3788 files changed, 185755 insertions, 0 deletions
diff --git a/qa/.gitignore b/qa/.gitignore
new file mode 100644
index 000000000..e80d9d42a
--- /dev/null
+++ b/qa/.gitignore
@@ -0,0 +1,4 @@
+*~
+.*.sw[nmop]
+*.pyc
+.tox
diff --git a/qa/.qa b/qa/.qa
new file mode 120000
index 000000000..945c9b46d
--- /dev/null
+++ b/qa/.qa
@@ -0,0 +1 @@
+.
+\ No newline at end of file
diff --git a/qa/CMakeLists.txt b/qa/CMakeLists.txt
new file mode 100644
index 000000000..a8726141b
--- /dev/null
+++ b/qa/CMakeLists.txt
@@ -0,0 +1,9 @@
+set(CEPH_BUILD_VIRTUALENV $ENV{TMPDIR})
+if(NOT CEPH_BUILD_VIRTUALENV)
+  set(CEPH_BUILD_VIRTUALENV ${CMAKE_BINARY_DIR})
+endif()
+
+if(WITH_TESTS)
+  include(AddCephTest)
+  add_tox_test(qa TOX_ENVS flake8 mypy deadsymlinks)
+endif()
diff --git a/qa/Makefile b/qa/Makefile
new file mode 100644
index 000000000..ad655b7e7
--- /dev/null
+++ b/qa/Makefile
@@ -0,0 +1,4 @@
+DIRS= workunits btrfs
+
+all:
+	for d in $(DIRS) ; do ( cd $$d ; $(MAKE) all ) ; done
diff --git a/qa/README b/qa/README
new file mode 100644
index 000000000..f9b8988c6
--- /dev/null
+++ b/qa/README
@@ -0,0 +1,85 @@
+ceph-qa-suite
+-------------
+
+clusters/    - some predefined cluster layouts
+suites/      - set suite
+
+The suites directory has a hierarchical collection of tests.  This can be
+freeform, but generally follows the convention of
+
+  suites/<test suite name>/<test group>/...
+
+A test is described by a yaml fragment.
+
+A test can exist as a single .yaml file in the directory tree.  For example:
+
+ suites/foo/one.yaml
+ suites/foo/two.yaml
+
+is a simple group of two tests.
+
+A directory with a magic '+' file represents a test that combines all
+other items in the directory into a single yaml fragment.  For example:
+
+ suites/foo/bar/+
+ suites/foo/bar/a.yaml
+ suites/foo/bar/b.yaml
+ suites/foo/bar/c.yaml
+
+is a single test consisting of a + b + c.
+
+A directory with a magic '%' file represents a test matrix formed from
+all other items in the directory.  For example,
+
+ suites/baz/%
+ suites/baz/a.yaml
+ suites/baz/b/b1.yaml
+ suites/baz/b/b2.yaml
+ suites/baz/c.yaml
+ suites/baz/d/d1.yaml
+ suites/baz/d/d2.yaml
+
+is a 4-dimensional test matrix.  Two dimensions (a, c) are trivial (1
+item), so this is really 2x2 = 4 tests, which are
+
+  a + b1 + c + d1
+  a + b1 + c + d2
+  a + b2 + c + d1
+  a + b2 + c + d2
+
+A directory with a magic '$' file, or a directory whose name ends with '$',
+represents a test where one of the non-magic items is chosen randomly.  For
+example, both
+
+ suites/foo/$
+ suites/foo/a.yaml
+ suites/foo/b.yaml
+ suites/foo/c.yaml
+
+and
+
+ suites/foo$/a.yaml
+ suites/foo$/b.yaml
+ suites/foo$/c.yaml
+
+is a single test, either a, b or c.  This can be used in conjunction with the
+'%' file in the same (see below) or other directories to run a series of tests
+without causing an unwanted increase in the total number of jobs run.
+
+Symlinks are okay.
+
+One particular use of symlinks is to combine '%' and the latter form of '$'
+feature.  Consider supported_distros directory containing fragments that define
+os_type and os_version:
+
+ supported_distros/%
+ supported_distros/centos.yaml
+ supported_distros/rhel.yaml
+ supported_distros/ubuntu.yaml
+
+A test that links supported_distros as distros (a name that doesn't end with
+'$') will be run three times: on centos, rhel and ubuntu.  A test that links
+supported_distros as distros$ will be run just once: either on centos, rhel or
+ubuntu, chosen randomly.
+
+The teuthology code can be found in https://github.com/ceph/teuthology.git
diff --git a/qa/archs/aarch64.yaml b/qa/archs/aarch64.yaml
new file mode 100644
index 000000000..6399b9959
--- /dev/null
+++ b/qa/archs/aarch64.yaml
@@ -0,0 +1 @@
+arch: aarch64
diff --git a/qa/archs/armv7.yaml b/qa/archs/armv7.yaml
new file mode 100644
index 000000000..c261ebd52
--- /dev/null
+++ b/qa/archs/armv7.yaml
@@ -0,0 +1 @@
+arch: armv7l
diff --git a/qa/archs/i686.yaml b/qa/archs/i686.yaml
new file mode 100644
index 000000000..a920e5a9e
--- /dev/null
+++ b/qa/archs/i686.yaml
@@ -0,0 +1 @@
+arch: i686
diff --git a/qa/archs/x86_64.yaml b/qa/archs/x86_64.yaml
new file mode 100644
index 000000000..c2409f5d0
--- /dev/null
+++ b/qa/archs/x86_64.yaml
@@ -0,0 +1 @@
+arch: x86_64
diff --git a/qa/btrfs/.gitignore b/qa/btrfs/.gitignore
new file mode 100644
index 000000000..530c1b5b4
--- /dev/null
+++ b/qa/btrfs/.gitignore
@@ -0,0 +1,3 @@
+/clone_range
+/test_async_snap
+/create_async_snap
diff --git a/qa/btrfs/Makefile b/qa/btrfs/Makefile
new file mode 100644
index 000000000..be95ecfd3
--- /dev/null
+++ b/qa/btrfs/Makefile
@@ -0,0 +1,11 @@
+CFLAGS = -Wall -Wextra -D_GNU_SOURCE
+
+TARGETS = clone_range test_async_snap create_async_snap
+
+.c:
+	$(CC) $(CFLAGS) $@.c -o $@
+
+all:	$(TARGETS)
+
+clean:
+	rm $(TARGETS)
diff --git a/qa/btrfs/clone_range.c b/qa/btrfs/clone_range.c
new file mode 100644
index 000000000..0a88e1601
--- /dev/null
+++ b/qa/btrfs/clone_range.c
@@ -0,0 +1,35 @@
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <string.h>
+
+#include <linux/types.h>
+#include "../../src/os/btrfs_ioctl.h"
+#include <stdio.h>
+#include <errno.h>
+
+int main(int argc, char **argv)
+{
+        struct btrfs_ioctl_clone_range_args ca;
+        int dfd;
+        int r;
+
+	if (argc < 6) {
+		printf("usage: %s <srcfn> <srcoffset> <srclen> <destfn> <destoffset>\n", argv[0]);
+		exit(1);
+	}
+
+        ca.src_fd = open(argv[1], O_RDONLY);
+        ca.src_offset = atoi(argv[2]);
+        ca.src_length = atoi(argv[3]);
+        dfd = open(argv[4], O_WRONLY|O_CREAT);
+        ca.dest_offset = atoi(argv[5]);
+
+        r = ioctl(dfd, BTRFS_IOC_CLONE_RANGE, &ca);
+        printf("clone_range %s %lld %lld~%lld to %s %d %lld = %d %s\n",
+               argv[1], ca.src_fd,
+	       ca.src_offset, ca.src_length,
+	       argv[4], dfd,
+               ca.dest_offset, r, strerror(errno));
+        return r;
+}
diff --git a/qa/btrfs/create_async_snap.c b/qa/btrfs/create_async_snap.c
new file mode 100644
index 000000000..2ef22af7b
--- /dev/null
+++ b/qa/btrfs/create_async_snap.c
@@ -0,0 +1,34 @@
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <string.h>
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+#include "../../src/os/btrfs_ioctl.h"
+
+struct btrfs_ioctl_vol_args_v2 va;
+
+int main(int argc, char **argv)
+{
+	int fd;
+	int r;
+
+	if (argc != 3) {
+		printf("usage: %s <source subvol> <name>\n", argv[0]);
+		return 1;
+	}
+	printf("creating snap ./%s from %s\n", argv[2], argv[1]);
+	fd = open(".", O_RDONLY);
+	va.fd = open(argv[1], O_RDONLY);
+	va.flags = BTRFS_SUBVOL_CREATE_ASYNC;
+	strcpy(va.name, argv[2]);
+	r = ioctl(fd, BTRFS_IOC_SNAP_CREATE_V2, (unsigned long long)&va);
+	printf("result %d\n", r ? -errno:0);
+	return r;
+}
diff --git a/qa/btrfs/test_async_snap.c b/qa/btrfs/test_async_snap.c
new file mode 100644
index 000000000..211be95a6
--- /dev/null
+++ b/qa/btrfs/test_async_snap.c
@@ -0,0 +1,83 @@
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <string.h>
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+#include "../../src/os/btrfs_ioctl.h"
+
+struct btrfs_ioctl_vol_args_v2 va;
+struct btrfs_ioctl_vol_args vold;
+int max = 4;
+
+void check_return(int r)
+{
+	if (r < 0) {
+		printf("********* failed with %d %s ********\n", errno, strerror(errno));
+		exit(1);
+	}
+}
+
+int main(int argc, char **argv)
+{
+	int num = 1000;
+
+	if (argc > 1)
+		num = atoi(argv[1]);
+	printf("will do %d iterations\n", num);
+
+        int cwd = open(".", O_RDONLY);
+        printf("cwd = %d\n", cwd);
+        while (num-- > 0) {
+		if (rand() % 10 == 0) {
+			__u64 transid;
+			int r;
+			printf("sync starting\n");
+			r = ioctl(cwd, BTRFS_IOC_START_SYNC, &transid);
+			check_return(r);
+			printf("sync started, transid %lld, waiting\n", transid);
+			r = ioctl(cwd, BTRFS_IOC_WAIT_SYNC, &transid);
+			check_return(r);
+			printf("sync finished\n");	
+		}
+
+                int i = rand() % max;
+                struct stat st;
+                va.fd = cwd;
+                sprintf(va.name, "test.%d", i);
+                va.transid = 0;
+                int r = stat(va.name, &st);
+                if (r < 0) {
+			if (rand() % 3 == 0) {
+				printf("snap create (sync) %s\n", va.name);
+				va.flags = 0;
+				r = ioctl(cwd, BTRFS_IOC_SNAP_CREATE_V2, &va);
+				check_return(r);
+			} else {
+				printf("snap create (async) %s\n", va.name);
+				va.flags = BTRFS_SUBVOL_CREATE_ASYNC;
+				r = ioctl(cwd, BTRFS_IOC_SNAP_CREATE_V2, &va);
+				check_return(r);
+				printf("snap created, transid %lld\n", va.transid);
+				if (rand() % 2 == 0) {
+					printf("waiting for async snap create\n");
+					r = ioctl(cwd, BTRFS_IOC_WAIT_SYNC, &va.transid);
+					check_return(r);
+				}
+                        }
+                } else {
+                        printf("snap remove %s\n", va.name);
+			vold.fd = va.fd;
+			strcpy(vold.name, va.name);
+                        r = ioctl(cwd, BTRFS_IOC_SNAP_DESTROY, &vold);
+			check_return(r);
+                }
+        }
+	return 0;
+}
diff --git a/qa/btrfs/test_rmdir_async_snap.c b/qa/btrfs/test_rmdir_async_snap.c
new file mode 100644
index 000000000..5dafaacaa
--- /dev/null
+++ b/qa/btrfs/test_rmdir_async_snap.c
@@ -0,0 +1,62 @@
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <string.h>
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+#include "../../src/os/btrfs_ioctl.h"
+
+struct btrfs_ioctl_vol_args_v2 va;
+struct btrfs_ioctl_vol_args vold;
+
+int main(int argc, char **argv)
+{
+	int num = 1000;
+	int i, r, fd;
+	char buf[30];
+
+	if (argc > 1)
+		num = atoi(argv[1]);
+	printf("will do %d iterations\n", num);
+	
+	fd = open(".", O_RDONLY);
+	vold.fd = 0;
+	strcpy(vold.name, "current");
+	r = ioctl(fd, BTRFS_IOC_SUBVOL_CREATE, (unsigned long int)&vold);
+	printf("create current ioctl got %d\n", r ? errno:0);
+	if (r)
+		return 1;
+
+	for (i=0; i<num; i++) {
+		sprintf(buf, "current/dir.%d", i);
+		r = mkdir(buf, 0755);
+		printf("mkdir got %d\n", r ? errno:0);
+		if (r)
+			return 1;
+	}
+
+	va.fd = open("current", O_RDONLY);
+	va.flags = BTRFS_SUBVOL_CREATE_ASYNC;
+	for (i=0; i<num; i++) {
+		system("/bin/cp /boot/vmlinuz-3.2.0-ceph-00142-g9e98323 current/foo");
+		sprintf(buf, "current/dir.%d", i);
+		r = rmdir(buf);
+		printf("rmdir got %d\n", r ? errno:0);
+		if (r)
+			return 1;
+
+		if (i % 10) continue;
+		sprintf(va.name, "snap.%d", i);
+		r = ioctl(fd, BTRFS_IOC_SNAP_CREATE_V2, (unsigned long long)&va);
+		printf("ioctl got %d\n", r ? errno:0);
+		if (r)
+			return 1;
+	}
+	return 0;
+}
diff --git a/qa/cephfs/.qa b/qa/cephfs/.qa
new file mode 120000
index 000000000..b870225aa
--- /dev/null
+++ b/qa/cephfs/.qa
@@ -0,0 +1 @@
+../
+\ No newline at end of file
diff --git a/qa/cephfs/begin/+ b/qa/cephfs/begin/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/cephfs/begin/+
diff --git a/qa/cephfs/begin/0-install.yaml b/qa/cephfs/begin/0-install.yaml
new file mode 100644
index 000000000..413958f11
--- /dev/null
+++ b/qa/cephfs/begin/0-install.yaml
@@ -0,0 +1,65 @@
+tasks:
+  - install:
+      extra_packages:
+        rpm:
+        - python3-cephfs
+        - cephfs-top
+        - cephfs-mirror
+        deb:
+        - python3-cephfs
+        - cephfs-shell
+        - cephfs-top
+        - cephfs-mirror
+      # For kernel_untar_build workunit
+      extra_system_packages:
+        deb:
+        - bison
+        - flex
+        - libelf-dev
+        - libssl-dev
+        - network-manager
+        - iproute2
+        - util-linux
+        # for xfstests-dev
+        - dump
+        - indent
+        # for fsx
+        - libaio-dev
+        - libtool-bin
+        - uuid-dev
+        - xfslibs-dev
+        # for postgres
+        - postgresql
+        - postgresql-client
+        - postgresql-common
+        - postgresql-contrib
+        rpm:
+        - bison
+        - flex
+        - elfutils-libelf-devel
+        - openssl-devel
+        - NetworkManager
+        - iproute
+        - util-linux
+        # for xfstests-dev
+        - libacl-devel
+        - libaio-devel
+        - libattr-devel
+        - libtool
+        - libuuid-devel
+        - xfsdump
+        - xfsprogs
+        - xfsprogs-devel
+        # for fsx
+        - libaio-devel
+        - libtool
+        - libuuid-devel
+        - xfsprogs-devel
+        # for postgres
+        - postgresql
+        - postgresql-server
+        - postgresql-contrib
+syslog:
+  ignorelist:
+    - WARNING*.*check_session_state
+    - WARNING*.*__ceph_remove_cap
diff --git a/qa/cephfs/begin/1-ceph.yaml b/qa/cephfs/begin/1-ceph.yaml
new file mode 100644
index 000000000..531c8e3e0
--- /dev/null
+++ b/qa/cephfs/begin/1-ceph.yaml
@@ -0,0 +1,6 @@
+log-rotate:
+  ceph-mds: 10G
+  ceph-osd: 10G
+tasks:
+  - ceph:
+      create_rbd_pool: false
diff --git a/qa/cephfs/begin/2-logrotate.yaml b/qa/cephfs/begin/2-logrotate.yaml
new file mode 100644
index 000000000..3b876f13b
--- /dev/null
+++ b/qa/cephfs/begin/2-logrotate.yaml
@@ -0,0 +1,3 @@
+log-rotate:
+  ceph-mds: 10G
+  ceph-osd: 10G
diff --git a/qa/cephfs/clusters/.qa b/qa/cephfs/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/cephfs/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/cephfs/clusters/1-mds-1-client-coloc.yaml b/qa/cephfs/clusters/1-mds-1-client-coloc.yaml
new file mode 100644
index 000000000..d295dc3d3
--- /dev/null
+++ b/qa/cephfs/clusters/1-mds-1-client-coloc.yaml
@@ -0,0 +1,9 @@
+roles:
+- [mon.a, mgr.y, mds.a, osd.0, osd.1, osd.2, osd.3, client.0]
+- [mon.b, mon.c, mgr.x, mds.b, osd.4, osd.5, osd.6, osd.7]
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 20 # GB
+- machine:
+    disk: 200 # GB
diff --git a/qa/cephfs/clusters/1-mds-1-client-micro.yaml b/qa/cephfs/clusters/1-mds-1-client-micro.yaml
new file mode 100644
index 000000000..8b66c3906
--- /dev/null
+++ b/qa/cephfs/clusters/1-mds-1-client-micro.yaml
@@ -0,0 +1,7 @@
+roles:
+- [mon.a, mon.b, mon.c, mgr.x, mds.a, osd.0, osd.1, osd.2, osd.3]
+- [client.0]
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 10 # GB
diff --git a/qa/cephfs/clusters/1-mds-1-client.yaml b/qa/cephfs/clusters/1-mds-1-client.yaml
new file mode 100644
index 000000000..d7701815f
--- /dev/null
+++ b/qa/cephfs/clusters/1-mds-1-client.yaml
@@ -0,0 +1,10 @@
+roles:
+- [mon.a, mgr.y, mds.a, mds.c, osd.0, osd.1, osd.2, osd.3]
+- [mon.b, mon.c, mgr.x, mds.b, osd.4, osd.5, osd.6, osd.7]
+- [client.0]
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 20 # GB
+- machine:
+    disk: 200 # GB
diff --git a/qa/cephfs/clusters/1-mds-2-client-coloc.yaml b/qa/cephfs/clusters/1-mds-2-client-coloc.yaml
new file mode 100644
index 000000000..5ce128cbd
--- /dev/null
+++ b/qa/cephfs/clusters/1-mds-2-client-coloc.yaml
@@ -0,0 +1,9 @@
+roles:
+- [mon.a, mgr.y, mds.a, osd.0, osd.1, osd.2, osd.3, client.0]
+- [mon.b, mon.c, mgr.x, mds.b, osd.4, osd.5, osd.6, osd.7, client.1]
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 20 # GB
+- machine:
+    disk: 200 # GB
diff --git a/qa/cephfs/clusters/1-mds-2-client-micro.yaml b/qa/cephfs/clusters/1-mds-2-client-micro.yaml
new file mode 100644
index 000000000..42ad4efcd
--- /dev/null
+++ b/qa/cephfs/clusters/1-mds-2-client-micro.yaml
@@ -0,0 +1,8 @@
+roles:
+- [mon.a, mon.b, mon.c, mgr.x, mgr.y, mds.a, mds.b, mds.c, osd.0, osd.1, osd.2, osd.3]
+- [client.0]
+- [client.1]
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 10 # GB
diff --git a/qa/cephfs/clusters/1-mds-2-client.yaml b/qa/cephfs/clusters/1-mds-2-client.yaml
new file mode 100644
index 000000000..6e996244e
--- /dev/null
+++ b/qa/cephfs/clusters/1-mds-2-client.yaml
@@ -0,0 +1,11 @@
+roles:
+- [mon.a, mgr.y, mds.a, mds.c, osd.0, osd.1, osd.2, osd.3]
+- [mon.b, mon.c, mgr.x, mds.b, osd.4, osd.5, osd.6, osd.7]
+- [client.0]
+- [client.1]
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 30 # GB
+- machine:
+    disk: 200 # GB
diff --git a/qa/cephfs/clusters/1-mds-3-client.yaml b/qa/cephfs/clusters/1-mds-3-client.yaml
new file mode 100644
index 000000000..200df1fe0
--- /dev/null
+++ b/qa/cephfs/clusters/1-mds-3-client.yaml
@@ -0,0 +1,12 @@
+roles:
+- [mon.a, mgr.y, mds.a, osd.0, osd.1, osd.2, osd.3]
+- [mon.b, mon.c, mgr.x, mds.b, osd.4, osd.5, osd.6, osd.7]
+- [client.0]
+- [client.1]
+- [client.2]
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 30 # GB
+- machine:
+    disk: 200 # GB
diff --git a/qa/cephfs/clusters/1-mds-4-client-coloc.yaml b/qa/cephfs/clusters/1-mds-4-client-coloc.yaml
new file mode 100644
index 000000000..3e2ee7870
--- /dev/null
+++ b/qa/cephfs/clusters/1-mds-4-client-coloc.yaml
@@ -0,0 +1,9 @@
+roles:
+- [mon.a, mgr.y, mds.a, osd.0, osd.1, osd.2, osd.3, client.0, client.1]
+- [mon.b, mon.c, mgr.x, mds.b, osd.4, osd.5, osd.6, osd.7, client.2, client.3]
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 30 # GB
+- machine:
+    disk: 200 # GB
diff --git a/qa/cephfs/clusters/1-mds-4-client.yaml b/qa/cephfs/clusters/1-mds-4-client.yaml
new file mode 100644
index 000000000..9addfe3b0
--- /dev/null
+++ b/qa/cephfs/clusters/1-mds-4-client.yaml
@@ -0,0 +1,13 @@
+roles:
+- [mon.a, mgr.y, mds.a, mds.b, osd.0, osd.1, osd.2, osd.3]
+- [mon.b, mon.c, mgr.x, mds.c, osd.4, osd.5, osd.6, osd.7]
+- [client.0]
+- [client.1]
+- [client.2]
+- [client.3]
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 30 # GB
+- machine:
+    disk: 200 # GB
diff --git a/qa/cephfs/clusters/1-node-1-mds-1-osd.yaml b/qa/cephfs/clusters/1-node-1-mds-1-osd.yaml
new file mode 100644
index 000000000..865b976c6
--- /dev/null
+++ b/qa/cephfs/clusters/1-node-1-mds-1-osd.yaml
@@ -0,0 +1,8 @@
+roles:
+- [mon.a, mgr.x, mds.a, osd.0, client.0]
+openstack:
+- volumes: # attached to each instance
+    count: 1
+    size: 5 # GB
+- machine:
+    disk: 10 # GB
diff --git a/qa/cephfs/clusters/1a11s-mds-1c-client-3node.yaml b/qa/cephfs/clusters/1a11s-mds-1c-client-3node.yaml
new file mode 100644
index 000000000..ca6f79ba2
--- /dev/null
+++ b/qa/cephfs/clusters/1a11s-mds-1c-client-3node.yaml
@@ -0,0 +1,10 @@
+roles:
+- [mon.a, mgr.x, mds.a, mds.d, mds.g, mds.j, osd.0, osd.3, osd.6, osd.9, client.0]
+- [mon.b, mgr.y, mds.b, mds.e, mds.h, mds.k, osd.1, osd.4, osd.7, osd.10]
+- [mon.c, mgr.z, mds.c, mds.f, mds.i, mds.l, osd.2, osd.5, osd.8, osd.11]
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 20 # GB
+- machine:
+    disk: 200 # GB
diff --git a/qa/cephfs/clusters/1a2s-mds-1c-client-3node.yaml b/qa/cephfs/clusters/1a2s-mds-1c-client-3node.yaml
new file mode 100644
index 000000000..3a9ff34f8
--- /dev/null
+++ b/qa/cephfs/clusters/1a2s-mds-1c-client-3node.yaml
@@ -0,0 +1,10 @@
+roles:
+- [mon.a, mgr.x, mds.a, osd.0, osd.3, osd.6, osd.9, client.0]
+- [mon.b, mgr.y, mds.b, osd.1, osd.4, osd.7, osd.10]
+- [mon.c, mgr.z, mds.c, osd.2, osd.5, osd.8, osd.11]
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 20 # GB
+- machine:
+    disk: 200 # GB
diff --git a/qa/cephfs/clusters/1a3s-mds-1c-client.yaml b/qa/cephfs/clusters/1a3s-mds-1c-client.yaml
new file mode 100644
index 000000000..87867daa4
--- /dev/null
+++ b/qa/cephfs/clusters/1a3s-mds-1c-client.yaml
@@ -0,0 +1,9 @@
+roles:
+- [mon.a, mgr.y, mds.a, mds.c, osd.0, osd.1, osd.2, osd.3, client.0]
+- [mon.b, mon.c, mgr.x, mds.b, mds.d, osd.4, osd.5, osd.6, osd.7]
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 20 # GB
+- machine:
+    disk: 200 # GB
diff --git a/qa/cephfs/clusters/1a3s-mds-2c-client.yaml b/qa/cephfs/clusters/1a3s-mds-2c-client.yaml
new file mode 100644
index 000000000..ba17f05d2
--- /dev/null
+++ b/qa/cephfs/clusters/1a3s-mds-2c-client.yaml
@@ -0,0 +1,9 @@
+roles:
+- [mon.a, mgr.y, mds.a, mds.c, osd.0, osd.1, osd.2, osd.3, client.0]
+- [mon.b, mon.c, mgr.x, mds.b, mds.d, osd.4, osd.5, osd.6, osd.7, client.1]
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 20 # GB
+- machine:
+    disk: 200 # GB
diff --git a/qa/cephfs/clusters/1a3s-mds-4c-client.yaml b/qa/cephfs/clusters/1a3s-mds-4c-client.yaml
new file mode 100644
index 000000000..51fd809b4
--- /dev/null
+++ b/qa/cephfs/clusters/1a3s-mds-4c-client.yaml
@@ -0,0 +1,9 @@
+roles:
+- [mon.a, mgr.y, mds.a, mds.c, osd.0, osd.1, osd.2, osd.3, client.0, client.2]
+- [mon.b, mon.c, mgr.x, mds.b, mds.d, osd.4, osd.5, osd.6, osd.7, client.1, client.3]
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 20 # GB
+- machine:
+    disk: 200 # GB
diff --git a/qa/cephfs/clusters/1a5s-mds-1c-client-3node.yaml b/qa/cephfs/clusters/1a5s-mds-1c-client-3node.yaml
new file mode 100644
index 000000000..ae723c160
--- /dev/null
+++ b/qa/cephfs/clusters/1a5s-mds-1c-client-3node.yaml
@@ -0,0 +1,10 @@
+roles:
+- [mon.a, mgr.x, mds.a, mds.d, osd.0, osd.3, osd.6, osd.9, client.0]
+- [mon.b, mgr.y, mds.b, mds.e, osd.1, osd.4, osd.7, osd.10]
+- [mon.c, mgr.z, mds.c, mds.f, osd.2, osd.5, osd.8, osd.11]
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 20 # GB
+- machine:
+    disk: 200 # GB
diff --git a/qa/cephfs/clusters/1a5s-mds-1c-client.yaml b/qa/cephfs/clusters/1a5s-mds-1c-client.yaml
new file mode 100644
index 000000000..79fd84cf4
--- /dev/null
+++ b/qa/cephfs/clusters/1a5s-mds-1c-client.yaml
@@ -0,0 +1,9 @@
+roles:
+- [mon.a, mgr.y, mds.a, mds.c, mds.e, osd.0, osd.1, osd.2, osd.3, client.0]
+- [mon.b, mon.c, mgr.x, mds.b, mds.d, mds.f, osd.4, osd.5, osd.6, osd.7]
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 20 # GB
+- machine:
+    disk: 200 # GB
diff --git a/qa/cephfs/clusters/3-mds.yaml b/qa/cephfs/clusters/3-mds.yaml
new file mode 100644
index 000000000..8dfacb4e7
--- /dev/null
+++ b/qa/cephfs/clusters/3-mds.yaml
@@ -0,0 +1,14 @@
+roles:
+- [mon.a, mon.c, mgr.y, mds.a, osd.0, osd.1, osd.2, osd.3]
+- [mon.b, mgr.x, mds.b, mds.c, osd.4, osd.5, osd.6, osd.7]
+- [client.0, client.1]
+overrides:
+  ceph:
+    cephfs:
+      max_mds: 3
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 30 # GB
+- machine:
+    disk: 200 # GB
diff --git a/qa/cephfs/clusters/9-mds.yaml b/qa/cephfs/clusters/9-mds.yaml
new file mode 100644
index 000000000..60b18c251
--- /dev/null
+++ b/qa/cephfs/clusters/9-mds.yaml
@@ -0,0 +1,14 @@
+roles:
+- [mon.a, mon.c, mgr.y, mds.a, mds.b, mds.c, mds.d, osd.0, osd.1, osd.2, osd.3]
+- [mon.b, mgr.x, mds.e, mds.f, mds.g, mds.h, mds.i, osd.4, osd.5, osd.6, osd.7]
+- [client.0, client.1]
+overrides:
+  ceph:
+    cephfs:
+      max_mds: 9
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 30 # GB
+- machine:
+    disk: 200 # GB
diff --git a/qa/cephfs/clusters/fixed-2-ucephfs.yaml b/qa/cephfs/clusters/fixed-2-ucephfs.yaml
new file mode 100644
index 000000000..dff37a51f
--- /dev/null
+++ b/qa/cephfs/clusters/fixed-2-ucephfs.yaml
@@ -0,0 +1,9 @@
+roles:
+- [mon.a, mgr.y, mds.a, osd.0, osd.1, osd.2, osd.3, client.0]
+- [mon.b, mon.c, mgr.x, mds.b, osd.4, osd.5, osd.6, osd.7]
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 30 # GB
+- machine:
+    disk: 200 # GB
diff --git a/qa/cephfs/conf/+ b/qa/cephfs/conf/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/cephfs/conf/+
diff --git a/qa/cephfs/conf/.qa b/qa/cephfs/conf/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/cephfs/conf/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/cephfs/conf/client.yaml b/qa/cephfs/conf/client.yaml
new file mode 100644
index 000000000..4b37d03b4
--- /dev/null
+++ b/qa/cephfs/conf/client.yaml
@@ -0,0 +1,9 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        client mount timeout: 600
+        debug ms: 1
+        debug client: 20
+        rados mon op timeout: 900
+        rados osd op timeout: 900
diff --git a/qa/cephfs/conf/mds.yaml b/qa/cephfs/conf/mds.yaml
new file mode 100644
index 000000000..b1c7a5869
--- /dev/null
+++ b/qa/cephfs/conf/mds.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    conf:
+      mds:
+        debug mds: 20
+        debug mds balancer: 20
+        debug ms: 1
+        mds debug frag: true
+        mds debug scatterstat: true
+        mds op complaint time: 180
+        mds verify scatter: true
+        osd op complaint time: 180
+        rados mon op timeout: 900
+        rados osd op timeout: 900
diff --git a/qa/cephfs/conf/mon.yaml b/qa/cephfs/conf/mon.yaml
new file mode 100644
index 000000000..eea56004a
--- /dev/null
+++ b/qa/cephfs/conf/mon.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      mon:
+        mon op complaint time: 120
diff --git a/qa/cephfs/conf/osd.yaml b/qa/cephfs/conf/osd.yaml
new file mode 100644
index 000000000..1087202f9
--- /dev/null
+++ b/qa/cephfs/conf/osd.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd op complaint time: 180
diff --git a/qa/cephfs/mount/.qa b/qa/cephfs/mount/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/cephfs/mount/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/cephfs/mount/fuse.yaml b/qa/cephfs/mount/fuse.yaml
new file mode 100644
index 000000000..9ffb5129d
--- /dev/null
+++ b/qa/cephfs/mount/fuse.yaml
@@ -0,0 +1,16 @@
+teuthology:
+  postmerge:
+  - local function is_kupstream()
+      return false
+    end
+  - local function is_kdistro()
+      return false
+    end
+  - local function is_fuse()
+      return true
+    end
+  - local function syntax_version()
+      return ''
+    end
+tasks:
+  - ceph-fuse:
diff --git a/qa/cephfs/mount/kclient/% b/qa/cephfs/mount/kclient/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/cephfs/mount/kclient/%
diff --git a/qa/cephfs/mount/kclient/.qa b/qa/cephfs/mount/kclient/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/cephfs/mount/kclient/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/cephfs/mount/kclient/mount-syntax/$ b/qa/cephfs/mount/kclient/mount-syntax/$
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/cephfs/mount/kclient/mount-syntax/$
diff --git a/qa/cephfs/mount/kclient/mount-syntax/v1.yaml b/qa/cephfs/mount/kclient/mount-syntax/v1.yaml
new file mode 100644
index 000000000..84d5d43b2
--- /dev/null
+++ b/qa/cephfs/mount/kclient/mount-syntax/v1.yaml
@@ -0,0 +1,3 @@
+overrides:
+  kclient:
+      syntax: 'v1'
diff --git a/qa/cephfs/mount/kclient/mount-syntax/v2.yaml b/qa/cephfs/mount/kclient/mount-syntax/v2.yaml
new file mode 100644
index 000000000..ef7d30424
--- /dev/null
+++ b/qa/cephfs/mount/kclient/mount-syntax/v2.yaml
@@ -0,0 +1,3 @@
+overrides:
+  kclient:
+      syntax: 'v2'
diff --git a/qa/cephfs/mount/kclient/mount.yaml b/qa/cephfs/mount/kclient/mount.yaml
new file mode 100644
index 000000000..a130ff9d5
--- /dev/null
+++ b/qa/cephfs/mount/kclient/mount.yaml
@@ -0,0 +1,16 @@
+teuthology:
+  postmerge:
+  - local function is_kupstream()
+      return yaml.ktype == 'upstream'
+    end
+  - local function is_kdistro()
+      return yaml.ktype == 'distro'
+    end
+  - local function is_fuse()
+      return false
+    end
+  - local function syntax_version()
+      return yaml.overrides.kclient.syntax
+    end
+tasks:
+- kclient:
diff --git a/qa/cephfs/mount/kclient/overrides/% b/qa/cephfs/mount/kclient/overrides/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/cephfs/mount/kclient/overrides/%
diff --git a/qa/cephfs/mount/kclient/overrides/.qa b/qa/cephfs/mount/kclient/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/cephfs/mount/kclient/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/cephfs/mount/kclient/overrides/distro/.qa b/qa/cephfs/mount/kclient/overrides/distro/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/cephfs/mount/kclient/overrides/distro/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/cephfs/mount/kclient/overrides/distro/stock/% b/qa/cephfs/mount/kclient/overrides/distro/stock/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/cephfs/mount/kclient/overrides/distro/stock/%
diff --git a/qa/cephfs/mount/kclient/overrides/distro/stock/.qa b/qa/cephfs/mount/kclient/overrides/distro/stock/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/cephfs/mount/kclient/overrides/distro/stock/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/cephfs/mount/kclient/overrides/distro/stock/k-stock.yaml b/qa/cephfs/mount/kclient/overrides/distro/stock/k-stock.yaml
new file mode 100644
index 000000000..ca2d688bb
--- /dev/null
+++ b/qa/cephfs/mount/kclient/overrides/distro/stock/k-stock.yaml
@@ -0,0 +1,4 @@
+kernel:
+  client:
+    sha1: distro
+ktype: distro
diff --git a/qa/cephfs/mount/kclient/overrides/distro/stock/rhel_8.yaml b/qa/cephfs/mount/kclient/overrides/distro/stock/rhel_8.yaml
new file mode 120000
index 000000000..133acf27b
--- /dev/null
+++ b/qa/cephfs/mount/kclient/overrides/distro/stock/rhel_8.yaml
@@ -0,0 +1 @@
+.qa/distros/all/rhel_8.yaml
+\ No newline at end of file
diff --git a/qa/cephfs/mount/kclient/overrides/distro/testing/.qa b/qa/cephfs/mount/kclient/overrides/distro/testing/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/cephfs/mount/kclient/overrides/distro/testing/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/cephfs/mount/kclient/overrides/distro/testing/k-testing.yaml b/qa/cephfs/mount/kclient/overrides/distro/testing/k-testing.yaml
new file mode 100644
index 000000000..2ee219125
--- /dev/null
+++ b/qa/cephfs/mount/kclient/overrides/distro/testing/k-testing.yaml
@@ -0,0 +1,4 @@
+kernel:
+  client:
+    branch: testing
+ktype: upstream
diff --git a/qa/cephfs/mount/kclient/overrides/ms-die-on-skipped.yaml b/qa/cephfs/mount/kclient/overrides/ms-die-on-skipped.yaml
new file mode 100644
index 000000000..30da870b2
--- /dev/null
+++ b/qa/cephfs/mount/kclient/overrides/ms-die-on-skipped.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms die on skipped message: false
diff --git a/qa/cephfs/objectstore-ec/.qa b/qa/cephfs/objectstore-ec/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/cephfs/objectstore-ec/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/cephfs/objectstore-ec/bluestore-bitmap.yaml b/qa/cephfs/objectstore-ec/bluestore-bitmap.yaml
new file mode 120000
index 000000000..9fb86b9fe
--- /dev/null
+++ b/qa/cephfs/objectstore-ec/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+../../objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/cephfs/objectstore-ec/bluestore-comp-ec-root.yaml b/qa/cephfs/objectstore-ec/bluestore-comp-ec-root.yaml
new file mode 100644
index 000000000..512eb117c
--- /dev/null
+++ b/qa/cephfs/objectstore-ec/bluestore-comp-ec-root.yaml
@@ -0,0 +1,29 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    cephfs:
+      ec_profile:
+        - m=2
+        - k=2
+        - crush-failure-domain=osd
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore compression mode: aggressive
+        bluestore fsck on mount: true
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
+
diff --git a/qa/cephfs/objectstore-ec/bluestore-comp.yaml b/qa/cephfs/objectstore-ec/bluestore-comp.yaml
new file mode 100644
index 000000000..b408032fd
--- /dev/null
+++ b/qa/cephfs/objectstore-ec/bluestore-comp.yaml
@@ -0,0 +1,23 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore compression mode: aggressive
+        bluestore fsck on mount: true
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
diff --git a/qa/cephfs/objectstore-ec/bluestore-ec-root.yaml b/qa/cephfs/objectstore-ec/bluestore-ec-root.yaml
new file mode 100644
index 000000000..b89c4c711
--- /dev/null
+++ b/qa/cephfs/objectstore-ec/bluestore-ec-root.yaml
@@ -0,0 +1,43 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    cephfs:
+      ec_profile:
+        - m=2
+        - k=2
+        - crush-failure-domain=osd
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore fsck on mount: true
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
+  ceph-deploy:
+    fs: xfs
+    bluestore: yes
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore fsck on mount: true
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+
diff --git a/qa/cephfs/overrides/.qa b/qa/cephfs/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/cephfs/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/cephfs/overrides/frag.yaml b/qa/cephfs/overrides/frag.yaml
new file mode 100644
index 000000000..f05b3f48f
--- /dev/null
+++ b/qa/cephfs/overrides/frag.yaml
@@ -0,0 +1,9 @@
+overrides:
+  ceph:
+    conf:
+      mds:
+        mds bal fragment size max: 10000
+        mds bal merge size: 5
+        mds bal split bits: 3
+        mds bal split size: 100
+
diff --git a/qa/cephfs/overrides/fuse/.qa b/qa/cephfs/overrides/fuse/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/cephfs/overrides/fuse/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/cephfs/overrides/fuse/default-perm/% b/qa/cephfs/overrides/fuse/default-perm/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/cephfs/overrides/fuse/default-perm/%
diff --git a/qa/cephfs/overrides/fuse/default-perm/.qa b/qa/cephfs/overrides/fuse/default-perm/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/cephfs/overrides/fuse/default-perm/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/cephfs/overrides/fuse/default-perm/no.yaml b/qa/cephfs/overrides/fuse/default-perm/no.yaml
new file mode 100644
index 000000000..445e93652
--- /dev/null
+++ b/qa/cephfs/overrides/fuse/default-perm/no.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        fuse default permissions: false
diff --git a/qa/cephfs/overrides/fuse/default-perm/yes.yaml b/qa/cephfs/overrides/fuse/default-perm/yes.yaml
new file mode 100644
index 000000000..2fd210a39
--- /dev/null
+++ b/qa/cephfs/overrides/fuse/default-perm/yes.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        fuse default permissions: true
diff --git a/qa/cephfs/overrides/ignorelist_health.yaml b/qa/cephfs/overrides/ignorelist_health.yaml
new file mode 100644
index 000000000..d8b819288
--- /dev/null
+++ b/qa/cephfs/overrides/ignorelist_health.yaml
@@ -0,0 +1,13 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(FS_DEGRADED\)
+      - \(MDS_FAILED\)
+      - \(MDS_DEGRADED\)
+      - \(FS_WITH_FAILED_MDS\)
+      - \(MDS_DAMAGE\)
+      - \(MDS_ALL_DOWN\)
+      - \(MDS_UP_LESS_THAN_MAX\)
+      - \(FS_INLINE_DATA_DEPRECATED\)
+      - \(POOL_APP_NOT_ENABLED\)
diff --git a/qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml b/qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
new file mode 100644
index 000000000..41ba84f04
--- /dev/null
+++ b/qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
@@ -0,0 +1,9 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(OSD_DOWN\)
+      - \(OSD_
+      - but it is still running
+# MDS daemon 'b' is not responding, replacing it as rank 0 with standby 'a'
+      - is not responding
diff --git a/qa/cephfs/overrides/osd-asserts.yaml b/qa/cephfs/overrides/osd-asserts.yaml
new file mode 100644
index 000000000..8c16e6e1c
--- /dev/null
+++ b/qa/cephfs/overrides/osd-asserts.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd shutdown pgref assert: true
diff --git a/qa/cephfs/overrides/prefetch_entire_dirfrags/no.yaml b/qa/cephfs/overrides/prefetch_entire_dirfrags/no.yaml
new file mode 100644
index 000000000..71d6d73ba
--- /dev/null
+++ b/qa/cephfs/overrides/prefetch_entire_dirfrags/no.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      mds:
+        mds_dir_prefetch: false
diff --git a/qa/cephfs/overrides/prefetch_entire_dirfrags/yes.yaml b/qa/cephfs/overrides/prefetch_entire_dirfrags/yes.yaml
new file mode 100644
index 000000000..5d25b61d1
--- /dev/null
+++ b/qa/cephfs/overrides/prefetch_entire_dirfrags/yes.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      mds:
+        mds_dir_prefetch: true
diff --git a/qa/cephfs/overrides/session_timeout.yaml b/qa/cephfs/overrides/session_timeout.yaml
new file mode 100644
index 000000000..a7a163337
--- /dev/null
+++ b/qa/cephfs/overrides/session_timeout.yaml
@@ -0,0 +1,4 @@
+overrides:
+  ceph:
+    cephfs:
+      session_timeout: 300
diff --git a/qa/cephfs/tasks/.qa b/qa/cephfs/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/cephfs/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/cephfs/tasks/cfuse_workunit_suites_blogbench.yaml b/qa/cephfs/tasks/cfuse_workunit_suites_blogbench.yaml
new file mode 100644
index 000000000..2d370d7ef
--- /dev/null
+++ b/qa/cephfs/tasks/cfuse_workunit_suites_blogbench.yaml
@@ -0,0 +1,9 @@
+tasks:
+- check-counter:
+    counters:
+      mds:
+        - "mds.dir_split"
+- workunit:
+    clients:
+      all:
+        - suites/blogbench.sh
diff --git a/qa/cephfs/tasks/cfuse_workunit_suites_dbench.yaml b/qa/cephfs/tasks/cfuse_workunit_suites_dbench.yaml
new file mode 100644
index 000000000..41b2bc8ed
--- /dev/null
+++ b/qa/cephfs/tasks/cfuse_workunit_suites_dbench.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      all:
+        - suites/dbench.sh
diff --git a/qa/cephfs/tasks/cfuse_workunit_suites_ffsb.yaml b/qa/cephfs/tasks/cfuse_workunit_suites_ffsb.yaml
new file mode 100644
index 000000000..6a2b35a18
--- /dev/null
+++ b/qa/cephfs/tasks/cfuse_workunit_suites_ffsb.yaml
@@ -0,0 +1,17 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - SLOW_OPS
+    - slow request
+    conf:
+      osd:
+        filestore flush min: 0
+tasks:
+- check-counter:
+    counters:
+      mds:
+        - "mds.dir_split"
+- workunit:
+    clients:
+      all:
+        - suites/ffsb.sh
diff --git a/qa/cephfs/tasks/cfuse_workunit_suites_fsstress.yaml b/qa/cephfs/tasks/cfuse_workunit_suites_fsstress.yaml
new file mode 100644
index 000000000..bae220292
--- /dev/null
+++ b/qa/cephfs/tasks/cfuse_workunit_suites_fsstress.yaml
@@ -0,0 +1,6 @@
+tasks:
+- workunit:
+    timeout: 6h
+    clients:
+      all:
+        - suites/fsstress.sh
diff --git a/qa/cephfs/tasks/cfuse_workunit_trivial_sync.yaml b/qa/cephfs/tasks/cfuse_workunit_trivial_sync.yaml
new file mode 100644
index 000000000..e51542022
--- /dev/null
+++ b/qa/cephfs/tasks/cfuse_workunit_trivial_sync.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      all:
+        - fs/misc/trivial_sync.sh
diff --git a/qa/cephfs/tasks/libcephfs_interface_tests.yaml b/qa/cephfs/tasks/libcephfs_interface_tests.yaml
new file mode 100644
index 000000000..c59775259
--- /dev/null
+++ b/qa/cephfs/tasks/libcephfs_interface_tests.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph-fuse:
+    disabled: true
+  kclient:
+    disabled: true
+tasks:
+- check-counter:
+    counters:
+      mds:
+        - "mds.dir_split"
+- workunit:
+    clients:
+      client.0:
+        - libcephfs/test.sh
diff --git a/qa/cephfs/unshare_ns_mount.sh b/qa/cephfs/unshare_ns_mount.sh
new file mode 100755
index 000000000..88ac3e933
--- /dev/null
+++ b/qa/cephfs/unshare_ns_mount.sh
@@ -0,0 +1,594 @@
+#!/usr/bin/env bash
+
+# This is one helper for mounting the ceph-fuse/kernel clients by
+# unsharing the network namespace, let's call it netns container.
+# With the netns container, you can easily suspend or resume the
+# virtual network interface to simulate the client node hard
+# shutdown for some test cases.
+#
+#         netnsX                 netnsY                 netnsZ
+#     --------------         --------------         --------------
+#    | mount client |       | mount client |       | mount client |
+#    |    default   |  ...  |    default   |  ...  |    default   |
+#    |192.168.0.1/16|       |192.168.0.2/16|       |192.168.0.3/16|
+#    |    veth0     |       |     veth0    |       |     veth0    |
+#     --------------         --------------         -------------
+#           |                       |                      |
+#            \                      | brx.Y               /
+#             \          ----------------------          /
+#              \  brx.X |       ceph-brx       | brx.Z  / 
+#               \------>|       default        |<------/
+#                   |   |  192.168.255.254/16  |   |
+#                   |    ----------------------    |
+#            (suspend/resume)       |       (suspend/resume)
+#                              -----------
+#                             |  Physical |
+#                             | A.B.C.D/M |
+#                              -----------
+#
+# Defaultly it will use the 192.168.X.Y/16 private network IPs for
+# the ceph-brx and netnses as above. And you can also specify your
+# own new ip/mask for the ceph-brx, like:
+#
+#  $ unshare_ns_mount.sh --fuse /mnt/cephfs --brxip 172.19.100.100/12
+#
+# Then the each netns will get a new ip from the ranges:
+# [172.16.0.1 ~ 172.19.100.99]/12 and [172.19.100.101 ~ 172.31.255.254]/12
+
+usage() {
+    echo ""
+    echo "This will help to isolate the network namespace from OS for the mount client!"
+    echo ""
+    echo "usage: unshare_ns_mount.sh [OPTIONS [paramters]] [--brxip <ip_address/mask>]"
+    echo "OPTIONS:" 
+    echo -e "  --fuse    <ceph-fuse options>"
+    echo -e "\tThe ceph-fuse command options"
+    echo -e "\t  $ unshare_ns_mount.sh --fuse -m 192.168.0.1:6789 /mnt/cephfs -o nonempty"
+    echo ""
+    echo -e "  --kernel  <mount options>"
+    echo -e "\tThe mount command options"
+    echo -e "\t  $ unshare_ns_mount.sh --kernel -t ceph 192.168.0.1:6789:/ /mnt/cephfs -o fs=a"
+    echo ""
+    echo -e "  --suspend <mountpoint>"
+    echo -e "\tDown the veth interface in the network namespace"
+    echo -e "\t  $ unshare_ns_mount.sh --suspend /mnt/cephfs"
+    echo ""
+    echo -e "  --resume  <mountpoint>"
+    echo -e "\tUp the veth interface in the network namespace"
+    echo -e "\t  $ unshare_ns_mount.sh --resume /mnt/cephfs"
+    echo ""
+    echo -e "  --umount  <mountpoint>"
+    echo -e "\tUmount and delete the network namespace"
+    echo -e "\t  $ unshare_ns_mount.sh --umount /mnt/cephfs"
+    echo ""
+    echo -e "  --brxip   <ip_address/mask>"
+    echo -e "\tSpecify ip/mask for ceph-brx and it only makes sense for --fuse/--kernel options"
+    echo -e "\t(default: 192.168.255.254/16, netns ip: 192.168.0.1/16 ~ 192.168.255.253/16)"
+    echo -e "\t  $ unshare_ns_mount.sh --fuse -m 192.168.0.1:6789 /mnt/cephfs --brxip 172.19.255.254/12"
+    echo -e "\t  $ unshare_ns_mount.sh --kernel 192.168.0.1:6789:/ /mnt/cephfs --brxip 172.19.255.254/12"
+    echo ""
+    echo -e "  -h, --help"
+    echo -e "\tPrint help"
+    echo ""
+}
+
+CEPH_BRX=ceph-brx
+CEPH_BRX_IP_DEF=192.168.255.254
+NET_MASK_DEF=16
+BRD_DEF=192.168.255.255
+
+CEPH_BRX_IP=$CEPH_BRX_IP_DEF
+NET_MASK=$NET_MASK_DEF
+BRD=$BRD_DEF
+
+mountpoint=""
+new_netns=""
+fuse_type=false
+
+function get_mountpoint() {
+    for param in $@
+    do
+        if [ -d $param ]; then
+            # skipping "--client_mountpoint/-r root_directory"
+            # option for ceph-fuse command
+            if [ "$last" == "-r" -o "$last" == "--client_mountpoint" ]; then
+                last=$param
+                continue
+	        fi
+            if [ "0$mountpoint" != "0" ]; then
+                echo "Oops: too many mountpiont options!"
+                exit 1
+            fi
+            mountpoint=$param
+        fi
+        last=$param
+    done
+
+    if [ "0$mountpoint" == "0" ]; then
+        echo "Oops: mountpoint path is not a directory or no mountpoint specified!"
+        exit 1
+    fi
+}
+
+function get_new_netns() {
+    # prune the repeating slashes:
+    # "/mnt///cephfs///" --> "/mnt/cephfs/"
+    __mountpoint=`echo "$mountpoint" | sed 's/\/\+/\//g'`
+
+    # prune the leading slashes
+    while [ ${__mountpoint:0:1} == "/" ]
+    do
+        __mountpoint=${__mountpoint:1}
+    done
+
+    # prune the last slashes
+    while [ ${__mountpoint: -1} == "/" ]
+    do
+        __mountpoint=${__mountpoint:0:-1}
+    done
+
+    # replace '/' with '-'
+    __mountpoint=${__mountpoint//\//-}
+
+    # "mnt/cephfs" --> "ceph-fuse-mnt-cephfs"
+    if [ "$1" == "--fuse" ]; then
+        new_netns=`echo ceph-fuse-$__mountpoint`
+        fuse_type=true
+        return
+    fi
+
+    # "mnt/cephfs" --> "ceph-kernel-mnt-cephfs"
+    if [ "$1" == "--kernel" ]; then
+        new_netns=`echo ceph-kernel-$__mountpoint`
+        return
+    fi
+
+    # we are in umount/suspend/resume routines
+    for ns in `ip netns list | awk '{print $1}'`
+    do
+        if [ "$ns" == "ceph-fuse-$__mountpoint" ]; then
+            new_netns=$ns
+            fuse_type=true
+            return
+        fi
+        if [ "$ns" == "ceph-kernel-$__mountpoint" ]; then
+            new_netns=$ns
+            return
+        fi
+    done
+    
+    if [ "0$new_netns" == "0" ]; then
+        echo "Oops, netns 'ceph-{fuse/kernel}-$__mountpoint' does not exists!"
+        exit 1
+    fi
+}
+
+# the peer veth name will be "brx.$nsid" on host node
+function get_netns_brx() {
+    get_new_netns 
+
+    nsid=`ip netns list-id | grep "$new_netns" | awk '{print $2}'`
+    netns_veth=brx.$nsid
+    eval $1="$netns_veth"
+}
+
+function suspend_netns_veth() {
+    get_mountpoint $@
+
+    get_netns_brx brx
+    ip link set $brx down
+    exit 0
+}
+
+function resume_netns_veth() {
+    get_mountpoint $@
+
+    get_netns_brx brx
+    ip link set $brx up
+    exit 0
+}
+
+# help and usage
+if [ $# == 0 -o "$1" == "-h" -o "$1" == "--help" ]; then
+    usage
+    exit 0
+fi
+
+# suspend the veth from network namespace
+if [ $1 == "--suspend" ]; then
+    suspend_netns_veth $@
+    exit 0
+fi
+
+# resume the veth from network namespace
+if [ $1 == "--resume" ]; then
+    resume_netns_veth $@
+    exit 0
+fi
+
+function ceph_umount() {
+    get_mountpoint $@
+    get_new_netns
+
+    if [ $fuse_type == true ]; then
+        nsenter --net=/var/run/netns/$new_netns fusermount -u $mountpoint 2>/dev/null
+    else
+        nsenter --net=/var/run/netns/$new_netns umount $mountpoint 2>/dev/null
+    fi
+
+    # let's wait for a while to let the umount operation
+    # to finish before deleting the netns
+    while [ 1 ]
+    do
+        for pid in `ip netns pids $new_netns 2>/dev/null`
+        do
+            name=`cat /proc/$pid/comm 2>/dev/null`
+            if [ "$name" == "ceph-fuse" ]; then
+                break
+            fi
+        done
+
+        if [ "$name" == "ceph-fuse" ]; then
+            name=""
+            usleep 100000
+            continue
+        fi
+
+        break
+    done
+
+    nsid=`ip netns list-id | grep "$new_netns" | awk '{print $2}'`
+    netns_brx=brx.$nsid
+
+    # brctl delif $CEPH_BRX $netns_brx 2>/dev/null
+    nmcli connection down $netns_brx down 2>/dev/null
+    nmcli connection delete $netns_brx 2>/dev/null
+
+    ip netns delete $new_netns 2>/dev/null
+    
+    # if this is the last netns_brx, will delete
+    # the $CEPH_BRX and restore the OS configure
+    # rc=`brctl show ceph-brx 2>/dev/null | grep 'brx\.'|wc -l`
+    rc=`nmcli connection show 2>/dev/null | grep 'brx\.' | wc -l`
+    if [ $rc == 0 ]; then
+        ip link set $CEPH_BRX down 2>/dev/null
+        # brctl delbr $CEPH_BRX 2>/dev/null
+        nmcli connection delete $CEPH_BRX 2>/dev/null
+
+        # restore the ip forward
+        tmpfile=`ls /tmp/ | grep "$CEPH_BRX\."`
+        tmpfile=/tmp/$tmpfile
+        if [ ! -f $tmpfile ]; then
+            echo "Oops, the $CEPH_BRX.XXX temp file does not exist!"
+        else
+            save=`cat $tmpfile`
+            echo $save > /proc/sys/net/ipv4/ip_forward
+            rm -rf $tmpfile
+        fi
+
+        # drop the iptables NAT rules
+        host_nic=`route | grep default | awk '{print $8}'`
+        iptables -D FORWARD -o $host_nic -i $CEPH_BRX -j ACCEPT
+        iptables -D FORWARD -i $host_nic -o $CEPH_BRX -j ACCEPT
+        iptables -t nat -D POSTROUTING -s $CEPH_BRX_IP/$NET_MASK -o $host_nic -j MASQUERADE
+    fi
+}
+
+function get_brd_mask() {
+    first=`echo "$CEPH_BRX_IP" | awk -F. '{print $1}'`
+    second=`echo "$CEPH_BRX_IP" | awk -F. '{print $2}'`
+    third=`echo "$CEPH_BRX_IP" | awk -F. '{print $3}'`
+    fourth=`echo "$CEPH_BRX_IP" | awk -F. '{print $4}'`
+
+    if [ "$first" == "172" ]; then
+        second_max=31
+    else
+        second_max=255
+    fi
+    third_max=255
+    fourth_max=255
+
+    if [ $NET_MASK -lt 16 ]; then
+        let power=16-$NET_MASK
+        m=`awk 'BEGIN{printf 2^"'$power'"-1}'`
+        second=$((second&~m))
+        let second_max=$second+$m
+    elif [ $NET_MASK -lt 24 ]; then
+        let power=24-$NET_MASK
+        m=`awk 'BEGIN{printf 2^"'$power'"-1}'`
+        third=$((third&~m))
+        let third_max=$third+$m
+        second_max=$second
+    elif [ $NET_MASK -lt 32 ]; then
+        let power=32-$NET_MASK
+        m=`awk 'BEGIN{printf 2^"'$power'"-1}'`
+        fourth=$((fourth&~m))
+        let fourth_max=$fourth+$m
+        second_max=$second
+        third_max=$third
+    fi
+
+    BRD=$first.$second_max.$third_max.$fourth_max
+}
+
+# As default:
+# The netns IP will be 192.168.0.1 ~ 192.168.255.253,
+# and 192.168.255.254 is saved for $CEPH_BRX
+function get_new_ns_ip() {
+    first=`echo "$CEPH_BRX_IP" | awk -F. '{print $1}'`
+    second=`echo "$CEPH_BRX_IP" | awk -F. '{print $2}'`
+    third=`echo "$CEPH_BRX_IP" | awk -F. '{print $3}'`
+    fourth=`echo "$CEPH_BRX_IP" | awk -F. '{print $4}'`
+
+    if [ "$first" == ""172 ]; then
+        second_max=31
+    else
+        second_max=255
+    fi
+    third_max=255
+    fourth_max=254
+
+    if [ $NET_MASK -lt 16 ]; then
+        let power=16-$NET_MASK
+        m=`awk 'BEGIN{printf 2^"'$power'"-1}'`
+        second=$((second&~m))
+        let second_max=$second+$m
+        third=0
+        fourth=1
+    elif [ $NET_MASK -lt 24 ]; then
+        let power=24-$NET_MASK
+        m=`awk 'BEGIN{printf 2^"'$power'"-1}'`
+        third=$((third&~m))
+        let third_max=$third+$m
+        second_max=$second
+        fourth=1
+    elif [ $NET_MASK -lt 32 ]; then
+        let power=32-$NET_MASK
+        m=`awk 'BEGIN{printf 2^"'$power'"-1}'`
+        fourth=$((fourth&~m))
+        let fourth+=1
+        let fourth_max=$fourth+$m-1
+        second_max=$second
+        third_max=$third
+    fi
+
+    while [ $second -le $second_max -a $third -le $third_max -a $fourth -le $fourth_max ]
+    do
+        conflict=false
+
+        # check from the existing network namespaces
+        for netns in `ip netns list | awk '{print $1}'`
+        do
+            ip=`ip netns exec $netns ip addr | grep "inet " | grep "veth0"`
+            ip=`echo "$ip" | awk '{print $2}' | awk -F/ '{print $1}'`
+            if [ "0$ip" == "0" ]; then
+                continue
+            fi
+            if [ "$first.$second.$third.$fourth" == "$ip" ]; then
+                conflict=true
+
+                let fourth+=1
+                if [ $fourth -le $fourth_max ]; then
+                     break
+                fi
+		
+                fourth=0
+                let third+=1
+                if [ $third -le $third_max ]; then
+                     break
+                fi
+
+                third=0
+                let second+=1
+                if [ $second -le $second_max ]; then
+                    break
+                fi
+
+                echo "Oops: we have ran out of the ip addresses!"
+                exit 1
+            fi
+        done
+
+        # have we found one ?
+        if [ $conflict == false ]; then
+            break
+        fi
+    done
+
+    ip=$first.$second.$third.$fourth
+    max=$first.$second_max.$third_max.$fourth_max
+    if [ "$ip" == "$max" ]; then
+        echo "Oops: we have ran out of the ip addresses!"
+        exit 1
+    fi
+
+    eval $1="$ip"
+}
+
+function check_valid_private_ip() {
+    first=`echo "$1" | awk -F. '{print $1}'`
+    second=`echo "$1" | awk -F. '{print $2}'`
+
+    # private network class A 10.0.0.0 - 10.255.255.255
+    if [ "$first" == "10" -a $NET_MASK -ge 8 ]; then
+        return
+    fi
+
+    # private network class B 172.16.0.0 - 172.31.255.255
+    if [ "$first" == "172" -a $second -ge 16 -a $second -le 31 -a $NET_MASK -ge 12 ]; then
+        return
+    fi
+
+    # private network class C 192.168.0.0 - 192.168.255.255
+    if [ "$first" == "192" -a "$second" == "168" -a $NET_MASK -ge 16 ]; then
+        return
+    fi
+
+    echo "Oops: invalid private ip address '$CEPH_BRX_IP/$NET_MASK'!"
+    exit 1
+}
+
+function setup_bridge_and_nat() {
+    # check and parse the --brxip parameter
+    is_brxip=false
+    for ip in $@
+    do
+        if [ "$ip" == "--brxip" ]; then
+            is_brxip=true
+            continue
+        fi
+        if [ $is_brxip == true ]; then
+            new_brxip=$ip
+            break
+        fi
+    done
+
+    # if the $CEPH_BRX already exists, then check the new
+    # brxip, if not match fail it without doing anything.
+    rc=`ip addr | grep "inet " | grep " $CEPH_BRX"`
+    if [ "0$rc" != "0" ]; then
+        existing_brxip=`echo "$rc" | awk '{print $2}'`
+        if [ "0$new_brxip" != "0" -a "$existing_brxip" != "$new_brxip" ]; then
+            echo "Oops: conflict with the existing $CEPH_BRX ip '$existing_brxip', new '$new_brxip'!"
+            exit 1
+        fi
+
+        CEPH_BRX_IP=`echo "$existing_brxip" | awk -F/ '{print $1}'`
+        NET_MASK=`echo "$existing_brxip" | awk -F/ '{print $2}'`
+        get_brd_mask
+        return
+    fi
+
+    # if it is the first time to run the the script or there
+    # is no any network namespace exists, we need to setup
+    # the $CEPH_BRX, if no --brxip is specified will use the
+    # default $CEPH_BRX_IP/$NET_MASK
+    if [ "0$new_brxip" != "0" ]; then
+        CEPH_BRX_IP=`echo "$new_brxip" | awk -F/ '{print $1}'`
+        NET_MASK=`echo "$new_brxip" | awk -F/ '{print $2}'`
+        get_brd_mask
+        check_valid_private_ip $CEPH_BRX_IP
+    fi
+
+    # brctl addbr $CEPH_BRX
+    nmcli connection add type bridge con-name $CEPH_BRX ifname $CEPH_BRX stp no
+    # ip link set $CEPH_BRX up
+    # ip addr add $CEPH_BRX_IP/$NET_MASK brd $BRD dev $CEPH_BRX
+    nmcli connection modify $CEPH_BRX ipv4.addresses $CEPH_BRX_IP/$NET_MASK ipv4.method manual
+    nmcli connection up $CEPH_BRX
+
+    # setup the NAT
+    rm -rf /tmp/ceph-brx.*
+    tmpfile=$(mktemp /tmp/ceph-brx.XXXXXXXX)
+    save=`cat /proc/sys/net/ipv4/ip_forward`
+    echo $save > $tmpfile
+    echo 1 > /proc/sys/net/ipv4/ip_forward
+
+    host_nic=`route | grep default | awk '{print $8}'`
+    iptables -A FORWARD -o $host_nic -i $CEPH_BRX -j ACCEPT
+    iptables -A FORWARD -i $host_nic -o $CEPH_BRX -j ACCEPT
+    iptables -t nat -A POSTROUTING -s $CEPH_BRX_IP/$NET_MASK -o $host_nic -j MASQUERADE
+}
+
+function __ceph_mount() {
+    # for some options like the '-t' in mount command
+    # the nsenter command will take over it, so it is
+    # hard to pass it direct to the netns.
+    # here we will create one temp file with x mode
+    tmpfile=$(mktemp /tmp/ceph-nsenter.XXXXXXXX)
+    chmod +x $tmpfile
+    if [ "$1" == "--kernel" ]; then
+        cmd=`echo "$@" | sed 's/--kernel/mount/'`
+    else
+        cmd=`echo "$@" | sed 's/--fuse/ceph-fuse/'`
+    fi
+
+    # remove the --brxip parameter
+    cmd=`echo "$cmd" | sed 's/--brxip.*\/[0-9]* //'`
+
+    # enter $new_netns and run ceph fuse client mount,
+    # we couldn't use 'ip netns exec' here because it
+    # will unshare the mount namespace.
+    echo "$cmd" > $tmpfile
+    nsenter --net=/var/run/netns/$new_netns /bin/bash $tmpfile ; echo $? > $tmpfile
+    rc=`cat $tmpfile`
+    rm -f $tmpfile
+
+    # fall back
+    if [ $rc != 0 ]; then
+        m=$mountpoint
+        mountpoint=""
+        ceph_umount $m
+    fi
+}
+
+function get_new_nsid() {
+    # get one uniq netns id
+    uniq_id=0
+    while [ 1 ]
+    do
+        rc=`ip netns list-id | grep "nsid $uniq_id "`
+        if [ "0$rc" == "0" ]; then
+            break
+        fi
+        let uniq_id+=1
+    done
+
+    eval $1="$uniq_id"
+}
+
+function ceph_mount() {
+    get_mountpoint $@
+    setup_bridge_and_nat $@
+
+    get_new_netns $1
+    rc=`ip netns list | grep "$new_netns" | awk '{print $1}'`
+    if [ "0$rc" != "0" ]; then
+        echo "Oops: the netns "$new_netns" already exists!"
+        exit 1
+    fi
+
+    get_new_nsid new_nsid
+
+    # create a new network namespace
+    ip netns add $new_netns
+    ip netns set $new_netns $new_nsid
+
+    get_new_ns_ip ns_ip
+    if [ 0"$ns_ip" == "0" ]; then
+        echo "Oops: there is no ip address could be used any more!"
+        exit 1
+    fi
+
+    # veth interface in netns
+    ns_veth=veth0
+    netns_brx=brx.$new_nsid
+
+    # setup veth interfaces
+    ip link add $ns_veth netns $new_netns type veth peer name $netns_brx
+    ip netns exec $new_netns ip addr add $ns_ip/$NET_MASK brd $BRD dev $ns_veth
+    ip netns exec $new_netns ip link set $ns_veth up
+    ip netns exec $new_netns ip link set lo up
+    ip netns exec $new_netns ip route add default via $CEPH_BRX_IP
+
+    # bring up the bridge interface and join it to $CEPH_BRX
+    # brctl addif $CEPH_BRX $netns_brx
+    nmcli connection add type bridge-slave con-name $netns_brx ifname $netns_brx master $CEPH_BRX
+    nmcli connection up $netns_brx
+    # ip link set $netns_brx up
+
+    __ceph_mount $@
+}
+
+if [ "$1" == "--umount" ]; then
+    ceph_umount $@
+    exit 0
+fi
+
+# mount in the netns
+if [ "$1" != "--kernel" -a "$1" != "--fuse" ]; then
+    echo "Oops: invalid mount options '$1'!"
+    exit 1
+fi
+
+ceph_mount $@
diff --git a/qa/client/30_subdir_mount.sh b/qa/client/30_subdir_mount.sh
new file mode 100755
index 000000000..0bdf2ed1a
--- /dev/null
+++ b/qa/client/30_subdir_mount.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+set -x
+
+basedir=`echo $0 | sed 's/[^/]*$//g'`.
+. $basedir/common.sh
+
+client_mount
+mkdir -p $mnt/sub
+echo sub > $mnt/sub/file
+client_umount
+
+mkdir -p $mnt/1
+mkdir -p $mnt/2
+/bin/mount -t ceph $monhost:/sub $mnt/1
+grep sub $mnt/1/file
+
+/bin/mount -t ceph $monhost:/ $mnt/2
+grep sub $mnt/2/sub/file
+
+/bin/umount $mnt/1
+grep sub $mnt/2/sub/file
+
+/bin/umount $mnt/2
diff --git a/qa/client/common.sh b/qa/client/common.sh
new file mode 100644
index 000000000..d06368e6e
--- /dev/null
+++ b/qa/client/common.sh
@@ -0,0 +1,58 @@
+
+# defaults
+[ -z "$bindir" ] && bindir=$PWD       # location of init-ceph
+[ -z "$conf" ] && conf="$basedir/ceph.conf"
+[ -z "$mnt" ] && mnt="/c"
+[ -z "$monhost" ] && monhost="cosd0"
+
+set -e
+
+mydir=`hostname`_`echo $0 | sed 's/\//_/g'`
+
+client_mount()
+{
+    /bin/mount -t ceph $monhost:/ $mnt
+}
+
+client_umount()
+{
+    /bin/umount $mnt
+    # look for VFS complaints
+    if dmesg | tail -n 50 | grep -c "VFS: Busy inodes" ; then
+	echo "looks like we left inodes pinned"
+	exit 1
+    fi
+}
+
+ceph_start()
+{
+    $bindir/init-ceph -c $conf start ${1}
+}
+
+ceph_stop()
+{
+    $bindir/init-ceph -c $conf stop ${1}
+}
+
+ceph_restart()
+{
+    $bindir/init-ceph -c $conf restart ${1}
+}
+
+ceph_command()
+{
+    $bindir/ceph -c $conf $*
+}
+
+client_enter_mydir()
+{
+    pushd .
+    test -d $mnt/$mydir && rm -r $mnt/$mydir
+    mkdir $mnt/$mydir
+    cd $mnt/$mydir
+}
+
+client_leave_mydir()
+{
+    popd
+}
diff --git a/qa/client/gen-1774.sh b/qa/client/gen-1774.sh
new file mode 100644
index 000000000..3ee5bc90d
--- /dev/null
+++ b/qa/client/gen-1774.sh
@@ -0,0 +1,2068 @@
+#!/usr/bin/env bash
+set -e
+
+mount () { :; }
+umount () { :; }
+
+list="\
+abiword.control
+abiword.list
+abiword-plugin-latex.control
+abiword-plugin-latex.list
+abiword-plugin-opendocument.control
+abiword-plugin-opendocument.list
+abiword-plugin-openxml.control
+abiword-plugin-openxml.list
+abiword-plugin-pdf.control
+abiword-plugin-pdf.list
+abiword-plugin-wikipedia.control
+abiword-plugin-wikipedia.list
+abiword.postinst
+aceofpenguins.control
+aceofpenguins-launcher.control
+aceofpenguins-launcher.list
+aceofpenguins.list
+aceofpenguins.postinst
+alsa-conf-base.control
+alsa-conf-base.list
+alsa-scenarii-shr.conffiles
+alsa-scenarii-shr.control
+alsa-scenarii-shr.list
+alsa-utils-alsactl.control
+alsa-utils-alsactl.list
+alsa-utils-alsamixer.control
+alsa-utils-alsamixer.list
+alsa-utils-amixer.control
+alsa-utils-amixer.list
+alsa-utils-aplay.control
+alsa-utils-aplay.list
+angstrom-libc-fixup-hack.control
+angstrom-libc-fixup-hack.list
+angstrom-libc-fixup-hack.postinst
+apmd.control
+apmd.list
+apmd.postinst
+apmd.postrm
+apmd.prerm
+aspell.control
+aspell.list
+atd-over-fso.control
+atd-over-fso.list
+atd-over-fso.postinst
+atd-over-fso.postrm
+atd-over-fso.prerm
+base-files.conffiles
+base-files.control
+base-files.list
+base-passwd.control
+base-passwd.list
+base-passwd.postinst
+bash.control
+bash.list
+bash.postinst
+bluez4.control
+bluez4.list
+bluez4.postinst
+bluez4.postrm
+bluez4.prerm
+boost-signals.control
+boost-signals.list
+boost-signals.postinst
+busybox.control
+busybox.list
+busybox-mountall.control
+busybox-mountall.list
+busybox-mountall.postinst
+busybox-mountall.prerm
+busybox.postinst
+busybox.prerm
+busybox-syslog.conffiles
+busybox-syslog.control
+busybox-syslog.list
+busybox-syslog.postinst
+busybox-syslog.postrm
+busybox-syslog.prerm
+ca-certificates.conffiles
+ca-certificates.control
+ca-certificates.list
+ca-certificates.postinst
+calc.control
+calc.list
+connman.control
+connman.list
+connman-plugin-udhcp.control
+connman-plugin-udhcp.list
+connman-plugin-wifi.control
+connman-plugin-wifi.list
+connman.postinst
+connman.postrm
+connman.prerm
+connman-scripts.control
+connman-scripts.list
+cpio.control
+cpio.list
+cpio.postinst
+cpio.prerm
+cpp.control
+cpp.list
+cpp-symlinks.control
+cpp-symlinks.list
+cron.control
+cron.list
+cron.postinst
+cron.postrm
+cron.prerm
+curl.control
+curl.list
+dbus.conffiles
+dbus.control
+dbus-daemon-proxy.control
+dbus-daemon-proxy.list
+dbus-hlid.control
+dbus-hlid.list
+dbus.list
+dbus.postinst
+dbus.postrm
+dbus.prerm
+dbus-x11.control
+dbus-x11.list
+devmem2.control
+devmem2.list
+distro-feed-configs.conffiles
+distro-feed-configs.control
+distro-feed-configs.list
+dosfstools.control
+dosfstools.list
+e2fsprogs-badblocks.control
+e2fsprogs-badblocks.list
+e2fsprogs.control
+e2fsprogs-e2fsck.control
+e2fsprogs-e2fsck.list
+e2fsprogs-e2fsck.postinst
+e2fsprogs-e2fsck.prerm
+e2fsprogs.list
+e2fsprogs-mke2fs.control
+e2fsprogs-mke2fs.list
+e2fsprogs-mke2fs.postinst
+e2fsprogs-mke2fs.prerm
+e2fsprogs.postinst
+e2fsprogs.prerm
+ecore-con.control
+ecore-con.list
+ecore-con.postinst
+ecore.control
+ecore-evas.control
+ecore-evas.list
+ecore-evas.postinst
+ecore-fb.control
+ecore-fb.list
+ecore-fb.postinst
+ecore-file.control
+ecore-file.list
+ecore-file.postinst
+ecore-imf.control
+ecore-imf-evas.control
+ecore-imf-evas.list
+ecore-imf-evas.postinst
+ecore-imf.list
+ecore-imf.postinst
+ecore-input.control
+ecore-input.list
+ecore-input.postinst
+ecore-ipc.control
+ecore-ipc.list
+ecore-ipc.postinst
+ecore.list
+ecore.postinst
+ecore-x.control
+ecore-x.list
+ecore-x.postinst
+edbus.control
+edbus.list
+edbus.postinst
+edje.control
+edje.list
+edje.postinst
+edje-utils.control
+edje-utils.list
+efreet.control
+efreet.list
+efreet.postinst
+eggdbus.control
+eggdbus.list
+eggdbus.postinst
+eglibc-binary-localedata-en-us.control
+eglibc-binary-localedata-en-us.list
+eglibc-charmap-utf-8.control
+eglibc-charmap-utf-8.list
+eglibc-gconv.control
+eglibc-gconv-cp1252.control
+eglibc-gconv-cp1252.list
+eglibc-gconv-ibm850.control
+eglibc-gconv-ibm850.list
+eglibc-gconv-iso8859-15.control
+eglibc-gconv-iso8859-15.list
+eglibc-gconv-iso8859-1.control
+eglibc-gconv-iso8859-1.list
+eglibc-gconv.list
+eglibc-localedata-i18n.control
+eglibc-localedata-i18n.list
+eglibc-localedata-iso14651-t1-common.control
+eglibc-localedata-iso14651-t1-common.list
+eglibc-localedata-iso14651-t1.control
+eglibc-localedata-iso14651-t1.list
+eglibc-localedata-translit-circle.control
+eglibc-localedata-translit-circle.list
+eglibc-localedata-translit-cjk-compat.control
+eglibc-localedata-translit-cjk-compat.list
+eglibc-localedata-translit-compat.control
+eglibc-localedata-translit-compat.list
+eglibc-localedata-translit-font.control
+eglibc-localedata-translit-font.list
+eglibc-localedata-translit-fraction.control
+eglibc-localedata-translit-fraction.list
+eglibc-localedata-translit-narrow.control
+eglibc-localedata-translit-narrow.list
+eglibc-localedata-translit-neutral.control
+eglibc-localedata-translit-neutral.list
+eglibc-localedata-translit-small.control
+eglibc-localedata-translit-small.list
+eglibc-localedata-translit-wide.control
+eglibc-localedata-translit-wide.list
+eglibc-utils.control
+eglibc-utils.list
+eina.control
+eina.list
+eina.postinst
+eject.control
+eject.list
+elementary-theme-gry.control
+elementary-theme-gry.list
+emacs-x11.control
+emacs-x11.list
+embryo.control
+embryo.list
+embryo.postinst
+embryo-tests.control
+embryo-tests.list
+enchant.control
+enchant.list
+enchant.postinst
+epdfview.control
+epdfview.list
+espeak.control
+espeak.list
+espeak.postinst
+evas.control
+evas-engine-buffer.control
+evas-engine-buffer.list
+evas-engine-fb.control
+evas-engine-fb.list
+evas-engine-software-16.control
+evas-engine-software-16.list
+evas-engine-software-16-x11.control
+evas-engine-software-16-x11.list
+evas-engine-software-generic.control
+evas-engine-software-generic.list
+evas-engine-software-x11.control
+evas-engine-software-x11.list
+evas-engine-xrender-x11.control
+evas-engine-xrender-x11.list
+evas.list
+evas-loader-eet.control
+evas-loader-eet.list
+evas-loader-jpeg.control
+evas-loader-jpeg.list
+evas-loader-png.control
+evas-loader-png.list
+evas.postinst
+evas-saver-eet.control
+evas-saver-eet.list
+evas-saver-jpeg.control
+evas-saver-jpeg.list
+evas-saver-png.control
+evas-saver-png.list
+evtest.control
+evtest.list
+e-wm-config-default.control
+e-wm-config-default.list
+e-wm-config-illume2-shr.control
+e-wm-config-illume2-shr.list
+e-wm-config-illume-shr.control
+e-wm-config-illume-shr.list
+e-wm.control
+e-wm-icons.control
+e-wm-icons.list
+e-wm-images.control
+e-wm-images.list
+e-wm-input-methods.control
+e-wm-input-methods.list
+e-wm.list
+e-wm-menu-shr.control
+e-wm-menu-shr.list
+e-wm-other.control
+e-wm-other.list
+e-wm.postinst
+e-wm.postrm
+e-wm-sysactions-shr.control
+e-wm-sysactions-shr.list
+e-wm-theme-default.control
+e-wm-theme-default.list
+e-wm-theme-illume-gry.control
+e-wm-theme-illume-gry.list
+e-wm-theme-illume-shr.control
+e-wm-theme-illume-shr.list
+e-wm-utils.control
+e-wm-utils.list
+fbreader.control
+fbreader.list
+fbreader.postinst
+fbset.control
+fbset.list
+fbset-modes.conffiles
+fbset-modes.control
+fbset-modes.list
+fbset.postinst
+fbset.postrm
+ffalarms.control
+ffalarms.list
+file.control
+file.list
+file.postinst
+findutils.control
+findutils.list
+findutils.postinst
+findutils.prerm
+flac.control
+flac.list
+flite.control
+flite.list
+fontconfig-utils.control
+fontconfig-utils.list
+font-update-common.control
+font-update-common.list
+frameworkd-config-shr.conffiles
+frameworkd-config-shr.control
+frameworkd-config-shr.list
+frameworkd.control
+frameworkd.list
+frameworkd.postinst
+frameworkd.postrm
+frameworkd.prerm
+fso-abyss-config.conffiles
+fso-abyss-config.control
+fso-abyss-config.list
+fso-abyss.control
+fso-abyss.list
+fso-apm.control
+fso-apm.list
+fsodatad.control
+fsodatad.list
+fsodatad.postinst
+fsodeviced.control
+fsodeviced.list
+fsodeviced.postinst
+fsodeviced.postrm
+fsodeviced.prerm
+fso-gpsd.control
+fso-gpsd.list
+fso-gpsd.postinst
+fso-gpsd.postrm
+fso-gpsd.prerm
+fsogsmd.control
+fsogsmd.list
+fsogsmd.postinst
+fsonetworkd.control
+fsonetworkd.list
+fsonetworkd.postinst
+fsoraw.control
+fsoraw.list
+fsotdld.control
+fsotdld.list
+fsotdld.postinst
+fsousaged.control
+fsousaged.list
+fsousaged.postinst
+gcc.control
+gcc.list
+gconf.control
+gconf.list
+gconf.postinst
+g++.control
+gdb.control
+gdb.list
+gdk-pixbuf-loader-gif.control
+gdk-pixbuf-loader-gif.list
+gdk-pixbuf-loader-gif.postinst
+gdk-pixbuf-loader-jpeg.control
+gdk-pixbuf-loader-jpeg.list
+gdk-pixbuf-loader-jpeg.postinst
+gdk-pixbuf-loader-png.control
+gdk-pixbuf-loader-png.list
+gdk-pixbuf-loader-png.postinst
+gdk-pixbuf-loader-xpm.control
+gdk-pixbuf-loader-xpm.list
+gdk-pixbuf-loader-xpm.postinst
+git.control
+git.list
+g++.list
+gnome-pty-helper.control
+gnome-pty-helper.list
+gnome-vfs.control
+gnome-vfs.list
+gnome-vfs-plugin-file.control
+gnome-vfs-plugin-file.list
+gnome-vfs.postinst
+gnome-vfs.prerm
+gnupg.control
+gnupg.list
+gpe-icons.control
+gpe-icons.list
+gpe-icons.postinst
+gpe-icons.postrm
+gpe-scap.control
+gpe-scap.list
+gpe-sketchbook.control
+gpe-sketchbook.list
+gpgv.control
+gpgv.list
+gridpad.control
+gridpad.list
+gst-plugin-alsa.control
+gst-plugin-alsa.list
+gst-plugin-audioconvert.control
+gst-plugin-audioconvert.list
+gst-plugin-autodetect.control
+gst-plugin-autodetect.list
+gst-plugin-gconfelements.control
+gst-plugin-gconfelements.list
+gst-plugin-gconfelements.postinst
+gst-plugin-gconfelements.prerm
+gst-plugin-mad.control
+gst-plugin-mad.list
+gstreamer.control
+gstreamer.list
+gstreamer.postinst
+gtk+.control
+gtk+.list
+gtk+.postinst
+hal.control
+hal-info.control
+hal-info.list
+hal.list
+hal.postinst
+hal.postrm
+hdparm.control
+hdparm.list
+hdparm.postinst
+hdparm.prerm
+hicolor-icon-theme.control
+hicolor-icon-theme.list
+hicolor-icon-theme.postinst
+hicolor-icon-theme.postrm
+htop.control
+htop.list
+i2c-tools.control
+i2c-tools.list
+id3lib.control
+id3lib.list
+id3lib.postinst
+iliwi.control
+iliwi.list
+illume-keyboard-default-alpha.control
+illume-keyboard-default-alpha.list
+illume-keyboard-default-terminal.control
+illume-keyboard-default-terminal.list
+illume-keyboard-numeric-alt.control
+illume-keyboard-numeric-alt.list
+imagemagick.control
+imagemagick.list
+imagemagick.postinst
+initscripts-shr.control
+initscripts-shr.list
+intone.control
+intone.list
+iptables.control
+iptables.list
+iptables.postinst
+kernel-2.6.29-rc3.control
+kernel-2.6.29-rc3.list
+kernel.control
+kernel-image-2.6.29-rc3.control
+kernel-image-2.6.29-rc3.list
+kernel-image-2.6.29-rc3.postinst
+kernel.list
+kernel-module-ar6000.control
+kernel-module-ar6000.list
+kernel-module-ar6000.postinst
+kernel-module-ar6000.postrm
+kernel-module-arc4.control
+kernel-module-arc4.list
+kernel-module-arc4.postinst
+kernel-module-arc4.postrm
+kernel-module-asix.control
+kernel-module-asix.list
+kernel-module-asix.postinst
+kernel-module-asix.postrm
+kernel-module-bluetooth.control
+kernel-module-bluetooth.list
+kernel-module-bluetooth.postinst
+kernel-module-bluetooth.postrm
+kernel-module-bnep.control
+kernel-module-bnep.list
+kernel-module-bnep.postinst
+kernel-module-bnep.postrm
+kernel-module-btusb.control
+kernel-module-btusb.list
+kernel-module-btusb.postinst
+kernel-module-btusb.postrm
+kernel-module-crc-ccitt.control
+kernel-module-crc-ccitt.list
+kernel-module-crc-ccitt.postinst
+kernel-module-crc-ccitt.postrm
+kernel-module-ecb.control
+kernel-module-ecb.list
+kernel-module-ecb.postinst
+kernel-module-ecb.postrm
+kernel-module-exportfs.control
+kernel-module-exportfs.list
+kernel-module-exportfs.postinst
+kernel-module-exportfs.postrm
+kernel-module-gadgetfs.control
+kernel-module-gadgetfs.list
+kernel-module-gadgetfs.postinst
+kernel-module-gadgetfs.postrm
+kernel-module-g-ether.control
+kernel-module-g-ether.list
+kernel-module-g-ether.postinst
+kernel-module-g-ether.postrm
+kernel-module-g-file-storage.control
+kernel-module-g-file-storage.list
+kernel-module-g-file-storage.postinst
+kernel-module-g-file-storage.postrm
+kernel-module-g-serial.control
+kernel-module-g-serial.list
+kernel-module-g-serial.postinst
+kernel-module-g-serial.postrm
+kernel-module-hidp.control
+kernel-module-hidp.list
+kernel-module-hidp.postinst
+kernel-module-hidp.postrm
+kernel-module-iptable-filter.control
+kernel-module-iptable-filter.list
+kernel-module-iptable-filter.postinst
+kernel-module-iptable-filter.postrm
+kernel-module-iptable-nat.control
+kernel-module-iptable-nat.list
+kernel-module-iptable-nat.postinst
+kernel-module-iptable-nat.postrm
+kernel-module-ip-tables.control
+kernel-module-ip-tables.list
+kernel-module-ip-tables.postinst
+kernel-module-ip-tables.postrm
+kernel-module-ipt-masquerade.control
+kernel-module-ipt-masquerade.list
+kernel-module-ipt-masquerade.postinst
+kernel-module-ipt-masquerade.postrm
+kernel-module-l2cap.control
+kernel-module-l2cap.list
+kernel-module-l2cap.postinst
+kernel-module-l2cap.postrm
+kernel-module-lockd.control
+kernel-module-lockd.list
+kernel-module-lockd.postinst
+kernel-module-lockd.postrm
+kernel-module-michael-mic.control
+kernel-module-michael-mic.list
+kernel-module-michael-mic.postinst
+kernel-module-michael-mic.postrm
+kernel-module-nf-conntrack.control
+kernel-module-nf-conntrack-ipv4.control
+kernel-module-nf-conntrack-ipv4.list
+kernel-module-nf-conntrack-ipv4.postinst
+kernel-module-nf-conntrack-ipv4.postrm
+kernel-module-nf-conntrack.list
+kernel-module-nf-conntrack.postinst
+kernel-module-nf-conntrack.postrm
+kernel-module-nf-defrag-ipv4.control
+kernel-module-nf-defrag-ipv4.list
+kernel-module-nf-defrag-ipv4.postinst
+kernel-module-nf-defrag-ipv4.postrm
+kernel-module-nf-nat.control
+kernel-module-nf-nat.list
+kernel-module-nf-nat.postinst
+kernel-module-nf-nat.postrm
+kernel-module-nfs-acl.control
+kernel-module-nfs-acl.list
+kernel-module-nfs-acl.postinst
+kernel-module-nfs-acl.postrm
+kernel-module-nfsd.control
+kernel-module-nfsd.list
+kernel-module-nfsd.postinst
+kernel-module-nfsd.postrm
+kernel-module-nls-utf8.control
+kernel-module-nls-utf8.list
+kernel-module-nls-utf8.postinst
+kernel-module-nls-utf8.postrm
+kernel-module-ohci-hcd.control
+kernel-module-ohci-hcd.list
+kernel-module-ohci-hcd.postinst
+kernel-module-ohci-hcd.postrm
+kernel-module-pegasus.control
+kernel-module-pegasus.list
+kernel-module-pegasus.postinst
+kernel-module-pegasus.postrm
+kernel-module-ppp-async.control
+kernel-module-ppp-async.list
+kernel-module-ppp-async.postinst
+kernel-module-ppp-async.postrm
+kernel-module-ppp-deflate.control
+kernel-module-ppp-deflate.list
+kernel-module-ppp-deflate.postinst
+kernel-module-ppp-deflate.postrm
+kernel-module-ppp-generic.control
+kernel-module-ppp-generic.list
+kernel-module-ppp-generic.postinst
+kernel-module-ppp-generic.postrm
+kernel-module-ppp-mppe.control
+kernel-module-ppp-mppe.list
+kernel-module-ppp-mppe.postinst
+kernel-module-ppp-mppe.postrm
+kernel-module-rfcomm.control
+kernel-module-rfcomm.list
+kernel-module-rfcomm.postinst
+kernel-module-rfcomm.postrm
+kernel-module-s3cmci.control
+kernel-module-s3cmci.list
+kernel-module-s3cmci.postinst
+kernel-module-s3cmci.postrm
+kernel-module-sco.control
+kernel-module-sco.list
+kernel-module-sco.postinst
+kernel-module-sco.postrm
+kernel-module-scsi-mod.control
+kernel-module-scsi-mod.list
+kernel-module-scsi-mod.postinst
+kernel-module-scsi-mod.postrm
+kernel-module-sd-mod.control
+kernel-module-sd-mod.list
+kernel-module-sd-mod.postinst
+kernel-module-sd-mod.postrm
+kernel-module-slhc.control
+kernel-module-slhc.list
+kernel-module-slhc.postinst
+kernel-module-slhc.postrm
+kernel-module-snd.control
+kernel-module-snd.list
+kernel-module-snd-page-alloc.control
+kernel-module-snd-page-alloc.list
+kernel-module-snd-page-alloc.postinst
+kernel-module-snd-page-alloc.postrm
+kernel-module-snd-pcm.control
+kernel-module-snd-pcm.list
+kernel-module-snd-pcm.postinst
+kernel-module-snd-pcm.postrm
+kernel-module-snd.postinst
+kernel-module-snd.postrm
+kernel-module-snd-soc-core.control
+kernel-module-snd-soc-core.list
+kernel-module-snd-soc-core.postinst
+kernel-module-snd-soc-core.postrm
+kernel-module-snd-soc-neo1973-gta02-wm8753.control
+kernel-module-snd-soc-neo1973-gta02-wm8753.list
+kernel-module-snd-soc-neo1973-gta02-wm8753.postinst
+kernel-module-snd-soc-neo1973-gta02-wm8753.postrm
+kernel-module-snd-soc-s3c24xx.control
+kernel-module-snd-soc-s3c24xx-i2s.control
+kernel-module-snd-soc-s3c24xx-i2s.list
+kernel-module-snd-soc-s3c24xx-i2s.postinst
+kernel-module-snd-soc-s3c24xx-i2s.postrm
+kernel-module-snd-soc-s3c24xx.list
+kernel-module-snd-soc-s3c24xx.postinst
+kernel-module-snd-soc-s3c24xx.postrm
+kernel-module-snd-soc-wm8753.control
+kernel-module-snd-soc-wm8753.list
+kernel-module-snd-soc-wm8753.postinst
+kernel-module-snd-soc-wm8753.postrm
+kernel-module-snd-timer.control
+kernel-module-snd-timer.list
+kernel-module-snd-timer.postinst
+kernel-module-snd-timer.postrm
+kernel-module-sunrpc.control
+kernel-module-sunrpc.list
+kernel-module-sunrpc.postinst
+kernel-module-sunrpc.postrm
+kernel-module-tun.control
+kernel-module-tun.list
+kernel-module-tun.postinst
+kernel-module-tun.postrm
+kernel-module-uinput.control
+kernel-module-uinput.list
+kernel-module-uinput.postinst
+kernel-module-uinput.postrm
+kernel-module-usbserial.control
+kernel-module-usbserial.list
+kernel-module-usbserial.postinst
+kernel-module-usbserial.postrm
+kernel-module-usb-storage.control
+kernel-module-usb-storage.list
+kernel-module-usb-storage.postinst
+kernel-module-usb-storage.postrm
+kernel-module-x-tables.control
+kernel-module-x-tables.list
+kernel-module-x-tables.postinst
+kernel-module-x-tables.postrm
+kernel.postinst
+kernel.postrm
+lame.control
+lame.list
+liba52-0.control
+liba52-0.list
+liba52-0.postinst
+libacl1.control
+libacl1.list
+libacl1.postinst
+libapm1.control
+libapm1.list
+libapm1.postinst
+libasound2.control
+libasound2.list
+libasound2.postinst
+libaspell15.control
+libaspell15.list
+libaspell15.postinst
+libatk-1.0-0.control
+libatk-1.0-0.list
+libatk-1.0-0.postinst
+libattr1.control
+libattr1.list
+libattr1.postinst
+libavahi-client3.control
+libavahi-client3.list
+libavahi-client3.postinst
+libavahi-common3.control
+libavahi-common3.list
+libavahi-common3.postinst
+libavahi-glib1.control
+libavahi-glib1.list
+libavahi-glib1.postinst
+libavcodec52.control
+libavcodec52.list
+libavcodec52.postinst
+libavformat52.control
+libavformat52.list
+libavformat52.postinst
+libavutil50.control
+libavutil50.list
+libavutil50.postinst
+libblkid1.control
+libblkid1.list
+libblkid1.postinst
+libbz2-1.control
+libbz2-1.list
+libbz2-1.postinst
+libc6.control
+libc6.list
+libc6.postinst
+libcairo2.control
+libcairo2.list
+libcairo2.postinst
+libcanberra0.control
+libcanberra0.list
+libcanberra0.postinst
+libcanberra-alsa.control
+libcanberra-alsa.list
+libcom-err2.control
+libcom-err2.list
+libcom-err2.postinst
+libcroco.control
+libcroco.list
+libcroco.postinst
+libcrypto0.9.8.control
+libcrypto0.9.8.list
+libcrypto0.9.8.postinst
+libcups2.control
+libcups2.list
+libcups2.postinst
+libcurl4.control
+libcurl4.list
+libcurl4.postinst
+libdbus-1-3.control
+libdbus-1-3.list
+libdbus-1-3.postinst
+libdbus-glib-1-2.control
+libdbus-glib-1-2.list
+libdbus-glib-1-2.postinst
+libdmx1.control
+libdmx1.list
+libdmx1.postinst
+libdrm.control
+libdrm.list
+libdrm.postinst
+libdvdcss2.control
+libdvdcss2.list
+libdvdcss2.postinst
+libdvdread3.control
+libdvdread3.list
+libdvdread3.postinst
+libeet1.control
+libeet1.list
+libeet1.postinst
+libelementary-ver-pre-svn-05-0.control
+libelementary-ver-pre-svn-05-0.list
+libelementary-ver-pre-svn-05-0.postinst
+libelementary-ver-pre-svn-05-themes.control
+libelementary-ver-pre-svn-05-themes.list
+libelf0.control
+libelf0.list
+libelf0.postinst
+libewebkit0.control
+libewebkit0.list
+libewebkit0.postinst
+libexif12.control
+libexif12.list
+libexif12.postinst
+libexosip2.control
+libexosip2.list
+libexosip2.postinst
+libexpat1.control
+libexpat1.list
+libexpat1.postinst
+libfaac0.control
+libfaac0.list
+libfaac0.postinst
+libfakekey0.control
+libfakekey0.list
+libfakekey0.postinst
+libffi5.control
+libffi5.list
+libffi5.postinst
+libflac8.control
+libflac8.list
+libflac8.postinst
+libfontconfig1.control
+libfontconfig1.list
+libfontconfig1.postinst
+libfontenc1.control
+libfontenc1.list
+libfontenc1.postinst
+libframeworkd-glib0.control
+libframeworkd-glib0.list
+libframeworkd-glib0.postinst
+libfreetype6.control
+libfreetype6.list
+libfreetype6.postinst
+libfribidi0.control
+libfribidi0.list
+libfribidi0.postinst
+libfsobasics0.control
+libfsobasics0.list
+libfsobasics0.postinst
+libfsoframework0.control
+libfsoframework0.list
+libfsoframework0.postinst
+libfso-glib0.control
+libfso-glib0.list
+libfso-glib0.postinst
+libfsoresource0.control
+libfsoresource0.list
+libfsoresource0.postinst
+libfsotransport0.control
+libfsotransport0.list
+libfsotransport0.postinst
+libgcc1.control
+libgcc1.list
+libgcc1.postinst
+libgcrypt11.control
+libgcrypt11.list
+libgcrypt11.postinst
+libgee2.control
+libgee2.list
+libgee2.postinst
+libgio-2.0-0.control
+libgio-2.0-0.list
+libgio-2.0-0.postinst
+libgl1.control
+libgl1.list
+libgl1.postinst
+libglade-2.0-0.control
+libglade-2.0-0.list
+libglade-2.0-0.postinst
+libglib-2.0-0.control
+libglib-2.0-0.list
+libglib-2.0-0.postinst
+libglu1.control
+libglu1.list
+libglu1.postinst
+libgmodule-2.0-0.control
+libgmodule-2.0-0.list
+libgmodule-2.0-0.postinst
+libgmp3.control
+libgmp3.list
+libgmp3.postinst
+libgnt0.control
+libgnt0.list
+libgnt0.postinst
+libgnutls26.control
+libgnutls26.list
+libgnutls26.postinst
+libgnutls-extra26.control
+libgnutls-extra26.list
+libgnutls-extra26.postinst
+libgobject-2.0-0.control
+libgobject-2.0-0.list
+libgobject-2.0-0.postinst
+libgoffice-0.8-8.control
+libgoffice-0.8-8.list
+libgoffice-0.8-8.postinst
+libgoffice-0.8-plugin-plot-barcol.control
+libgoffice-0.8-plugin-plot-barcol.list
+libgoffice-0.8-plugin-plot-distrib.control
+libgoffice-0.8-plugin-plot-distrib.list
+libgoffice-0.8-plugin-plot-pie.control
+libgoffice-0.8-plugin-plot-pie.list
+libgoffice-0.8-plugin-plot-radar.control
+libgoffice-0.8-plugin-plot-radar.list
+libgoffice-0.8-plugin-plot-surface.control
+libgoffice-0.8-plugin-plot-surface.list
+libgoffice-0.8-plugin-plot-xy.control
+libgoffice-0.8-plugin-plot-xy.list
+libgoffice-0.8-plugin-reg-linear.control
+libgoffice-0.8-plugin-reg-linear.list
+libgoffice-0.8-plugin-reg-logfit.control
+libgoffice-0.8-plugin-reg-logfit.list
+libgoffice-0.8-plugin-smoothing.control
+libgoffice-0.8-plugin-smoothing.list
+libgpewidget1.control
+libgpewidget1.list
+libgpewidget1.postinst
+libgpg-error0.control
+libgpg-error0.list
+libgpg-error0.postinst
+libgpgme11.control
+libgpgme11.list
+libgpgme11.postinst
+libgsf.control
+libgsf.list
+libgsf.postinst
+libgsf.prerm
+libgsm0710-0.control
+libgsm0710-0.list
+libgsm0710-0.postinst
+libgsm0710mux0.control
+libgsm0710mux0.list
+libgsm0710mux0.postinst
+libgsm1.control
+libgsm1.list
+libgsm1.postinst
+libgstaudio-0.10-0.control
+libgstaudio-0.10-0.list
+libgstaudio-0.10-0.postinst
+libgstfarsight-0.10-0.control
+libgstfarsight-0.10-0.list
+libgstfarsight-0.10-0.postinst
+libgstinterfaces-0.10-0.control
+libgstinterfaces-0.10-0.list
+libgstinterfaces-0.10-0.postinst
+libgstnetbuffer-0.10-0.control
+libgstnetbuffer-0.10-0.list
+libgstnetbuffer-0.10-0.postinst
+libgstpbutils-0.10-0.control
+libgstpbutils-0.10-0.list
+libgstpbutils-0.10-0.postinst
+libgstrtp-0.10-0.control
+libgstrtp-0.10-0.list
+libgstrtp-0.10-0.postinst
+libgsttag-0.10-0.control
+libgsttag-0.10-0.list
+libgsttag-0.10-0.postinst
+libgstvideo-0.10-0.control
+libgstvideo-0.10-0.list
+libgstvideo-0.10-0.postinst
+libgthread-2.0-0.control
+libgthread-2.0-0.list
+libgthread-2.0-0.postinst
+libgypsy0.control
+libgypsy0.list
+libgypsy0.postinst
+libical.control
+libical.list
+libical.postinst
+libice6.control
+libice6.list
+libice6.postinst
+libicudata36.control
+libicudata36.list
+libicudata36.postinst
+libicui18n36.control
+libicui18n36.list
+libicui18n36.postinst
+libicuuc36.control
+libicuuc36.list
+libicuuc36.postinst
+libid3tag0.control
+libid3tag0.list
+libid3tag0.postinst
+libidl-2-0.control
+libidl-2-0.list
+libidl-2-0.postinst
+libidn.control
+libidn.list
+libidn.postinst
+libimlib2-1.control
+libimlib2-1.list
+libimlib2-1.postinst
+libjasper1.control
+libjasper1.list
+libjasper1.postinst
+libjpeg62.control
+libjpeg62.list
+libjpeg62.postinst
+liblinebreak1.control
+liblinebreak1.list
+liblinebreak1.postinst
+liblinphone3.control
+liblinphone3.list
+liblinphone3.postinst
+liblockfile.control
+liblockfile.list
+liblockfile.postinst
+libltdl7.control
+libltdl7.list
+libltdl7.postinst
+liblzo1.control
+liblzo1.list
+liblzo1.postinst
+libmad0.control
+libmad0.list
+libmad0.postinst
+libmediastreamer0.control
+libmediastreamer0.list
+libmediastreamer0.postinst
+libmp3lame0.control
+libmp3lame0.list
+libmp3lame0.postinst
+libmpfr1.control
+libmpfr1.list
+libmpfr1.postinst
+libnice.control
+libnice.list
+libnice.postinst
+libnl2.control
+libnl2.list
+libnl2.postinst
+libnl-genl2.control
+libnl-genl2.list
+libnl-genl2.postinst
+libnl-nf2.control
+libnl-nf2.list
+libnl-nf2.postinst
+libnl-route2.control
+libnl-route2.list
+libnl-route2.postinst
+libode0.control
+libode0.list
+libode0.postinst
+libogg0.control
+libogg0.list
+libogg0.postinst
+liboil.control
+liboil.list
+liboil.postinst
+libopkg0.control
+libopkg0.list
+libopkg0.postinst
+libortp8.control
+libortp8.list
+libortp8.postinst
+libosip2-3.control
+libosip2-3.list
+libosip2-3.postinst
+libpam-base-files.control
+libpam-base-files.list
+libpam.control
+libpam.list
+libpam-meta.control
+libpam-meta.list
+libpam.postinst
+libpcap.control
+libpcap.list
+libpcap.postinst
+libpciaccess0.control
+libpciaccess0.list
+libpciaccess0.postinst
+libperl5.control
+libperl5.list
+libperl5.postinst
+libphone-ui0.conffiles
+libphone-ui0.control
+libphone-ui0.list
+libphone-ui0.postinst
+libphone-ui-shr.control
+libphone-ui-shr.list
+libphone-utils0.conffiles
+libphone-utils0.control
+libphone-utils0.list
+libphone-utils0.postinst
+libpixman-1-0.control
+libpixman-1-0.list
+libpixman-1-0.postinst
+libpng12-0.control
+libpng12-0.list
+libpng12-0.postinst
+libpng.control
+libpng.list
+libpoppler5.control
+libpoppler5.list
+libpoppler5.postinst
+libpoppler-glib4.control
+libpoppler-glib4.list
+libpoppler-glib4.postinst
+libpopt0.control
+libpopt0.list
+libpopt0.postinst
+libportaudio2.control
+libportaudio2.list
+libportaudio2.postinst
+libpostproc51.control
+libpostproc51.list
+libpostproc51.postinst
+libpthread-stubs0.control
+libpthread-stubs0.list
+libpthread-stubs0.postinst
+libpurple.control
+libpurple.list
+libpurple-plugin-ssl.control
+libpurple-plugin-ssl-gnutls.control
+libpurple-plugin-ssl-gnutls.list
+libpurple-plugin-ssl.list
+libpurple.postinst
+libpurple.prerm
+libpurple-protocol-icq.control
+libpurple-protocol-icq.list
+libpurple-protocol-irc.control
+libpurple-protocol-irc.list
+libpurple-protocol-msn.control
+libpurple-protocol-msn.list
+libpurple-protocol-xmpp.control
+libpurple-protocol-xmpp.list
+libpyglib-2.0-python0.control
+libpyglib-2.0-python0.list
+libpyglib-2.0-python0.postinst
+libpython2.6-1.0.control
+libpython2.6-1.0.list
+libpython2.6-1.0.postinst
+libreadline5.control
+libreadline5.list
+libreadline5.postinst
+librsvg-2-2.control
+librsvg-2-2.list
+librsvg-2-2.postinst
+librsvg-2-gtk.control
+librsvg-2-gtk.list
+librsvg-2-gtk.postinst
+libschroedinger-1.0-0.control
+libschroedinger-1.0-0.list
+libschroedinger-1.0-0.postinst
+libsdl-1.2-0.control
+libsdl-1.2-0.list
+libsdl-1.2-0.postinst
+libsdl-image-1.2-0.control
+libsdl-image-1.2-0.list
+libsdl-image-1.2-0.postinst
+libsdl-mixer-1.2-0.control
+libsdl-mixer-1.2-0.list
+libsdl-mixer-1.2-0.postinst
+libsdl-ttf-2.0-0.control
+libsdl-ttf-2.0-0.list
+libsdl-ttf-2.0-0.postinst
+libsm6.control
+libsm6.list
+libsm6.postinst
+libsoup-2.2-8.control
+libsoup-2.2-8.list
+libsoup-2.2-8.postinst
+libsoup-2.4-1.control
+libsoup-2.4-1.list
+libsoup-2.4-1.postinst
+libspeex1.control
+libspeex1.list
+libspeex1.postinst
+libspeexdsp1.control
+libspeexdsp1.list
+libspeexdsp1.postinst
+libsqlite0.control
+libsqlite0.list
+libsqlite0.postinst
+libsqlite3-0.control
+libsqlite3-0.list
+libsqlite3-0.postinst
+libss2.control
+libss2.list
+libss2.postinst
+libssl0.9.8.control
+libssl0.9.8.list
+libssl0.9.8.postinst
+libstartup-notification-1-0.control
+libstartup-notification-1-0.list
+libstartup-notification-1-0.postinst
+libstdc++6.control
+libstdc++6.list
+libstdc++6.postinst
+libswscale0.control
+libswscale0.list
+libswscale0.postinst
+libsysfs2.control
+libsysfs2.list
+libsysfs2.postinst
+libtheora0.control
+libtheora0.list
+libtheora0.postinst
+libthread-db1.control
+libthread-db1.list
+libthread-db1.postinst
+libtiff5.control
+libtiff5.list
+libtiff5.postinst
+libts-1.0-0.control
+libts-1.0-0.list
+libts-1.0-0.postinst
+libungif4.control
+libungif4.list
+libungif4.postinst
+libusb-0.1-4.control
+libusb-0.1-4.list
+libusb-0.1-4.postinst
+libuuid1.control
+libuuid1.list
+libuuid1.postinst
+libvorbis0.control
+libvorbis0.list
+libvorbis0.postinst
+libvte9.control
+libvte9.list
+libvte9.postinst
+libwebkit-1.0-2.control
+libwebkit-1.0-2.list
+libwebkit-1.0-2.postinst
+libwrap0.control
+libwrap0.list
+libwrap0.postinst
+libx11-6.control
+libx11-6.list
+libx11-6.postinst
+libx11-locale.control
+libx11-locale.list
+libxau6.control
+libxau6.list
+libxau6.postinst
+libxaw7-7.control
+libxaw7-7.list
+libxaw7-7.postinst
+libxcalibrate0.control
+libxcalibrate0.list
+libxcalibrate0.postinst
+libxcomposite1.control
+libxcomposite1.list
+libxcomposite1.postinst
+libxcursor1.control
+libxcursor1.list
+libxcursor1.postinst
+libxdamage1.control
+libxdamage1.list
+libxdamage1.postinst
+libxdmcp6.control
+libxdmcp6.list
+libxdmcp6.postinst
+libxext6.control
+libxext6.list
+libxext6.postinst
+libxfixes3.control
+libxfixes3.list
+libxfixes3.postinst
+libxfont1.control
+libxfont1.list
+libxfont1.postinst
+libxfontcache1.control
+libxfontcache1.list
+libxfontcache1.postinst
+libxft2.control
+libxft2.list
+libxft2.postinst
+libxi6.control
+libxi6.list
+libxi6.postinst
+libxinerama1.control
+libxinerama1.list
+libxinerama1.postinst
+libxkbfile1.control
+libxkbfile1.list
+libxkbfile1.postinst
+libxml2.control
+libxml2.list
+libxml2.postinst
+libxmu6.control
+libxmu6.list
+libxmu6.postinst
+libxmuu1.control
+libxmuu1.list
+libxmuu1.postinst
+libxp6.control
+libxp6.list
+libxp6.postinst
+libxpm4.control
+libxpm4.list
+libxpm4.postinst
+libxrandr2.control
+libxrandr2.list
+libxrandr2.postinst
+libxrender1.control
+libxrender1.list
+libxrender1.postinst
+libxslt.control
+libxslt.list
+libxslt.postinst
+libxss1.control
+libxss1.list
+libxss1.postinst
+libxt6.control
+libxt6.list
+libxt6.postinst
+libxtst6.control
+libxtst6.list
+libxtst6.postinst
+libxv1.control
+libxv1.list
+libxv1.postinst
+libxxf86dga1.control
+libxxf86dga1.list
+libxxf86dga1.postinst
+libxxf86misc1.control
+libxxf86misc1.list
+libxxf86misc1.postinst
+libxxf86vm1.control
+libxxf86vm1.list
+libxxf86vm1.postinst
+libyaml-0-2.control
+libyaml-0-2.list
+libyaml-0-2.postinst
+libz1.control
+libz1.list
+libz1.postinst
+linphone.control
+linphone.list
+locale-base-en-us.control
+locale-base-en-us.list
+logrotate.conffiles
+logrotate.control
+logrotate.list
+logrotate.postinst
+logrotate.postrm
+lsof.control
+lsof.list
+ltrace.control
+ltrace.list
+make.control
+make.list
+matchbox-keyboard-im.control
+matchbox-keyboard-im.list
+matchbox-keyboard-im.postinst
+matchbox-keyboard-im.postrm
+mbuffer.control
+mbuffer.list
+mdbus2.control
+mdbus2.list
+mesa-dri.control
+mesa-dri.list
+mesa-dri.postinst
+mime-support.control
+mime-support.list
+mioctl.control
+mioctl.list
+mkdump.control
+mkdump.list
+mobile-broadband-provider-info.control
+mobile-broadband-provider-info.list
+module-init-tools.control
+module-init-tools-depmod.control
+module-init-tools-depmod.list
+module-init-tools-depmod.postinst
+module-init-tools-depmod.prerm
+module-init-tools.list
+module-init-tools.postinst
+module-init-tools.prerm
+modutils-initscripts.control
+modutils-initscripts.list
+modutils-initscripts.postinst
+modutils-initscripts.postrm
+modutils-initscripts.prerm
+mokomaze.control
+mokomaze.list
+mplayer-common.control
+mplayer-common.list
+mplayer.conffiles
+mplayer.control
+mplayer.list
+mtd-utils.control
+mtd-utils.list
+mterm2.control
+mterm2.list
+nano.control
+nano.list
+navit.conffiles
+navit.control
+navit-icons.control
+navit-icons.list
+navit.list
+ncurses.control
+ncurses.list
+ncurses.postinst
+netbase.conffiles
+netbase.control
+netbase.list
+netbase.postinst
+netbase.postrm
+netbase.prerm
+nfs-utils-client.control
+nfs-utils-client.list
+nmon.control
+nmon.list
+numptyphysics.control
+numptyphysics.list
+openssh.control
+openssh-keygen.control
+openssh-keygen.list
+openssh.list
+openssh-scp.control
+openssh-scp.list
+openssh-scp.postinst
+openssh-scp.postrm
+openssh-sftp-server.control
+openssh-sftp-server.list
+openssh-ssh.conffiles
+openssh-ssh.control
+openssh-sshd.conffiles
+openssh-sshd.control
+openssh-sshd.list
+openssh-sshd.postinst
+openssh-sshd.postrm
+openssh-ssh.list
+openssh-ssh.postinst
+openssh-ssh.postrm
+openssl.control
+openssl.list
+openvpn.control
+openvpn.list
+opimd-utils-cli.control
+opimd-utils-cli.list
+opimd-utils-data.control
+opimd-utils-data.list
+opimd-utils-notes.control
+opimd-utils-notes.list
+opkg-collateral.conffiles
+opkg-collateral.control
+opkg-collateral.list
+opkg.control
+opkg.list
+opkg.postinst
+opkg.postrm
+orbit2.control
+orbit2.list
+orbit2.postinst
+pam-plugin-access.control
+pam-plugin-access.list
+pam-plugin-debug.control
+pam-plugin-debug.list
+pam-plugin-deny.control
+pam-plugin-deny.list
+pam-plugin-echo.control
+pam-plugin-echo.list
+pam-plugin-env.control
+pam-plugin-env.list
+pam-plugin-exec.control
+pam-plugin-exec.list
+pam-plugin-faildelay.control
+pam-plugin-faildelay.list
+pam-plugin-filter.control
+pam-plugin-filter.list
+pam-plugin-ftp.control
+pam-plugin-ftp.list
+pam-plugin-group.control
+pam-plugin-group.list
+pam-plugin-issue.control
+pam-plugin-issue.list
+pam-plugin-keyinit.control
+pam-plugin-keyinit.list
+pam-plugin-lastlog.control
+pam-plugin-lastlog.list
+pam-plugin-limits.control
+pam-plugin-limits.list
+pam-plugin-listfile.control
+pam-plugin-listfile.list
+pam-plugin-localuser.control
+pam-plugin-localuser.list
+pam-plugin-loginuid.control
+pam-plugin-loginuid.list
+pam-plugin-mail.control
+pam-plugin-mail.list
+pam-plugin-mkhomedir.control
+pam-plugin-mkhomedir.list
+pam-plugin-motd.control
+pam-plugin-motd.list
+pam-plugin-namespace.control
+pam-plugin-namespace.list
+pam-plugin-nologin.control
+pam-plugin-nologin.list
+pam-plugin-permit.control
+pam-plugin-permit.list
+pam-plugin-pwhistory.control
+pam-plugin-pwhistory.list
+pam-plugin-rhosts.control
+pam-plugin-rhosts.list
+pam-plugin-rootok.control
+pam-plugin-rootok.list
+pam-plugin-securetty.control
+pam-plugin-securetty.list
+pam-plugin-shells.control
+pam-plugin-shells.list
+pam-plugin-stress.control
+pam-plugin-stress.list
+pam-plugin-succeed-if.control
+pam-plugin-succeed-if.list
+pam-plugin-tally2.control
+pam-plugin-tally2.list
+pam-plugin-tally.control
+pam-plugin-tally.list
+pam-plugin-time.control
+pam-plugin-time.list
+pam-plugin-timestamp.control
+pam-plugin-timestamp.list
+pam-plugin-umask.control
+pam-plugin-umask.list
+pam-plugin-unix.control
+pam-plugin-unix.list
+pam-plugin-warn.control
+pam-plugin-warn.list
+pam-plugin-wheel.control
+pam-plugin-wheel.list
+pam-plugin-xauth.control
+pam-plugin-xauth.list
+pango.control
+pango.list
+pango-module-basic-fc.control
+pango-module-basic-fc.list
+pango-module-basic-fc.postinst
+pango-module-basic-x.control
+pango-module-basic-x.list
+pango-module-basic-x.postinst
+pango.postinst
+perl.control
+perl.list
+perl-module-carp.control
+perl-module-carp.list
+perl-module-exporter.control
+perl-module-exporter.list
+perl-module-file-basename.control
+perl-module-file-basename.list
+perl-module-file-path.control
+perl-module-file-path.list
+perl-module-strict.control
+perl-module-strict.list
+perl-module-warnings.control
+perl-module-warnings.list
+phonefsod.conffiles
+phonefsod.control
+phonefsod.list
+phonefsod.postinst
+phonefsod.postrm
+phonefsod.prerm
+phoneui-apps-contacts.control
+phoneui-apps-contacts.list
+phoneui-apps-dialer.control
+phoneui-apps-dialer.list
+phoneui-apps-messages.control
+phoneui-apps-messages.list
+phoneui-apps-quick-settings.control
+phoneui-apps-quick-settings.list
+phoneuid.conffiles
+phoneuid.control
+phoneuid.list
+pidgin.control
+pidgin-data.control
+pidgin-data.list
+pidgin.list
+pingus.control
+pingus.list
+pointercal.control
+pointercal.list
+policykit.control
+policykit.list
+policykit.postinst
+policykit.postrm
+poppler-data.control
+poppler-data.list
+portmap.control
+portmap.list
+portmap.postinst
+portmap.postrm
+portmap.prerm
+powertop.control
+powertop.list
+ppp.conffiles
+ppp.control
+ppp-dialin.control
+ppp-dialin.list
+ppp-dialin.postinst
+ppp-dialin.postrm
+ppp.list
+ppp.postinst
+procps.conffiles
+procps.control
+procps.list
+procps.postinst
+procps.postrm
+procps.prerm
+pth.control
+pth.list
+pth.postinst
+pxaregs.control
+pxaregs.list
+pyefl-sudoku.control
+pyefl-sudoku.list
+pyphonelog.control
+pyphonelog.list
+python-codecs.control
+python-codecs.list
+python-core.control
+python-core.list
+python-crypt.control
+python-crypt.list
+python-ctypes.control
+python-ctypes.list
+python-datetime.control
+python-datetime.list
+python-dateutil.control
+python-dateutil.list
+python-dbus.control
+python-dbus.list
+python-difflib.control
+python-difflib.list
+python-ecore.control
+python-ecore.list
+python-edbus.control
+python-edbus.list
+python-edje.control
+python-edje.list
+python-elementary.control
+python-elementary.list
+python-evas.control
+python-evas.list
+python-fcntl.control
+python-fcntl.list
+python-gst.control
+python-gst.list
+python-io.control
+python-io.list
+python-lang.control
+python-lang.list
+python-logging.control
+python-logging.list
+python-math.control
+python-math.list
+python-multiprocessing.control
+python-multiprocessing.list
+python-pexpect.control
+python-pexpect.list
+python-phoneutils.control
+python-phoneutils.list
+python-pickle.control
+python-pickle.list
+python-pprint.control
+python-pprint.list
+python-pyalsaaudio.control
+python-pyalsaaudio.list
+python-pycairo.control
+python-pycairo.list
+python-pygobject.control
+python-pygobject.list
+python-pygtk.control
+python-pygtk.list
+python-pyrtc.control
+python-pyrtc.list
+python-pyserial.control
+python-pyserial.list
+python-pyyaml.control
+python-pyyaml.list
+python-readline.control
+python-readline.list
+python-re.control
+python-re.list
+python-resource.control
+python-resource.list
+python-shell.control
+python-shell.list
+python-sqlite3.control
+python-sqlite3.list
+python-stringold.control
+python-stringold.list
+python-subprocess.control
+python-subprocess.list
+python-syslog.control
+python-syslog.list
+python-terminal.control
+python-terminal.list
+python-textutils.control
+python-textutils.list
+python-threading.control
+python-threading.list
+python-vobject.control
+python-vobject.list
+python-xml.control
+python-xml.list
+python-zlib.control
+python-zlib.list
+rgb.control
+rgb.list
+rsync.control
+rsync.list
+s3c24xx-gpio.control
+s3c24xx-gpio.list
+s3c64xx-gpio.control
+s3c64xx-gpio.list
+screen.control
+screen.list
+sed.control
+sed.list
+sed.postinst
+sed.prerm
+serial-forward.control
+serial-forward.list
+shared-mime-info.control
+shared-mime-info.list
+shr-settings-addons-illume.control
+shr-settings-addons-illume.list
+shr-settings-backup-configuration.conffiles
+shr-settings-backup-configuration.control
+shr-settings-backup-configuration.list
+shr-settings.control
+shr-settings.list
+shr-splash.control
+shr-splash.list
+shr-splash.postinst
+shr-splash.postrm
+shr-splash.prerm
+shr-splash-theme-simple.control
+shr-splash-theme-simple.list
+shr-splash-theme-simple.postinst
+shr-splash-theme-simple.postrm
+shr-theme.control
+shr-theme-gry.control
+shr-theme-gry.list
+shr-theme-gtk-e17lookalike.control
+shr-theme-gtk-e17lookalike.list
+shr-theme-gtk-e17lookalike.postinst
+shr-theme-gtk-e17lookalike.postrm
+shr-theme.list
+shr-wizard.control
+shr-wizard.list
+socat.control
+socat.list
+strace.control
+strace.list
+synergy.control
+synergy.list
+sysfsutils.control
+sysfsutils.list
+sysstat.control
+sysstat.list
+sysvinit.control
+sysvinit-inittab.conffiles
+sysvinit-inittab.control
+sysvinit-inittab.list
+sysvinit.list
+sysvinit-pidof.control
+sysvinit-pidof.list
+sysvinit-pidof.postinst
+sysvinit-pidof.prerm
+sysvinit.postinst
+sysvinit.postrm
+sysvinit.prerm
+sysvinit-utils.control
+sysvinit-utils.list
+sysvinit-utils.postinst
+sysvinit-utils.prerm
+tangogps.control
+tangogps.list
+task-base-apm.control
+task-base-apm.list
+task-base-bluetooth.control
+task-base-bluetooth.list
+task-base.control
+task-base-ext2.control
+task-base-ext2.list
+task-base-kernel26.control
+task-base-kernel26.list
+task-base.list
+task-base-ppp.control
+task-base-ppp.list
+task-base-usbgadget.control
+task-base-usbgadget.list
+task-base-usbhost.control
+task-base-usbhost.list
+task-base-vfat.control
+task-base-vfat.list
+task-base-wifi.control
+task-base-wifi.list
+task-boot.control
+task-boot.list
+task-cli-tools.control
+task-cli-tools-debug.control
+task-cli-tools-debug.list
+task-cli-tools.list
+task-distro-base.control
+task-distro-base.list
+task-fonts-truetype-core.control
+task-fonts-truetype-core.list
+task-fso2-compliance.control
+task-fso2-compliance.list
+task-machine-base.control
+task-machine-base.list
+task-shr-apps.control
+task-shr-apps.list
+task-shr-cli.control
+task-shr-cli.list
+task-shr-games.control
+task-shr-games.list
+task-shr-gtk.control
+task-shr-gtk.list
+task-shr-minimal-apps.control
+task-shr-minimal-apps.list
+task-shr-minimal-audio.control
+task-shr-minimal-audio.list
+task-shr-minimal-base.control
+task-shr-minimal-base.list
+task-shr-minimal-cli.control
+task-shr-minimal-cli.list
+task-shr-minimal-fso.control
+task-shr-minimal-fso.list
+task-shr-minimal-gtk.control
+task-shr-minimal-gtk.list
+task-shr-minimal-x.control
+task-shr-minimal-x.list
+task-x11-illume.control
+task-x11-illume.list
+task-x11-server.control
+task-x11-server.list
+task-x11-utils.control
+task-x11-utils.list
+tcpdump.control
+tcpdump.list
+tinylogin.control
+tinylogin.list
+tinylogin.postinst
+tinylogin.prerm
+tslib-calibrate.control
+tslib-calibrate.list
+tslib-conf.control
+tslib-conf.list
+ttf-dejavu-common.control
+ttf-dejavu-common.list
+ttf-dejavu-common.postinst
+ttf-dejavu-common.postrm
+ttf-dejavu-sans.control
+ttf-dejavu-sans.list
+ttf-dejavu-sans-mono.control
+ttf-dejavu-sans-mono.list
+ttf-dejavu-sans-mono.postinst
+ttf-dejavu-sans-mono.postrm
+ttf-dejavu-sans.postinst
+ttf-dejavu-sans.postrm
+ttf-liberation-mono.control
+ttf-liberation-mono.list
+ttf-liberation-mono.postinst
+ttf-liberation-mono.postrm
+tzdata-africa.control
+tzdata-africa.list
+tzdata-americas.control
+tzdata-americas.list
+tzdata-asia.control
+tzdata-asia.list
+tzdata-australia.control
+tzdata-australia.list
+tzdata.conffiles
+tzdata.control
+tzdata-europe.control
+tzdata-europe.list
+tzdata.list
+udev.control
+udev.list
+udev.postinst
+udev.postrm
+udev.prerm
+udev-utils.control
+udev-utils.list
+update-modules.control
+update-modules.list
+update-modules.postinst
+update-rc.d.control
+update-rc.d.list
+usb-gadget-mode.control
+usb-gadget-mode.list
+usb-gadget-mode.postinst
+usb-gadget-mode.postrm
+usbutils.control
+usbutils.list
+util-linux-ng-blkid.control
+util-linux-ng-blkid.list
+util-linux-ng-blkid.postinst
+util-linux-ng-blkid.prerm
+util-linux-ng-cfdisk.control
+util-linux-ng-cfdisk.list
+util-linux-ng.control
+util-linux-ng-fdisk.control
+util-linux-ng-fdisk.list
+util-linux-ng-fdisk.postinst
+util-linux-ng-fdisk.prerm
+util-linux-ng-fsck.control
+util-linux-ng-fsck.list
+util-linux-ng-fsck.postinst
+util-linux-ng-fsck.prerm
+util-linux-ng.list
+util-linux-ng-losetup.control
+util-linux-ng-losetup.list
+util-linux-ng-losetup.postinst
+util-linux-ng-losetup.prerm
+util-linux-ng-mountall.control
+util-linux-ng-mountall.list
+util-linux-ng-mountall.postinst
+util-linux-ng-mountall.prerm
+util-linux-ng-mount.control
+util-linux-ng-mount.list
+util-linux-ng-mount.postinst
+util-linux-ng-mount.prerm
+util-linux-ng.postinst
+util-linux-ng.prerm
+util-linux-ng-readprofile.control
+util-linux-ng-readprofile.list
+util-linux-ng-readprofile.postinst
+util-linux-ng-readprofile.prerm
+util-linux-ng-sfdisk.control
+util-linux-ng-sfdisk.list
+util-linux-ng-swaponoff.control
+util-linux-ng-swaponoff.list
+util-linux-ng-swaponoff.postinst
+util-linux-ng-swaponoff.prerm
+util-linux-ng-umount.control
+util-linux-ng-umount.list
+util-linux-ng-umount.postinst
+util-linux-ng-umount.prerm
+vagalume.control
+vagalume.list
+vala-terminal.control
+vala-terminal.list
+ventura.control
+ventura.list
+vnc.control
+vnc.list
+vpnc.conffiles
+vpnc.control
+vpnc.list
+vte-termcap.control
+vte-termcap.list
+wireless-tools.control
+wireless-tools.list
+wmiconfig.control
+wmiconfig.list
+wpa-supplicant.control
+wpa-supplicant.list
+wpa-supplicant-passphrase.control
+wpa-supplicant-passphrase.list
+wv.control
+wv.list
+wv.postinst
+x11vnc.control
+x11vnc.list
+xauth.control
+xauth.list
+xcursor-transparent-theme.control
+xcursor-transparent-theme.list
+xdpyinfo.control
+xdpyinfo.list
+xf86-input-evdev.control
+xf86-input-evdev.list
+xf86-input-keyboard.control
+xf86-input-keyboard.list
+xf86-input-mouse.control
+xf86-input-mouse.list
+xf86-input-tslib.control
+xf86-input-tslib.list
+xf86-video-glamo.control
+xf86-video-glamo.list
+xhost.control
+xhost.list
+xinit.control
+xinit.list
+xinput-calibrator.control
+xinput-calibrator.list
+xinput.control
+xinput.list
+xkbcomp.control
+xkbcomp.list
+xkeyboard-config.control
+xkeyboard-config.list
+xmodmap.control
+xmodmap.list
+xorg-minimal-fonts.control
+xorg-minimal-fonts.list
+xrandr.control
+xrandr.list
+xserver-kdrive-common.control
+xserver-kdrive-common.list
+xserver-nodm-init.control
+xserver-nodm-init.list
+xserver-nodm-init.postinst
+xserver-nodm-init.postrm
+xserver-nodm-init.prerm
+xserver-xorg-conf.conffiles
+xserver-xorg-conf.control
+xserver-xorg-conf.list
+xserver-xorg.control
+xserver-xorg-extension-dri2.control
+xserver-xorg-extension-dri2.list
+xserver-xorg-extension-dri.control
+xserver-xorg-extension-dri.list
+xserver-xorg-extension-glx.control
+xserver-xorg-extension-glx.list
+xserver-xorg.list
+xset.control
+xset.list
+xtscal.control
+xtscal.list"
+
+mount /mnt/ceph-fuse
+: cd /mnt/ceph-fuse
+
+mkdir test-1774
+cd test-1774
+for f in $list; do
+  touch $f
+done
+
+cd
+umount /mnt/ceph-fuse
+mount /mnt/ceph-fuse
+cd -
+
+# this worked before the 1774 fix
+diff <(ls) <(echo "$list")
+
+# but this failed, because we cached the dirlist wrong
+# update-modules.postinst used to be the missing file,
+# the last one in the first dirent set passed to ceph-fuse
+diff <(ls) <(echo "$list")
+
+cd ..
+rm -rf test-1774
+
+cd
+umount /mnt/ceph-fuse
diff --git a/qa/clusters/2-node-mgr.yaml b/qa/clusters/2-node-mgr.yaml
new file mode 100644
index 000000000..b1c29a866
--- /dev/null
+++ b/qa/clusters/2-node-mgr.yaml
@@ -0,0 +1,10 @@
+roles:
+- [mgr.x, mon.a, mon.c, mds.a, mds.c, osd.0, client.0]
+- [mgr.y, mgr.z, mon.b, mds.b, osd.1, osd.2, osd.3, client.1]
+log-rotate:
+  ceph-mds: 10G
+  ceph-osd: 10G
+openstack:
+  - volumes: # attached to each instance
+      count: 2
+      size: 30 # GB
diff --git a/qa/clusters/extra-client.yaml b/qa/clusters/extra-client.yaml
new file mode 100644
index 000000000..33fa505b7
--- /dev/null
+++ b/qa/clusters/extra-client.yaml
@@ -0,0 +1,14 @@
+roles:
+- [mon.a, mon.c, osd.0, osd.1, osd.2]
+- [mon.b, mgr.x, mds.a, osd.3, osd.4, osd.5]
+- [client.0]
+- [client.1]
+openstack:
+- volumes: # attached to each instance
+    count: 3
+    size: 10 # GB
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd shutdown pgref assert: true
+\ No newline at end of file
diff --git a/qa/clusters/fixed-1.yaml b/qa/clusters/fixed-1.yaml
new file mode 100644
index 000000000..d8e5898b9
--- /dev/null
+++ b/qa/clusters/fixed-1.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph-deploy:
+    conf:
+      global:
+        osd pool default size: 2
+        osd crush chooseleaf type: 0
+        osd pool default pg num:  128
+        osd pool default pgp num:  128
+  ceph:
+    conf:
+      osd:
+        osd shutdown pgref assert: true
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0]
diff --git a/qa/clusters/fixed-2.yaml b/qa/clusters/fixed-2.yaml
new file mode 100644
index 000000000..e4448bb20
--- /dev/null
+++ b/qa/clusters/fixed-2.yaml
@@ -0,0 +1,12 @@
+roles:
+- [mon.a, mon.c, mgr.y, osd.0, osd.1, osd.2, osd.3, client.0, node-exporter.a]
+- [mon.b, mgr.x, osd.4, osd.5, osd.6, osd.7, client.1, prometheus.a, node-exporter.b]
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 10 # GB
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd shutdown pgref assert: true
diff --git a/qa/clusters/fixed-3-cephfs.yaml b/qa/clusters/fixed-3-cephfs.yaml
new file mode 100644
index 000000000..9e021b3bd
--- /dev/null
+++ b/qa/clusters/fixed-3-cephfs.yaml
@@ -0,0 +1,16 @@
+roles:
+- [mon.a, mds.a, mgr.x, osd.0, osd.1]
+- [mon.b, mds.b, mon.c, mgr.y, osd.2, osd.3]
+- [client.0]
+openstack:
+- volumes: # attached to each instance
+    count: 2
+    size: 10 # GB
+log-rotate:
+  ceph-mds: 10G
+  ceph-osd: 10G
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd shutdown pgref assert: true
diff --git a/qa/clusters/fixed-3.yaml b/qa/clusters/fixed-3.yaml
new file mode 100644
index 000000000..ddc79a84b
--- /dev/null
+++ b/qa/clusters/fixed-3.yaml
@@ -0,0 +1,13 @@
+roles:
+- [mon.a, mon.c, mgr.x, osd.0, osd.1, osd.2, osd.3]
+- [mon.b, mgr.y, osd.4, osd.5, osd.6, osd.7]
+- [client.0]
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 10 # GB
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd shutdown pgref assert: true
diff --git a/qa/clusters/fixed-4.yaml b/qa/clusters/fixed-4.yaml
new file mode 100644
index 000000000..df767f357
--- /dev/null
+++ b/qa/clusters/fixed-4.yaml
@@ -0,0 +1,10 @@
+roles: 
+- [mon.a, mgr.y, osd.0, osd.4, osd.8, osd.12]
+- [mon.b, osd.1, osd.5, osd.9, osd.13] 
+- [mon.c, osd.2, osd.6, osd.10, osd.14] 
+- [mgr.x, osd.3, osd.7, osd.11, osd.15, client.0]
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd shutdown pgref assert: true
+\ No newline at end of file
diff --git a/qa/config/crimson_qa_overrides.yaml b/qa/config/crimson_qa_overrides.yaml
new file mode 100644
index 000000000..670b98bc1
--- /dev/null
+++ b/qa/config/crimson_qa_overrides.yaml
@@ -0,0 +1,13 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        enable experimental unrecoverable data corrupting features: crimson
+      mon:
+        osd pool default crimson: true
+      osd:
+        crimson seastar smp: 3
+    flavor: crimson
+  workunit:
+    env:
+      CRIMSON_COMPAT: '1'
diff --git a/qa/config/rados.yaml b/qa/config/rados.yaml
new file mode 100644
index 000000000..710847f59
--- /dev/null
+++ b/qa/config/rados.yaml
@@ -0,0 +1,13 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd op queue: debug_random
+        osd op queue cut off: debug_random
+        osd debug verify missing on start: true
+        osd debug verify cached snaps: true
+        bluestore zero block detection: true
+        osd mclock override recovery settings: true
+        osd mclock profile: high_recovery_ops
+      mon:
+        mon scrub interval: 300
diff --git a/qa/crontab/teuthology-cronjobs b/qa/crontab/teuthology-cronjobs
new file mode 100644
index 000000000..783dcbd78
--- /dev/null
+++ b/qa/crontab/teuthology-cronjobs
@@ -0,0 +1,143 @@
+PATH=/home/teuthology/src/teuthology_main/virtualenv/bin:/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
+TEUTH_CEPH_REPO='https://github.com/ceph/ceph.git'
+TEUTH_SUITE_REPO='https://github.com/ceph/ceph.git'
+MAILTO="ceph-infra@redhat.com;yweinste@redhat.com"
+CEPH_QA_EMAIL="ceph-qa@ceph.io"
+
+### !!!!!!!!!!!!!!!!!!!!!!!!!!
+## THIS CRONTAB MUST NOT BE EDITED MANUALLY !!!!
+## AUTOMATED CRONTAB UPDATING
+## https://code.google.com/archive/p/chkcrontab/wikis/CheckCrontab.wiki
+## https://github.com/ceph/ceph-cm-ansible/pull/391
+## crontab is in https://github.com/ceph/ceph/main/qa/crontab/teuthology-cronjobs
+# chkcrontab: disable-msg=INVALID_USER
+# chkcrontab: disable-msg=USER_NOT_FOUND
+@daily /bin/bash /home/teuthology/bin/update-crontab.sh
+### !!!!!!!!!!!!!!!!!!!!!!!!!!
+
+
+# Ensure teuthology is up-to-date
+@daily cd /home/teuthology/src/teuthology_main && /home/teuthology/bin/cron_wrapper git pull
+@daily cd /home/teuthology/src/git.ceph.com_ceph_main && /home/teuthology/bin/cron_wrapper git pull
+# Ensure ceph-sepia-secrets is up-to-date
+*/5 * * * *  cd /home/teuthology/ceph-sepia-secrets && /home/teuthology/bin/cron_wrapper git pull
+
+
+#Publish this crontab to the Tracker page http://tracker.ceph.com/projects/ceph-releases/wiki/Crontab
+@daily crontab=$(crontab -l | perl -p -e 's/</&lt;/g; s/>/&gt;/g; s/&/&amp;/g') ; header=$(echo h3. Crontab ; echo) ; curl --verbose -X PUT --header 'Content-type: application/xml' --data-binary '<?xml version="1.0"?><wiki_page><text>'"$header"'&lt;pre&gt;'"$crontab"'&lt;/pre&gt;</text></wiki_page>' http://tracker.ceph.com/projects/ceph-releases/wiki/sepia.xml?key=$(cat /etc/redmine-key)
+
+## This is an example only, don't remove !
+## to see result open http://tracker.ceph.com/projects/ceph-qa-suite/wiki/ceph-ansible
+@daily SUITE_NAME=~/src/ceph-qa-suite_main/suites/ceph-ansible; crontab=$(teuthology-describe-tests --show-facet no $SUITE_NAME | perl -p -e 's/</&lt;/g; s/>/&gt;/g; s/&/&amp;/g') ; header=$(echo h4. $SUITE_NAME ; echo " "; echo " ") ; curl --verbose -X PUT --header 'Content-type: application/xml' --data-binary '<?xml version="1.0"?><wiki_page><text>'"$header"'&lt;pre&gt;'"$crontab"'&lt;/pre&gt;</text></wiki_page>' http://tracker.ceph.com/projects/ceph-qa-suite/wiki/ceph-ansible.xml?key=$(cat /etc/redmine-key)
+
+
+## ********** smoke tests on main, octopus, and pacific branches
+# 0 5  * * 0,2,4 CEPH_BRANCH=main; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s smoke -k distro -e $CEPH_QA_EMAIL -p 70
+# 0 8  * * 5 CEPH_BRANCH=octopus; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -m $MACHINE_NAME -s smoke -k distro -e $CEPH_QA_EMAIL -p 70
+# 7 8  * * 6 CEPH_BRANCH=pacific; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -m $MACHINE_NAME -s smoke -k distro -e $CEPH_QA_EMAIL -p 70
+
+
+## ********** windows tests on main branch - weekly
+# 00 03 * * 1 CEPH_BRANCH=main; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s windows -k distro -e $CEPH_QA_EMAIL
+
+## ********** crimson tests on main branch - weekly
+# 01 01 * * 0 CEPH_BRANCH=main; MACHINE_NAME=smithi; SUITE_NAME=crimson-rados; KERNEL=distro;  /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 100000 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL
+
+## quincy branch runs - weekly
+## suites rados and rbd use --subset arg and must be call with schedule_subset.sh
+## see script in https://github.com/ceph/ceph/tree/main/qa/machine_types
+
+# 01 07 * * 0 CEPH_BRANCH=quincy; MACHINE_NAME=smithi; SUITE_NAME=rados;         KERNEL=distro;  /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 100000 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL
+# 07 07 * * 0 CEPH_BRANCH=quincy; MACHINE_NAME=smithi; SUITE_NAME=orch;          KERNEL=distro;  /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 100000 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL
+# 01 02 * * 1 CEPH_BRANCH=quincy; MACHINE_NAME=smithi; SUITE_NAME=rbd;           KERNEL=distro;  /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 100000 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL
+# 15 03 * * 2 CEPH_BRANCH=quincy; MACHINE_NAME=smithi; SUITE_NAME=fs;            KERNEL=distro;  /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh     32 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL
+# 15 11 * * 3 CEPH_BRANCH=quincy; MACHINE_NAME=smithi; SUITE_NAME=powercycle;    KERNEL=distro;  /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 100000 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL
+# 05 03 * * 4 CEPH_BRANCH=quincy; MACHINE_NAME=smithi; SUITE_NAME=rgw;           KERNEL=distro;  /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s $SUITE_NAME -k $KERNEL -e $CEPH_QA_EMAIL
+# 20 03 * * 5 CEPH_BRANCH=quincy; MACHINE_NAME=smithi; SUITE_NAME=krbd;          KERNEL=testing; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s $SUITE_NAME -k $KERNEL -e $CEPH_QA_EMAIL
+
+###  The suite below must run on bare-metal because it's performance suite and run 3 times to produce more data points
+# 57 03 * * 6 CEPH_BRANCH=quincy; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s perf-basic -k distro -e $CEPH_QA_EMAIL -N 3
+
+
+##########################
+
+#********** nautilus branch START - weekly
+
+# 25 13 * * 5  CEPH_BRANCH=nautilus; MACHINE_NAME=smithi; SUITE_NAME=kcephfs;  KERNEL=testing; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 2999 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL
+# 15 05 * * 0  CEPH_BRANCH=nautilus; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH  -n 100 -m $MACHINE_NAME -s krbd -k testing -e $CEPH_QA_EMAIL
+
+
+#********** nautilus branch END
+
+#********** octopus branch START - weekly
+
+# 30 03 * * 3  CEPH_BRANCH=octopus; MACHINE_NAME=smithi; SUITE_NAME=rados;      KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 9999 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL
+# 00 06 * * 4  CEPH_BRANCH=octopus; MACHINE_NAME=smithi; SUITE_NAME=rbd;        KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 9999 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL
+# 10 04 * * 5  CEPH_BRANCH=octopus; MACHINE_NAME=smithi; SUITE_NAME=fs;         KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 9999 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL
+# 15 13 * * 6  CEPH_BRANCH=octopus; MACHINE_NAME=smithi; SUITE_NAME=multimds;   KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 9999 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL
+# 15 12 * * 0  CEPH_BRANCH=octopus; MACHINE_NAME=smithi; SUITE_NAME=powercycle; KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 9999 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL
+# 05 05 * * 1  CEPH_BRANCH=octopus; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH  -n 100 -m $MACHINE_NAME -s rgw -k distro -e $CEPH_QA_EMAIL
+# 15 05 * * 2  CEPH_BRANCH=octopus; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH  -n 100 -m $MACHINE_NAME -s krbd -k testing -e $CEPH_QA_EMAIL
+
+## upgrades suites for on octopus
+# 30 02 * * 4  CEPH_BRANCH=octopus; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -k distro -m $MACHINE_NAME -s upgrade/mimic-x -e $CEPH_QA_EMAIL
+# 23 14 * * 5  CEPH_BRANCH=octopus; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -k distro -n 100 -m $MACHINE_NAME -s upgrade/nautilus-x -e $CEPH_QA_EMAIL
+# 25 01 * * 6  CEPH_BRANCH=octopus; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH  -n 100 -m $MACHINE_NAME -s upgrade/octopus-p2p -k distro -e $CEPH_QA_EMAIL
+
+
+## !!!! three suites below MUST use --suite-branch luminous, mimic, nautilus (see https://tracker.ceph.com/issues/24021)
+## The suites below run without filters
+
+# 47 01 * * 5  CEPH_BRANCH=octopus; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH  -n 100 -m $MACHINE_NAME -s upgrade/client-upgrade-luminous-octopus -k distro -e $CEPH_QA_EMAIL --suite-branch luminous -t py2
+# 50 01 * * 5  CEPH_BRANCH=octopus; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH  -n 100 -m $MACHINE_NAME -s upgrade/client-upgrade-mimic-octopus -k distro -e $CEPH_QA_EMAIL --suite-branch mimic -t py2
+# 50 01 * * 5  CEPH_BRANCH=octopus; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH  -n 100 -m $MACHINE_NAME -s upgrade-clients/client-upgrade-nautilus-octopus -k distro -e $CEPH_QA_EMAIL --suite-branch nautilus
+
+#********** octopus branch END
+
+
+#********** pacific branch START - frequency 4(2) times a week
+
+# 31 03 * * 0   CEPH_BRANCH=pacific; MACHINE_NAME=smithi; SUITE_NAME=rados;      KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 99999 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL -p 500 --force-priority
+# 07 06 * * 1   CEPH_BRANCH=pacific; MACHINE_NAME=smithi; SUITE_NAME=rbd;        KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 99999 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL -p 500 --force-priority
+# 17 04 * * 2   CEPH_BRANCH=pacific; MACHINE_NAME=smithi; SUITE_NAME=fs;         KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh    32 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL -p 500 --force-priority
+# 17 12 * * 3   CEPH_BRANCH=pacific; MACHINE_NAME=smithi; SUITE_NAME=powercycle; KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh  9999 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL -p 500 --force-priority
+# 07 05 * * 4   CEPH_BRANCH=pacific; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH  -n 100 -m $MACHINE_NAME -s rgw -k distro -e $CEPH_QA_EMAIL -p 500
+# 17 05 * * 5   CEPH_BRANCH=pacific; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH  -n 100 -m $MACHINE_NAME -s krbd -k testing -e $CEPH_QA_EMAIL -p 500
+# 23 14 * * 6   CEPH_BRANCH=pacific; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -k distro -n 100 -m $MACHINE_NAME -s upgrade/nautilus-x -e $CEPH_QA_EMAIL -p 500
+# 20 01 * * 6   CEPH_BRANCH=pacific; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH  -n 100 -m $MACHINE_NAME -s upgrade-clients/client-upgrade-octopus-pacific -k distro -e $CEPH_QA_EMAIL --suite-branch octopus -p 500
+
+# 20 07 * * 6   CEPH_BRANCH=pacific; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH  -n 100 -m $MACHINE_NAME -s upgrade-clients/client-upgrade-nautilus-pacific -k distro -e $CEPH_QA_EMAIL --suite-branch nautilus -p 500
+
+
+# 22 14 * * 6    CEPH_BRANCH=pacific; MACHINE_NAME=smithi; SUITE_NAME=upgrade:octopus-x; KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 10 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL -p 500 --force-priority
+
+# 25 01 * * 7  CEPH_BRANCH=pacific; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH  -n 100 -m $MACHINE_NAME -s upgrade/pacific-p2p -k distro -e $CEPH_QA_EMAIL
+
+
+#********** pacific branch END
+
+
+### upgrade runs for quincy release
+###### on smithi
+
+## !!!! the client suites below MUST use --suite-branch octopus, pacific (see https://tracker.ceph.com/issues/24021)
+
+# 20 01 * * 4  CEPH_BRANCH=quincy; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH  -n 100 -m $MACHINE_NAME -s upgrade-clients/client-upgrade-octopus-quincy -k distro -e $CEPH_QA_EMAIL --suite-branch octopus
+
+# 25 01 * * 4  CEPH_BRANCH=quincy; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH  -n 100 -m $MACHINE_NAME -s upgrade-clients/client-upgrade-pacific-quincy -k  distro -e $CEPH_QA_EMAIL --suite-branch pacific
+
+# 22 14 * * 5 CEPH_BRANCH=quincy; MACHINE_NAME=smithi; SUITE_NAME=upgrade:octopus-x; KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 10 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL -p 70 --force-priority
+
+# 23 14 * * 5 CEPH_BRANCH=quincy; MACHINE_NAME=smithi; SUITE_NAME=upgrade:pacific-x; KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 10 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL -p 70 --force-priority
+
+
+# 35 01 * * 7  CEPH_BRANCH=quincy; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH  -n 100 -m $MACHINE_NAME -s   upgrade/quincy-p2p -k distro -e $CEPH_QA_EMAIL
+
+
+### upgrade runs for reef release
+###### on smithi
+
+
+# 23 14 * * 6 CEPH_BRANCH=main; MACHINE_NAME=smithi; SUITE_NAME=upgrade:pacific-x; KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 10 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL -p 70 --force-priority
+
+# 23 14 * * 6 CEPH_BRANCH=main; MACHINE_NAME=smithi; SUITE_NAME=upgrade:quincy-x; KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 10 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL -p 70 --force-priority
diff --git a/qa/debug/buildpackages.yaml b/qa/debug/buildpackages.yaml
new file mode 100644
index 000000000..527ed6627
--- /dev/null
+++ b/qa/debug/buildpackages.yaml
@@ -0,0 +1,6 @@
+tasks:
+    - buildpackages:
+        machine:
+          disk: 40 # GB
+          ram: 15000 # MB
+          cpus: 16
diff --git a/qa/debug/mds_client.yaml b/qa/debug/mds_client.yaml
new file mode 100644
index 000000000..c6fec3fc6
--- /dev/null
+++ b/qa/debug/mds_client.yaml
@@ -0,0 +1,9 @@
+overrides:
+  ceph:
+    conf:
+      mds:
+        debug ms: 1
+        debug mds: 20
+      client:
+        debug ms: 1
+        debug client: 20
+\ No newline at end of file
diff --git a/qa/debug/mgr.yaml b/qa/debug/mgr.yaml
new file mode 100644
index 000000000..1f8e9cbc2
--- /dev/null
+++ b/qa/debug/mgr.yaml
@@ -0,0 +1,17 @@
+overrides:
+  ceph:
+    conf:
+      mon:
+        debug mon: 20
+      mgr:
+        debug mgr: 20
+        debug ms: 1
+        debug client: 20
+      client:
+        debug client: 20
+        debug mgrc: 20
+        debug ms: 1
+      osd:
+        debug mgrc: 20
+      mds:
+        debug mgrc: 20
diff --git a/qa/debug/openstack-15G.yaml b/qa/debug/openstack-15G.yaml
new file mode 100644
index 000000000..857ad22a2
--- /dev/null
+++ b/qa/debug/openstack-15G.yaml
@@ -0,0 +1,3 @@
+openstack:
+  - machine:
+      ram: 15000 # MB
diff --git a/qa/debug/openstack-30G.yaml b/qa/debug/openstack-30G.yaml
new file mode 100644
index 000000000..da7ed803a
--- /dev/null
+++ b/qa/debug/openstack-30G.yaml
@@ -0,0 +1,3 @@
+openstack:
+  - machine:
+      ram: 30000 # MB
diff --git a/qa/distros/.qa b/qa/distros/.qa
new file mode 120000
index 000000000..a96aa0ea9
--- /dev/null
+++ b/qa/distros/.qa
@@ -0,0 +1 @@
+..
+\ No newline at end of file
diff --git a/qa/distros/a-supported-distro.yaml b/qa/distros/a-supported-distro.yaml
new file mode 120000
index 000000000..33a40b6e4
--- /dev/null
+++ b/qa/distros/a-supported-distro.yaml
@@ -0,0 +1 @@
+all/centos_7.2.yaml
+\ No newline at end of file
diff --git a/qa/distros/all/centos.yaml b/qa/distros/all/centos.yaml
new file mode 100644
index 000000000..1efcfa192
--- /dev/null
+++ b/qa/distros/all/centos.yaml
@@ -0,0 +1,2 @@
+os_type: centos
+ktype: distro
diff --git a/qa/distros/all/centos_6.3.yaml b/qa/distros/all/centos_6.3.yaml
new file mode 100644
index 000000000..ab441ebe4
--- /dev/null
+++ b/qa/distros/all/centos_6.3.yaml
@@ -0,0 +1,3 @@
+os_type: centos
+os_version: "6.3"
+ktype: distro
diff --git a/qa/distros/all/centos_6.4.yaml b/qa/distros/all/centos_6.4.yaml
new file mode 100644
index 000000000..c0675434f
--- /dev/null
+++ b/qa/distros/all/centos_6.4.yaml
@@ -0,0 +1,3 @@
+os_type: centos
+os_version: "6.4"
+ktype: distro
diff --git a/qa/distros/all/centos_6.5.yaml b/qa/distros/all/centos_6.5.yaml
new file mode 100644
index 000000000..2500389ee
--- /dev/null
+++ b/qa/distros/all/centos_6.5.yaml
@@ -0,0 +1,3 @@
+os_type: centos
+os_version: "6.5"
+ktype: distro
diff --git a/qa/distros/all/centos_7.0.yaml b/qa/distros/all/centos_7.0.yaml
new file mode 100644
index 000000000..357b11f0d
--- /dev/null
+++ b/qa/distros/all/centos_7.0.yaml
@@ -0,0 +1,3 @@
+os_type: centos
+os_version: "7.0"
+ktype: distro
diff --git a/qa/distros/all/centos_7.1.yaml b/qa/distros/all/centos_7.1.yaml
new file mode 100644
index 000000000..022620d9e
--- /dev/null
+++ b/qa/distros/all/centos_7.1.yaml
@@ -0,0 +1,3 @@
+os_type: centos
+os_version: "7.1"
+ktype: distro
diff --git a/qa/distros/all/centos_7.2.yaml b/qa/distros/all/centos_7.2.yaml
new file mode 100644
index 000000000..9a918f855
--- /dev/null
+++ b/qa/distros/all/centos_7.2.yaml
@@ -0,0 +1,3 @@
+os_type: centos
+os_version: "7.2"
+ktype: distro
diff --git a/qa/distros/all/centos_7.3.yaml b/qa/distros/all/centos_7.3.yaml
new file mode 100644
index 000000000..e86cbb80d
--- /dev/null
+++ b/qa/distros/all/centos_7.3.yaml
@@ -0,0 +1,3 @@
+os_type: centos
+os_version: "7.3"
+ktype: distro
diff --git a/qa/distros/all/centos_7.4.yaml b/qa/distros/all/centos_7.4.yaml
new file mode 100644
index 000000000..3eb689611
--- /dev/null
+++ b/qa/distros/all/centos_7.4.yaml
@@ -0,0 +1,3 @@
+os_type: centos
+os_version: "7.4"
+ktype: distro
diff --git a/qa/distros/all/centos_7.5.yaml b/qa/distros/all/centos_7.5.yaml
new file mode 100644
index 000000000..2f067e462
--- /dev/null
+++ b/qa/distros/all/centos_7.5.yaml
@@ -0,0 +1,3 @@
+os_type: centos
+os_version: "7.5"
+ktype: distro
diff --git a/qa/distros/all/centos_7.6.yaml b/qa/distros/all/centos_7.6.yaml
new file mode 100644
index 000000000..81014e102
--- /dev/null
+++ b/qa/distros/all/centos_7.6.yaml
@@ -0,0 +1,7 @@
+os_type: centos
+os_version: "7.6"
+overrides:
+  selinux:
+    whitelist:
+      - scontext=system_u:system_r:logrotate_t:s0
+ktype: distro
diff --git a/qa/distros/all/centos_7.yaml b/qa/distros/all/centos_7.yaml
new file mode 120000
index 000000000..23ef40d86
--- /dev/null
+++ b/qa/distros/all/centos_7.yaml
@@ -0,0 +1 @@
+centos_7.6.yaml
+\ No newline at end of file
diff --git a/qa/distros/all/centos_8.0.yaml b/qa/distros/all/centos_8.0.yaml
new file mode 100644
index 000000000..1679bf0d5
--- /dev/null
+++ b/qa/distros/all/centos_8.0.yaml
@@ -0,0 +1,7 @@
+os_type: centos
+os_version: "8.0"
+overrides:
+  selinux:
+    whitelist:
+      - scontext=system_u:system_r:logrotate_t:s0
+ktype: distro
diff --git a/qa/distros/all/centos_8.1.yaml b/qa/distros/all/centos_8.1.yaml
new file mode 100644
index 000000000..f764e5079
--- /dev/null
+++ b/qa/distros/all/centos_8.1.yaml
@@ -0,0 +1,7 @@
+os_type: centos
+os_version: "8.1"
+overrides:
+  selinux:
+    whitelist:
+      - scontext=system_u:system_r:logrotate_t:s0
+ktype: distro
diff --git a/qa/distros/all/centos_8.2.yaml b/qa/distros/all/centos_8.2.yaml
new file mode 100644
index 000000000..1ccbd8abd
--- /dev/null
+++ b/qa/distros/all/centos_8.2.yaml
@@ -0,0 +1,7 @@
+os_type: centos
+os_version: "8.2"
+overrides:
+  selinux:
+    whitelist:
+      - scontext=system_u:system_r:logrotate_t:s0
+ktype: distro
diff --git a/qa/distros/all/centos_8.3.yaml b/qa/distros/all/centos_8.3.yaml
new file mode 100644
index 000000000..b9a7c2579
--- /dev/null
+++ b/qa/distros/all/centos_8.3.yaml
@@ -0,0 +1,7 @@
+os_type: centos
+os_version: "8.3"
+overrides:
+  selinux:
+    whitelist:
+      - scontext=system_u:system_r:logrotate_t:s0
+ktype: distro
diff --git a/qa/distros/all/centos_8.stream.yaml b/qa/distros/all/centos_8.stream.yaml
new file mode 100644
index 000000000..5ae75c6be
--- /dev/null
+++ b/qa/distros/all/centos_8.stream.yaml
@@ -0,0 +1,7 @@
+os_type: centos
+os_version: "8.stream"
+overrides:
+  selinux:
+    whitelist:
+      - scontext=system_u:system_r:logrotate_t:s0
+ktype: distro
diff --git a/qa/distros/all/centos_8.yaml b/qa/distros/all/centos_8.yaml
new file mode 120000
index 000000000..8e7476153
--- /dev/null
+++ b/qa/distros/all/centos_8.yaml
@@ -0,0 +1 @@
+centos_8.stream.yaml
+\ No newline at end of file
diff --git a/qa/distros/all/centos_9.stream.yaml b/qa/distros/all/centos_9.stream.yaml
new file mode 100644
index 000000000..52d553bd6
--- /dev/null
+++ b/qa/distros/all/centos_9.stream.yaml
@@ -0,0 +1,2 @@
+os_type: centos
+os_version: "9.stream"
diff --git a/qa/distros/all/centos_latest.yaml b/qa/distros/all/centos_latest.yaml
new file mode 120000
index 000000000..2f843a512
--- /dev/null
+++ b/qa/distros/all/centos_latest.yaml
@@ -0,0 +1 @@
+centos_9.stream.yaml
+\ No newline at end of file
diff --git a/qa/distros/all/debian_6.0.yaml b/qa/distros/all/debian_6.0.yaml
new file mode 100644
index 000000000..e0d6f51f8
--- /dev/null
+++ b/qa/distros/all/debian_6.0.yaml
@@ -0,0 +1,3 @@
+os_type: debian
+os_version: "6.0"
+ktype: distro
diff --git a/qa/distros/all/debian_7.0.yaml b/qa/distros/all/debian_7.0.yaml
new file mode 100644
index 000000000..1eba6366d
--- /dev/null
+++ b/qa/distros/all/debian_7.0.yaml
@@ -0,0 +1,3 @@
+os_type: debian
+os_version: "7.0"
+ktype: distro
diff --git a/qa/distros/all/debian_8.0.yaml b/qa/distros/all/debian_8.0.yaml
new file mode 100644
index 000000000..48f9e44d4
--- /dev/null
+++ b/qa/distros/all/debian_8.0.yaml
@@ -0,0 +1,3 @@
+os_type: debian
+os_version: "8.0"
+ktype: distro
diff --git a/qa/distros/all/fedora_17.yaml b/qa/distros/all/fedora_17.yaml
new file mode 100644
index 000000000..4124a1c0a
--- /dev/null
+++ b/qa/distros/all/fedora_17.yaml
@@ -0,0 +1,3 @@
+os_type: fedora
+os_version: "17"
+ktype: distro
diff --git a/qa/distros/all/fedora_18.yaml b/qa/distros/all/fedora_18.yaml
new file mode 100644
index 000000000..7c87ae53f
--- /dev/null
+++ b/qa/distros/all/fedora_18.yaml
@@ -0,0 +1,3 @@
+os_type: fedora
+os_version: "18"
+ktype: distro
diff --git a/qa/distros/all/fedora_19.yaml b/qa/distros/all/fedora_19.yaml
new file mode 100644
index 000000000..5ee62d861
--- /dev/null
+++ b/qa/distros/all/fedora_19.yaml
@@ -0,0 +1,3 @@
+os_type: fedora
+os_version: "19"
+ktype: distro
diff --git a/qa/distros/all/opensuse_15.1.yaml b/qa/distros/all/opensuse_15.1.yaml
new file mode 100644
index 000000000..05cb3d8e4
--- /dev/null
+++ b/qa/distros/all/opensuse_15.1.yaml
@@ -0,0 +1,3 @@
+os_type: opensuse
+os_version: "15.1"
+ktype: distro
diff --git a/qa/distros/all/opensuse_15.2.yaml b/qa/distros/all/opensuse_15.2.yaml
new file mode 100644
index 000000000..18ee83ba8
--- /dev/null
+++ b/qa/distros/all/opensuse_15.2.yaml
@@ -0,0 +1,3 @@
+os_type: opensuse
+os_version: "15.2"
+ktype: distro
diff --git a/qa/distros/all/opensuse_42.3.yaml b/qa/distros/all/opensuse_42.3.yaml
new file mode 100644
index 000000000..d3419fc95
--- /dev/null
+++ b/qa/distros/all/opensuse_42.3.yaml
@@ -0,0 +1,3 @@
+os_type: opensuse
+os_version: "42.3"
+ktype: distro
diff --git a/qa/distros/all/rhel_6.3.yaml b/qa/distros/all/rhel_6.3.yaml
new file mode 100644
index 000000000..f9171354f
--- /dev/null
+++ b/qa/distros/all/rhel_6.3.yaml
@@ -0,0 +1,3 @@
+os_type: rhel
+os_version: "6.3"
+ktype: distro
diff --git a/qa/distros/all/rhel_6.4.yaml b/qa/distros/all/rhel_6.4.yaml
new file mode 100644
index 000000000..5b250a32f
--- /dev/null
+++ b/qa/distros/all/rhel_6.4.yaml
@@ -0,0 +1,3 @@
+os_type: rhel
+os_version: "6.4"
+ktype: distro
diff --git a/qa/distros/all/rhel_6.5.yaml b/qa/distros/all/rhel_6.5.yaml
new file mode 100644
index 000000000..6e9f7fe5c
--- /dev/null
+++ b/qa/distros/all/rhel_6.5.yaml
@@ -0,0 +1,3 @@
+os_type: rhel
+os_version: "6.5"
+ktype: distro
diff --git a/qa/distros/all/rhel_6.yaml b/qa/distros/all/rhel_6.yaml
new file mode 120000
index 000000000..850573949
--- /dev/null
+++ b/qa/distros/all/rhel_6.yaml
@@ -0,0 +1 @@
+rhel_6.5.yaml
+\ No newline at end of file
diff --git a/qa/distros/all/rhel_7.0.yaml b/qa/distros/all/rhel_7.0.yaml
new file mode 100644
index 000000000..36b6d4282
--- /dev/null
+++ b/qa/distros/all/rhel_7.0.yaml
@@ -0,0 +1,3 @@
+os_type: rhel
+os_version: "7.0"
+ktype: distro
diff --git a/qa/distros/all/rhel_7.5.yaml b/qa/distros/all/rhel_7.5.yaml
new file mode 100644
index 000000000..6b1e60f80
--- /dev/null
+++ b/qa/distros/all/rhel_7.5.yaml
@@ -0,0 +1,3 @@
+os_type: rhel
+os_version: "7.5"
+ktype: distro
diff --git a/qa/distros/all/rhel_7.6.yaml b/qa/distros/all/rhel_7.6.yaml
new file mode 100644
index 000000000..37bc0fb4a
--- /dev/null
+++ b/qa/distros/all/rhel_7.6.yaml
@@ -0,0 +1,7 @@
+os_type: rhel
+os_version: "7.6"
+overrides:
+  selinux:
+    whitelist:
+      - scontext=system_u:system_r:logrotate_t:s0
+ktype: distro
diff --git a/qa/distros/all/rhel_7.7.yaml b/qa/distros/all/rhel_7.7.yaml
new file mode 100644
index 000000000..ac44fe18e
--- /dev/null
+++ b/qa/distros/all/rhel_7.7.yaml
@@ -0,0 +1,7 @@
+os_type: rhel
+os_version: "7.7"
+overrides:
+  selinux:
+    whitelist:
+      - scontext=system_u:system_r:logrotate_t:s0
+ktype: distro
diff --git a/qa/distros/all/rhel_7.yaml b/qa/distros/all/rhel_7.yaml
new file mode 120000
index 000000000..85ed1cd48
--- /dev/null
+++ b/qa/distros/all/rhel_7.yaml
@@ -0,0 +1 @@
+rhel_7.7.yaml
+\ No newline at end of file
diff --git a/qa/distros/all/rhel_8.0.yaml b/qa/distros/all/rhel_8.0.yaml
new file mode 100644
index 000000000..da6a33ed0
--- /dev/null
+++ b/qa/distros/all/rhel_8.0.yaml
@@ -0,0 +1,7 @@
+os_type: rhel
+os_version: "8.0"
+overrides:
+  selinux:
+    whitelist:
+      - scontext=system_u:system_r:logrotate_t:s0
+ktype: distro
diff --git a/qa/distros/all/rhel_8.1.yaml b/qa/distros/all/rhel_8.1.yaml
new file mode 100644
index 000000000..c73893149
--- /dev/null
+++ b/qa/distros/all/rhel_8.1.yaml
@@ -0,0 +1,7 @@
+os_type: rhel
+os_version: "8.1"
+overrides:
+  selinux:
+    whitelist:
+      - scontext=system_u:system_r:logrotate_t:s0
+ktype: distro
diff --git a/qa/distros/all/rhel_8.3.yaml b/qa/distros/all/rhel_8.3.yaml
new file mode 100644
index 000000000..4e44bbff5
--- /dev/null
+++ b/qa/distros/all/rhel_8.3.yaml
@@ -0,0 +1,7 @@
+os_type: rhel
+os_version: "8.3"
+overrides:
+  selinux:
+    whitelist:
+      - scontext=system_u:system_r:logrotate_t:s0
+ktype: distro
diff --git a/qa/distros/all/rhel_8.4.yaml b/qa/distros/all/rhel_8.4.yaml
new file mode 100644
index 000000000..5a299ffc5
--- /dev/null
+++ b/qa/distros/all/rhel_8.4.yaml
@@ -0,0 +1,7 @@
+os_type: rhel
+os_version: "8.4"
+overrides:
+  selinux:
+    whitelist:
+      - scontext=system_u:system_r:logrotate_t:s0
+ktype: distro
diff --git a/qa/distros/all/rhel_8.5.yaml b/qa/distros/all/rhel_8.5.yaml
new file mode 100644
index 000000000..3e02bb196
--- /dev/null
+++ b/qa/distros/all/rhel_8.5.yaml
@@ -0,0 +1,7 @@
+os_type: rhel
+os_version: "8.5"
+overrides:
+  selinux:
+    whitelist:
+      - scontext=system_u:system_r:logrotate_t:s0
+ktype: distro
diff --git a/qa/distros/all/rhel_8.6.yaml b/qa/distros/all/rhel_8.6.yaml
new file mode 100644
index 000000000..1f9a6b73f
--- /dev/null
+++ b/qa/distros/all/rhel_8.6.yaml
@@ -0,0 +1,7 @@
+os_type: rhel
+os_version: "8.6"
+overrides:
+  selinux:
+    whitelist:
+      - scontext=system_u:system_r:logrotate_t:s0
+ktype: distro
diff --git a/qa/distros/all/rhel_8.yaml b/qa/distros/all/rhel_8.yaml
new file mode 120000
index 000000000..d49c09cc1
--- /dev/null
+++ b/qa/distros/all/rhel_8.yaml
@@ -0,0 +1 @@
+rhel_8.6.yaml
+\ No newline at end of file
diff --git a/qa/distros/all/sle_12.2.yaml b/qa/distros/all/sle_12.2.yaml
new file mode 100644
index 000000000..720fc2dec
--- /dev/null
+++ b/qa/distros/all/sle_12.2.yaml
@@ -0,0 +1,3 @@
+os_type: sle
+os_version: "12.2"
+ktype: distro
diff --git a/qa/distros/all/sle_12.3.yaml b/qa/distros/all/sle_12.3.yaml
new file mode 100644
index 000000000..baab97ed7
--- /dev/null
+++ b/qa/distros/all/sle_12.3.yaml
@@ -0,0 +1,3 @@
+os_type: sle
+os_version: "12.3"
+ktype: distro
diff --git a/qa/distros/all/sle_15.1.yaml b/qa/distros/all/sle_15.1.yaml
new file mode 100644
index 000000000..e2fe5427f
--- /dev/null
+++ b/qa/distros/all/sle_15.1.yaml
@@ -0,0 +1,3 @@
+os_type: sle
+os_version: "15.1"
+ktype: distro
diff --git a/qa/distros/all/sle_15.2.yaml b/qa/distros/all/sle_15.2.yaml
new file mode 100644
index 000000000..ccaac1799
--- /dev/null
+++ b/qa/distros/all/sle_15.2.yaml
@@ -0,0 +1,3 @@
+os_type: sle
+os_version: "15.2"
+ktype: distro
diff --git a/qa/distros/all/ubuntu_12.04.yaml b/qa/distros/all/ubuntu_12.04.yaml
new file mode 100644
index 000000000..c7d076def
--- /dev/null
+++ b/qa/distros/all/ubuntu_12.04.yaml
@@ -0,0 +1,3 @@
+os_type: ubuntu
+os_version: "12.04"
+ktype: distro
diff --git a/qa/distros/all/ubuntu_12.10.yaml b/qa/distros/all/ubuntu_12.10.yaml
new file mode 100644
index 000000000..5b1c510a1
--- /dev/null
+++ b/qa/distros/all/ubuntu_12.10.yaml
@@ -0,0 +1,3 @@
+os_type: ubuntu
+os_version: "12.10"
+ktype: distro
diff --git a/qa/distros/all/ubuntu_14.04.yaml b/qa/distros/all/ubuntu_14.04.yaml
new file mode 100644
index 000000000..3067dc0f7
--- /dev/null
+++ b/qa/distros/all/ubuntu_14.04.yaml
@@ -0,0 +1,3 @@
+os_type: ubuntu
+os_version: "14.04"
+ktype: distro
diff --git a/qa/distros/all/ubuntu_14.04_aarch64.yaml b/qa/distros/all/ubuntu_14.04_aarch64.yaml
new file mode 100644
index 000000000..08ad4f50f
--- /dev/null
+++ b/qa/distros/all/ubuntu_14.04_aarch64.yaml
@@ -0,0 +1,4 @@
+os_type: ubuntu
+os_version: "14.04"
+arch: aarch64
+ktype: distro
diff --git a/qa/distros/all/ubuntu_14.04_i686.yaml b/qa/distros/all/ubuntu_14.04_i686.yaml
new file mode 100644
index 000000000..905391e1b
--- /dev/null
+++ b/qa/distros/all/ubuntu_14.04_i686.yaml
@@ -0,0 +1,4 @@
+os_type: ubuntu
+os_version: "14.04"
+arch: i686
+ktype: distro
diff --git a/qa/distros/all/ubuntu_16.04.yaml b/qa/distros/all/ubuntu_16.04.yaml
new file mode 100644
index 000000000..7a55a7735
--- /dev/null
+++ b/qa/distros/all/ubuntu_16.04.yaml
@@ -0,0 +1,3 @@
+os_type: ubuntu
+os_version: "16.04"
+ktype: distro
diff --git a/qa/distros/all/ubuntu_18.04.yaml b/qa/distros/all/ubuntu_18.04.yaml
new file mode 100644
index 000000000..3a89b202d
--- /dev/null
+++ b/qa/distros/all/ubuntu_18.04.yaml
@@ -0,0 +1,3 @@
+os_type: ubuntu
+os_version: "18.04"
+ktype: distro
diff --git a/qa/distros/all/ubuntu_20.04.yaml b/qa/distros/all/ubuntu_20.04.yaml
new file mode 100644
index 000000000..f9fb375ad
--- /dev/null
+++ b/qa/distros/all/ubuntu_20.04.yaml
@@ -0,0 +1,3 @@
+os_type: ubuntu
+os_version: "20.04"
+ktype: distro
diff --git a/qa/distros/all/ubuntu_22.04.yaml b/qa/distros/all/ubuntu_22.04.yaml
new file mode 100644
index 000000000..a34ddad11
--- /dev/null
+++ b/qa/distros/all/ubuntu_22.04.yaml
@@ -0,0 +1,3 @@
+os_type: ubuntu
+os_version: "22.04"
+ktype: distro
diff --git a/qa/distros/all/ubuntu_latest.yaml b/qa/distros/all/ubuntu_latest.yaml
new file mode 120000
index 000000000..08ecf3afa
--- /dev/null
+++ b/qa/distros/all/ubuntu_latest.yaml
@@ -0,0 +1 @@
+ubuntu_22.04.yaml
+\ No newline at end of file
diff --git a/qa/distros/container-hosts/.qa b/qa/distros/container-hosts/.qa
new file mode 120000
index 000000000..fea2489fd
--- /dev/null
+++ b/qa/distros/container-hosts/.qa
@@ -0,0 +1 @@
+../.qa
+\ No newline at end of file
diff --git a/qa/distros/container-hosts/centos_8.stream_container_tools.yaml b/qa/distros/container-hosts/centos_8.stream_container_tools.yaml
new file mode 100644
index 000000000..4a76306f1
--- /dev/null
+++ b/qa/distros/container-hosts/centos_8.stream_container_tools.yaml
@@ -0,0 +1,14 @@
+os_type: centos
+os_version: "8.stream"
+overrides:
+  selinux:
+    whitelist:
+      - scontext=system_u:system_r:logrotate_t:s0
+
+tasks:
+- pexec:
+    all:
+    - sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup
+    - sudo dnf -y  module reset container-tools
+    - sudo dnf -y  module install container-tools --allowerasing --nobest
+    - sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf
diff --git a/qa/distros/container-hosts/centos_8.stream_container_tools_crun.yaml b/qa/distros/container-hosts/centos_8.stream_container_tools_crun.yaml
new file mode 100644
index 000000000..b06e1c87d
--- /dev/null
+++ b/qa/distros/container-hosts/centos_8.stream_container_tools_crun.yaml
@@ -0,0 +1,16 @@
+os_type: centos
+os_version: "8.stream"
+overrides:
+  selinux:
+    whitelist:
+      - scontext=system_u:system_r:logrotate_t:s0
+
+tasks:
+- pexec:
+    all:
+    - sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup
+    - sudo dnf -y  module reset container-tools
+    - sudo dnf -y  module install container-tools --allowerasing --nobest
+    - sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf
+    - sudo sed -i 's/runtime = "runc"/#runtime = "runc"/g' /usr/share/containers/containers.conf
+    - sudo sed -i 's/#runtime = "crun"/runtime = "crun"/g' /usr/share/containers/containers.conf
diff --git a/qa/distros/container-hosts/rhel_8.6_container_tools_3.0.yaml b/qa/distros/container-hosts/rhel_8.6_container_tools_3.0.yaml
new file mode 100644
index 000000000..361d8546e
--- /dev/null
+++ b/qa/distros/container-hosts/rhel_8.6_container_tools_3.0.yaml
@@ -0,0 +1,13 @@
+os_type: rhel
+os_version: "8.6"
+overrides:
+  selinux:
+    whitelist:
+      - scontext=system_u:system_r:logrotate_t:s0
+tasks:
+- pexec:
+    all:
+    - sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup
+    - sudo dnf -y  module reset container-tools
+    - sudo dnf -y  module install container-tools:3.0 --allowerasing --nobest
+    - sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf
diff --git a/qa/distros/container-hosts/rhel_8.6_container_tools_rhel8.yaml b/qa/distros/container-hosts/rhel_8.6_container_tools_rhel8.yaml
new file mode 100644
index 000000000..be94ed69e
--- /dev/null
+++ b/qa/distros/container-hosts/rhel_8.6_container_tools_rhel8.yaml
@@ -0,0 +1,13 @@
+os_type: rhel
+os_version: "8.6"
+overrides:
+  selinux:
+    whitelist:
+      - scontext=system_u:system_r:logrotate_t:s0
+tasks:
+- pexec:
+    all:
+    - sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup
+    - sudo dnf -y  module reset container-tools
+    - sudo dnf -y  module install container-tools:rhel8 --allowerasing --nobest
+    - sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf
diff --git a/qa/distros/container-hosts/ubuntu_20.04.yaml b/qa/distros/container-hosts/ubuntu_20.04.yaml
new file mode 100644
index 000000000..bb9f5c00a
--- /dev/null
+++ b/qa/distros/container-hosts/ubuntu_20.04.yaml
@@ -0,0 +1,9 @@
+os_type: ubuntu
+os_version: "20.04"
+# the normal ubuntu 20.04 kernel (5.4.0-88-generic currently) have a bug that prevents the nvme_loop
+# from behaving.  I think it is this:
+#   https://lkml.org/lkml/2020/9/21/1456
+# (at least, that is the symptom: nvme nvme1: Connect command failed, error wo/DNR bit: 880)
+overrides:
+  kernel:
+    hwe: true
diff --git a/qa/distros/crimson-supported-all-distro/centos_8.yaml b/qa/distros/crimson-supported-all-distro/centos_8.yaml
new file mode 120000
index 000000000..b7e6c9b4e
--- /dev/null
+++ b/qa/distros/crimson-supported-all-distro/centos_8.yaml
@@ -0,0 +1 @@
+../all/centos_8.yaml
+\ No newline at end of file
diff --git a/qa/distros/crimson-supported-all-distro/centos_latest.yaml b/qa/distros/crimson-supported-all-distro/centos_latest.yaml
new file mode 120000
index 000000000..2e29883f3
--- /dev/null
+++ b/qa/distros/crimson-supported-all-distro/centos_latest.yaml
@@ -0,0 +1 @@
+../all/centos_latest.yaml
+\ No newline at end of file
diff --git a/qa/distros/podman/centos_8.stream_container_tools.yaml b/qa/distros/podman/centos_8.stream_container_tools.yaml
new file mode 100644
index 000000000..4a76306f1
--- /dev/null
+++ b/qa/distros/podman/centos_8.stream_container_tools.yaml
@@ -0,0 +1,14 @@
+os_type: centos
+os_version: "8.stream"
+overrides:
+  selinux:
+    whitelist:
+      - scontext=system_u:system_r:logrotate_t:s0
+
+tasks:
+- pexec:
+    all:
+    - sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup
+    - sudo dnf -y  module reset container-tools
+    - sudo dnf -y  module install container-tools --allowerasing --nobest
+    - sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf
diff --git a/qa/distros/podman/rhel_8.6_container_tools_3.0.yaml b/qa/distros/podman/rhel_8.6_container_tools_3.0.yaml
new file mode 100644
index 000000000..361d8546e
--- /dev/null
+++ b/qa/distros/podman/rhel_8.6_container_tools_3.0.yaml
@@ -0,0 +1,13 @@
+os_type: rhel
+os_version: "8.6"
+overrides:
+  selinux:
+    whitelist:
+      - scontext=system_u:system_r:logrotate_t:s0
+tasks:
+- pexec:
+    all:
+    - sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup
+    - sudo dnf -y  module reset container-tools
+    - sudo dnf -y  module install container-tools:3.0 --allowerasing --nobest
+    - sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf
diff --git a/qa/distros/podman/rhel_8.6_container_tools_rhel8.yaml b/qa/distros/podman/rhel_8.6_container_tools_rhel8.yaml
new file mode 100644
index 000000000..be94ed69e
--- /dev/null
+++ b/qa/distros/podman/rhel_8.6_container_tools_rhel8.yaml
@@ -0,0 +1,13 @@
+os_type: rhel
+os_version: "8.6"
+overrides:
+  selinux:
+    whitelist:
+      - scontext=system_u:system_r:logrotate_t:s0
+tasks:
+- pexec:
+    all:
+    - sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup
+    - sudo dnf -y  module reset container-tools
+    - sudo dnf -y  module install container-tools:rhel8 --allowerasing --nobest
+    - sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf
diff --git a/qa/distros/single-container-host.yaml b/qa/distros/single-container-host.yaml
new file mode 120000
index 000000000..f71756d42
--- /dev/null
+++ b/qa/distros/single-container-host.yaml
@@ -0,0 +1 @@
+container-hosts/centos_8.stream_container_tools_crun.yaml
+\ No newline at end of file
diff --git a/qa/distros/supported-all-distro/centos_8.yaml b/qa/distros/supported-all-distro/centos_8.yaml
new file mode 120000
index 000000000..b7e6c9b4e
--- /dev/null
+++ b/qa/distros/supported-all-distro/centos_8.yaml
@@ -0,0 +1 @@
+../all/centos_8.yaml
+\ No newline at end of file
diff --git a/qa/distros/supported-all-distro/centos_latest.yaml b/qa/distros/supported-all-distro/centos_latest.yaml
new file mode 120000
index 000000000..2e29883f3
--- /dev/null
+++ b/qa/distros/supported-all-distro/centos_latest.yaml
@@ -0,0 +1 @@
+../all/centos_latest.yaml
+\ No newline at end of file
diff --git a/qa/distros/supported-all-distro/rhel_8.yaml b/qa/distros/supported-all-distro/rhel_8.yaml
new file mode 120000
index 000000000..f803f091e
--- /dev/null
+++ b/qa/distros/supported-all-distro/rhel_8.yaml
@@ -0,0 +1 @@
+../all/rhel_8.yaml
+\ No newline at end of file
diff --git a/qa/distros/supported-all-distro/ubuntu_20.04.yaml b/qa/distros/supported-all-distro/ubuntu_20.04.yaml
new file mode 120000
index 000000000..75d907e3b
--- /dev/null
+++ b/qa/distros/supported-all-distro/ubuntu_20.04.yaml
@@ -0,0 +1 @@
+../all/ubuntu_20.04.yaml
+\ No newline at end of file
diff --git a/qa/distros/supported-all-distro/ubuntu_latest.yaml b/qa/distros/supported-all-distro/ubuntu_latest.yaml
new file mode 120000
index 000000000..cfcd0d1a8
--- /dev/null
+++ b/qa/distros/supported-all-distro/ubuntu_latest.yaml
@@ -0,0 +1 @@
+../all/ubuntu_latest.yaml
+\ No newline at end of file
diff --git a/qa/distros/supported-random-distro$/$ b/qa/distros/supported-random-distro$/$
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/distros/supported-random-distro$/$
diff --git a/qa/distros/supported-random-distro$/centos_8.yaml b/qa/distros/supported-random-distro$/centos_8.yaml
new file mode 120000
index 000000000..b7e6c9b4e
--- /dev/null
+++ b/qa/distros/supported-random-distro$/centos_8.yaml
@@ -0,0 +1 @@
+../all/centos_8.yaml
+\ No newline at end of file
diff --git a/qa/distros/supported-random-distro$/centos_latest.yaml b/qa/distros/supported-random-distro$/centos_latest.yaml
new file mode 120000
index 000000000..2e29883f3
--- /dev/null
+++ b/qa/distros/supported-random-distro$/centos_latest.yaml
@@ -0,0 +1 @@
+../all/centos_latest.yaml
+\ No newline at end of file
diff --git a/qa/distros/supported-random-distro$/rhel_8.yaml b/qa/distros/supported-random-distro$/rhel_8.yaml
new file mode 120000
index 000000000..f803f091e
--- /dev/null
+++ b/qa/distros/supported-random-distro$/rhel_8.yaml
@@ -0,0 +1 @@
+../all/rhel_8.yaml
+\ No newline at end of file
diff --git a/qa/distros/supported-random-distro$/ubuntu_20.04.yaml b/qa/distros/supported-random-distro$/ubuntu_20.04.yaml
new file mode 120000
index 000000000..75d907e3b
--- /dev/null
+++ b/qa/distros/supported-random-distro$/ubuntu_20.04.yaml
@@ -0,0 +1 @@
+../all/ubuntu_20.04.yaml
+\ No newline at end of file
diff --git a/qa/distros/supported-random-distro$/ubuntu_latest.yaml b/qa/distros/supported-random-distro$/ubuntu_latest.yaml
new file mode 120000
index 000000000..cfcd0d1a8
--- /dev/null
+++ b/qa/distros/supported-random-distro$/ubuntu_latest.yaml
@@ -0,0 +1 @@
+../all/ubuntu_latest.yaml
+\ No newline at end of file
diff --git a/qa/distros/supported/centos_8.stream.yaml b/qa/distros/supported/centos_8.stream.yaml
new file mode 120000
index 000000000..e96091bb1
--- /dev/null
+++ b/qa/distros/supported/centos_8.stream.yaml
@@ -0,0 +1 @@
+../all/centos_8.stream.yaml
+\ No newline at end of file
diff --git a/qa/distros/supported/centos_latest.yaml b/qa/distros/supported/centos_latest.yaml
new file mode 120000
index 000000000..2e29883f3
--- /dev/null
+++ b/qa/distros/supported/centos_latest.yaml
@@ -0,0 +1 @@
+../all/centos_latest.yaml
+\ No newline at end of file
diff --git a/qa/distros/supported/rhel_latest.yaml b/qa/distros/supported/rhel_latest.yaml
new file mode 120000
index 000000000..f803f091e
--- /dev/null
+++ b/qa/distros/supported/rhel_latest.yaml
@@ -0,0 +1 @@
+../all/rhel_8.yaml
+\ No newline at end of file
diff --git a/qa/distros/supported/ubuntu_20.04.yaml b/qa/distros/supported/ubuntu_20.04.yaml
new file mode 120000
index 000000000..75d907e3b
--- /dev/null
+++ b/qa/distros/supported/ubuntu_20.04.yaml
@@ -0,0 +1 @@
+../all/ubuntu_20.04.yaml
+\ No newline at end of file
diff --git a/qa/distros/supported/ubuntu_latest.yaml b/qa/distros/supported/ubuntu_latest.yaml
new file mode 120000
index 000000000..cfcd0d1a8
--- /dev/null
+++ b/qa/distros/supported/ubuntu_latest.yaml
@@ -0,0 +1 @@
+../all/ubuntu_latest.yaml
+\ No newline at end of file
diff --git a/qa/erasure-code/ec-feature-plugins-v2.yaml b/qa/erasure-code/ec-feature-plugins-v2.yaml
new file mode 100644
index 000000000..f2d374dd9
--- /dev/null
+++ b/qa/erasure-code/ec-feature-plugins-v2.yaml
@@ -0,0 +1,98 @@
+#
+# Test the expected behavior of the
+#
+#    CEPH_FEATURE_ERASURE_CODE_PLUGINS_V2 
+#
+# feature.
+#
+roles:
+- - mon.a
+  - mon.b
+  - osd.0
+  - osd.1
+- - osd.2
+  - mon.c
+  - mgr.x
+tasks:
+#
+# Install firefly
+#
+- install:
+    branch: firefly
+- ceph:
+    fs: xfs
+#
+# We don't need mon.c for now: it will be used later to make sure an old
+# mon cannot join the quorum once the feature has been activated
+#
+- ceph.stop:
+    daemons: [mon.c]
+- exec:
+    mon.a: 
+      - |-
+        ceph osd erasure-code-profile set WRONG plugin=WRONG
+        ceph osd pool create poolWRONG 12 12 erasure WRONG 2>&1 | grep "failed to load plugin using profile WRONG"
+#
+# Partial upgrade, osd.2 is not upgraded
+#
+- install.upgrade:
+    osd.0: 
+#
+# a is the leader
+#
+- ceph.restart:
+    daemons: [mon.a]
+    wait-for-healthy: false
+- exec:
+    mon.a:
+      - |-
+        ceph osd erasure-code-profile set profile-lrc plugin=lrc 2>&1 | grep "unsupported by: the monitor cluster"
+- ceph.restart:
+    daemons: [mon.b, osd.1, osd.0]
+    wait-for-healthy: false
+    wait-for-osds-up: true
+#
+# The lrc plugin cannot be used because osd.2 is not upgraded yet
+# and would crash.
+#
+- exec:
+    mon.a: 
+      - |-
+        ceph osd erasure-code-profile set profile-lrc plugin=lrc 2>&1 | grep "unsupported by: osd.2"
+#
+# Taking osd.2 out, the rest of the cluster is upgraded
+#
+- ceph.stop:
+    daemons: [osd.2]
+- sleep:
+    duration: 60
+#
+# Creating an erasure code profile using the lrc plugin now works
+#
+- exec:
+    mon.a: 
+      - "ceph osd erasure-code-profile set profile-lrc plugin=lrc"
+#
+# osd.2 won't be able to join the because is does not support the feature
+#
+- ceph.restart:
+    daemons: [osd.2]
+    wait-for-healthy: false
+- sleep:
+    duration: 60
+- exec:
+    osd.2: 
+      - |-
+        grep "protocol feature.*missing 100000000000" /var/log/ceph/ceph-osd.2.log
+#
+# mon.c won't be able to join the because it does not support the feature
+#
+- ceph.restart:
+    daemons: [mon.c]
+    wait-for-healthy: false
+- sleep:
+    duration: 60
+- exec:
+    mon.c: 
+      - |-
+        grep "missing.*feature" /var/log/ceph/ceph-mon.c.log
diff --git a/qa/erasure-code/ec-rados-default.yaml b/qa/erasure-code/ec-rados-default.yaml
new file mode 100644
index 000000000..cc62371e3
--- /dev/null
+++ b/qa/erasure-code/ec-rados-default.yaml
@@ -0,0 +1,19 @@
+tasks:
+  - rados:
+      clients: [client.0]
+      ops: 4000
+      objects: 50
+      ec_pool: true
+      write_append_excl: false
+      op_weights:
+        read: 100
+        write: 0
+        append: 100
+        delete: 50
+        snap_create: 50
+        snap_remove: 50
+        rollback: 50
+        copy_from: 50
+        setattr: 25
+        rmattr: 25
+  - print: "**** done rados ec task"
diff --git a/qa/erasure-code/ec-rados-parallel.yaml b/qa/erasure-code/ec-rados-parallel.yaml
new file mode 100644
index 000000000..0f01d8424
--- /dev/null
+++ b/qa/erasure-code/ec-rados-parallel.yaml
@@ -0,0 +1,20 @@
+workload:
+  parallel:
+    - rados:
+       clients: [client.0]
+       ops: 4000
+       objects: 50
+       ec_pool: true
+       write_append_excl: false
+       op_weights:
+         read: 100
+         write: 0
+         append: 100
+         delete: 50
+         snap_create: 50
+         snap_remove: 50
+         rollback: 50
+         copy_from: 50
+         setattr: 25
+         rmattr: 25
+    - print: "**** done rados ec parallel"
diff --git a/qa/erasure-code/ec-rados-plugin=clay-k=4-m=2.yaml b/qa/erasure-code/ec-rados-plugin=clay-k=4-m=2.yaml
new file mode 100644
index 000000000..2efb85436
--- /dev/null
+++ b/qa/erasure-code/ec-rados-plugin=clay-k=4-m=2.yaml
@@ -0,0 +1,25 @@
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 50
+    ec_pool: true
+    write_append_excl: false
+    erasure_code_profile:
+      name: clay42profile
+      plugin: clay
+      k: 4
+      m: 2
+      technique: reed_sol_van
+      crush-failure-domain: osd
+    op_weights:
+      read: 100
+      write: 0
+      append: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
+      setattr: 25
+      rmattr: 25
diff --git a/qa/erasure-code/ec-rados-plugin=isa-k=2-m=1.yaml b/qa/erasure-code/ec-rados-plugin=isa-k=2-m=1.yaml
new file mode 100644
index 000000000..64b59705c
--- /dev/null
+++ b/qa/erasure-code/ec-rados-plugin=isa-k=2-m=1.yaml
@@ -0,0 +1,26 @@
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 50
+    ec_pool: true
+    min_size: 2
+    write_append_excl: false
+    erasure_code_profile:
+      name: isaprofile
+      plugin: isa
+      k: 2
+      m: 1
+      technique: reed_sol_van
+      crush-failure-domain: osd
+    op_weights:
+      read: 100
+      write: 0
+      append: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
+      setattr: 25
+      rmattr: 25
diff --git a/qa/erasure-code/ec-rados-plugin=jerasure-k=2-m=1.yaml b/qa/erasure-code/ec-rados-plugin=jerasure-k=2-m=1.yaml
new file mode 100644
index 000000000..d61b1c8af
--- /dev/null
+++ b/qa/erasure-code/ec-rados-plugin=jerasure-k=2-m=1.yaml
@@ -0,0 +1,25 @@
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 50
+    ec_pool: true
+    write_append_excl: false
+    erasure_code_profile:
+      name: jerasure21profile
+      plugin: jerasure
+      k: 2
+      m: 1
+      technique: reed_sol_van
+      crush-failure-domain: osd
+    op_weights:
+      read: 100
+      write: 0
+      append: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
+      setattr: 25
+      rmattr: 25
diff --git a/qa/erasure-code/ec-rados-plugin=jerasure-k=3-m=1.yaml b/qa/erasure-code/ec-rados-plugin=jerasure-k=3-m=1.yaml
new file mode 100644
index 000000000..2ca53a799
--- /dev/null
+++ b/qa/erasure-code/ec-rados-plugin=jerasure-k=3-m=1.yaml
@@ -0,0 +1,31 @@
+#
+# k=3 implies a stripe_width of 1376*3 = 4128 which is different from
+# the default value of 4096 It is also not a multiple of 1024*1024 and
+# creates situations where rounding rules during recovery becomes 
+# necessary. 
+#
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 50
+    ec_pool: true
+    write_append_excl: false
+    erasure_code_profile:
+      name: jerasure31profile
+      plugin: jerasure
+      k: 3
+      m: 1
+      technique: reed_sol_van
+      crush-failure-domain: osd
+    op_weights:
+      read: 100
+      write: 0
+      append: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
+      setattr: 25
+      rmattr: 25
diff --git a/qa/erasure-code/ec-rados-plugin=jerasure-k=4-m=2.yaml b/qa/erasure-code/ec-rados-plugin=jerasure-k=4-m=2.yaml
new file mode 100644
index 000000000..dfcc61607
--- /dev/null
+++ b/qa/erasure-code/ec-rados-plugin=jerasure-k=4-m=2.yaml
@@ -0,0 +1,25 @@
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 50
+    ec_pool: true
+    write_append_excl: false
+    erasure_code_profile:
+      name: jerasure21profile
+      plugin: jerasure
+      k: 4
+      m: 2
+      technique: reed_sol_van
+      crush-failure-domain: osd
+    op_weights:
+      read: 100
+      write: 0
+      append: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
+      setattr: 25
+      rmattr: 25
diff --git a/qa/erasure-code/ec-rados-plugin=lrc-k=4-m=2-l=3.yaml b/qa/erasure-code/ec-rados-plugin=lrc-k=4-m=2-l=3.yaml
new file mode 100644
index 000000000..86ae0568c
--- /dev/null
+++ b/qa/erasure-code/ec-rados-plugin=lrc-k=4-m=2-l=3.yaml
@@ -0,0 +1,25 @@
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 400
+    objects: 50
+    ec_pool: true
+    write_append_excl: false
+    erasure_code_profile:
+      name: lrcprofile
+      plugin: lrc
+      k: 4
+      m: 2
+      l: 3
+      crush-failure-domain: osd
+    op_weights:
+      read: 100
+      write: 0
+      append: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
+      setattr: 25
+      rmattr: 25
diff --git a/qa/erasure-code/ec-rados-plugin=shec-k=4-m=3-c=2.yaml b/qa/erasure-code/ec-rados-plugin=shec-k=4-m=3-c=2.yaml
new file mode 100644
index 000000000..ee74c6e98
--- /dev/null
+++ b/qa/erasure-code/ec-rados-plugin=shec-k=4-m=3-c=2.yaml
@@ -0,0 +1,25 @@
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 400
+    objects: 50
+    ec_pool: true
+    write_append_excl: false
+    erasure_code_profile:
+      name: shecprofile
+      plugin: shec
+      k: 4
+      m: 3
+      c: 2
+      crush-failure-domain: osd
+    op_weights:
+      read: 100
+      write: 0
+      append: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
+      setattr: 25
+      rmattr: 25
diff --git a/qa/erasure-code/ec-rados-sequential.yaml b/qa/erasure-code/ec-rados-sequential.yaml
new file mode 100644
index 000000000..90536ee6f
--- /dev/null
+++ b/qa/erasure-code/ec-rados-sequential.yaml
@@ -0,0 +1,20 @@
+workload:
+  sequential:
+    - rados:
+       clients: [client.0]
+       ops: 4000
+       objects: 50
+       ec_pool: true
+       write_append_excl: false
+       op_weights:
+         read: 100
+         write: 0
+         append: 100
+         delete: 50
+         snap_create: 50
+         snap_remove: 50
+         rollback: 50
+         copy_from: 50
+         setattr: 25
+         rmattr: 25
+    - print: "**** done rados ec sequential"
diff --git a/qa/find-used-ports.sh b/qa/find-used-ports.sh
new file mode 100755
index 000000000..c57525cd4
--- /dev/null
+++ b/qa/find-used-ports.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+git --no-pager grep -n '127.0.0.1:[0-9]\+' | sed -n 's/.*127.0.0.1:\([0-9]\+\).*/\1/p' | sort -n | uniq -u
diff --git a/qa/libceph/Makefile b/qa/libceph/Makefile
new file mode 100644
index 000000000..06e1b990b
--- /dev/null
+++ b/qa/libceph/Makefile
@@ -0,0 +1,11 @@
+CFLAGS = -Wall -Wextra -D_GNU_SOURCE -lcephfs -L../../build/lib
+
+TARGETS = trivial_libceph 
+
+.c:
+	$(CC) $(CFLAGS) $@.c -o $@
+
+all:	$(TARGETS)
+
+clean:
+	rm $(TARGETS)
diff --git a/qa/libceph/trivial_libceph.c b/qa/libceph/trivial_libceph.c
new file mode 100644
index 000000000..9093e97e7
--- /dev/null
+++ b/qa/libceph/trivial_libceph.c
@@ -0,0 +1,69 @@
+#define _FILE_OFFSET_BITS 64
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/statvfs.h>
+#include "../../src/include/cephfs/libcephfs.h"
+
+#define MB64 (1<<26)
+
+int main(int argc, const char **argv)
+{
+        struct ceph_mount_info *cmount;
+        int ret, fd, len;
+        char buf[1024];
+
+        if (argc < 3) {
+                fprintf(stderr, "usage: ./%s <conf> <file>\n", argv[0]);
+                exit(1);
+        }
+
+        ret = ceph_create(&cmount, NULL);
+        if (ret) {
+                fprintf(stderr, "ceph_create=%d\n", ret);
+                exit(1);
+        }
+
+        ret = ceph_conf_read_file(cmount, argv[1]);
+        if (ret) {
+                fprintf(stderr, "ceph_conf_read_file=%d\n", ret);
+                exit(1);
+        }
+
+        ret = ceph_conf_parse_argv(cmount, argc, argv);
+        if (ret) {
+                fprintf(stderr, "ceph_conf_parse_argv=%d\n", ret);
+                exit(1);
+        }
+
+        ret = ceph_mount(cmount, NULL);
+        if (ret) {
+                fprintf(stderr, "ceph_mount=%d\n", ret);
+                exit(1);
+        }
+
+        ret = ceph_chdir(cmount, "/");
+        if (ret) {
+                fprintf(stderr, "ceph_chdir=%d\n", ret);
+                exit(1);
+        }
+
+        fd = ceph_open(cmount, argv[2], O_CREAT|O_TRUNC|O_RDWR, 0777); 
+        if (fd < 0) {
+                fprintf(stderr, "ceph_open=%d\n", fd);
+                exit(1);
+        }
+
+        memset(buf, 'a', sizeof(buf));
+
+        len = ceph_write(cmount, fd, buf, sizeof(buf), 0);
+
+        fprintf(stdout, "wrote %d bytes\n", len);
+
+        ceph_shutdown(cmount);
+
+        return 0;
+}
diff --git a/qa/loopall.sh b/qa/loopall.sh
new file mode 100755
index 000000000..d69e8c720
--- /dev/null
+++ b/qa/loopall.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+
+set -ex
+
+basedir=`echo $0 | sed 's/[^/]*$//g'`.
+testdir="$1"
+[ -n "$2" ] && logdir=$2 || logdir=$1
+
+[ ${basedir:0:1} == "." ] && basedir=`pwd`/${basedir:1}
+
+PATH="$basedir/src:$PATH"
+
+[ -z "$testdir" ] || [ ! -d "$testdir" ] && echo "specify test dir" && exit 1
+cd $testdir
+
+while true
+do
+    for test in `cd $basedir/workunits && find . -executable -type f | $basedir/../src/script/permute`
+    do
+	echo "------ running test $test ------"
+	pwd
+	[ -d $test ] && rm -r $test
+	mkdir -p $test
+	mkdir -p `dirname $logdir/$test.log`
+	test -e $logdir/$test.log && rm $logdir/$test.log
+	sh -c "cd $test && $basedir/workunits/$test" 2>&1 | tee $logdir/$test.log
+    done
+done
diff --git a/qa/machine_types/schedule_rados_ovh.sh b/qa/machine_types/schedule_rados_ovh.sh
new file mode 100755
index 000000000..aeb37162e
--- /dev/null
+++ b/qa/machine_types/schedule_rados_ovh.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+
+# $1 - part
+# $2 - branch name
+# $3 - machine name
+# $4 - email address
+# $5 - filter out (this arg is to be at the end of the command line for now)
+
+## example #1 
+## (date +%U) week number
+## % 2 - mod 2 (e.g. 0,1,0,1 ...)
+## * 7 -  multiplied by 7 (e.g. 0,7,0,7...)
+## $1 day of the week (0-6)
+## /14 for 2 weeks
+
+## example #2 
+## (date +%U) week number
+## % 4 - mod 4 (e.g. 0,1,2,3,0,1,2,3 ...)
+## * 7 -  multiplied by 7 (e.g. 0,7,14,21,0,7,14,21...)
+## $1 day of the week (0-6)
+## /28 for 4 weeks
+
+echo "Scheduling " $2 " branch"
+if [ $2 = "master" ] ; then
+        # run master branch with --newest option looking for good sha1 7 builds back
+        teuthology-suite -v -c $2 -m $3 -k distro -s rados --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/28 --newest 7 -e $4 ~/vps.yaml $5
+elif [ $2 = "jewel" ] ; then
+        # run jewel branch with /40 jobs
+        teuthology-suite -v -c $2 -m $3 -k distro -s rados --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/40 -e $4 ~/vps.yaml $5
+else
+        # run NON master branches without --newest 
+        teuthology-suite -v -c $2 -m $3 -k distro -s rados --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/28 -e $4 ~/vps.yaml $5
+fi
+
diff --git a/qa/machine_types/schedule_subset.sh b/qa/machine_types/schedule_subset.sh
new file mode 100755
index 000000000..7f18c81ef
--- /dev/null
+++ b/qa/machine_types/schedule_subset.sh
@@ -0,0 +1,20 @@
+#!/bin/bash -e
+
+#command line => CEPH_BRANCH=<branch>; MACHINE_NAME=<machine_type>; SUITE_NAME=<suite>; ../schedule_subset.sh <day_of_week> $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL <$FILTER>
+
+partitions="$1"
+shift
+branch="$1"
+shift
+machine="$1"
+shift
+suite="$1"
+shift
+email="$1"
+shift
+kernel="$1"
+shift
+# rest of arguments passed directly to teuthology-suite
+
+echo "Scheduling $branch branch"
+teuthology-suite -v -c "$branch" -m "$machine" -k "$kernel" -s "$suite" --ceph-repo https://git.ceph.com/ceph.git --suite-repo https://git.ceph.com/ceph.git --subset "$((RANDOM % partitions))/$partitions" --newest 100 -e "$email" "$@"
diff --git a/qa/machine_types/vps.yaml b/qa/machine_types/vps.yaml
new file mode 100644
index 000000000..64a3da47d
--- /dev/null
+++ b/qa/machine_types/vps.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        osd heartbeat grace: 100
+        # this line to address issue #1017 
+        mon lease: 15
+        mon lease ack timeout: 25
+  s3tests:
+    idle_timeout: 1200
+  ceph-fuse:
+    client.0:
+       mount_wait: 60
+       mount_timeout: 120
diff --git a/qa/mds/test_anchortable.sh b/qa/mds/test_anchortable.sh
new file mode 100755
index 000000000..1bf2494de
--- /dev/null
+++ b/qa/mds/test_anchortable.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+set -x
+
+mkdir links
+for f in `seq 1 8`
+do
+    mkdir $f
+    for g in `seq 1 20`
+    do
+	touch $f/$g
+	ln $f/$g links/$f.$g
+    done
+done
+
+for f in `seq 1 8`
+do
+    echo testing failure point $f
+    bash -c "pushd . ; cd $bindir ; sleep 10; ./ceph -c $conf mds tell \* injectargs \"--mds_kill_mdstable_at $f\" ; popd" &
+    bash -c "pushd . ; cd $bindir ; sleep 11 ; ./init-ceph -c $conf start mds ; popd" &
+    for g in `seq 1 20`
+    do
+	rm $f/$g
+	rm links/$f.$g
+	sleep 1
+    done
+done
+
diff --git a/qa/mds/test_mdstable_failures.sh b/qa/mds/test_mdstable_failures.sh
new file mode 100755
index 000000000..c959995cf
--- /dev/null
+++ b/qa/mds/test_mdstable_failures.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+set -x
+
+for f in `seq 1 8`
+do
+    echo testing failure point $f
+    pushd . ; cd $bindir ; ./ceph -c $conf mds tell \* injectargs "--mds_kill_mdstable_at $f" ; popd
+    sleep 1  # wait for mds command to go thru
+    bash -c "pushd . ; cd $bindir ; sleep 10 ; ./init-ceph -c $conf start mds ; popd" &
+    touch $f
+    ln $f $f.link
+    sleep 10
+done
+
diff --git a/qa/mgr_ttl_cache/disable.yaml b/qa/mgr_ttl_cache/disable.yaml
new file mode 100644
index 000000000..bbd78d53f
--- /dev/null
+++ b/qa/mgr_ttl_cache/disable.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      mgr:
+        mgr ttl cache expire seconds: 0
diff --git a/qa/mgr_ttl_cache/enable.yaml b/qa/mgr_ttl_cache/enable.yaml
new file mode 100644
index 000000000..2c1c0e053
--- /dev/null
+++ b/qa/mgr_ttl_cache/enable.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      mgr:
+        mgr ttl cache expire seconds: 5
diff --git a/qa/mon/bootstrap/host.sh b/qa/mon/bootstrap/host.sh
new file mode 100755
index 000000000..ad4e327d1
--- /dev/null
+++ b/qa/mon/bootstrap/host.sh
@@ -0,0 +1,29 @@
+#!/bin/sh -ex
+
+cwd=`pwd`
+cat > conf <<EOF
+[global]
+mon host = 127.0.0.1:6789
+
+[mon]
+admin socket = 
+log file = $cwd/\$name.log
+debug mon = 20
+debug ms = 1
+EOF
+
+rm -f mm
+fsid=`uuidgen`
+
+rm -f keyring
+ceph-authtool --create-keyring keyring --gen-key -n client.admin
+ceph-authtool keyring --gen-key -n mon.
+
+ceph-mon -c conf -i a --mkfs --fsid $fsid --mon-data mon.a -k keyring
+
+ceph-mon -c conf -i a --mon-data $cwd/mon.a
+
+ceph -c conf -k keyring health
+
+killall ceph-mon
+echo OK
+\ No newline at end of file
diff --git a/qa/mon/bootstrap/initial_members.sh b/qa/mon/bootstrap/initial_members.sh
new file mode 100755
index 000000000..2dfa9e992
--- /dev/null
+++ b/qa/mon/bootstrap/initial_members.sh
@@ -0,0 +1,39 @@
+#!/bin/sh -ex
+
+cwd=`pwd`
+cat > conf <<EOF
+[mon]
+admin socket = 
+log file = $cwd/\$name.log
+debug mon = 20
+debug ms = 1
+mon initial members = a,b,d
+EOF
+
+rm -f mm
+monmaptool --create mm \
+    --add a 127.0.0.1:6789 \
+    --add b 127.0.0.1:6790 \
+    --add c 127.0.0.1:6791
+
+rm -f keyring
+ceph-authtool --create-keyring keyring --gen-key -n client.admin
+ceph-authtool keyring --gen-key -n mon.
+
+ceph-mon -c conf -i a --mkfs --monmap mm --mon-data $cwd/mon.a -k keyring
+ceph-mon -c conf -i b --mkfs --monmap mm --mon-data $cwd/mon.b -k keyring
+ceph-mon -c conf -i c --mkfs --monmap mm --mon-data $cwd/mon.c -k keyring
+
+ceph-mon -c conf -i a --mon-data $cwd/mon.a
+ceph-mon -c conf -i c --mon-data $cwd/mon.b
+ceph-mon -c conf -i b --mon-data $cwd/mon.c
+
+ceph -c conf -k keyring --monmap mm health
+
+ceph -c conf -k keyring --monmap mm health
+if ceph -c conf -k keyring --monmap mm mon stat | grep a= | grep b= | grep c= ; then
+    break
+fi
+
+killall ceph-mon
+echo OK
diff --git a/qa/mon/bootstrap/initial_members_asok.sh b/qa/mon/bootstrap/initial_members_asok.sh
new file mode 100755
index 000000000..618f4c5db
--- /dev/null
+++ b/qa/mon/bootstrap/initial_members_asok.sh
@@ -0,0 +1,66 @@
+#!/bin/sh -ex
+
+cwd=`pwd`
+cat > conf <<EOF
+[mon]
+log file = $cwd/\$name.log
+debug mon = 20
+debug ms = 1
+debug asok = 20
+mon initial members = a,b,d
+admin socket = $cwd/\$name.asok
+EOF
+
+rm -f mm
+fsid=`uuidgen`
+
+rm -f keyring
+ceph-authtool --create-keyring keyring --gen-key -n client.admin
+ceph-authtool keyring --gen-key -n mon.
+
+ceph-mon -c conf -i a --mkfs --fsid $fsid --mon-data $cwd/mon.a -k keyring
+ceph-mon -c conf -i b --mkfs --fsid $fsid --mon-data $cwd/mon.b -k keyring
+ceph-mon -c conf -i c --mkfs --fsid $fsid --mon-data $cwd/mon.c -k keyring
+
+ceph-mon -c conf -i a --mon-data $cwd/mon.a --public-addr 127.0.0.1:6789
+ceph-mon -c conf -i b --mon-data $cwd/mon.c --public-addr 127.0.0.1:6790
+ceph-mon -c conf -i c --mon-data $cwd/mon.b --public-addr 127.0.0.1:6791
+
+sleep 1
+
+if timeout 5 ceph -c conf -k keyring -m localhost mon stat | grep "a,b,c" ; then
+    echo WTF
+    exit 1
+fi
+
+ceph --admin-daemon mon.a.asok add_bootstrap_peer_hint 127.0.0.1:6790
+
+while true; do
+    if ceph -c conf -k keyring -m 127.0.0.1 mon stat | grep 'a,b'; then
+	break
+    fi
+    sleep 1
+done
+
+ceph --admin-daemon mon.c.asok add_bootstrap_peer_hint 127.0.0.1:6790
+
+while true; do
+    if ceph -c conf -k keyring -m 127.0.0.1 mon stat | grep 'a,b,c'; then
+	break
+    fi
+    sleep 1
+done
+
+ceph-mon -c conf -i d --mkfs --fsid $fsid --mon-data $cwd/mon.d -k keyring
+ceph-mon -c conf -i d --mon-data $cwd/mon.d --public-addr 127.0.0.1:6792
+ceph --admin-daemon mon.d.asok add_bootstrap_peer_hint 127.0.0.1:6790
+
+while true; do
+    if ceph -c conf -k keyring -m 127.0.0.1 mon stat | grep 'a,b,c,d'; then
+	break
+    fi
+    sleep 1
+done
+
+killall ceph-mon
+echo OK
diff --git a/qa/mon/bootstrap/simple.sh b/qa/mon/bootstrap/simple.sh
new file mode 100755
index 000000000..2121301b9
--- /dev/null
+++ b/qa/mon/bootstrap/simple.sh
@@ -0,0 +1,36 @@
+#!/bin/sh -e
+
+cwd=`pwd`
+cat > conf <<EOF
+[mon]
+admin socket = 
+EOF
+
+rm -f mm
+monmaptool --create mm \
+    --add a 127.0.0.1:6789 \
+    --add b 127.0.0.1:6790 \
+    --add c 127.0.0.1:6791
+
+rm -f keyring
+ceph-authtool --create-keyring keyring --gen-key -n client.admin
+ceph-authtool keyring --gen-key -n mon.
+
+ceph-mon -c conf -i a --mkfs --monmap mm --mon-data $cwd/mon.a -k keyring
+ceph-mon -c conf -i b --mkfs --monmap mm --mon-data $cwd/mon.b -k keyring
+ceph-mon -c conf -i c --mkfs --monmap mm --mon-data $cwd/mon.c -k keyring
+
+ceph-mon -c conf -i a --mon-data $cwd/mon.a
+ceph-mon -c conf -i c --mon-data $cwd/mon.b
+ceph-mon -c conf -i b --mon-data $cwd/mon.c
+
+while true; do
+    ceph -c conf -k keyring --monmap mm health
+    if ceph -c conf -k keyring --monmap mm mon stat | grep 'quorum 0,1,2'; then
+	break
+    fi
+    sleep 1
+done
+
+killall ceph-mon
+echo OK
diff --git a/qa/mon/bootstrap/simple_expand.sh b/qa/mon/bootstrap/simple_expand.sh
new file mode 100755
index 000000000..519d8ae8f
--- /dev/null
+++ b/qa/mon/bootstrap/simple_expand.sh
@@ -0,0 +1,60 @@
+#!/bin/sh -ex
+
+cwd=`pwd`
+cat > conf <<EOF
+[mon]
+admin socket = 
+log file = $cwd/\$name.log
+debug mon = 20
+debug ms = 1
+EOF
+
+rm -f mm
+monmaptool --create mm \
+    --add a 127.0.0.1:6789 \
+    --add b 127.0.0.1:6790 \
+    --add c 127.0.0.1:6791
+
+rm -f keyring
+ceph-authtool --create-keyring keyring --gen-key -n client.admin
+ceph-authtool keyring --gen-key -n mon.
+
+ceph-mon -c conf -i a --mkfs --monmap mm --mon-data $cwd/mon.a -k keyring
+ceph-mon -c conf -i b --mkfs --monmap mm --mon-data $cwd/mon.b -k keyring
+ceph-mon -c conf -i c --mkfs --monmap mm --mon-data $cwd/mon.c -k keyring
+
+ceph-mon -c conf -i a --mon-data $cwd/mon.a
+ceph-mon -c conf -i c --mon-data $cwd/mon.b
+ceph-mon -c conf -i b --mon-data $cwd/mon.c
+
+ceph -c conf -k keyring --monmap mm health
+
+## expand via a kludged monmap
+monmaptool mm --add d 127.0.0.1:6792
+ceph-mon -c conf -i d --mkfs --monmap mm --mon-data $cwd/mon.d -k keyring
+ceph-mon -c conf -i d --mon-data $cwd/mon.d
+
+while true; do
+    ceph -c conf -k keyring --monmap mm health
+    if ceph -c conf -k keyring --monmap mm mon stat | grep 'quorum 0,1,2,3'; then
+	break
+    fi
+    sleep 1
+done
+
+# again
+monmaptool mm --add e 127.0.0.1:6793
+ceph-mon -c conf -i e --mkfs --monmap mm --mon-data $cwd/mon.e -k keyring
+ceph-mon -c conf -i e --mon-data $cwd/mon.e
+
+while true; do
+    ceph -c conf -k keyring --monmap mm health
+    if ceph -c conf -k keyring --monmap mm mon stat | grep 'quorum 0,1,2,3,4'; then
+	break
+    fi
+    sleep 1
+done
+
+
+killall ceph-mon
+echo OK
diff --git a/qa/mon/bootstrap/simple_expand_monmap.sh b/qa/mon/bootstrap/simple_expand_monmap.sh
new file mode 100755
index 000000000..da24c02c2
--- /dev/null
+++ b/qa/mon/bootstrap/simple_expand_monmap.sh
@@ -0,0 +1,44 @@
+#!/bin/sh -ex
+
+cwd=`pwd`
+cat > conf <<EOF
+[mon]
+admin socket = 
+EOF
+
+rm -f mm
+monmaptool --create mm \
+    --add a 127.0.0.1:6789 \
+    --add b 127.0.0.1:6790 \
+    --add c 127.0.0.1:6791
+
+rm -f keyring
+ceph-authtool --create-keyring keyring --gen-key -n client.admin
+ceph-authtool keyring --gen-key -n mon.
+
+ceph-mon -c conf -i a --mkfs --monmap mm --mon-data $cwd/mon.a -k keyring
+ceph-mon -c conf -i b --mkfs --monmap mm --mon-data $cwd/mon.b -k keyring
+ceph-mon -c conf -i c --mkfs --monmap mm --mon-data $cwd/mon.c -k keyring
+
+ceph-mon -c conf -i a --mon-data $cwd/mon.a
+ceph-mon -c conf -i c --mon-data $cwd/mon.b
+ceph-mon -c conf -i b --mon-data $cwd/mon.c
+
+ceph -c conf -k keyring --monmap mm health
+
+## expand via a kludged monmap
+monmaptool mm --add d 127.0.0.1:6792
+ceph-mon -c conf -i d --mkfs --monmap mm --mon-data $cwd/mon.d -k keyring
+ceph-mon -c conf -i d --mon-data $cwd/mon.d
+
+while true; do
+    ceph -c conf -k keyring --monmap mm health
+    if ceph -c conf -k keyring --monmap mm mon stat | grep d=; then
+	break
+    fi
+    sleep 1
+done
+
+killall ceph-mon
+
+echo OK
diff --git a/qa/mon/bootstrap/simple_single_expand.sh b/qa/mon/bootstrap/simple_single_expand.sh
new file mode 100755
index 000000000..99fe5645e
--- /dev/null
+++ b/qa/mon/bootstrap/simple_single_expand.sh
@@ -0,0 +1,54 @@
+#!/bin/sh -ex
+
+cwd=`pwd`
+cat > conf <<EOF
+[mon]
+admin socket = 
+log file = $cwd/\$name.log
+debug mon = 20
+debug ms = 1
+EOF
+
+rm -f mm
+monmaptool --create mm \
+    --add a 127.0.0.1:6789
+
+rm -f keyring
+ceph-authtool --create-keyring keyring --gen-key -n client.admin
+ceph-authtool keyring --gen-key -n mon.
+
+ceph-mon -c conf -i a --mkfs --monmap mm --mon-data $cwd/mon.a -k keyring
+
+ceph-mon -c conf -i a --mon-data $cwd/mon.a
+
+ceph -c conf -k keyring --monmap mm health
+
+## expand via a kludged monmap
+monmaptool mm --add d 127.0.0.1:6702
+ceph-mon -c conf -i d --mkfs --monmap mm --mon-data $cwd/mon.d -k keyring
+ceph-mon -c conf -i d --mon-data $cwd/mon.d
+
+while true; do
+    ceph -c conf -k keyring --monmap mm health
+    if ceph -c conf -k keyring --monmap mm mon stat | grep 'quorum 0,1'; then
+	break
+    fi
+    sleep 1
+done
+
+# again
+monmaptool mm --add e 127.0.0.1:6793
+ceph-mon -c conf -i e --mkfs --monmap mm --mon-data $cwd/mon.e -k keyring
+ceph-mon -c conf -i e --mon-data $cwd/mon.e
+
+while true; do
+    ceph -c conf -k keyring --monmap mm health
+    if ceph -c conf -k keyring --monmap mm mon stat | grep 'quorum 0,1,2'; then
+	break
+    fi
+    sleep 1
+done
+
+
+killall ceph-mon
+echo OK
diff --git a/qa/mon/bootstrap/simple_single_expand2.sh b/qa/mon/bootstrap/simple_single_expand2.sh
new file mode 100755
index 000000000..28d0c563b
--- /dev/null
+++ b/qa/mon/bootstrap/simple_single_expand2.sh
@@ -0,0 +1,40 @@
+#!/bin/sh -ex
+
+cwd=`pwd`
+cat > conf <<EOF
+[mon]
+admin socket = 
+log file = $cwd/\$name.log
+debug mon = 20
+debug ms = 1
+EOF
+
+rm -f mm
+ip=`host \`hostname\` | awk '{print $4}'`
+monmaptool --create mm \
+    --add a $ip:6779
+
+rm -f keyring
+ceph-authtool --create-keyring keyring --gen-key -n client.admin
+ceph-authtool keyring --gen-key -n mon.
+
+ceph-mon -c conf -i a --mkfs --monmap mm --mon-data $cwd/mon.a -k keyring
+
+ceph-mon -c conf -i a --mon-data $cwd/mon.a
+
+ceph -c conf -k keyring --monmap mm health
+
+## expand via a local_network
+ceph-mon -c conf -i d --mkfs --monmap mm --mon-data $cwd/mon.d -k keyring
+ceph-mon -c conf -i d --mon-data $cwd/mon.d --public-network 127.0.0.1/32
+
+while true; do
+    ceph -c conf -k keyring --monmap mm health
+    if ceph -c conf -k keyring --monmap mm mon stat | grep 'quorum 0,1'; then
+	break
+    fi
+    sleep 1
+done
+
+killall ceph-mon
+echo OK
diff --git a/qa/mon/bootstrap/single_host.sh b/qa/mon/bootstrap/single_host.sh
new file mode 100755
index 000000000..c40b5614f
--- /dev/null
+++ b/qa/mon/bootstrap/single_host.sh
@@ -0,0 +1,29 @@
+#!/bin/sh -ex
+
+cwd=`pwd`
+cat > conf <<EOF
+[global]
+mon host = 127.0.0.1:6789
+
+[mon]
+admin socket = 
+log file = $cwd/\$name.log
+debug mon = 20
+debug ms = 1
+EOF
+
+rm -f mm
+fsid=`uuidgen`
+
+rm -f keyring
+ceph-authtool --create-keyring keyring --gen-key -n client.admin
+ceph-authtool keyring --gen-key -n mon.
+
+ceph-mon -c conf -i a --mkfs --fsid $fsid --mon-data $cwd/mon.a -k keyring
+
+ceph-mon -c conf -i a --mon-data $cwd/mon.a
+
+ceph -c conf -k keyring health
+
+killall ceph-mon
+echo OK
+\ No newline at end of file
diff --git a/qa/mon/bootstrap/single_host_multi.sh b/qa/mon/bootstrap/single_host_multi.sh
new file mode 100755
index 000000000..864f3b179
--- /dev/null
+++ b/qa/mon/bootstrap/single_host_multi.sh
@@ -0,0 +1,39 @@
+#!/bin/sh -ex
+
+cwd=`pwd`
+cat > conf <<EOF
+[global]
+
+[mon]
+admin socket = 
+log file = $cwd/\$name.log
+debug mon = 20
+debug ms = 1
+mon host = 127.0.0.1:6789 127.0.0.1:6790 127.0.0.1:6791
+EOF
+
+rm -f mm
+fsid=`uuidgen`
+
+rm -f keyring
+ceph-authtool --create-keyring keyring --gen-key -n client.admin
+ceph-authtool keyring --gen-key -n mon.
+
+ceph-mon -c conf -i a --mkfs --fsid $fsid --mon-data $cwd/mon.a -k keyring --public-addr 127.0.0.1:6789
+ceph-mon -c conf -i b --mkfs --fsid $fsid --mon-data $cwd/mon.b -k keyring --public-addr 127.0.0.1:6790
+ceph-mon -c conf -i c --mkfs --fsid $fsid --mon-data $cwd/mon.c -k keyring --public-addr 127.0.0.1:6791
+
+ceph-mon -c conf -i a --mon-data $cwd/mon.a
+ceph-mon -c conf -i b --mon-data $cwd/mon.b
+ceph-mon -c conf -i c --mon-data $cwd/mon.c
+
+ceph -c conf -k keyring health -m 127.0.0.1
+while true; do
+    if ceph -c conf -k keyring -m 127.0.0.1 mon stat | grep 'a,b,c'; then
+	break
+    fi
+    sleep 1
+done
+
+killall ceph-mon
+echo OK
+\ No newline at end of file
diff --git a/qa/mon_election/classic.yaml b/qa/mon_election/classic.yaml
new file mode 100644
index 000000000..7ccd99830
--- /dev/null
+++ b/qa/mon_election/classic.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        mon election default strategy: 1
+\ No newline at end of file
diff --git a/qa/mon_election/connectivity.yaml b/qa/mon_election/connectivity.yaml
new file mode 100644
index 000000000..3b9f9e26c
--- /dev/null
+++ b/qa/mon_election/connectivity.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        mon election default strategy: 3
+\ No newline at end of file
diff --git a/qa/msgr/async-v1only.yaml b/qa/msgr/async-v1only.yaml
new file mode 100644
index 000000000..aa90cad67
--- /dev/null
+++ b/qa/msgr/async-v1only.yaml
@@ -0,0 +1,7 @@
+overrides:
+  ceph:
+    mon_bind_msgr2: false
+    conf:
+      global:
+        ms type: async
+        ms bind msgr2: false
diff --git a/qa/msgr/async-v2only.yaml b/qa/msgr/async-v2only.yaml
new file mode 100644
index 000000000..4de9f32ac
--- /dev/null
+++ b/qa/msgr/async-v2only.yaml
@@ -0,0 +1,7 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms type: async
+        ms bind msgr2: true
+        ms bind msgr1: false
diff --git a/qa/msgr/async.yaml b/qa/msgr/async.yaml
new file mode 100644
index 000000000..9f93cab4f
--- /dev/null
+++ b/qa/msgr/async.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms type: async
diff --git a/qa/mypy.ini b/qa/mypy.ini
new file mode 100644
index 000000000..1215375ed
--- /dev/null
+++ b/qa/mypy.ini
@@ -0,0 +1,2 @@
+[mypy]
+ignore_missing_imports = True
+\ No newline at end of file
diff --git a/qa/nightlies/cron_wrapper b/qa/nightlies/cron_wrapper
new file mode 100755
index 000000000..29b4483e9
--- /dev/null
+++ b/qa/nightlies/cron_wrapper
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+# /nightlies/cron_wrapper.sh
+
+# check for no argument case and stop
+if [ -z $1 ]; then
+  echo "need argument"
+  exit 1
+fi
+
+# set permanent $LOG file var
+LOG="/var/log/crontab-nightlies-log/crontab.log"
+# set $LOG_LOCKED_ERR in case locking failed
+LOG_LOCK_ERR="/var/log/crontab-nightlies-log/crontab_lock_problem.$$"
+
+# temp files to store stdout and stderr
+# named with the PID of this script in their name so they'll be unique
+STDERR="/var/tmp/stderr.$$"
+STDOUT="/var/tmp/stdout.$$"
+
+# $STDOUT and $STDERR are removed when the script exits for any reason
+trap  "rm -f $STDOUT $STDERR" 0
+
+# run a command from this script's argument
+# redirect stdout to $STDOUT file and redirect stderr to $STDERR file
+
+DATE=$(date)
+echo -n "$DATE: "  >> $STDOUT
+echo "Running command: $@" >> $STDOUT
+"$@" > $STDOUT 2> $STDERR
+
+# get return code from the command run
+code=$?
+
+if [ $code != 0 ] ; then
+        # echoing to stdout/stderr makes cron send email
+        echo "stdout:"
+        cat $STDOUT
+        echo "stderr:"
+        cat $STDERR
+else
+        # normal exit: just log stdout
+
+	# lock $LOG with file descriptor 200
+	exec 200>>$LOG
+	# if $LOG is locked by other process - wait for 20 sec
+	flock -w 20 200 || LOG=$LOG_LOCK_ERR
+	echo "stdout:" >> $LOG
+	cat $STDOUT >> $LOG
+	echo "stderr:" >> $LOG
+	cat $STDERR >> $LOG
+	# unlock
+	flock -u 200
+fi
diff --git a/qa/objectstore/bluestore-bitmap.yaml b/qa/objectstore/bluestore-bitmap.yaml
new file mode 100644
index 000000000..d50c50e76
--- /dev/null
+++ b/qa/objectstore/bluestore-bitmap.yaml
@@ -0,0 +1,43 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 1/20
+        debug bluefs: 1/20
+        debug rocksdb: 4/10
+        bluestore fsck on mount: true
+        bluestore allocator: bitmap
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
+        bdev enable discard: true
+        bdev async discard: true
+  ceph-deploy:
+    fs: xfs
+    bluestore: yes
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 1/20
+        debug bluefs: 1/20
+        debug rocksdb: 4/10
+        bluestore fsck on mount: true
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+        bdev enable discard: true
+        bdev async discard: true
+
diff --git a/qa/objectstore/bluestore-comp-lz4.yaml b/qa/objectstore/bluestore-comp-lz4.yaml
new file mode 100644
index 000000000..c45951984
--- /dev/null
+++ b/qa/objectstore/bluestore-comp-lz4.yaml
@@ -0,0 +1,24 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 1/20
+        debug bluefs: 1/20
+        debug rocksdb: 4/10
+        bluestore compression mode: aggressive
+        bluestore fsck on mount: true
+        bluestore compression algorithm: lz4
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
diff --git a/qa/objectstore/bluestore-comp-snappy.yaml b/qa/objectstore/bluestore-comp-snappy.yaml
new file mode 100644
index 000000000..e9cdd741d
--- /dev/null
+++ b/qa/objectstore/bluestore-comp-snappy.yaml
@@ -0,0 +1,24 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 1/20
+        debug bluefs: 1/20
+        debug rocksdb: 4/10
+        bluestore compression mode: aggressive
+        bluestore fsck on mount: true
+        bluestore compression algorithm: snappy
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
diff --git a/qa/objectstore/bluestore-comp-zlib.yaml b/qa/objectstore/bluestore-comp-zlib.yaml
new file mode 100644
index 000000000..b3628dea2
--- /dev/null
+++ b/qa/objectstore/bluestore-comp-zlib.yaml
@@ -0,0 +1,24 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 1/20
+        debug bluefs: 1/20
+        debug rocksdb: 4/10
+        bluestore compression mode: aggressive
+        bluestore fsck on mount: true
+        bluestore compression algorithm: zlib
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
diff --git a/qa/objectstore/bluestore-comp-zstd.yaml b/qa/objectstore/bluestore-comp-zstd.yaml
new file mode 100644
index 000000000..7906c6045
--- /dev/null
+++ b/qa/objectstore/bluestore-comp-zstd.yaml
@@ -0,0 +1,24 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 1/20
+        debug bluefs: 1/20
+        debug rocksdb: 4/10
+        bluestore compression mode: aggressive
+        bluestore fsck on mount: true
+        bluestore compression algorithm: zstd
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
diff --git a/qa/objectstore/bluestore-hybrid.yaml b/qa/objectstore/bluestore-hybrid.yaml
new file mode 100644
index 000000000..26e3302ce
--- /dev/null
+++ b/qa/objectstore/bluestore-hybrid.yaml
@@ -0,0 +1,40 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 1/20
+        debug bluefs: 1/20
+        debug rocksdb: 4/10
+        bluestore fsck on mount: true
+        bluestore allocator: hybrid
+        bluefs allocator: hybrid
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
+  ceph-deploy:
+    fs: xfs
+    bluestore: yes
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 1/20
+        debug bluefs: 1/20
+        debug rocksdb: 4/10
+        bluestore fsck on mount: true
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+
diff --git a/qa/objectstore/bluestore-low-osd-mem-target.yaml b/qa/objectstore/bluestore-low-osd-mem-target.yaml
new file mode 100644
index 000000000..e0efcbc36
--- /dev/null
+++ b/qa/objectstore/bluestore-low-osd-mem-target.yaml
@@ -0,0 +1,25 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: bluestore
+        osd memory target: 1610612736 # reduced to 1.5_G
+        bluestore block size: 96636764160
+        debug bluestore: 1/20
+        debug bluefs: 1/20
+        debug rocksdb: 4/10
+        bluestore fsck on mount: true
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
+        bdev enable discard: true
+        bdev async discard: true
+
diff --git a/qa/objectstore/bluestore-stupid.yaml b/qa/objectstore/bluestore-stupid.yaml
new file mode 100644
index 000000000..5441e9397
--- /dev/null
+++ b/qa/objectstore/bluestore-stupid.yaml
@@ -0,0 +1,43 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 1/20
+        debug bluefs: 1/20
+        debug rocksdb: 4/10
+        bluestore fsck on mount: true
+        bluestore allocator: stupid
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
+        bdev enable discard: true
+        bdev async discard: true
+  ceph-deploy:
+    fs: xfs
+    bluestore: yes
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 1/20
+        debug bluefs: 1/20
+        debug rocksdb: 4/10
+        bluestore fsck on mount: true
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+        bdev enable discard: true
+        bdev async discard: true
+
diff --git a/qa/objectstore_cephfs/bluestore-bitmap.yaml b/qa/objectstore_cephfs/bluestore-bitmap.yaml
new file mode 120000
index 000000000..951e65ac0
--- /dev/null
+++ b/qa/objectstore_cephfs/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+../objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/objectstore_debug/bluestore-bitmap.yaml b/qa/objectstore_debug/bluestore-bitmap.yaml
new file mode 100644
index 000000000..b18e04bee
--- /dev/null
+++ b/qa/objectstore_debug/bluestore-bitmap.yaml
@@ -0,0 +1,43 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore fsck on mount: true
+        bluestore allocator: bitmap
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
+        bdev enable discard: true
+        bdev async discard: true
+  ceph-deploy:
+    fs: xfs
+    bluestore: yes
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore fsck on mount: true
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+        bdev enable discard: true
+        bdev async discard: true
+
diff --git a/qa/objectstore_debug/bluestore-comp-lz4.yaml b/qa/objectstore_debug/bluestore-comp-lz4.yaml
new file mode 100644
index 000000000..46f993e68
--- /dev/null
+++ b/qa/objectstore_debug/bluestore-comp-lz4.yaml
@@ -0,0 +1,24 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore compression mode: aggressive
+        bluestore fsck on mount: true
+        bluestore compression algorithm: lz4
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
diff --git a/qa/objectstore_debug/bluestore-comp-snappy.yaml b/qa/objectstore_debug/bluestore-comp-snappy.yaml
new file mode 100644
index 000000000..b5d58414e
--- /dev/null
+++ b/qa/objectstore_debug/bluestore-comp-snappy.yaml
@@ -0,0 +1,24 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore compression mode: aggressive
+        bluestore fsck on mount: true
+        bluestore compression algorithm: snappy
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
diff --git a/qa/objectstore_debug/bluestore-comp-zlib.yaml b/qa/objectstore_debug/bluestore-comp-zlib.yaml
new file mode 100644
index 000000000..b47ebbb7c
--- /dev/null
+++ b/qa/objectstore_debug/bluestore-comp-zlib.yaml
@@ -0,0 +1,24 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore compression mode: aggressive
+        bluestore fsck on mount: true
+        bluestore compression algorithm: zlib
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
diff --git a/qa/objectstore_debug/bluestore-comp-zstd.yaml b/qa/objectstore_debug/bluestore-comp-zstd.yaml
new file mode 100644
index 000000000..e2f5e4e5b
--- /dev/null
+++ b/qa/objectstore_debug/bluestore-comp-zstd.yaml
@@ -0,0 +1,24 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore compression mode: aggressive
+        bluestore fsck on mount: true
+        bluestore compression algorithm: zstd
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
diff --git a/qa/objectstore_debug/bluestore-hybrid.yaml b/qa/objectstore_debug/bluestore-hybrid.yaml
new file mode 100644
index 000000000..68b9bc427
--- /dev/null
+++ b/qa/objectstore_debug/bluestore-hybrid.yaml
@@ -0,0 +1,40 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore fsck on mount: true
+        bluestore allocator: hybrid
+        bluefs allocator: hybrid
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
+  ceph-deploy:
+    fs: xfs
+    bluestore: yes
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore fsck on mount: true
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+
diff --git a/qa/objectstore_debug/bluestore-low-osd-mem-target.yaml b/qa/objectstore_debug/bluestore-low-osd-mem-target.yaml
new file mode 100644
index 000000000..b2a49790b
--- /dev/null
+++ b/qa/objectstore_debug/bluestore-low-osd-mem-target.yaml
@@ -0,0 +1,25 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: bluestore
+        osd memory target: 1610612736 # reduced to 1.5_G
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore fsck on mount: true
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
+        bdev enable discard: true
+        bdev async discard: true
+
diff --git a/qa/objectstore_debug/bluestore-stupid.yaml b/qa/objectstore_debug/bluestore-stupid.yaml
new file mode 100644
index 000000000..ca811f131
--- /dev/null
+++ b/qa/objectstore_debug/bluestore-stupid.yaml
@@ -0,0 +1,43 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore fsck on mount: true
+        bluestore allocator: stupid
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
+        bdev enable discard: true
+        bdev async discard: true
+  ceph-deploy:
+    fs: xfs
+    bluestore: yes
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore fsck on mount: true
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+        bdev enable discard: true
+        bdev async discard: true
+
diff --git a/qa/overrides/2-size-1-min-size.yaml b/qa/overrides/2-size-1-min-size.yaml
new file mode 100644
index 000000000..d710aee22
--- /dev/null
+++ b/qa/overrides/2-size-1-min-size.yaml
@@ -0,0 +1,6 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        osd_pool_default_size: 2
+        osd_pool_default_min_size: 1
diff --git a/qa/overrides/2-size-2-min-size.yaml b/qa/overrides/2-size-2-min-size.yaml
new file mode 100644
index 000000000..d46db3fad
--- /dev/null
+++ b/qa/overrides/2-size-2-min-size.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        osd_pool_default_size: 2
+        osd_pool_default_min_size: 2
+    log-ignorelist:
+      - \(REQUEST_STUCK\)
diff --git a/qa/overrides/3-size-2-min-size.yaml b/qa/overrides/3-size-2-min-size.yaml
new file mode 100644
index 000000000..02579060c
--- /dev/null
+++ b/qa/overrides/3-size-2-min-size.yaml
@@ -0,0 +1,8 @@
+overrides:
+  thrashosds:
+    min_in: 4
+  ceph:
+    conf:
+      global:
+        osd_pool_default_size: 3
+        osd_pool_default_min_size: 2
diff --git a/qa/overrides/ignorelist_wrongly_marked_down.yaml b/qa/overrides/ignorelist_wrongly_marked_down.yaml
new file mode 100644
index 000000000..2ce7ffd97
--- /dev/null
+++ b/qa/overrides/ignorelist_wrongly_marked_down.yaml
@@ -0,0 +1,10 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    conf:
+      mds:
+        debug mds: 20
+        debug ms: 1
+      client:
+        debug client: 10
+\ No newline at end of file
diff --git a/qa/overrides/more-active-recovery.yaml b/qa/overrides/more-active-recovery.yaml
new file mode 100644
index 000000000..bfe86e4de
--- /dev/null
+++ b/qa/overrides/more-active-recovery.yaml
@@ -0,0 +1,6 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        osd_recovery_max_active: 10
+        osd_recovery_max_single_start: 10
diff --git a/qa/overrides/no_client_pidfile.yaml b/qa/overrides/no_client_pidfile.yaml
new file mode 100644
index 000000000..4ea02f476
--- /dev/null
+++ b/qa/overrides/no_client_pidfile.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        pid file: ""
diff --git a/qa/overrides/nvme_loop.yaml b/qa/overrides/nvme_loop.yaml
new file mode 100644
index 000000000..75f6cd454
--- /dev/null
+++ b/qa/overrides/nvme_loop.yaml
@@ -0,0 +1,2 @@
+tasks:
+- nvme_loop:
diff --git a/qa/overrides/short_pg_log.yaml b/qa/overrides/short_pg_log.yaml
new file mode 100644
index 000000000..fa55e91ea
--- /dev/null
+++ b/qa/overrides/short_pg_log.yaml
@@ -0,0 +1,7 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        osd_min_pg_log_entries: 1
+        osd_max_pg_log_entries: 2
+        osd_pg_log_trim_min: 0
diff --git a/qa/packages/packages.yaml b/qa/packages/packages.yaml
new file mode 100644
index 000000000..866d59e6f
--- /dev/null
+++ b/qa/packages/packages.yaml
@@ -0,0 +1,85 @@
+---
+ceph:
+  deb:
+  - ceph
+  - cephadm
+  - ceph-mds
+  - ceph-mgr
+  - ceph-common
+  - ceph-fuse
+  - ceph-test
+  - ceph-volume
+  - radosgw
+  - python3-rados
+  - python3-rgw
+  - python3-cephfs
+  - python3-rbd
+  - libcephfs2
+  - libcephfs-dev
+  - librados2
+  - librbd1
+  - rbd-fuse
+  - ceph-common-dbg
+  - ceph-fuse-dbg
+  - ceph-mds-dbg
+  - ceph-mgr-dbg
+  - ceph-mon-dbg
+  - ceph-osd-dbg
+  - ceph-test-dbg
+  - libcephfs2-dbg
+  - librados2-dbg
+  - libradosstriper1-dbg
+  - librbd1-dbg
+  - librgw2-dbg
+  - radosgw-dbg
+  - rbd-fuse-dbg
+  - rbd-mirror-dbg
+  - rbd-nbd-dbg
+  rpm:
+  - ceph-radosgw
+  - ceph-test
+  - ceph
+  - ceph-base
+  - cephadm
+  - ceph-immutable-object-cache
+  - ceph-mgr
+  - ceph-mgr-dashboard
+  - ceph-mgr-diskprediction-local
+  - ceph-mgr-rook
+  - ceph-mgr-cephadm
+  - ceph-fuse
+  - ceph-volume
+  - librados-devel
+  - libcephfs2
+  - libcephfs-devel
+  - librados2
+  - librbd1
+  - python3-rados
+  - python3-rgw
+  - python3-cephfs
+  - python3-rbd
+  - rbd-fuse
+  - rbd-mirror
+  - rbd-nbd
+  - ceph-base-debuginfo
+  - ceph-common-debuginfo
+  - ceph-immutable-object-cache-debuginfo
+  - ceph-radosgw-debuginfo
+  - ceph-test-debuginfo
+  - ceph-base-debuginfo
+  - ceph-mgr-debuginfo
+  - ceph-mds-debuginfo
+  - ceph-mon-debuginfo
+  - ceph-osd-debuginfo
+  - ceph-fuse-debuginfo
+  - librados-devel-debuginfo
+  - libcephfs2-debuginfo
+  - librados2-debuginfo
+  - librbd1-debuginfo
+  - python3-cephfs-debuginfo
+  - python3-rados-debuginfo
+  - python3-rbd-debuginfo
+  - python3-rgw-debuginfo
+  - rbd-fuse-debuginfo
+  - rbd-mirror-debuginfo
+  - rbd-nbd-debuginfo
diff --git a/qa/qa_scripts/cephscrub.sh b/qa/qa_scripts/cephscrub.sh
new file mode 100755
index 000000000..331d5ce32
--- /dev/null
+++ b/qa/qa_scripts/cephscrub.sh
@@ -0,0 +1,30 @@
+# remove the ceph directories
+sudo rm -rf /var/log/ceph
+sudo rm -rf /var/lib/ceph
+sudo rm -rf /etc/ceph
+sudo rm -rf /var/run/ceph
+# remove the ceph packages
+sudo apt-get -y  purge ceph
+sudo apt-get -y  purge ceph-dbg
+sudo apt-get -y  purge ceph-mds
+sudo apt-get -y  purge ceph-mds-dbg
+sudo apt-get -y  purge ceph-fuse
+sudo apt-get -y  purge ceph-fuse-dbg
+sudo apt-get -y  purge ceph-common
+sudo apt-get -y  purge ceph-common-dbg
+sudo apt-get -y  purge ceph-resource-agents
+sudo apt-get -y  purge librados2
+sudo apt-get -y  purge librados2-dbg
+sudo apt-get -y  purge librados-dev
+sudo apt-get -y  purge librbd1
+sudo apt-get -y  purge librbd1-dbg
+sudo apt-get -y  purge librbd-dev
+sudo apt-get -y  purge libcephfs2
+sudo apt-get -y  purge libcephfs2-dbg
+sudo apt-get -y  purge libcephfs-dev
+sudo apt-get -y  purge radosgw
+sudo apt-get -y  purge radosgw-dbg
+sudo apt-get -y  purge obsync
+sudo apt-get -y  purge python-rados
+sudo apt-get -y  purge python-rbd
+sudo apt-get -y  purge python-cephfs
diff --git a/qa/qa_scripts/openstack/README b/qa/qa_scripts/openstack/README
new file mode 100644
index 000000000..63fe2d973
--- /dev/null
+++ b/qa/qa_scripts/openstack/README
@@ -0,0 +1,32 @@
+This directory contains scripts to quickly bring up an OpenStack instance,
+attach a ceph cluster, create a nova compute node, and store the associated glance images, cinder volumes, nova vm, and cinder backup on ceph via rbd.
+
+execs is a directory that contains executables that are copied and remotely
+run on the OpenStack instance
+
+files is a directory that contains templates used to initialize OpenStack
+conf files.  These templates reflect the state of these conf files on 5/17/2016.
+If further development is necessary in the future, these templates should
+probably be removed and direct editing of the OpenStack conf files should
+probably be performed.
+
+These scripts also assume that either there is a rhel iso file named
+rhel-server-7.2-x86_64-boot.iso in the user's home directory, or the
+exported variable RHEL_ISO is set to point at an existing rhel iso file.
+If one is also running the ceph-deploy based ceph_install.sh, this script
+also assumes that there is a file named rhceph-1.3.1-rhel-7-x86_64-dvd.iso
+in the files directory.  These iso files can be obtained from the rhel site
+and are not stored with these scripts.
+
+To install openstack:
+./openstack.sh <openstack-admin-node> <ceph-monitor-node>
+
+This assumes that the ceph cluster is already set up.
+
+To setup a ceph-cluster using an iso and ceph-deploy:
+./ceph_install.sh <admin-node> <mon-node> <osd-node> <osd-node> <osd-node>
+
+To setup a ceph-cluster using the cdn and ceph-ansible:
+cd ceph_install_w_ansible
+./ceph_install.sh <admin-node> <mon-node> <osd-node> <osd-node> <osd-node>
+
diff --git a/qa/qa_scripts/openstack/ceph_install.sh b/qa/qa_scripts/openstack/ceph_install.sh
new file mode 100755
index 000000000..47831bd03
--- /dev/null
+++ b/qa/qa_scripts/openstack/ceph_install.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+#
+# Install a simple ceph cluster upon which openstack images will be stored.
+#
+set -fv
+ceph_node=${1}
+source copy_func.sh
+copy_file files/$OS_CEPH_ISO $ceph_node .
+copy_file execs/ceph_cluster.sh $ceph_node . 0777 
+copy_file execs/ceph-pool-create.sh $ceph_node . 0777
+ssh $ceph_node ./ceph_cluster.sh $*
diff --git a/qa/qa_scripts/openstack/ceph_install_w_ansible/README b/qa/qa_scripts/openstack/ceph_install_w_ansible/README
new file mode 100644
index 000000000..282c46e48
--- /dev/null
+++ b/qa/qa_scripts/openstack/ceph_install_w_ansible/README
@@ -0,0 +1,32 @@
+
+ceph_install.sh installs a ceph cluster using the cdn and ceph-ansible.
+
+Right now, it takes 5 parameters -- an admin node, a ceph mon node, and
+three osd nodes.
+
+In order to subscribe to the cdn, in your home directory create a file named
+secrets, (~/secrets), that contains the following lines:
+
+subscrname=Your-Redhat-Cdn-Id
+subscrpassword=Your-Redhat-Cdn-Password
+
+If you want to set the monitor_interface or the public_network values,
+in your home directory create a file named ip_info (~/ip_info), that
+contains the following lines: 
+
+mon_intf=your-monitor-interface (default is eno1)
+pub_netw=public-network (default is 10.8.128.0/21)
+
+This script first subscribes to the cdn, enables the rhel 7 repos, and does
+a yum update.  (multi_action.sh performs all the actions on all nodes at once,
+staller.sh is used to make sure that all updates are complete before exiting,
+and execs/cdn_setup.sh is used to remotely update the cdn information.
+
+After that, it makes sure that all nodes can connect via passwordless ssh
+(using talknice.sh and config) and then installs the appropriate repos and
+runs ceph_ansible on the admin node using execs/ceph_ansible.sh,
+execs/edit_ansible_hosts.sh and execs/edit_groupvars_osds.sh.
+
+repolocs.sh contains the locations of repo files.  These variables can
+be changed if one wishes to use different urls.
+
diff --git a/qa/qa_scripts/openstack/ceph_install_w_ansible/ceph_install.sh b/qa/qa_scripts/openstack/ceph_install_w_ansible/ceph_install.sh
new file mode 100755
index 000000000..b4d14f9c2
--- /dev/null
+++ b/qa/qa_scripts/openstack/ceph_install_w_ansible/ceph_install.sh
@@ -0,0 +1,39 @@
+#! /usr/bin/env bash
+if [ $# -ne 5 ]; then
+    echo 'Usage: ceph_install.sh <admin-node> <mon-node> <osd-node> <osd-node> <osd-node>'
+    exit -1
+fi
+allnodes=$*
+adminnode=$1
+shift
+cephnodes=$*
+monnode=$1
+shift
+osdnodes=$*
+./multi_action.sh cdn_setup.sh $allnodes
+./talknice.sh $allnodes
+for mac in $allnodes; do
+    ssh $mac sudo yum -y install yum-utils
+done
+
+source ./repolocs.sh
+ssh $adminnode sudo yum-config-manager --add ${CEPH_REPO_TOOLS}
+ssh $monnode sudo yum-config-manager --add ${CEPH_REPO_MON}
+for mac in $osdnodes; do
+    ssh $mac sudo yum-config-manager --add ${CEPH_REPO_OSD}
+done
+ssh $adminnode sudo yum-config-manager --add ${INSTALLER_REPO_LOC}
+
+for mac in $allnodes; do
+    ssh $mac sudo sed -i 's/gpgcheck=1/gpgcheck=0/' /etc/yum.conf
+done
+
+source copy_func.sh
+copy_file execs/ceph_ansible.sh $adminnode . 0777 ubuntu:ubuntu
+copy_file execs/edit_ansible_hosts.sh $adminnode . 0777 ubuntu:ubuntu
+copy_file execs/edit_groupvars_osds.sh $adminnode . 0777 ubuntu:ubuntu
+copy_file ../execs/ceph-pool-create.sh $monnode . 0777 ubuntu:ubuntu
+if [ -e ~/ip_info ]; then
+    copy_file ~/ip_info $adminnode . 0777 ubuntu:ubuntu
+fi
+ssh $adminnode ./ceph_ansible.sh $cephnodes
diff --git a/qa/qa_scripts/openstack/ceph_install_w_ansible/config b/qa/qa_scripts/openstack/ceph_install_w_ansible/config
new file mode 100644
index 000000000..a7d819869
--- /dev/null
+++ b/qa/qa_scripts/openstack/ceph_install_w_ansible/config
@@ -0,0 +1,5 @@
+Host plana* mira* burnupi* tala* saya* vpm* names* gitbuilder* teuthology gw* senta* vercoi* rex* magna*
+  ServerAliveInterval 360
+  StrictHostKeyChecking no
+  UserKnownHostsFile=/dev/null
+  User ubuntu
diff --git a/qa/qa_scripts/openstack/ceph_install_w_ansible/copy_func.sh b/qa/qa_scripts/openstack/ceph_install_w_ansible/copy_func.sh
new file mode 120000
index 000000000..6a36be7b0
--- /dev/null
+++ b/qa/qa_scripts/openstack/ceph_install_w_ansible/copy_func.sh
@@ -0,0 +1 @@
+../copy_func.sh
+\ No newline at end of file
diff --git a/qa/qa_scripts/openstack/ceph_install_w_ansible/execs/cdn_setup.sh b/qa/qa_scripts/openstack/ceph_install_w_ansible/execs/cdn_setup.sh
new file mode 100755
index 000000000..0c87039db
--- /dev/null
+++ b/qa/qa_scripts/openstack/ceph_install_w_ansible/execs/cdn_setup.sh
@@ -0,0 +1,20 @@
+#! /usr/bin/env bash
+if [ -f ~/secrets ]; then
+    source ~/secrets
+fi
+subm=`which subscription-manager`
+if [ ${#subm} -eq 0 ]; then
+    sudo yum -y update
+    exit
+fi
+subst=`sudo subscription-manager status | grep "^Overall" | awk '{print $NF}'`
+if [ $subst == 'Unknown' ]; then
+    mynameis=${subscrname:-'inigomontoya'}
+    mypassis=${subscrpassword:-'youkeelmyfatherpreparetodie'}
+    sudo subscription-manager register --username=$mynameis --password=$mypassis --force
+    sudo subscription-manager refresh
+    if [ $? -eq 1 ]; then exit 1; fi
+    sudo subscription-manager attach --pool=8a85f9823e3d5e43013e3ddd4e2a0977
+fi
+sudo subscription-manager repos --enable=rhel-7-server-rpms
+sudo yum -y update
diff --git a/qa/qa_scripts/openstack/ceph_install_w_ansible/execs/ceph_ansible.sh b/qa/qa_scripts/openstack/ceph_install_w_ansible/execs/ceph_ansible.sh
new file mode 100755
index 000000000..8581de601
--- /dev/null
+++ b/qa/qa_scripts/openstack/ceph_install_w_ansible/execs/ceph_ansible.sh
@@ -0,0 +1,36 @@
+#! /usr/bin/env bash
+cephnodes=$*
+monnode=$1
+sudo yum -y install ceph-ansible
+cd
+sudo ./edit_ansible_hosts.sh $cephnodes
+mkdir ceph-ansible-keys
+cd /usr/share/ceph-ansible/group_vars/
+if [ -f ~/ip_info ]; then
+    source ~/ip_info
+fi
+mon_intf=${mon_intf:-'eno1'}
+pub_netw=${pub_netw:-'10.8.128.0\/21'}
+sudo cp all.sample all
+sudo sed -i 's/#ceph_origin:.*/ceph_origin: distro/' all
+sudo sed -i 's/#fetch_directory:.*/fetch_directory: ~\/ceph-ansible-keys/' all
+sudo sed -i 's/#ceph_stable:.*/ceph_stable: true/' all
+sudo sed -i 's/#ceph_stable_rh_storage:.*/ceph_stable_rh_storage: false/' all
+sudo sed -i 's/#ceph_stable_rh_storage_cdn_install:.*/ceph_stable_rh_storage_cdn_install: true/' all
+sudo sed -i 's/#cephx:.*/cephx: true/' all
+sudo sed -i "s/#monitor_interface:.*/monitor_interface: ${mon_intf}/" all
+sudo sed -i 's/#journal_size:.*/journal_size: 1024/' all
+sudo sed -i "s/#public_network:.*/public_network: ${pub_netw}/" all
+sudo cp osds.sample osds
+sudo sed -i 's/#fetch_directory:.*/fetch_directory: ~\/ceph-ansible-keys/' osds
+sudo sed -i 's/#crush_location:/crush_location:/' osds
+sudo sed -i 's/#osd_crush_location:/osd_crush_location:/' osds
+sudo sed -i 's/#cephx:/cephx:/' osds
+sudo sed -i 's/#devices:/devices:/' osds
+sudo sed -i 's/#journal_collocation:.*/journal_collocation: true/' osds
+cd
+sudo ./edit_groupvars_osds.sh
+cd /usr/share/ceph-ansible
+sudo cp site.yml.sample site.yml
+ansible-playbook site.yml
+ssh $monnode ~/ceph-pool-create.sh
diff --git a/qa/qa_scripts/openstack/ceph_install_w_ansible/execs/edit_ansible_hosts.sh b/qa/qa_scripts/openstack/ceph_install_w_ansible/execs/edit_ansible_hosts.sh
new file mode 100755
index 000000000..7eb0b7011
--- /dev/null
+++ b/qa/qa_scripts/openstack/ceph_install_w_ansible/execs/edit_ansible_hosts.sh
@@ -0,0 +1,17 @@
+#! /usr/bin/env bash
+ed /etc/ansible/hosts << EOF
+$
+a
+
+[mons]
+${1}
+
+[osds]
+${2}
+${3}
+${4}
+
+.
+w
+q
+EOF
diff --git a/qa/qa_scripts/openstack/ceph_install_w_ansible/execs/edit_groupvars_osds.sh b/qa/qa_scripts/openstack/ceph_install_w_ansible/execs/edit_groupvars_osds.sh
new file mode 100755
index 000000000..751658b09
--- /dev/null
+++ b/qa/qa_scripts/openstack/ceph_install_w_ansible/execs/edit_groupvars_osds.sh
@@ -0,0 +1,13 @@
+#! /usr/bin/env bash
+ed /usr/share/ceph-ansible/group_vars/osds << EOF
+$
+/^devices:
+.+1
+i
+   - /dev/sdb
+   - /dev/sdc
+   - /dev/sdd
+.
+w
+q
+EOF
diff --git a/qa/qa_scripts/openstack/ceph_install_w_ansible/multi_action.sh b/qa/qa_scripts/openstack/ceph_install_w_ansible/multi_action.sh
new file mode 100755
index 000000000..abc368b0a
--- /dev/null
+++ b/qa/qa_scripts/openstack/ceph_install_w_ansible/multi_action.sh
@@ -0,0 +1,19 @@
+#! /usr/bin/env bash
+source copy_func.sh
+allparms=$*
+cmdv=$1
+shift
+sites=$*
+for mac in $sites; do
+    echo $cmdv $mac
+    if [ -f ~/secrets ]; then
+        copy_file ~/secrets $mac . 0777 ubuntu:ubuntu
+    fi
+    copy_file execs/${cmdv} $mac . 0777 ubuntu:ubuntu
+    ssh $mac ./${cmdv} &
+done
+./staller.sh $allparms
+for mac in $sites; do
+    ssh $mac sudo rm -rf secrets
+done
+echo "DONE"
diff --git a/qa/qa_scripts/openstack/ceph_install_w_ansible/repolocs.sh b/qa/qa_scripts/openstack/ceph_install_w_ansible/repolocs.sh
new file mode 100755
index 000000000..5d82f35d9
--- /dev/null
+++ b/qa/qa_scripts/openstack/ceph_install_w_ansible/repolocs.sh
@@ -0,0 +1,8 @@
+#! /usr/bin/env bash
+SPECIFIC_VERSION=latest-Ceph-2-RHEL-7 
+#SPECIFIC_VERSION=Ceph-2-RHEL-7-20160630.t.0
+#SPECIFIC_VERSION=Ceph-2.0-RHEL-7-20160718.t.0
+export CEPH_REPO_TOOLS=http://download.eng.bos.redhat.com/rcm-guest/ceph-drops/auto/ceph-2-rhel-7-compose/${SPECIFIC_VERSION}/compose/Tools/x86_64/os/
+export CEPH_REPO_MON=http://download.eng.bos.redhat.com/rcm-guest/ceph-drops/auto/ceph-2-rhel-7-compose/${SPECIFIC_VERSION}/compose/MON/x86_64/os/
+export CEPH_REPO_OSD=http://download.eng.bos.redhat.com/rcm-guest/ceph-drops/auto/ceph-2-rhel-7-compose/${SPECIFIC_VERSION}/compose/OSD/x86_64/os/
+export INSTALLER_REPO_LOC=http://download.eng.bos.redhat.com/rcm-guest/ceph-drops/auto/rhscon-2-rhel-7-compose/latest-RHSCON-2-RHEL-7/compose/Installer/x86_64/os/
diff --git a/qa/qa_scripts/openstack/ceph_install_w_ansible/staller.sh b/qa/qa_scripts/openstack/ceph_install_w_ansible/staller.sh
new file mode 100755
index 000000000..99c00da33
--- /dev/null
+++ b/qa/qa_scripts/openstack/ceph_install_w_ansible/staller.sh
@@ -0,0 +1,15 @@
+#! /usr/bin/env bash
+cmd_wait=$1
+shift
+sites=$*
+donebit=0
+while [ $donebit -ne 1 ]; do
+    sleep 10
+    donebit=1
+    for rem in $sites; do
+        rval=`ssh $rem ps aux | grep $cmd_wait | wc -l` 
+        if [ $rval -gt 0 ]; then
+            donebit=0
+        fi
+    done
+done
diff --git a/qa/qa_scripts/openstack/ceph_install_w_ansible/talknice.sh b/qa/qa_scripts/openstack/ceph_install_w_ansible/talknice.sh
new file mode 100755
index 000000000..ffed4f1dc
--- /dev/null
+++ b/qa/qa_scripts/openstack/ceph_install_w_ansible/talknice.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+declare -A rsapub
+for fulln in $*; do
+    sname=`echo $fulln | sed 's/\..*//'`
+    nhead=`echo $sname | sed 's/[0-9]*//g'`
+    x=`ssh $fulln "ls .ssh/id_rsa"`
+    if [ -z $x ]; then
+        ssh $fulln "ssh-keygen -N '' -f .ssh/id_rsa";
+    fi
+    xx=`ssh $fulln "ls .ssh/config"`
+    if [ -z $xx ]; then
+        scp config $fulln:/home/ubuntu/.ssh/config
+    fi
+    ssh $fulln "chown ubuntu:ubuntu .ssh/config"
+    ssh $fulln "chmod 0600 .ssh/config"
+    rsapub[$fulln]=`ssh $fulln "cat .ssh/id_rsa.pub"`
+done
+for ii in $*; do
+    ssh $ii sudo iptables -F
+    for jj in $*; do
+        pval=${rsapub[$jj]}
+        if [ "$ii" != "$jj" ]; then
+            xxxx=`ssh $ii "grep $jj .ssh/authorized_keys"`
+            if [ -z "$xxxx" ]; then
+                ssh $ii "echo '$pval' | sudo tee -a /home/ubuntu/.ssh/authorized_keys"
+            fi
+        fi
+    done;
+done
diff --git a/qa/qa_scripts/openstack/connectceph.sh b/qa/qa_scripts/openstack/connectceph.sh
new file mode 100755
index 000000000..d975daada
--- /dev/null
+++ b/qa/qa_scripts/openstack/connectceph.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+#
+# Connect openstack node just installed to a ceph cluster.
+#
+# Essentially implements:
+#
+# http://docs.ceph.com/en/latest/rbd/rbd-openstack/
+#
+# The directory named files contains templates for the /etc/glance/glance-api.conf,
+# /etc/cinder/cinder.conf, /etc/nova/nova.conf Openstack files
+#
+set -fv
+source ./copy_func.sh
+source ./fix_conf_file.sh
+openstack_node=${1}
+ceph_node=${2}
+
+scp $ceph_node:/etc/ceph/ceph.conf ./ceph.conf
+ssh $openstack_node sudo mkdir /etc/ceph
+copy_file ceph.conf $openstack_node /etc/ceph 0644
+rm -f ceph.conf
+ssh $openstack_node sudo yum -y install python-rbd
+ssh $openstack_node sudo yum -y install ceph-common
+ssh $ceph_node "sudo ceph auth get-or-create client.cinder mon 'allow r' osd 'allow class-read object_prefix rbd_children, allow rwx pool=volumes, allow rwx pool=vms, allow rx pool=images'"
+ssh $ceph_node "sudo ceph auth get-or-create client.glance mon 'allow r' osd 'allow class-read object_prefix rbd_children, allow rwx pool=images'"
+ssh $ceph_node "sudo ceph auth get-or-create client.cinder-backup mon 'allow r' osd 'allow class-read object_prefix rbd_children, allow rwx pool=backups'"
+ssh $ceph_node sudo ceph auth get-or-create client.glance mon 'allow r' osd 'allow class-read object_prefix rbd_children, allow rwx pool=images'
+ssh $ceph_node sudo ceph auth get-or-create client.cinder-backup mon 'allow r' osd 'allow class-read object_prefix rbd_children, allow rwx pool=backups'
+ssh $ceph_node sudo ceph auth get-or-create client.glance | ssh $openstack_node sudo tee /etc/ceph/ceph.client.glance.keyring
+ssh $openstack_node sudo chown glance:glance /etc/ceph/ceph.client.glance.keyring
+ssh $ceph_node sudo ceph auth get-or-create client.cinder | ssh $openstack_node sudo tee /etc/ceph/ceph.client.cinder.keyring
+ssh $openstack_node sudo chown cinder:cinder /etc/ceph/ceph.client.cinder.keyring
+ssh $ceph_node sudo ceph auth get-or-create client.cinder-backup | ssh $openstack_node sudo tee /etc/ceph/ceph.client.cinder-backup.keyring
+ssh $openstack_node sudo chown cinder:cinder /etc/ceph/ceph.client.cinder-backup.keyring
+ssh $ceph_node sudo ceph auth get-key client.cinder | ssh $openstack_node tee client.cinder.key
+copy_file execs/libvirt-secret.sh $openstack_node .
+secret_msg=`ssh $openstack_node sudo ./libvirt-secret.sh $openstack_node`
+secret_virt=`echo $secret_msg | sed 's/.* set //'`
+echo $secret_virt
+fix_conf_file $openstack_node glance-api /etc/glance
+fix_conf_file $openstack_node cinder /etc/cinder $secret_virt
+fix_conf_file $openstack_node nova /etc/nova $secret_virt
+copy_file execs/start_openstack.sh $openstack_node . 0755
+ssh $openstack_node ./start_openstack.sh
diff --git a/qa/qa_scripts/openstack/copy_func.sh b/qa/qa_scripts/openstack/copy_func.sh
new file mode 100755
index 000000000..571980262
--- /dev/null
+++ b/qa/qa_scripts/openstack/copy_func.sh
@@ -0,0 +1,22 @@
+#
+# copy_file(<filename>, <node>, <directory>, [<permissions>], [<owner>]
+# 
+# copy a file -- this is needed because passwordless ssh does not
+#                work when sudo'ing.
+#    <file> -- name of local file to be copied
+#    <node> -- node where we want the file
+#    <directory> -- location where we want the file on <node>
+#    <permissions> -- (optional) permissions on the copied file
+#    <owner> -- (optional) owner of the copied file
+#
+function copy_file() {
+    fname=`basename ${1}`
+    scp ${1} ${2}:/tmp/${fname}
+    ssh ${2} sudo cp /tmp/${fname} ${3}
+    if [ $# -gt 3 ]; then
+       ssh ${2} sudo chmod ${4} ${3}/${fname}
+    fi
+    if [ $# -gt 4 ]; then
+       ssh ${2} sudo chown ${5} ${3}/${fname}
+    fi
+}
diff --git a/qa/qa_scripts/openstack/execs/ceph-pool-create.sh b/qa/qa_scripts/openstack/execs/ceph-pool-create.sh
new file mode 100755
index 000000000..723c83069
--- /dev/null
+++ b/qa/qa_scripts/openstack/execs/ceph-pool-create.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+set -f
+
+#
+# On the ceph site, make the pools required for Openstack
+#
+
+#
+# Make a pool, if it does not already exist.
+#
+function make_pool {
+    if [[ -z `sudo ceph osd lspools | grep " $1,"` ]]; then
+        echo "making $1"
+        sudo ceph osd pool create $1 128
+    fi
+}
+
+#
+# Make sure the pg_num and pgp_num values are good.
+#
+count=`sudo ceph osd pool get rbd pg_num | sed 's/pg_num: //'`
+while [ $count -lt 128 ]; do
+    sudo ceph osd pool set rbd pg_num $count
+    count=`expr $count + 32`
+    sleep 30
+done
+sudo ceph osd pool set rbd pg_num 128
+sleep 30
+sudo ceph osd pool set rbd pgp_num 128
+sleep 30
+make_pool volumes
+make_pool images
+make_pool backups
+make_pool vms
diff --git a/qa/qa_scripts/openstack/execs/ceph_cluster.sh b/qa/qa_scripts/openstack/execs/ceph_cluster.sh
new file mode 100755
index 000000000..5afb3c787
--- /dev/null
+++ b/qa/qa_scripts/openstack/execs/ceph_cluster.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+set -f
+
+echo $OS_CEPH_ISO
+if [[ $# -ne 4 ]]; then
+    echo "Usage: ceph_cluster mon.0 osd.0 osd.1 osd.2"
+    exit -1
+fi
+allsites=$*
+mon=$1
+shift
+osds=$*
+ISOVAL=${OS_CEPH_ISO-rhceph-1.3.1-rhel-7-x86_64-dvd.iso}
+sudo mount -o loop ${ISOVAL} /mnt
+
+fqdn=`hostname -f`
+lsetup=`ls /mnt/Installer | grep "^ice_setup"`
+sudo yum -y install /mnt/Installer/${lsetup}
+sudo ice_setup -d /mnt << EOF
+yes
+/mnt
+$fqdn
+http
+EOF
+ceph-deploy new ${mon}
+ceph-deploy install --repo --release=ceph-mon ${mon}
+ceph-deploy install --repo --release=ceph-osd ${allsites}
+ceph-deploy install --mon ${mon}
+ceph-deploy install --osd ${allsites}
+ceph-deploy mon create-initial
+sudo service ceph -a start osd
+for d in b c d; do
+    for m in $osds; do
+        ceph-deploy disk zap ${m}:sd${d}
+    done
+    for m in $osds; do
+        ceph-deploy osd prepare ${m}:sd${d}
+    done
+    for m in $osds; do
+        ceph-deploy osd activate ${m}:sd${d}1:sd${d}2
+    done
+done
+
+sudo ./ceph-pool-create.sh
+
+hchk=`sudo ceph health`
+while [[ $hchk != 'HEALTH_OK' ]]; do
+    sleep 30
+    hchk=`sudo ceph health`
+done
diff --git a/qa/qa_scripts/openstack/execs/libvirt-secret.sh b/qa/qa_scripts/openstack/execs/libvirt-secret.sh
new file mode 100755
index 000000000..75e9e91a0
--- /dev/null
+++ b/qa/qa_scripts/openstack/execs/libvirt-secret.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+set -f
+
+#
+# Generate a libvirt secret on the Openstack node. 
+#
+openstack_node=${1}
+uuid=`uuidgen`
+cat > secret.xml <<EOF
+<secret ephemeral='no' private='no'>
+  <uuid>${uuid}</uuid>
+  <usage type='ceph'>
+    <name>client.cinder secret</name>
+  </usage>
+</secret>
+EOF
+sudo virsh secret-define --file secret.xml
+sudo virsh secret-set-value --secret ${uuid} --base64 $(cat client.cinder.key)
+echo ${uuid}
diff --git a/qa/qa_scripts/openstack/execs/openstack-preinstall.sh b/qa/qa_scripts/openstack/execs/openstack-preinstall.sh
new file mode 100755
index 000000000..a2b235e76
--- /dev/null
+++ b/qa/qa_scripts/openstack/execs/openstack-preinstall.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+set -f
+
+#
+# Remotely setup the stuff needed to run packstack.  This should do items 1-4 in
+# https://docs.google.com/document/d/1us18KR3LuLyINgGk2rmI-SVj9UksCE7y4C2D_68Aa8o/edit?ts=56a78fcb
+#
+yum remove -y rhos-release
+rpm -ivh http://rhos-release.virt.bos.redhat.com/repos/rhos-release/rhos-release-latest.noarch.rpm
+rm -rf /etc/yum.repos.d/*
+rm -rf /var/cache/yum/*
+rhos-release 8
+yum update -y
+yum install -y nc puppet vim screen setroubleshoot crudini bpython openstack-packstack
+systemctl disable ntpd
+systemctl stop ntpd
+reboot
diff --git a/qa/qa_scripts/openstack/execs/run_openstack.sh b/qa/qa_scripts/openstack/execs/run_openstack.sh
new file mode 100755
index 000000000..8764cbeb6
--- /dev/null
+++ b/qa/qa_scripts/openstack/execs/run_openstack.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+set -fv
+
+#
+# Create a glance image, a corresponding cinder volume, a nova instance, attach, the cinder volume to the
+# nova instance, and create a backup.
+#
+image_name=${1}X
+file_name=${2-rhel-server-7.2-x86_64-boot.iso}
+source ./keystonerc_admin 
+glance image-create --name $image_name --disk-format iso --container-format bare --file $file_name 
+glance_id=`glance image-list | grep ${image_name} | sed 's/^| //' | sed 's/ |.*//'`
+cinder create --image-id ${glance_id} --display-name ${image_name}-volume 8
+nova boot --image ${image_name} --flavor 1 ${image_name}-inst
+cinder_id=`cinder list | grep ${image_name} | sed 's/^| //' | sed 's/ |.*//'`
+chkr=`cinder list | grep ${image_name}-volume | grep available`
+while [ -z "$chkr" ]; do
+    sleep 30
+    chkr=`cinder list | grep ${image_name}-volume | grep available`
+done
+nova volume-attach ${image_name}-inst ${cinder_id} auto
+sleep 30
+cinder backup-create --name ${image_name}-backup ${image_name}-volume --force
diff --git a/qa/qa_scripts/openstack/execs/start_openstack.sh b/qa/qa_scripts/openstack/execs/start_openstack.sh
new file mode 100755
index 000000000..f5f12fe50
--- /dev/null
+++ b/qa/qa_scripts/openstack/execs/start_openstack.sh
@@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+set -fv
+
+#
+# start the Openstack services
+#
+sudo cp /root/keystonerc_admin ./keystonerc_admin
+sudo chmod 0644 ./keystonerc_admin
+source ./keystonerc_admin
+sudo service httpd stop
+sudo service openstack-keystone restart
+sudo service openstack-glance-api restart
+sudo service openstack-nova-compute restart
+sudo service openstack-cinder-volume restart
+sudo service openstack-cinder-backup restart
diff --git a/qa/qa_scripts/openstack/files/cinder.template.conf b/qa/qa_scripts/openstack/files/cinder.template.conf
new file mode 100644
index 000000000..807125ac3
--- /dev/null
+++ b/qa/qa_scripts/openstack/files/cinder.template.conf
@@ -0,0 +1,3481 @@
+[DEFAULT]
+
+#
+# From cinder
+#
+
+# Backup metadata version to be used when backing up volume metadata. If this
+# number is bumped, make sure the service doing the restore supports the new
+# version. (integer value)
+#backup_metadata_version = 2
+
+# The number of chunks or objects, for which one Ceilometer notification will
+# be sent (integer value)
+#backup_object_number_per_notification = 10
+
+# Interval, in seconds, between two progress notifications reporting the backup
+# status (integer value)
+#backup_timer_interval = 120
+
+# The maximum number of items that a collection resource returns in a single
+# response (integer value)
+#osapi_max_limit = 1000
+
+# Base URL that will be presented to users in links to the OpenStack Volume API
+# (string value)
+# Deprecated group/name - [DEFAULT]/osapi_compute_link_prefix
+#osapi_volume_base_URL = <None>
+
+# Ceph configuration file to use. (string value)
+#backup_ceph_conf = /etc/ceph/ceph.conf
+backup_ceph_conf = /etc/ceph/ceph.conf
+
+# The Ceph user to connect with. Default here is to use the same user as for
+# Cinder volumes. If not using cephx this should be set to None. (string value)
+#backup_ceph_user = cinder
+backup_ceph_user = cinder-backup
+
+# The chunk size, in bytes, that a backup is broken into before transfer to the
+# Ceph object store. (integer value)
+#backup_ceph_chunk_size = 134217728
+backup_ceph_chunk_size = 134217728
+
+# The Ceph pool where volume backups are stored. (string value)
+#backup_ceph_pool = backups
+backup_ceph_pool = backups
+
+# RBD stripe unit to use when creating a backup image. (integer value)
+#backup_ceph_stripe_unit = 0
+backup_ceph_stripe_unit = 0
+
+# RBD stripe count to use when creating a backup image. (integer value)
+#backup_ceph_stripe_count = 0
+backup_ceph_stripe_count = 0
+
+# If True, always discard excess bytes when restoring volumes i.e. pad with
+# zeroes. (boolean value)
+#restore_discard_excess_bytes = true
+restore_discard_excess_bytes = true
+
+# File with the list of available smbfs shares. (string value)
+#smbfs_shares_config = /etc/cinder/smbfs_shares
+
+# Default format that will be used when creating volumes if no volume format is
+# specified. (string value)
+# Allowed values: raw, qcow2, vhd, vhdx
+#smbfs_default_volume_format = qcow2
+
+# Create volumes as sparsed files which take no space rather than regular files
+# when using raw format, in which case volume creation takes lot of time.
+# (boolean value)
+#smbfs_sparsed_volumes = true
+
+# Percent of ACTUAL usage of the underlying volume before no new volumes can be
+# allocated to the volume destination. (floating point value)
+#smbfs_used_ratio = 0.95
+
+# This will compare the allocated to available space on the volume destination.
+# If the ratio exceeds this number, the destination will no longer be valid.
+# (floating point value)
+#smbfs_oversub_ratio = 1.0
+
+# Base dir containing mount points for smbfs shares. (string value)
+#smbfs_mount_point_base = $state_path/mnt
+
+# Mount options passed to the smbfs client. See mount.cifs man page for
+# details. (string value)
+#smbfs_mount_options = noperm,file_mode=0775,dir_mode=0775
+
+# Compression algorithm (None to disable) (string value)
+#backup_compression_algorithm = zlib
+
+# Use thin provisioning for SAN volumes? (boolean value)
+#san_thin_provision = true
+
+# IP address of SAN controller (string value)
+#san_ip =
+
+# Username for SAN controller (string value)
+#san_login = admin
+
+# Password for SAN controller (string value)
+#san_password =
+
+# Filename of private key to use for SSH authentication (string value)
+#san_private_key =
+
+# Cluster name to use for creating volumes (string value)
+#san_clustername =
+
+# SSH port to use with SAN (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#san_ssh_port = 22
+
+# Execute commands locally instead of over SSH; use if the volume service is
+# running on the SAN device (boolean value)
+#san_is_local = false
+
+# SSH connection timeout in seconds (integer value)
+#ssh_conn_timeout = 30
+
+# Minimum ssh connections in the pool (integer value)
+#ssh_min_pool_conn = 1
+
+# Maximum ssh connections in the pool (integer value)
+#ssh_max_pool_conn = 5
+
+# Configuration file for HDS NFS cinder plugin (string value)
+#hds_hnas_nfs_config_file = /opt/hds/hnas/cinder_nfs_conf.xml
+
+# Global backend request timeout, in seconds. (integer value)
+#violin_request_timeout = 300
+
+# Option to enable strict host key checking.  When set to "True" Cinder will
+# only connect to systems with a host key present in the configured
+# "ssh_hosts_key_file".  When set to "False" the host key will be saved upon
+# first connection and used for subsequent connections.  Default=False (boolean
+# value)
+#strict_ssh_host_key_policy = false
+
+# File containing SSH host keys for the systems with which Cinder needs to
+# communicate.  OPTIONAL: Default=$state_path/ssh_known_hosts (string value)
+#ssh_hosts_key_file = $state_path/ssh_known_hosts
+
+# The storage family type used on the storage system; valid values are
+# ontap_7mode for using Data ONTAP operating in 7-Mode, ontap_cluster for using
+# clustered Data ONTAP, or eseries for using E-Series. (string value)
+# Allowed values: ontap_7mode, ontap_cluster, eseries
+#netapp_storage_family = ontap_cluster
+
+# The storage protocol to be used on the data path with the storage system.
+# (string value)
+# Allowed values: iscsi, fc, nfs
+#netapp_storage_protocol = <None>
+
+# The hostname (or IP address) for the storage system or proxy server. (string
+# value)
+#netapp_server_hostname = <None>
+
+# The TCP port to use for communication with the storage system or proxy
+# server. If not specified, Data ONTAP drivers will use 80 for HTTP and 443 for
+# HTTPS; E-Series will use 8080 for HTTP and 8443 for HTTPS. (integer value)
+#netapp_server_port = <None>
+
+# The transport protocol used when communicating with the storage system or
+# proxy server. (string value)
+# Allowed values: http, https
+#netapp_transport_type = http
+
+# Administrative user account name used to access the storage system or proxy
+# server. (string value)
+#netapp_login = <None>
+
+# Password for the administrative user account specified in the netapp_login
+# option. (string value)
+#netapp_password = <None>
+
+# This option specifies the virtual storage server (Vserver) name on the
+# storage cluster on which provisioning of block storage volumes should occur.
+# (string value)
+#netapp_vserver = <None>
+
+# The vFiler unit on which provisioning of block storage volumes will be done.
+# This option is only used by the driver when connecting to an instance with a
+# storage family of Data ONTAP operating in 7-Mode. Only use this option when
+# utilizing the MultiStore feature on the NetApp storage system. (string value)
+#netapp_vfiler = <None>
+
+# The name of the config.conf stanza for a Data ONTAP (7-mode) HA partner.
+# This option is only used by the driver when connecting to an instance with a
+# storage family of Data ONTAP operating in 7-Mode, and it is required if the
+# storage protocol selected is FC. (string value)
+#netapp_partner_backend_name = <None>
+
+# The quantity to be multiplied by the requested volume size to ensure enough
+# space is available on the virtual storage server (Vserver) to fulfill the
+# volume creation request.  Note: this option is deprecated and will be removed
+# in favor of "reserved_percentage" in the Mitaka release. (floating point
+# value)
+#netapp_size_multiplier = 1.2
+
+# This option determines if storage space is reserved for LUN allocation. If
+# enabled, LUNs are thick provisioned. If space reservation is disabled,
+# storage space is allocated on demand. (string value)
+# Allowed values: enabled, disabled
+#netapp_lun_space_reservation = enabled
+
+# If the percentage of available space for an NFS share has dropped below the
+# value specified by this option, the NFS image cache will be cleaned. (integer
+# value)
+#thres_avl_size_perc_start = 20
+
+# When the percentage of available space on an NFS share has reached the
+# percentage specified by this option, the driver will stop clearing files from
+# the NFS image cache that have not been accessed in the last M minutes, where
+# M is the value of the expiry_thres_minutes configuration option. (integer
+# value)
+#thres_avl_size_perc_stop = 60
+
+# This option specifies the threshold for last access time for images in the
+# NFS image cache. When a cache cleaning cycle begins, images in the cache that
+# have not been accessed in the last M minutes, where M is the value of this
+# parameter, will be deleted from the cache to create free space on the NFS
+# share. (integer value)
+#expiry_thres_minutes = 720
+
+# This option is used to specify the path to the E-Series proxy application on
+# a proxy server. The value is combined with the value of the
+# netapp_transport_type, netapp_server_hostname, and netapp_server_port options
+# to create the URL used by the driver to connect to the proxy application.
+# (string value)
+#netapp_webservice_path = /devmgr/v2
+
+# This option is only utilized when the storage family is configured to
+# eseries. This option is used to restrict provisioning to the specified
+# controllers. Specify the value of this option to be a comma separated list of
+# controller hostnames or IP addresses to be used for provisioning. (string
+# value)
+#netapp_controller_ips = <None>
+
+# Password for the NetApp E-Series storage array. (string value)
+#netapp_sa_password = <None>
+
+# This option specifies whether the driver should allow operations that require
+# multiple attachments to a volume. An example would be live migration of
+# servers that have volumes attached. When enabled, this backend is limited to
+# 256 total volumes in order to guarantee volumes can be accessed by more than
+# one host. (boolean value)
+#netapp_enable_multiattach = false
+
+# This option specifies the path of the NetApp copy offload tool binary. Ensure
+# that the binary has execute permissions set which allow the effective user of
+# the cinder-volume process to execute the file. (string value)
+#netapp_copyoffload_tool_path = <None>
+
+# This option defines the type of operating system that will access a LUN
+# exported from Data ONTAP; it is assigned to the LUN at the time it is
+# created. (string value)
+#netapp_lun_ostype = <None>
+
+# This option defines the type of operating system for all initiators that can
+# access a LUN. This information is used when mapping LUNs to individual hosts
+# or groups of hosts. (string value)
+# Deprecated group/name - [DEFAULT]/netapp_eseries_host_type
+#netapp_host_type = <None>
+
+# This option is used to restrict provisioning to the specified pools. Specify
+# the value of this option to be a regular expression which will be applied to
+# the names of objects from the storage backend which represent pools in
+# Cinder. This option is only utilized when the storage protocol is configured
+# to use iSCSI or FC. (string value)
+# Deprecated group/name - [DEFAULT]/netapp_volume_list
+# Deprecated group/name - [DEFAULT]/netapp_storage_pools
+#netapp_pool_name_search_pattern = (.+)
+
+# Base dir containing mount point for gluster share. (string value)
+#glusterfs_backup_mount_point = $state_path/backup_mount
+
+# GlusterFS share in <hostname|ipv4addr|ipv6addr>:<gluster_vol_name> format.
+# Eg: 1.2.3.4:backup_vol (string value)
+#glusterfs_backup_share = <None>
+
+# Volume prefix for the backup id when backing up to TSM (string value)
+#backup_tsm_volume_prefix = backup
+
+# TSM password for the running username (string value)
+#backup_tsm_password = password
+
+# Enable or Disable compression for backups (boolean value)
+#backup_tsm_compression = true
+
+# Request for FC Zone creating host group (boolean value)
+#hpxp_zoning_request = false
+
+# Type of storage command line interface (string value)
+#hpxp_storage_cli = <None>
+
+# ID of storage system (string value)
+#hpxp_storage_id = <None>
+
+# Pool of storage system (string value)
+#hpxp_pool = <None>
+
+# Thin pool of storage system (string value)
+#hpxp_thin_pool = <None>
+
+# Logical device range of storage system (string value)
+#hpxp_ldev_range = <None>
+
+# Default copy method of storage system. There are two valid values: "FULL"
+# specifies that a full copy; "THIN" specifies that a thin copy. Default value
+# is "FULL" (string value)
+#hpxp_default_copy_method = FULL
+
+# Copy speed of storage system (integer value)
+#hpxp_copy_speed = 3
+
+# Interval to check copy (integer value)
+#hpxp_copy_check_interval = 3
+
+# Interval to check copy asynchronously (integer value)
+#hpxp_async_copy_check_interval = 10
+
+# Target port names for host group or iSCSI target (list value)
+#hpxp_target_ports = <None>
+
+# Target port names of compute node for host group or iSCSI target (list value)
+#hpxp_compute_target_ports = <None>
+
+# Request for creating host group or iSCSI target (boolean value)
+#hpxp_group_request = false
+
+# Instance numbers for HORCM (list value)
+#hpxp_horcm_numbers = 200,201
+
+# Username of storage system for HORCM (string value)
+#hpxp_horcm_user = <None>
+
+# Add to HORCM configuration (boolean value)
+#hpxp_horcm_add_conf = true
+
+# Resource group name of storage system for HORCM (string value)
+#hpxp_horcm_resource_name = meta_resource
+
+# Only discover a specific name of host group or iSCSI target (boolean value)
+#hpxp_horcm_name_only_discovery = false
+
+# Storage system storage pool for volumes (string value)
+#storwize_svc_volpool_name = volpool
+
+# Storage system space-efficiency parameter for volumes (percentage) (integer
+# value)
+# Minimum value: -1
+# Maximum value: 100
+#storwize_svc_vol_rsize = 2
+
+# Storage system threshold for volume capacity warnings (percentage) (integer
+# value)
+# Minimum value: -1
+# Maximum value: 100
+#storwize_svc_vol_warning = 0
+
+# Storage system autoexpand parameter for volumes (True/False) (boolean value)
+#storwize_svc_vol_autoexpand = true
+
+# Storage system grain size parameter for volumes (32/64/128/256) (integer
+# value)
+#storwize_svc_vol_grainsize = 256
+
+# Storage system compression option for volumes (boolean value)
+#storwize_svc_vol_compression = false
+
+# Enable Easy Tier for volumes (boolean value)
+#storwize_svc_vol_easytier = true
+
+# The I/O group in which to allocate volumes (integer value)
+#storwize_svc_vol_iogrp = 0
+
+# Maximum number of seconds to wait for FlashCopy to be prepared. (integer
+# value)
+# Minimum value: 1
+# Maximum value: 600
+#storwize_svc_flashcopy_timeout = 120
+
+# Connection protocol (iSCSI/FC) (string value)
+#storwize_svc_connection_protocol = iSCSI
+
+# Configure CHAP authentication for iSCSI connections (Default: Enabled)
+# (boolean value)
+#storwize_svc_iscsi_chap_enabled = true
+
+# Connect with multipath (FC only; iSCSI multipath is controlled by Nova)
+# (boolean value)
+#storwize_svc_multipath_enabled = false
+
+# Allows vdisk to multi host mapping (boolean value)
+#storwize_svc_multihostmap_enabled = true
+
+# Indicate whether svc driver is compatible for NPIV setup. If it is
+# compatible, it will allow no wwpns being returned on get_conn_fc_wwpns during
+# initialize_connection. It should always be set to True. It will be deprecated
+# and removed in M release. (boolean value)
+#storwize_svc_npiv_compatibility_mode = true
+
+# Allow tenants to specify QOS on create (boolean value)
+#storwize_svc_allow_tenant_qos = false
+
+# If operating in stretched cluster mode, specify the name of the pool in which
+# mirrored copies are stored.Example: "pool2" (string value)
+#storwize_svc_stretched_cluster_partner = <None>
+
+# Driver to use for backups. (string value)
+#backup_driver = cinder.backup.drivers.swift
+backup_driver = cinder.backup.drivers.ceph
+
+# Offload pending backup delete during backup service startup. (boolean value)
+#backup_service_inithost_offload = false
+
+# Make exception message format errors fatal. (boolean value)
+#fatal_exception_format_errors = false
+
+# IP address of this host (string value)
+#my_ip = 10.16.48.99
+
+# Default glance host name or IP (string value)
+#glance_host = $my_ip
+glance_host = VARINET4ADDR
+
+# Default glance port (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#glance_port = 9292
+
+# A list of the glance API servers available to cinder ([hostname|ip]:port)
+# (list value)
+#glance_api_servers = $glance_host:$glance_port
+
+# Version of the glance API to use (integer value)
+#glance_api_version = 1
+
+# Number retries when downloading an image from glance (integer value)
+#glance_num_retries = 0
+
+# Allow to perform insecure SSL (https) requests to glance (boolean value)
+#glance_api_insecure = false
+
+# Enables or disables negotiation of SSL layer compression. In some cases
+# disabling compression can improve data throughput, such as when high network
+# bandwidth is available and you use compressed image formats like qcow2.
+# (boolean value)
+#glance_api_ssl_compression = false
+
+# Location of ca certificates file to use for glance client requests. (string
+# value)
+#glance_ca_certificates_file = <None>
+
+# http/https timeout value for glance operations. If no value (None) is
+# supplied here, the glanceclient default value is used. (integer value)
+#glance_request_timeout = <None>
+
+# The topic that scheduler nodes listen on (string value)
+#scheduler_topic = cinder-scheduler
+
+# The topic that volume nodes listen on (string value)
+#volume_topic = cinder-volume
+
+# The topic that volume backup nodes listen on (string value)
+#backup_topic = cinder-backup
+
+# DEPRECATED: Deploy v1 of the Cinder API. (boolean value)
+#enable_v1_api = true
+enable_v1_api = True
+
+# Deploy v2 of the Cinder API. (boolean value)
+#enable_v2_api = true
+enable_v2_api = True
+
+# Enables or disables rate limit of the API. (boolean value)
+#api_rate_limit = true
+
+# Specify list of extensions to load when using osapi_volume_extension option
+# with cinder.api.contrib.select_extensions (list value)
+#osapi_volume_ext_list =
+
+# osapi volume extension to load (multi valued)
+#osapi_volume_extension = cinder.api.contrib.standard_extensions
+
+# Full class name for the Manager for volume (string value)
+#volume_manager = cinder.volume.manager.VolumeManager
+
+# Full class name for the Manager for volume backup (string value)
+#backup_manager = cinder.backup.manager.BackupManager
+
+# Full class name for the Manager for scheduler (string value)
+#scheduler_manager = cinder.scheduler.manager.SchedulerManager
+
+# Name of this node.  This can be an opaque identifier. It is not necessarily a
+# host name, FQDN, or IP address. (string value)
+#host = x86-024.build.eng.bos.redhat.com
+host = VARHOSTNAME
+
+# Availability zone of this node (string value)
+#storage_availability_zone = nova
+storage_availability_zone = nova
+
+# Default availability zone for new volumes. If not set, the
+# storage_availability_zone option value is used as the default for new
+# volumes. (string value)
+#default_availability_zone = <None>
+default_availability_zone = nova
+
+# If the requested Cinder availability zone is unavailable, fall back to the
+# value of default_availability_zone, then storage_availability_zone, instead
+# of failing. (boolean value)
+#allow_availability_zone_fallback = false
+
+# Default volume type to use (string value)
+#default_volume_type = <None>
+
+# Time period for which to generate volume usages. The options are hour, day,
+# month, or year. (string value)
+#volume_usage_audit_period = month
+
+# Path to the rootwrap configuration file to use for running commands as root
+# (string value)
+#rootwrap_config = /etc/cinder/rootwrap.conf
+
+# Enable monkey patching (boolean value)
+#monkey_patch = false
+
+# List of modules/decorators to monkey patch (list value)
+#monkey_patch_modules =
+
+# Maximum time since last check-in for a service to be considered up (integer
+# value)
+#service_down_time = 60
+
+# The full class name of the volume API class to use (string value)
+#volume_api_class = cinder.volume.api.API
+
+# The full class name of the volume backup API class (string value)
+#backup_api_class = cinder.backup.api.API
+
+# The strategy to use for auth. Supports noauth, keystone, and deprecated.
+# (string value)
+# Allowed values: noauth, keystone, deprecated
+#auth_strategy = keystone
+auth_strategy = keystone
+
+# A list of backend names to use. These backend names should be backed by a
+# unique [CONFIG] group with its options (list value)
+#enabled_backends = <None>
+enabled_backends = ceph
+
+# Whether snapshots count against gigabyte quota (boolean value)
+#no_snapshot_gb_quota = false
+
+# The full class name of the volume transfer API class (string value)
+#transfer_api_class = cinder.transfer.api.API
+
+# The full class name of the volume replication API class (string value)
+#replication_api_class = cinder.replication.api.API
+
+# The full class name of the consistencygroup API class (string value)
+#consistencygroup_api_class = cinder.consistencygroup.api.API
+
+# OpenStack privileged account username. Used for requests to other services
+# (such as Nova) that require an account with special rights. (string value)
+#os_privileged_user_name = <None>
+
+# Password associated with the OpenStack privileged account. (string value)
+#os_privileged_user_password = <None>
+
+# Tenant name associated with the OpenStack privileged account. (string value)
+#os_privileged_user_tenant = <None>
+
+# Auth URL associated with the OpenStack privileged account. (string value)
+#os_privileged_user_auth_url = <None>
+
+# Multiplier used for weighing volume capacity. Negative numbers mean to stack
+# vs spread. (floating point value)
+#capacity_weight_multiplier = 1.0
+
+# Multiplier used for weighing volume capacity. Negative numbers mean to stack
+# vs spread. (floating point value)
+#allocated_capacity_weight_multiplier = -1.0
+
+# IP address of sheep daemon. (string value)
+#sheepdog_store_address = 127.0.0.1
+
+# Port of sheep daemon. (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#sheepdog_store_port = 7000
+
+# Specifies the path of the GPFS directory where Block Storage volume and
+# snapshot files are stored. (string value)
+#gpfs_mount_point_base = <None>
+
+# Specifies the path of the Image service repository in GPFS.  Leave undefined
+# if not storing images in GPFS. (string value)
+#gpfs_images_dir = <None>
+
+# Specifies the type of image copy to be used.  Set this when the Image service
+# repository also uses GPFS so that image files can be transferred efficiently
+# from the Image service to the Block Storage service. There are two valid
+# values: "copy" specifies that a full copy of the image is made;
+# "copy_on_write" specifies that copy-on-write optimization strategy is used
+# and unmodified blocks of the image file are shared efficiently. (string
+# value)
+# Allowed values: copy, copy_on_write, <None>
+#gpfs_images_share_mode = <None>
+
+# Specifies an upper limit on the number of indirections required to reach a
+# specific block due to snapshots or clones.  A lengthy chain of copy-on-write
+# snapshots or clones can have a negative impact on performance, but improves
+# space utilization.  0 indicates unlimited clone depth. (integer value)
+#gpfs_max_clone_depth = 0
+
+# Specifies that volumes are created as sparse files which initially consume no
+# space. If set to False, the volume is created as a fully allocated file, in
+# which case, creation may take a significantly longer time. (boolean value)
+#gpfs_sparse_volumes = true
+
+# Specifies the storage pool that volumes are assigned to. By default, the
+# system storage pool is used. (string value)
+#gpfs_storage_pool = system
+
+# Set 512 byte emulation on volume creation;  (boolean value)
+#sf_emulate_512 = true
+
+# Allow tenants to specify QOS on create (boolean value)
+#sf_allow_tenant_qos = false
+
+# Create SolidFire accounts with this prefix. Any string can be used here, but
+# the string "hostname" is special and will create a prefix using the cinder
+# node hostname (previous default behavior).  The default is NO prefix. (string
+# value)
+#sf_account_prefix = <None>
+
+# Account name on the SolidFire Cluster to use as owner of template/cache
+# volumes (created if does not exist). (string value)
+#sf_template_account_name = openstack-vtemplate
+
+# Create an internal cache of copy of images when a bootable volume is created
+# to eliminate fetch from glance and qemu-conversion on subsequent calls.
+# (boolean value)
+#sf_allow_template_caching = true
+
+# Overrides default cluster SVIP with the one specified. This is required or
+# deployments that have implemented the use of VLANs for iSCSI networks in
+# their cloud. (string value)
+#sf_svip = <None>
+
+# Create an internal mapping of volume IDs and account.  Optimizes lookups and
+# performance at the expense of memory, very large deployments may want to
+# consider setting to False. (boolean value)
+#sf_enable_volume_mapping = true
+
+# SolidFire API port. Useful if the device api is behind a proxy on a different
+# port. (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#sf_api_port = 443
+
+# IBMNAS platform type to be used as backend storage; valid values are - v7ku :
+# for using IBM Storwize V7000 Unified, sonas : for using IBM Scale Out NAS,
+# gpfs-nas : for using NFS based IBM GPFS deployments. (string value)
+# Allowed values: v7ku, sonas, gpfs-nas
+#ibmnas_platform_type = v7ku
+
+# The URL of the Swift endpoint (string value)
+#backup_swift_url = <None>
+backup_swift_url = http://VARINET4ADDR:8080/v1/AUTH_
+
+# Info to match when looking for swift in the service catalog. Format is:
+# separated values of the form: <service_type>:<service_name>:<endpoint_type> -
+# Only used if backup_swift_url is unset (string value)
+#swift_catalog_info = object-store:swift:publicURL
+
+# Swift authentication mechanism (string value)
+#backup_swift_auth = per_user
+
+# Swift authentication version. Specify "1" for auth 1.0, or "2" for auth 2.0
+# (string value)
+#backup_swift_auth_version = 1
+
+# Swift tenant/account name. Required when connecting to an auth 2.0 system
+# (string value)
+#backup_swift_tenant = <None>
+
+# Swift user name (string value)
+#backup_swift_user = <None>
+
+# Swift key for authentication (string value)
+#backup_swift_key = <None>
+
+# The default Swift container to use (string value)
+#backup_swift_container = volumebackups
+backup_swift_container = volumes_backup
+
+# The size in bytes of Swift backup objects (integer value)
+#backup_swift_object_size = 52428800
+
+# The size in bytes that changes are tracked for incremental backups.
+# backup_swift_object_size has to be multiple of backup_swift_block_size.
+# (integer value)
+#backup_swift_block_size = 32768
+
+# The number of retries to make for Swift operations (integer value)
+#backup_swift_retry_attempts = 3
+
+# The backoff time in seconds between Swift retries (integer value)
+#backup_swift_retry_backoff = 2
+
+# Enable or Disable the timer to send the periodic progress notifications to
+# Ceilometer when backing up the volume to the Swift backend storage. The
+# default value is True to enable the timer. (boolean value)
+#backup_swift_enable_progress_timer = true
+
+# Location of the CA certificate file to use for swift client requests. (string
+# value)
+#backup_swift_ca_cert_file = <None>
+
+# These values will be used for CloudByte storage's addQos API call. (dict
+# value)
+#cb_add_qosgroup = graceallowed:false,iops:10,iopscontrol:true,latency:15,memlimit:0,networkspeed:0,throughput:0,tpcontrol:false
+
+# These values will be used for CloudByte storage's createVolume API call.
+# (dict value)
+#cb_create_volume = blocklength:512B,compression:off,deduplication:off,protocoltype:ISCSI,recordsize:16k,sync:always
+
+# Driver will use this API key to authenticate against the CloudByte storage's
+# management interface. (string value)
+#cb_apikey = <None>
+
+# CloudByte storage specific account name. This maps to a project name in
+# OpenStack. (string value)
+#cb_account_name = <None>
+
+# This corresponds to the name of Tenant Storage Machine (TSM) in CloudByte
+# storage. A volume will be created in this TSM. (string value)
+#cb_tsm_name = <None>
+
+# A retry value in seconds. Will be used by the driver to check if volume
+# creation was successful in CloudByte storage. (integer value)
+#cb_confirm_volume_create_retry_interval = 5
+
+# Will confirm a successful volume creation in CloudByte storage by making this
+# many number of attempts. (integer value)
+#cb_confirm_volume_create_retries = 3
+
+# A retry value in seconds. Will be used by the driver to check if volume
+# deletion was successful in CloudByte storage. (integer value)
+#cb_confirm_volume_delete_retry_interval = 5
+
+# Will confirm a successful volume deletion in CloudByte storage by making this
+# many number of attempts. (integer value)
+#cb_confirm_volume_delete_retries = 3
+
+# This corresponds to the discovery authentication group in CloudByte storage.
+# Chap users are added to this group. Driver uses the first user found for this
+# group. Default value is None. (string value)
+#cb_auth_group = None
+
+# Interval, in seconds, between nodes reporting state to datastore (integer
+# value)
+#report_interval = 10
+
+# Interval, in seconds, between running periodic tasks (integer value)
+#periodic_interval = 60
+
+# Range, in seconds, to randomly delay when starting the periodic task
+# scheduler to reduce stampeding. (Disable by setting to 0) (integer value)
+#periodic_fuzzy_delay = 60
+
+# IP address on which OpenStack Volume API listens (string value)
+#osapi_volume_listen = 0.0.0.0
+osapi_volume_listen = 0.0.0.0
+
+# Port on which OpenStack Volume API listens (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#osapi_volume_listen_port = 8776
+
+# Number of workers for OpenStack Volume API service. The default is equal to
+# the number of CPUs available. (integer value)
+#osapi_volume_workers = <None>
+osapi_volume_workers = 12
+
+# The full class name of the compute API class to use (string value)
+#compute_api_class = cinder.compute.nova.API
+
+# Number of nodes that should replicate the data. (string value)
+#drbdmanage_redundancy = 1
+
+# Pool or Vdisk name to use for volume creation. (string value)
+#dothill_backend_name = A
+
+# linear (for Vdisk) or virtual (for Pool). (string value)
+# Allowed values: linear, virtual
+#dothill_backend_type = virtual
+
+# DotHill API interface protocol. (string value)
+# Allowed values: http, https
+#dothill_api_protocol = https
+
+# Whether to verify DotHill array SSL certificate. (boolean value)
+#dothill_verify_certificate = false
+
+# DotHill array SSL certificate path. (string value)
+#dothill_verify_certificate_path = <None>
+
+# List of comma-separated target iSCSI IP addresses. (list value)
+#dothill_iscsi_ips =
+
+# File with the list of available gluster shares (string value)
+#glusterfs_shares_config = /etc/cinder/glusterfs_shares
+
+# Base dir containing mount points for gluster shares. (string value)
+#glusterfs_mount_point_base = $state_path/mnt
+
+# REST API authorization token. (string value)
+#pure_api_token = <None>
+
+# ID of the project which will be used as the Cinder internal tenant. (string
+# value)
+#cinder_internal_tenant_project_id = <None>
+
+# ID of the user to be used in volume operations as the Cinder internal tenant.
+# (string value)
+#cinder_internal_tenant_user_id = <None>
+
+# The scheduler host manager class to use (string value)
+#scheduler_host_manager = cinder.scheduler.host_manager.HostManager
+
+# Maximum number of attempts to schedule an volume (integer value)
+#scheduler_max_attempts = 3
+
+# Path or URL to Scality SOFS configuration file (string value)
+#scality_sofs_config = <None>
+
+# Base dir where Scality SOFS shall be mounted (string value)
+#scality_sofs_mount_point = $state_path/scality
+
+# Path from Scality SOFS root to volume dir (string value)
+#scality_sofs_volume_dir = cinder/volumes
+
+# VNX authentication scope type. (string value)
+#storage_vnx_authentication_type = global
+
+# Directory path that contains the VNX security file. Make sure the security
+# file is generated first. (string value)
+#storage_vnx_security_file_dir = <None>
+
+# Naviseccli Path. (string value)
+#naviseccli_path =
+
+# Comma-separated list of storage pool names to be used. (string value)
+# Deprecated group/name - [DEFAULT]/storage_vnx_pool_name
+#storage_vnx_pool_names = <None>
+
+# VNX secondary SP IP Address. (string value)
+#san_secondary_ip = <None>
+
+# Default timeout for CLI operations in minutes. For example, LUN migration is
+# a typical long running operation, which depends on the LUN size and the load
+# of the array. An upper bound in the specific deployment can be set to avoid
+# unnecessary long wait. By default, it is 365 days long. (integer value)
+#default_timeout = 525600
+
+# Default max number of LUNs in a storage group. By default, the value is 255.
+# (integer value)
+#max_luns_per_storage_group = 255
+
+# To destroy storage group when the last LUN is removed from it. By default,
+# the value is False. (boolean value)
+#destroy_empty_storage_group = false
+
+# Mapping between hostname and its iSCSI initiator IP addresses. (string value)
+#iscsi_initiators =
+
+# Comma separated iSCSI or FC ports to be used in Nova or Cinder. (string
+# value)
+#io_port_list = *
+
+# Automatically register initiators. By default, the value is False. (boolean
+# value)
+#initiator_auto_registration = false
+
+# Automatically deregister initiators after the related storage group is
+# destroyed. By default, the value is False. (boolean value)
+#initiator_auto_deregistration = false
+
+# Report free_capacity_gb as 0 when the limit to maximum number of pool LUNs is
+# reached. By default, the value is False. (boolean value)
+#check_max_pool_luns_threshold = false
+
+# Delete a LUN even if it is in Storage Groups. (boolean value)
+#force_delete_lun_in_storagegroup = false
+
+# Force LUN creation even if the full threshold of pool is reached. (boolean
+# value)
+#ignore_pool_full_threshold = false
+
+# IP address for connecting to VMware ESX/vCenter server. (string value)
+#vmware_host_ip = <None>
+
+# Username for authenticating with VMware ESX/vCenter server. (string value)
+#vmware_host_username = <None>
+
+# Password for authenticating with VMware ESX/vCenter server. (string value)
+#vmware_host_password = <None>
+
+# Optional VIM service WSDL Location e.g http://<server>/vimService.wsdl.
+# Optional over-ride to default location for bug work-arounds. (string value)
+#vmware_wsdl_location = <None>
+
+# Number of times VMware ESX/vCenter server API must be retried upon connection
+# related issues. (integer value)
+#vmware_api_retry_count = 10
+
+# The interval (in seconds) for polling remote tasks invoked on VMware
+# ESX/vCenter server. (floating point value)
+#vmware_task_poll_interval = 0.5
+
+# Name of the vCenter inventory folder that will contain Cinder volumes. This
+# folder will be created under "OpenStack/<project_folder>", where
+# project_folder is of format "Project (<volume_project_id>)". (string value)
+#vmware_volume_folder = Volumes
+
+# Timeout in seconds for VMDK volume transfer between Cinder and Glance.
+# (integer value)
+#vmware_image_transfer_timeout_secs = 7200
+
+# Max number of objects to be retrieved per batch. Query results will be
+# obtained in batches from the server and not in one shot. Server may still
+# limit the count to something less than the configured value. (integer value)
+#vmware_max_objects_retrieval = 100
+
+# Optional string specifying the VMware vCenter server version. The driver
+# attempts to retrieve the version from VMware vCenter server. Set this
+# configuration only if you want to override the vCenter server version.
+# (string value)
+#vmware_host_version = <None>
+
+# Directory where virtual disks are stored during volume backup and restore.
+# (string value)
+#vmware_tmp_dir = /tmp
+
+# CA bundle file to use in verifying the vCenter server certificate. (string
+# value)
+#vmware_ca_file = <None>
+
+# If true, the vCenter server certificate is not verified. If false, then the
+# default CA truststore is used for verification. This option is ignored if
+# "vmware_ca_file" is set. (boolean value)
+#vmware_insecure = false
+
+# Name of a vCenter compute cluster where volumes should be created. (multi
+# valued)
+#vmware_cluster_name =
+
+# Pool or Vdisk name to use for volume creation. (string value)
+#lenovo_backend_name = A
+
+# linear (for VDisk) or virtual (for Pool). (string value)
+# Allowed values: linear, virtual
+#lenovo_backend_type = virtual
+
+# Lenovo api interface protocol. (string value)
+# Allowed values: http, https
+#lenovo_api_protocol = https
+
+# Whether to verify Lenovo array SSL certificate. (boolean value)
+#lenovo_verify_certificate = false
+
+# Lenovo array SSL certificate path. (string value)
+#lenovo_verify_certificate_path = <None>
+
+# List of comma-separated target iSCSI IP addresses. (list value)
+#lenovo_iscsi_ips =
+
+# The maximum size in bytes of the files used to hold backups. If the volume
+# being backed up exceeds this size, then it will be backed up into multiple
+# files.backup_file_size must be a multiple of backup_sha_block_size_bytes.
+# (integer value)
+#backup_file_size = 1999994880
+
+# The size in bytes that changes are tracked for incremental backups.
+# backup_file_size has to be multiple of backup_sha_block_size_bytes. (integer
+# value)
+#backup_sha_block_size_bytes = 32768
+
+# Enable or Disable the timer to send the periodic progress notifications to
+# Ceilometer when backing up the volume to the backend storage. The default
+# value is True to enable the timer. (boolean value)
+#backup_enable_progress_timer = true
+
+# Path specifying where to store backups. (string value)
+#backup_posix_path = $state_path/backup
+
+# Custom directory to use for backups. (string value)
+#backup_container = <None>
+
+# REST server port. (string value)
+#sio_rest_server_port = 443
+
+# Whether to verify server certificate. (boolean value)
+#sio_verify_server_certificate = false
+
+# Server certificate path. (string value)
+#sio_server_certificate_path = <None>
+
+# Whether to round volume capacity. (boolean value)
+#sio_round_volume_capacity = true
+
+# Whether to allow force delete. (boolean value)
+#sio_force_delete = false
+
+# Whether to unmap volume before deletion. (boolean value)
+#sio_unmap_volume_before_deletion = false
+
+# Protection domain id. (string value)
+#sio_protection_domain_id = <None>
+
+# Protection domain name. (string value)
+#sio_protection_domain_name = <None>
+
+# Storage pools. (string value)
+#sio_storage_pools = <None>
+
+# Storage pool name. (string value)
+#sio_storage_pool_name = <None>
+
+# Storage pool id. (string value)
+#sio_storage_pool_id = <None>
+
+# Group name to use for creating volumes. Defaults to "group-0". (string value)
+#eqlx_group_name = group-0
+
+# Timeout for the Group Manager cli command execution. Default is 30. Note that
+# this option is deprecated in favour of "ssh_conn_timeout" as specified in
+# cinder/volume/drivers/san/san.py and will be removed in M release. (integer
+# value)
+#eqlx_cli_timeout = 30
+
+# Maximum retry count for reconnection. Default is 5. (integer value)
+#eqlx_cli_max_retries = 5
+
+# Use CHAP authentication for targets. Note that this option is deprecated in
+# favour of "use_chap_auth" as specified in cinder/volume/driver.py and will be
+# removed in next release. (boolean value)
+#eqlx_use_chap = false
+
+# Existing CHAP account name. Note that this option is deprecated in favour of
+# "chap_username" as specified in cinder/volume/driver.py and will be removed
+# in next release. (string value)
+#eqlx_chap_login = admin
+
+# Password for specified CHAP account name. Note that this option is deprecated
+# in favour of "chap_password" as specified in cinder/volume/driver.py and will
+# be removed in the next release (string value)
+#eqlx_chap_password = password
+
+# Pool in which volumes will be created. Defaults to "default". (string value)
+#eqlx_pool = default
+
+# The number of characters in the salt. (integer value)
+#volume_transfer_salt_length = 8
+
+# The number of characters in the autogenerated auth key. (integer value)
+#volume_transfer_key_length = 16
+
+# Services to be added to the available pool on create (boolean value)
+#enable_new_services = true
+
+# Template string to be used to generate volume names (string value)
+#volume_name_template = volume-%s
+
+# Template string to be used to generate snapshot names (string value)
+#snapshot_name_template = snapshot-%s
+
+# Template string to be used to generate backup names (string value)
+#backup_name_template = backup-%s
+
+# Multiplier used for weighing volume number. Negative numbers mean to spread
+# vs stack. (floating point value)
+#volume_number_multiplier = -1.0
+
+# Default storage pool for volumes. (integer value)
+#ise_storage_pool = 1
+
+# Raid level for ISE volumes. (integer value)
+#ise_raid = 1
+
+# Number of retries (per port) when establishing connection to ISE management
+# port. (integer value)
+#ise_connection_retries = 5
+
+# Interval (secs) between retries. (integer value)
+#ise_retry_interval = 1
+
+# Number on retries to get completion status after issuing a command to ISE.
+# (integer value)
+#ise_completion_retries = 30
+
+# Storage pool name. (string value)
+#zfssa_pool = <None>
+
+# Project name. (string value)
+#zfssa_project = <None>
+
+# Block size. (string value)
+# Allowed values: 512, 1k, 2k, 4k, 8k, 16k, 32k, 64k, 128k
+#zfssa_lun_volblocksize = 8k
+
+# Flag to enable sparse (thin-provisioned): True, False. (boolean value)
+#zfssa_lun_sparse = false
+
+# Data compression. (string value)
+# Allowed values: off, lzjb, gzip-2, gzip, gzip-9
+#zfssa_lun_compression = off
+
+# Synchronous write bias. (string value)
+# Allowed values: latency, throughput
+#zfssa_lun_logbias = latency
+
+# iSCSI initiator group. (string value)
+#zfssa_initiator_group =
+
+# iSCSI initiator IQNs. (comma separated) (string value)
+#zfssa_initiator =
+
+# iSCSI initiator CHAP user (name). (string value)
+#zfssa_initiator_user =
+
+# Secret of the iSCSI initiator CHAP user. (string value)
+#zfssa_initiator_password =
+
+# iSCSI initiators configuration. (string value)
+#zfssa_initiator_config =
+
+# iSCSI target group name. (string value)
+#zfssa_target_group = tgt-grp
+
+# iSCSI target CHAP user (name). (string value)
+#zfssa_target_user =
+
+# Secret of the iSCSI target CHAP user. (string value)
+#zfssa_target_password =
+
+# iSCSI target portal (Data-IP:Port, w.x.y.z:3260). (string value)
+#zfssa_target_portal = <None>
+
+# Network interfaces of iSCSI targets. (comma separated) (string value)
+#zfssa_target_interfaces = <None>
+
+# REST connection timeout. (seconds) (integer value)
+#zfssa_rest_timeout = <None>
+
+# IP address used for replication data. (maybe the same as data ip) (string
+# value)
+#zfssa_replication_ip =
+
+# Flag to enable local caching: True, False. (boolean value)
+#zfssa_enable_local_cache = true
+
+# Name of ZFSSA project where cache volumes are stored. (string value)
+#zfssa_cache_project = os-cinder-cache
+
+# Sets the value of TCP_KEEPALIVE (True/False) for each server socket. (boolean
+# value)
+#tcp_keepalive = true
+
+# Sets the value of TCP_KEEPIDLE in seconds for each server socket. Not
+# supported on OS X. (integer value)
+#tcp_keepidle = 600
+
+# Sets the value of TCP_KEEPINTVL in seconds for each server socket. Not
+# supported on OS X. (integer value)
+#tcp_keepalive_interval = <None>
+
+# Sets the value of TCP_KEEPCNT for each server socket. Not supported on OS X.
+# (integer value)
+#tcp_keepalive_count = <None>
+
+# CA certificate file to use to verify connecting clients (string value)
+#ssl_ca_file = <None>
+
+# Certificate file to use when starting the server securely (string value)
+#ssl_cert_file = <None>
+
+# Private key file to use when starting the server securely (string value)
+#ssl_key_file = <None>
+
+# Maximum line size of message headers to be accepted. max_header_line may need
+# to be increased when using large tokens (typically those generated by the
+# Keystone v3 API with big service catalogs). (integer value)
+#max_header_line = 16384
+
+# Timeout for client connections' socket operations. If an incoming connection
+# is idle for this number of seconds it will be closed. A value of '0' means
+# wait forever. (integer value)
+#client_socket_timeout = 900
+
+# If False, closes the client socket connection explicitly. Setting it to True
+# to maintain backward compatibility. Recommended setting is set it to False.
+# (boolean value)
+#wsgi_keep_alive = true
+
+# Number of times to attempt to run flakey shell commands (integer value)
+#num_shell_tries = 3
+
+# The percentage of backend capacity is reserved (integer value)
+# Maximum value: 100
+#reserved_percentage = 0
+
+# Prefix for iSCSI volumes (string value)
+#iscsi_target_prefix = iqn.2010-10.org.openstack:
+
+# The IP address that the iSCSI daemon is listening on (string value)
+#iscsi_ip_address = $my_ip
+
+# The list of secondary IP addresses of the iSCSI daemon (list value)
+#iscsi_secondary_ip_addresses =
+
+# The port that the iSCSI daemon is listening on (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#iscsi_port = 3260
+
+# The maximum number of times to rescan targets to find volume (integer value)
+#num_volume_device_scan_tries = 3
+
+# The backend name for a given driver implementation (string value)
+#volume_backend_name = <None>
+
+# Do we attach/detach volumes in cinder using multipath for volume to image and
+# image to volume transfers? (boolean value)
+#use_multipath_for_image_xfer = false
+
+# If this is set to True, attachment of volumes for image transfer will be
+# aborted when multipathd is not running. Otherwise, it will fallback to single
+# path. (boolean value)
+#enforce_multipath_for_image_xfer = false
+
+# Method used to wipe old volumes (string value)
+# Allowed values: none, zero, shred
+#volume_clear = zero
+
+# Size in MiB to wipe at start of old volumes. 0 => all (integer value)
+#volume_clear_size = 0
+
+# The flag to pass to ionice to alter the i/o priority of the process used to
+# zero a volume after deletion, for example "-c3" for idle only priority.
+# (string value)
+#volume_clear_ionice = <None>
+
+# iSCSI target user-land tool to use. tgtadm is default, use lioadm for LIO
+# iSCSI support, scstadmin for SCST target support, iseradm for the ISER
+# protocol, ietadm for iSCSI Enterprise Target, iscsictl for Chelsio iSCSI
+# Target or fake for testing. (string value)
+# Allowed values: tgtadm, lioadm, scstadmin, iseradm, iscsictl, ietadm, fake
+#iscsi_helper = tgtadm
+
+# Volume configuration file storage directory (string value)
+#volumes_dir = $state_path/volumes
+
+# IET configuration file (string value)
+#iet_conf = /etc/iet/ietd.conf
+
+# Chiscsi (CXT) global defaults configuration file (string value)
+#chiscsi_conf = /etc/chelsio-iscsi/chiscsi.conf
+
+# Sets the behavior of the iSCSI target to either perform blockio or fileio
+# optionally, auto can be set and Cinder will autodetect type of backing device
+# (string value)
+# Allowed values: blockio, fileio, auto
+#iscsi_iotype = fileio
+
+# The default block size used when copying/clearing volumes (string value)
+#volume_dd_blocksize = 1M
+
+# The blkio cgroup name to be used to limit bandwidth of volume copy (string
+# value)
+#volume_copy_blkio_cgroup_name = cinder-volume-copy
+
+# The upper limit of bandwidth of volume copy. 0 => unlimited (integer value)
+#volume_copy_bps_limit = 0
+
+# Sets the behavior of the iSCSI target to either perform write-back(on) or
+# write-through(off). This parameter is valid if iscsi_helper is set to tgtadm
+# or iseradm. (string value)
+# Allowed values: on, off
+#iscsi_write_cache = on
+
+# Sets the target-specific flags for the iSCSI target. Only used for tgtadm to
+# specify backing device flags using bsoflags option. The specified string is
+# passed as is to the underlying tool. (string value)
+#iscsi_target_flags =
+
+# Determines the iSCSI protocol for new iSCSI volumes, created with tgtadm or
+# lioadm target helpers. In order to enable RDMA, this parameter should be set
+# with the value "iser". The supported iSCSI protocol values are "iscsi" and
+# "iser". (string value)
+# Allowed values: iscsi, iser
+#iscsi_protocol = iscsi
+
+# The path to the client certificate key for verification, if the driver
+# supports it. (string value)
+#driver_client_cert_key = <None>
+
+# The path to the client certificate for verification, if the driver supports
+# it. (string value)
+#driver_client_cert = <None>
+
+# Tell driver to use SSL for connection to backend storage if the driver
+# supports it. (boolean value)
+#driver_use_ssl = false
+
+# Float representation of the over subscription ratio when thin provisioning is
+# involved. Default ratio is 20.0, meaning provisioned capacity can be 20 times
+# of the total physical capacity. If the ratio is 10.5, it means provisioned
+# capacity can be 10.5 times of the total physical capacity. A ratio of 1.0
+# means provisioned capacity cannot exceed the total physical capacity. A ratio
+# lower than 1.0 will be ignored and the default value will be used instead.
+# (floating point value)
+#max_over_subscription_ratio = 20.0
+
+# Certain ISCSI targets have predefined target names, SCST target driver uses
+# this name. (string value)
+#scst_target_iqn_name = <None>
+
+# SCST target implementation can choose from multiple SCST target drivers.
+# (string value)
+#scst_target_driver = iscsi
+
+# Option to enable/disable CHAP authentication for targets. (boolean value)
+# Deprecated group/name - [DEFAULT]/eqlx_use_chap
+#use_chap_auth = false
+
+# CHAP user name. (string value)
+# Deprecated group/name - [DEFAULT]/eqlx_chap_login
+#chap_username =
+
+# Password for specified CHAP account name. (string value)
+# Deprecated group/name - [DEFAULT]/eqlx_chap_password
+#chap_password =
+
+# Namespace for driver private data values to be saved in. (string value)
+#driver_data_namespace = <None>
+
+# String representation for an equation that will be used to filter hosts. Only
+# used when the driver filter is set to be used by the Cinder scheduler.
+# (string value)
+#filter_function = <None>
+
+# String representation for an equation that will be used to determine the
+# goodness of a host. Only used when using the goodness weigher is set to be
+# used by the Cinder scheduler. (string value)
+#goodness_function = <None>
+
+# If set to True the http client will validate the SSL certificate of the
+# backend endpoint. (boolean value)
+#driver_ssl_cert_verify = false
+
+# List of options that control which trace info is written to the DEBUG log
+# level to assist developers. Valid values are method and api. (list value)
+#trace_flags = <None>
+
+# There are two types of target configurations managed (replicate to another
+# configured backend) or unmanaged (replicate to a device not managed by
+# Cinder). (boolean value)
+#managed_replication_target = true
+
+# List of k/v pairs representing a replication target for this backend device.
+# For unmanaged the format is: {'key-1'='val1' 'key-2'='val2'...},{...} and for
+# managed devices its simply a list of valid configured backend_names that the
+# driver supports replicating to: backend-a,bakcend-b... (list value)
+#replication_devices = <None>
+
+# If set to True, upload-to-image in raw format will create a cloned volume and
+# register its location to the image service, instead of uploading the volume
+# content. The cinder backend and locations support must be enabled in the
+# image service, and glance_api_version must be set to 2. (boolean value)
+#image_upload_use_cinder_backend = false
+
+# If set to True, the image volume created by upload-to-image will be placed in
+# the internal tenant. Otherwise, the image volume is created in the current
+# context's tenant. (boolean value)
+#image_upload_use_internal_tenant = false
+
+# Enable the image volume cache for this backend. (boolean value)
+#image_volume_cache_enabled = false
+
+# Max size of the image volume cache for this backend in GB. 0 => unlimited.
+# (integer value)
+#image_volume_cache_max_size_gb = 0
+
+# Max number of entries allowed in the image volume cache. 0 => unlimited.
+# (integer value)
+#image_volume_cache_max_count = 0
+
+# The maximum number of times to rescan iSER targetto find volume (integer
+# value)
+#num_iser_scan_tries = 3
+
+# Prefix for iSER volumes (string value)
+#iser_target_prefix = iqn.2010-10.org.openstack:
+
+# The IP address that the iSER daemon is listening on (string value)
+#iser_ip_address = $my_ip
+
+# The port that the iSER daemon is listening on (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#iser_port = 3260
+
+# The name of the iSER target user-land tool to use (string value)
+#iser_helper = tgtadm
+
+# Public url to use for versions endpoint. The default is None, which will use
+# the request's host_url attribute to populate the URL base. If Cinder is
+# operating behind a proxy, you will want to change this to represent the
+# proxy's URL. (string value)
+#public_endpoint = <None>
+
+# Nimble Controller pool name (string value)
+#nimble_pool_name = default
+
+# Nimble Subnet Label (string value)
+#nimble_subnet_label = *
+
+# Path to store VHD backed volumes (string value)
+#windows_iscsi_lun_path = C:\iSCSIVirtualDisks
+
+# Pool or Vdisk name to use for volume creation. (string value)
+#hpmsa_backend_name = A
+
+# linear (for Vdisk) or virtual (for Pool). (string value)
+# Allowed values: linear, virtual
+#hpmsa_backend_type = virtual
+
+# HPMSA API interface protocol. (string value)
+# Allowed values: http, https
+#hpmsa_api_protocol = https
+
+# Whether to verify HPMSA array SSL certificate. (boolean value)
+#hpmsa_verify_certificate = false
+
+# HPMSA array SSL certificate path. (string value)
+#hpmsa_verify_certificate_path = <None>
+
+# List of comma-separated target iSCSI IP addresses. (list value)
+#hpmsa_iscsi_ips =
+
+# A list of url schemes that can be downloaded directly via the direct_url.
+# Currently supported schemes: [file]. (list value)
+#allowed_direct_url_schemes =
+
+# Default core properties of image (list value)
+#glance_core_properties = checksum,container_format,disk_format,image_name,image_id,min_disk,min_ram,name,size
+
+# Name for the VG that will contain exported volumes (string value)
+#volume_group = cinder-volumes
+
+# If >0, create LVs with multiple mirrors. Note that this requires lvm_mirrors
+# + 2 PVs with available space (integer value)
+#lvm_mirrors = 0
+
+# Type of LVM volumes to deploy; (default, thin, or auto). Auto defaults to
+# thin if thin is supported. (string value)
+# Allowed values: default, thin, auto
+#lvm_type = default
+
+# LVM conf file to use for the LVM driver in Cinder; this setting is ignored if
+# the specified file does not exist (You can also specify 'None' to not use a
+# conf file even if one exists). (string value)
+#lvm_conf_file = /etc/cinder/lvm.conf
+
+# use this file for cinder emc plugin config data (string value)
+#cinder_emc_config_file = /etc/cinder/cinder_emc_config.xml
+
+# IP address or Hostname of NAS system. (string value)
+#nas_ip =
+
+# User name to connect to NAS system. (string value)
+#nas_login = admin
+
+# Password to connect to NAS system. (string value)
+#nas_password =
+
+# SSH port to use to connect to NAS system. (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#nas_ssh_port = 22
+
+# Filename of private key to use for SSH authentication. (string value)
+#nas_private_key =
+
+# Allow network-attached storage systems to operate in a secure environment
+# where root level access is not permitted. If set to False, access is as the
+# root user and insecure. If set to True, access is not as root. If set to
+# auto, a check is done to determine if this is a new installation: True is
+# used if so, otherwise False. Default is auto. (string value)
+#nas_secure_file_operations = auto
+
+# Set more secure file permissions on network-attached storage volume files to
+# restrict broad other/world access. If set to False, volumes are created with
+# open permissions. If set to True, volumes are created with permissions for
+# the cinder user and group (660). If set to auto, a check is done to determine
+# if this is a new installation: True is used if so, otherwise False. Default
+# is auto. (string value)
+#nas_secure_file_permissions = auto
+
+# Path to the share to use for storing Cinder volumes. For example:
+# "/srv/export1" for an NFS server export available at 10.0.5.10:/srv/export1 .
+# (string value)
+#nas_share_path =
+
+# Options used to mount the storage backend file system where Cinder volumes
+# are stored. (string value)
+#nas_mount_options = <None>
+
+# Provisioning type that will be used when creating volumes. (string value)
+# Allowed values: thin, thick
+# Deprecated group/name - [DEFAULT]/glusterfs_sparsed_volumes
+# Deprecated group/name - [DEFAULT]/glusterfs_qcow2_volumes
+#nas_volume_prov_type = thin
+
+# IP address or hostname of mg-a (string value)
+#gateway_mga = <None>
+
+# IP address or hostname of mg-b (string value)
+#gateway_mgb = <None>
+
+# Use igroups to manage targets and initiators (boolean value)
+#use_igroups = false
+
+# Global backend request timeout, in seconds (integer value)
+#request_timeout = 300
+
+# Comma-separated list of REST servers IP to connect to. (eg
+# http://IP1/,http://IP2:81/path (string value)
+#srb_base_urls = <None>
+
+# XMS cluster id in multi-cluster environment (string value)
+#xtremio_cluster_name =
+
+# Number of retries in case array is busy (integer value)
+#xtremio_array_busy_retry_count = 5
+
+# Interval between retries in case array is busy (integer value)
+#xtremio_array_busy_retry_interval = 5
+
+# Serial number of storage system (string value)
+#hitachi_serial_number = <None>
+
+# Name of an array unit (string value)
+#hitachi_unit_name = <None>
+
+# Pool ID of storage system (integer value)
+#hitachi_pool_id = <None>
+
+# Thin pool ID of storage system (integer value)
+#hitachi_thin_pool_id = <None>
+
+# Range of logical device of storage system (string value)
+#hitachi_ldev_range = <None>
+
+# Default copy method of storage system (string value)
+#hitachi_default_copy_method = FULL
+
+# Copy speed of storage system (integer value)
+#hitachi_copy_speed = 3
+
+# Interval to check copy (integer value)
+#hitachi_copy_check_interval = 3
+
+# Interval to check copy asynchronously (integer value)
+#hitachi_async_copy_check_interval = 10
+
+# Control port names for HostGroup or iSCSI Target (string value)
+#hitachi_target_ports = <None>
+
+# Range of group number (string value)
+#hitachi_group_range = <None>
+
+# Request for creating HostGroup or iSCSI Target (boolean value)
+#hitachi_group_request = false
+
+# Infortrend raid pool name list. It is separated with comma. (string value)
+#infortrend_pools_name =
+
+# The Infortrend CLI absolute path. By default, it is at
+# /opt/bin/Infortrend/raidcmd_ESDS10.jar (string value)
+#infortrend_cli_path = /opt/bin/Infortrend/raidcmd_ESDS10.jar
+
+# Maximum retry time for cli. Default is 5. (integer value)
+#infortrend_cli_max_retries = 5
+
+# Default timeout for CLI copy operations in minutes. Support: migrate volume,
+# create cloned volume and create volume from snapshot. By Default, it is 30
+# minutes. (integer value)
+#infortrend_cli_timeout = 30
+
+# Infortrend raid channel ID list on Slot A for OpenStack usage. It is
+# separated with comma. By default, it is the channel 0~7. (string value)
+#infortrend_slots_a_channels_id = 0,1,2,3,4,5,6,7
+
+# Infortrend raid channel ID list on Slot B for OpenStack usage. It is
+# separated with comma. By default, it is the channel 0~7. (string value)
+#infortrend_slots_b_channels_id = 0,1,2,3,4,5,6,7
+
+# Let the volume use specific provisioning. By default, it is the full
+# provisioning. The supported options are full or thin. (string value)
+#infortrend_provisioning = full
+
+# Let the volume use specific tiering level. By default, it is the level 0. The
+# supported levels are 0,2,3,4. (string value)
+#infortrend_tiering = 0
+
+# Configuration file for HDS iSCSI cinder plugin (string value)
+#hds_hnas_iscsi_config_file = /opt/hds/hnas/cinder_iscsi_conf.xml
+
+# The name of ceph cluster (string value)
+#rbd_cluster_name = ceph
+
+# The RADOS pool where rbd volumes are stored (string value)
+#rbd_pool = rbd
+
+# The RADOS client name for accessing rbd volumes - only set when using cephx
+# authentication (string value)
+#rbd_user = <None>
+
+# Path to the ceph configuration file (string value)
+#rbd_ceph_conf =
+
+# Flatten volumes created from snapshots to remove dependency from volume to
+# snapshot (boolean value)
+#rbd_flatten_volume_from_snapshot = false
+
+# The libvirt uuid of the secret for the rbd_user volumes (string value)
+#rbd_secret_uuid = <None>
+
+# Directory where temporary image files are stored when the volume driver does
+# not write them directly to the volume.  Warning: this option is now
+# deprecated, please use image_conversion_dir instead. (string value)
+#volume_tmp_dir = <None>
+
+# Maximum number of nested volume clones that are taken before a flatten
+# occurs. Set to 0 to disable cloning. (integer value)
+#rbd_max_clone_depth = 5
+
+# Volumes will be chunked into objects of this size (in megabytes). (integer
+# value)
+#rbd_store_chunk_size = 4
+
+# Timeout value (in seconds) used when connecting to ceph cluster. If value <
+# 0, no timeout is set and default librados value is used. (integer value)
+#rados_connect_timeout = -1
+
+# Number of retries if connection to ceph cluster failed. (integer value)
+#rados_connection_retries = 3
+
+# Interval value (in seconds) between connection retries to ceph cluster.
+# (integer value)
+#rados_connection_interval = 5
+
+# The hostname (or IP address) for the storage system (string value)
+#tintri_server_hostname = <None>
+
+# User name for the storage system (string value)
+#tintri_server_username = <None>
+
+# Password for the storage system (string value)
+#tintri_server_password = <None>
+
+# API version for the storage system (string value)
+#tintri_api_version = v310
+
+# Instance numbers for HORCM (string value)
+#hitachi_horcm_numbers = 200,201
+
+# Username of storage system for HORCM (string value)
+#hitachi_horcm_user = <None>
+
+# Password of storage system for HORCM (string value)
+#hitachi_horcm_password = <None>
+
+# Add to HORCM configuration (boolean value)
+#hitachi_horcm_add_conf = true
+
+# Timeout until a resource lock is released, in seconds. The value must be
+# between 0 and 7200. (integer value)
+#hitachi_horcm_resource_lock_timeout = 600
+
+# HP LeftHand WSAPI Server Url like https://<LeftHand ip>:8081/lhos (string
+# value)
+#hplefthand_api_url = <None>
+
+# HP LeftHand Super user username (string value)
+#hplefthand_username = <None>
+
+# HP LeftHand Super user password (string value)
+#hplefthand_password = <None>
+
+# HP LeftHand cluster name (string value)
+#hplefthand_clustername = <None>
+
+# Configure CHAP authentication for iSCSI connections (Default: Disabled)
+# (boolean value)
+#hplefthand_iscsi_chap_enabled = false
+
+# Enable HTTP debugging to LeftHand (boolean value)
+#hplefthand_debug = false
+
+# Administrative user account name used to access the storage system or proxy
+# server. (string value)
+#netapp_login = <None>
+
+# Password for the administrative user account specified in the netapp_login
+# option. (string value)
+#netapp_password = <None>
+
+# The hostname (or IP address) for the storage system or proxy server. (string
+# value)
+#netapp_server_hostname = <None>
+
+# The TCP port to use for communication with the storage system or proxy
+# server. If not specified, Data ONTAP drivers will use 80 for HTTP and 443 for
+# HTTPS; E-Series will use 8080 for HTTP and 8443 for HTTPS. (integer value)
+#netapp_server_port = <None>
+
+# This option is used to specify the path to the E-Series proxy application on
+# a proxy server. The value is combined with the value of the
+# netapp_transport_type, netapp_server_hostname, and netapp_server_port options
+# to create the URL used by the driver to connect to the proxy application.
+# (string value)
+#netapp_webservice_path = /devmgr/v2
+
+# This option is only utilized when the storage family is configured to
+# eseries. This option is used to restrict provisioning to the specified
+# controllers. Specify the value of this option to be a comma separated list of
+# controller hostnames or IP addresses to be used for provisioning. (string
+# value)
+#netapp_controller_ips = <None>
+
+# Password for the NetApp E-Series storage array. (string value)
+#netapp_sa_password = <None>
+
+# This option specifies whether the driver should allow operations that require
+# multiple attachments to a volume. An example would be live migration of
+# servers that have volumes attached. When enabled, this backend is limited to
+# 256 total volumes in order to guarantee volumes can be accessed by more than
+# one host. (boolean value)
+#netapp_enable_multiattach = false
+
+# The transport protocol used when communicating with the storage system or
+# proxy server. (string value)
+# Allowed values: http, https
+#netapp_transport_type = http
+
+# This option defines the type of operating system that will access a LUN
+# exported from Data ONTAP; it is assigned to the LUN at the time it is
+# created. (string value)
+#netapp_lun_ostype = <None>
+
+# This option defines the type of operating system for all initiators that can
+# access a LUN. This information is used when mapping LUNs to individual hosts
+# or groups of hosts. (string value)
+# Deprecated group/name - [DEFAULT]/netapp_eseries_host_type
+#netapp_host_type = <None>
+
+# This option is used to restrict provisioning to the specified pools. Specify
+# the value of this option to be a regular expression which will be applied to
+# the names of objects from the storage backend which represent pools in
+# Cinder. This option is only utilized when the storage protocol is configured
+# to use iSCSI or FC. (string value)
+# Deprecated group/name - [DEFAULT]/netapp_volume_list
+# Deprecated group/name - [DEFAULT]/netapp_storage_pools
+#netapp_pool_name_search_pattern = (.+)
+
+# Request for FC Zone creating HostGroup (boolean value)
+#hitachi_zoning_request = false
+
+# Number of volumes allowed per project (integer value)
+#quota_volumes = 10
+
+# Number of volume snapshots allowed per project (integer value)
+#quota_snapshots = 10
+
+# Number of consistencygroups allowed per project (integer value)
+#quota_consistencygroups = 10
+
+# Total amount of storage, in gigabytes, allowed for volumes and snapshots per
+# project (integer value)
+#quota_gigabytes = 1000
+
+# Number of volume backups allowed per project (integer value)
+#quota_backups = 10
+
+# Total amount of storage, in gigabytes, allowed for backups per project
+# (integer value)
+#quota_backup_gigabytes = 1000
+
+# Number of seconds until a reservation expires (integer value)
+#reservation_expire = 86400
+
+# Count of reservations until usage is refreshed (integer value)
+#until_refresh = 0
+
+# Number of seconds between subsequent usage refreshes (integer value)
+#max_age = 0
+
+# Default driver to use for quota checks (string value)
+#quota_driver = cinder.quota.DbQuotaDriver
+
+# Enables or disables use of default quota class with default quota. (boolean
+# value)
+#use_default_quota_class = true
+
+# Max size allowed per volume, in gigabytes (integer value)
+#per_volume_size_limit = -1
+
+# The configuration file for the Cinder Huawei driver. (string value)
+#cinder_huawei_conf_file = /etc/cinder/cinder_huawei_conf.xml
+
+# Storage Center System Serial Number (integer value)
+#dell_sc_ssn = 64702
+
+# Dell API port (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#dell_sc_api_port = 3033
+
+# Name of the server folder to use on the Storage Center (string value)
+#dell_sc_server_folder = openstack
+
+# Name of the volume folder to use on the Storage Center (string value)
+#dell_sc_volume_folder = openstack
+
+# Enable HTTPS SC certificate verification. (boolean value)
+#dell_sc_verify_cert = false
+
+# Which filter class names to use for filtering hosts when not specified in the
+# request. (list value)
+#scheduler_default_filters = AvailabilityZoneFilter,CapacityFilter,CapabilitiesFilter
+
+# Which weigher class names to use for weighing hosts. (list value)
+#scheduler_default_weighers = CapacityWeigher
+
+# Base dir containing mount point for NFS share. (string value)
+#backup_mount_point_base = $state_path/backup_mount
+
+# NFS share in hostname:path, ipv4addr:path, or "[ipv6addr]:path" format.
+# (string value)
+#backup_share = <None>
+
+# Mount options passed to the NFS client. See NFS man page for details. (string
+# value)
+#backup_mount_options = <None>
+
+# IP address/hostname of Blockbridge API. (string value)
+#blockbridge_api_host = <None>
+
+# Override HTTPS port to connect to Blockbridge API server. (integer value)
+#blockbridge_api_port = <None>
+
+# Blockbridge API authentication scheme (token or password) (string value)
+# Allowed values: token, password
+#blockbridge_auth_scheme = token
+
+# Blockbridge API token (for auth scheme 'token') (string value)
+#blockbridge_auth_token = <None>
+
+# Blockbridge API user (for auth scheme 'password') (string value)
+#blockbridge_auth_user = <None>
+
+# Blockbridge API password (for auth scheme 'password') (string value)
+#blockbridge_auth_password = <None>
+
+# Defines the set of exposed pools and their associated backend query strings
+# (dict value)
+#blockbridge_pools = OpenStack:+openstack
+
+# Default pool name if unspecified. (string value)
+#blockbridge_default_pool = <None>
+
+# Data path IP address (string value)
+#zfssa_data_ip = <None>
+
+# HTTPS port number (string value)
+#zfssa_https_port = 443
+
+# Options to be passed while mounting share over nfs (string value)
+#zfssa_nfs_mount_options =
+
+# Storage pool name. (string value)
+#zfssa_nfs_pool =
+
+# Project name. (string value)
+#zfssa_nfs_project = NFSProject
+
+# Share name. (string value)
+#zfssa_nfs_share = nfs_share
+
+# Data compression. (string value)
+# Allowed values: off, lzjb, gzip-2, gzip, gzip-9
+#zfssa_nfs_share_compression = off
+
+# Synchronous write bias-latency, throughput. (string value)
+# Allowed values: latency, throughput
+#zfssa_nfs_share_logbias = latency
+
+# REST connection timeout. (seconds) (integer value)
+#zfssa_rest_timeout = <None>
+
+# Flag to enable local caching: True, False. (boolean value)
+#zfssa_enable_local_cache = true
+
+# Name of directory inside zfssa_nfs_share where cache volumes are stored.
+# (string value)
+#zfssa_cache_directory = os-cinder-cache
+
+# Space network name to use for data transfer (string value)
+#hgst_net = Net 1 (IPv4)
+
+# Comma separated list of Space storage servers:devices. ex:
+# os1_stor:gbd0,os2_stor:gbd0 (string value)
+#hgst_storage_servers = os:gbd0
+
+# Should spaces be redundantly stored (1/0) (string value)
+#hgst_redundancy = 0
+
+# User to own created spaces (string value)
+#hgst_space_user = root
+
+# Group to own created spaces (string value)
+#hgst_space_group = disk
+
+# UNIX mode for created spaces (string value)
+#hgst_space_mode = 0600
+
+# Directory used for temporary storage during image conversion (string value)
+#image_conversion_dir = $state_path/conversion
+
+# Match this value when searching for nova in the service catalog. Format is:
+# separated values of the form: <service_type>:<service_name>:<endpoint_type>
+# (string value)
+#nova_catalog_info = compute:Compute Service:publicURL
+nova_catalog_info = compute:nova:publicURL
+
+# Same as nova_catalog_info, but for admin endpoint. (string value)
+#nova_catalog_admin_info = compute:Compute Service:adminURL
+nova_catalog_admin_info = compute:nova:adminURL
+
+# Override service catalog lookup with template for nova endpoint e.g.
+# http://localhost:8774/v2/%(project_id)s (string value)
+#nova_endpoint_template = <None>
+
+# Same as nova_endpoint_template, but for admin endpoint. (string value)
+#nova_endpoint_admin_template = <None>
+
+# Region name of this node (string value)
+#os_region_name = <None>
+
+# Location of ca certificates file to use for nova client requests. (string
+# value)
+#nova_ca_certificates_file = <None>
+
+# Allow to perform insecure SSL requests to nova (boolean value)
+#nova_api_insecure = false
+
+# Connect with multipath (FC only).(Default is false.) (boolean value)
+#flashsystem_multipath_enabled = false
+
+# DPL pool uuid in which DPL volumes are stored. (string value)
+#dpl_pool =
+
+# DPL port number. (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#dpl_port = 8357
+
+# Add CHAP user (boolean value)
+#hitachi_add_chap_user = false
+
+# iSCSI authentication method (string value)
+#hitachi_auth_method = <None>
+
+# iSCSI authentication username (string value)
+#hitachi_auth_user = HBSD-CHAP-user
+
+# iSCSI authentication password (string value)
+#hitachi_auth_password = HBSD-CHAP-password
+
+# Driver to use for volume creation (string value)
+#volume_driver = cinder.volume.drivers.lvm.LVMVolumeDriver
+
+# Timeout for creating the volume to migrate to when performing volume
+# migration (seconds) (integer value)
+#migration_create_volume_timeout_secs = 300
+
+# Offload pending volume delete during volume service startup (boolean value)
+#volume_service_inithost_offload = false
+
+# FC Zoning mode configured (string value)
+#zoning_mode = none
+
+# User defined capabilities, a JSON formatted string specifying key/value
+# pairs. The key/value pairs can be used by the CapabilitiesFilter to select
+# between backends when requests specify volume types. For example, specifying
+# a service level or the geographical location of a backend, then creating a
+# volume type to allow the user to select by these different properties.
+# (string value)
+#extra_capabilities = {}
+
+# Default iSCSI Port ID of FlashSystem. (Default port is 0.) (integer value)
+#flashsystem_iscsi_portid = 0
+
+# Connection protocol should be FC. (Default is FC.) (string value)
+#flashsystem_connection_protocol = FC
+
+# Allows vdisk to multi host mapping. (Default is True) (boolean value)
+#flashsystem_multihostmap_enabled = true
+
+# 3PAR WSAPI Server Url like https://<3par ip>:8080/api/v1 (string value)
+#hp3par_api_url =
+
+# 3PAR username with the 'edit' role (string value)
+#hp3par_username =
+
+# 3PAR password for the user specified in hp3par_username (string value)
+#hp3par_password =
+
+# List of the CPG(s) to use for volume creation (list value)
+#hp3par_cpg = OpenStack
+
+# The CPG to use for Snapshots for volumes. If empty the userCPG will be used.
+# (string value)
+#hp3par_cpg_snap =
+
+# The time in hours to retain a snapshot.  You can't delete it before this
+# expires. (string value)
+#hp3par_snapshot_retention =
+
+# The time in hours when a snapshot expires  and is deleted.  This must be
+# larger than expiration (string value)
+#hp3par_snapshot_expiration =
+
+# Enable HTTP debugging to 3PAR (boolean value)
+#hp3par_debug = false
+
+# List of target iSCSI addresses to use. (list value)
+#hp3par_iscsi_ips =
+
+# Enable CHAP authentication for iSCSI connections. (boolean value)
+#hp3par_iscsi_chap_enabled = false
+
+# Proxy driver that connects to the IBM Storage Array (string value)
+#xiv_ds8k_proxy = xiv_ds8k_openstack.nova_proxy.XIVDS8KNovaProxy
+
+# Connection type to the IBM Storage Array (string value)
+# Allowed values: fibre_channel, iscsi
+#xiv_ds8k_connection_type = iscsi
+
+# CHAP authentication mode, effective only for iscsi (disabled|enabled) (string
+# value)
+# Allowed values: disabled, enabled
+#xiv_chap = disabled
+
+# List of Management IP addresses (separated by commas) (string value)
+#management_ips =
+
+# DEPRECATED: This will be removed in the Liberty release. Use san_login and
+# san_password instead. This directly sets the Datera API token. (string value)
+#datera_api_token = <None>
+
+# Datera API port. (string value)
+#datera_api_port = 7717
+
+# Datera API version. (string value)
+#datera_api_version = 1
+
+# Number of replicas to create of an inode. (string value)
+#datera_num_replicas = 3
+
+# List of all available devices (list value)
+#available_devices =
+
+# URL to the Quobyte volume e.g., quobyte://<DIR host>/<volume name> (string
+# value)
+#quobyte_volume_url = <None>
+
+# Path to a Quobyte Client configuration file. (string value)
+#quobyte_client_cfg = <None>
+
+# Create volumes as sparse files which take no space. If set to False, volume
+# is created as regular file.In such case volume creation takes a lot of time.
+# (boolean value)
+#quobyte_sparsed_volumes = true
+
+# Create volumes as QCOW2 files rather than raw files. (boolean value)
+#quobyte_qcow2_volumes = true
+
+# Base dir containing the mount point for the Quobyte volume. (string value)
+#quobyte_mount_point_base = $state_path/mnt
+
+# File with the list of available vzstorage shares. (string value)
+#vzstorage_shares_config = /etc/cinder/vzstorage_shares
+
+# Create volumes as sparsed files which take no space rather than regular files
+# when using raw format, in which case volume creation takes lot of time.
+# (boolean value)
+#vzstorage_sparsed_volumes = true
+
+# Percent of ACTUAL usage of the underlying volume before no new volumes can be
+# allocated to the volume destination. (floating point value)
+#vzstorage_used_ratio = 0.95
+
+# Base dir containing mount points for vzstorage shares. (string value)
+#vzstorage_mount_point_base = $state_path/mnt
+
+# Mount options passed to the vzstorage client. See section of the pstorage-
+# mount man page for details. (list value)
+#vzstorage_mount_options = <None>
+
+# File with the list of available nfs shares (string value)
+#nfs_shares_config = /etc/cinder/nfs_shares
+
+# Create volumes as sparsed files which take no space.If set to False volume is
+# created as regular file.In such case volume creation takes a lot of time.
+# (boolean value)
+#nfs_sparsed_volumes = true
+
+# Percent of ACTUAL usage of the underlying volume before no new volumes can be
+# allocated to the volume destination. Note that this option is deprecated in
+# favor of "reserved_percentage" and will be removed in the Mitaka release.
+# (floating point value)
+#nfs_used_ratio = 0.95
+
+# This will compare the allocated to available space on the volume destination.
+# If the ratio exceeds this number, the destination will no longer be valid.
+# Note that this option is deprecated in favor of "max_oversubscription_ratio"
+# and will be removed in the Mitaka release. (floating point value)
+#nfs_oversub_ratio = 1.0
+
+# Base dir containing mount points for nfs shares. (string value)
+#nfs_mount_point_base = $state_path/mnt
+
+# Mount options passed to the nfs client. See section of the nfs man page for
+# details. (string value)
+#nfs_mount_options = <None>
+
+# The number of attempts to mount nfs shares before raising an error.  At least
+# one attempt will be made to mount an nfs share, regardless of the value
+# specified. (integer value)
+#nfs_mount_attempts = 3
+
+#
+# From oslo.log
+#
+
+# Print debugging output (set logging level to DEBUG instead of default INFO
+# level). (boolean value)
+#debug = false
+debug = True
+
+# If set to false, will disable INFO logging level, making WARNING the default.
+# (boolean value)
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#verbose = true
+verbose = True
+
+# The name of a logging configuration file. This file is appended to any
+# existing logging configuration files. For details about logging configuration
+# files, see the Python logging module documentation. (string value)
+# Deprecated group/name - [DEFAULT]/log_config
+#log_config_append = <None>
+
+# DEPRECATED. A logging.Formatter log message format string which may use any
+# of the available logging.LogRecord attributes. This option is deprecated.
+# Please use logging_context_format_string and logging_default_format_string
+# instead. (string value)
+#log_format = <None>
+
+# Format string for %%(asctime)s in log records. Default: %(default)s . (string
+# value)
+#log_date_format = %Y-%m-%d %H:%M:%S
+
+# (Optional) Name of log file to output to. If no default is set, logging will
+# go to stdout. (string value)
+# Deprecated group/name - [DEFAULT]/logfile
+#log_file = <None>
+
+# (Optional) The base directory used for relative --log-file paths. (string
+# value)
+# Deprecated group/name - [DEFAULT]/logdir
+#log_dir = <None>
+log_dir = /var/log/cinder
+
+# Use syslog for logging. Existing syslog format is DEPRECATED and will be
+# changed later to honor RFC5424. (boolean value)
+#use_syslog = false
+
+# (Optional) Enables or disables syslog rfc5424 format for logging. If enabled,
+# prefixes the MSG part of the syslog message with APP-NAME (RFC5424). The
+# format without the APP-NAME is deprecated in Kilo, and will be removed in
+# Mitaka, along with this option. (boolean value)
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#use_syslog_rfc_format = true
+
+# Syslog facility to receive log lines. (string value)
+#syslog_log_facility = LOG_USER
+
+# Log output to standard error. (boolean value)
+#use_stderr = true
+
+# Format string to use for log messages with context. (string value)
+#logging_context_format_string = %(asctime)s.%(msecs)03d %(process)d %(levelname)s %(name)s [%(request_id)s %(user_identity)s] %(instance)s%(message)s
+
+# Format string to use for log messages without context. (string value)
+#logging_default_format_string = %(asctime)s.%(msecs)03d %(process)d %(levelname)s %(name)s [-] %(instance)s%(message)s
+
+# Data to append to log format when level is DEBUG. (string value)
+#logging_debug_format_suffix = %(funcName)s %(pathname)s:%(lineno)d
+
+# Prefix each line of exception output with this format. (string value)
+#logging_exception_prefix = %(asctime)s.%(msecs)03d %(process)d ERROR %(name)s %(instance)s
+
+# List of logger=LEVEL pairs. (list value)
+#default_log_levels = amqp=WARN,amqplib=WARN,boto=WARN,qpid=WARN,sqlalchemy=WARN,suds=INFO,oslo.messaging=INFO,iso8601=WARN,requests.packages.urllib3.connectionpool=WARN,urllib3.connectionpool=WARN,websocket=WARN,requests.packages.urllib3.util.retry=WARN,urllib3.util.retry=WARN,keystonemiddleware=WARN,routes.middleware=WARN,stevedore=WARN,taskflow=WARN
+
+# Enables or disables publication of error events. (boolean value)
+#publish_errors = false
+
+# The format for an instance that is passed with the log message. (string
+# value)
+#instance_format = "[instance: %(uuid)s] "
+
+# The format for an instance UUID that is passed with the log message. (string
+# value)
+#instance_uuid_format = "[instance: %(uuid)s] "
+
+# Enables or disables fatal status of deprecations. (boolean value)
+#fatal_deprecations = false
+
+#
+# From oslo.messaging
+#
+
+# Size of RPC connection pool. (integer value)
+# Deprecated group/name - [DEFAULT]/rpc_conn_pool_size
+#rpc_conn_pool_size = 30
+
+# ZeroMQ bind address. Should be a wildcard (*), an ethernet interface, or IP.
+# The "host" option should point or resolve to this address. (string value)
+#rpc_zmq_bind_address = *
+
+# MatchMaker driver. (string value)
+#rpc_zmq_matchmaker = local
+
+# ZeroMQ receiver listening port. (integer value)
+#rpc_zmq_port = 9501
+
+# Number of ZeroMQ contexts, defaults to 1. (integer value)
+#rpc_zmq_contexts = 1
+
+# Maximum number of ingress messages to locally buffer per topic. Default is
+# unlimited. (integer value)
+#rpc_zmq_topic_backlog = <None>
+
+# Directory for holding IPC sockets. (string value)
+#rpc_zmq_ipc_dir = /var/run/openstack
+
+# Name of this node. Must be a valid hostname, FQDN, or IP address. Must match
+# "host" option, if running Nova. (string value)
+#rpc_zmq_host = localhost
+
+# Seconds to wait before a cast expires (TTL). Only supported by impl_zmq.
+# (integer value)
+#rpc_cast_timeout = 30
+
+# Heartbeat frequency. (integer value)
+#matchmaker_heartbeat_freq = 300
+
+# Heartbeat time-to-live. (integer value)
+#matchmaker_heartbeat_ttl = 600
+
+# Size of executor thread pool. (integer value)
+# Deprecated group/name - [DEFAULT]/rpc_thread_pool_size
+#executor_thread_pool_size = 64
+
+# The Drivers(s) to handle sending notifications. Possible values are
+# messaging, messagingv2, routing, log, test, noop (multi valued)
+#notification_driver =
+notification_driver =messagingv2
+
+# AMQP topic used for OpenStack notifications. (list value)
+# Deprecated group/name - [rpc_notifier2]/topics
+#notification_topics = notifications
+
+# Seconds to wait for a response from a call. (integer value)
+#rpc_response_timeout = 60
+
+# A URL representing the messaging driver to use and its full configuration. If
+# not set, we fall back to the rpc_backend option and driver specific
+# configuration. (string value)
+#transport_url = <None>
+
+# The messaging driver to use, defaults to rabbit. Other drivers include qpid
+# and zmq. (string value)
+#rpc_backend = rabbit
+rpc_backend = rabbit
+
+# The default exchange under which topics are scoped. May be overridden by an
+# exchange name specified in the transport_url option. (string value)
+#control_exchange = openstack
+control_exchange = openstack
+
+#
+# From oslo.messaging
+#
+
+# Size of RPC connection pool. (integer value)
+# Deprecated group/name - [DEFAULT]/rpc_conn_pool_size
+#rpc_conn_pool_size = 30
+
+# ZeroMQ bind address. Should be a wildcard (*), an ethernet interface, or IP.
+# The "host" option should point or resolve to this address. (string value)
+#rpc_zmq_bind_address = *
+
+# MatchMaker driver. (string value)
+#rpc_zmq_matchmaker = local
+
+# ZeroMQ receiver listening port. (integer value)
+#rpc_zmq_port = 9501
+
+# Number of ZeroMQ contexts, defaults to 1. (integer value)
+#rpc_zmq_contexts = 1
+
+# Maximum number of ingress messages to locally buffer per topic. Default is
+# unlimited. (integer value)
+#rpc_zmq_topic_backlog = <None>
+
+# Directory for holding IPC sockets. (string value)
+#rpc_zmq_ipc_dir = /var/run/openstack
+
+# Name of this node. Must be a valid hostname, FQDN, or IP address. Must match
+# "host" option, if running Nova. (string value)
+#rpc_zmq_host = localhost
+
+# Seconds to wait before a cast expires (TTL). Only supported by impl_zmq.
+# (integer value)
+#rpc_cast_timeout = 30
+
+# Heartbeat frequency. (integer value)
+#matchmaker_heartbeat_freq = 300
+
+# Heartbeat time-to-live. (integer value)
+#matchmaker_heartbeat_ttl = 600
+
+# Size of executor thread pool. (integer value)
+# Deprecated group/name - [DEFAULT]/rpc_thread_pool_size
+#executor_thread_pool_size = 64
+
+# The Drivers(s) to handle sending notifications. Possible values are
+# messaging, messagingv2, routing, log, test, noop (multi valued)
+#notification_driver =
+
+# AMQP topic used for OpenStack notifications. (list value)
+# Deprecated group/name - [rpc_notifier2]/topics
+#notification_topics = notifications
+
+# Seconds to wait for a response from a call. (integer value)
+#rpc_response_timeout = 60
+
+# A URL representing the messaging driver to use and its full configuration. If
+# not set, we fall back to the rpc_backend option and driver specific
+# configuration. (string value)
+#transport_url = <None>
+
+# The messaging driver to use, defaults to rabbit. Other drivers include qpid
+# and zmq. (string value)
+#rpc_backend = rabbit
+
+# The default exchange under which topics are scoped. May be overridden by an
+# exchange name specified in the transport_url option. (string value)
+#control_exchange = openstack
+api_paste_config=/etc/cinder/api-paste.ini
+
+
+[BRCD_FABRIC_EXAMPLE]
+
+#
+# From cinder
+#
+
+# Management IP of fabric (string value)
+#fc_fabric_address =
+
+# Fabric user ID (string value)
+#fc_fabric_user =
+
+# Password for user (string value)
+#fc_fabric_password =
+
+# Connecting port (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#fc_fabric_port = 22
+
+# overridden zoning policy (string value)
+#zoning_policy = initiator-target
+
+# overridden zoning activation state (boolean value)
+#zone_activate = true
+
+# overridden zone name prefix (string value)
+#zone_name_prefix = <None>
+
+# Principal switch WWN of the fabric (string value)
+#principal_switch_wwn = <None>
+
+
+[CISCO_FABRIC_EXAMPLE]
+
+#
+# From cinder
+#
+
+# Management IP of fabric (string value)
+#cisco_fc_fabric_address =
+
+# Fabric user ID (string value)
+#cisco_fc_fabric_user =
+
+# Password for user (string value)
+#cisco_fc_fabric_password =
+
+# Connecting port (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#cisco_fc_fabric_port = 22
+
+# overridden zoning policy (string value)
+#cisco_zoning_policy = initiator-target
+
+# overridden zoning activation state (boolean value)
+#cisco_zone_activate = true
+
+# overridden zone name prefix (string value)
+#cisco_zone_name_prefix = <None>
+
+# VSAN of the Fabric (string value)
+#cisco_zoning_vsan = <None>
+
+
+[cors]
+
+#
+# From oslo.middleware
+#
+
+# Indicate whether this resource may be shared with the domain received in the
+# requests "origin" header. (string value)
+#allowed_origin = <None>
+
+# Indicate that the actual request can include user credentials (boolean value)
+#allow_credentials = true
+
+# Indicate which headers are safe to expose to the API. Defaults to HTTP Simple
+# Headers. (list value)
+#expose_headers = Content-Type,Cache-Control,Content-Language,Expires,Last-Modified,Pragma
+
+# Maximum cache age of CORS preflight requests. (integer value)
+#max_age = 3600
+
+# Indicate which methods can be used during the actual request. (list value)
+#allow_methods = GET,POST,PUT,DELETE,OPTIONS
+
+# Indicate which header field names may be used during the actual request.
+# (list value)
+#allow_headers = Content-Type,Cache-Control,Content-Language,Expires,Last-Modified,Pragma
+
+
+[cors.subdomain]
+
+#
+# From oslo.middleware
+#
+
+# Indicate whether this resource may be shared with the domain received in the
+# requests "origin" header. (string value)
+#allowed_origin = <None>
+
+# Indicate that the actual request can include user credentials (boolean value)
+#allow_credentials = true
+
+# Indicate which headers are safe to expose to the API. Defaults to HTTP Simple
+# Headers. (list value)
+#expose_headers = Content-Type,Cache-Control,Content-Language,Expires,Last-Modified,Pragma
+
+# Maximum cache age of CORS preflight requests. (integer value)
+#max_age = 3600
+
+# Indicate which methods can be used during the actual request. (list value)
+#allow_methods = GET,POST,PUT,DELETE,OPTIONS
+
+# Indicate which header field names may be used during the actual request.
+# (list value)
+#allow_headers = Content-Type,Cache-Control,Content-Language,Expires,Last-Modified,Pragma
+
+
+[database]
+
+#
+# From oslo.db
+#
+
+# The file name to use with SQLite. (string value)
+# Deprecated group/name - [DEFAULT]/sqlite_db
+#sqlite_db = oslo.sqlite
+
+# If True, SQLite uses synchronous mode. (boolean value)
+# Deprecated group/name - [DEFAULT]/sqlite_synchronous
+#sqlite_synchronous = true
+
+# The back end to use for the database. (string value)
+# Deprecated group/name - [DEFAULT]/db_backend
+#backend = sqlalchemy
+
+# The SQLAlchemy connection string to use to connect to the database. (string
+# value)
+# Deprecated group/name - [DEFAULT]/sql_connection
+# Deprecated group/name - [DATABASE]/sql_connection
+# Deprecated group/name - [sql]/connection
+#connection = <None>
+connection = mysql+pymysql://cinder:qum5net@VARINET4ADDR/cinder
+
+# The SQLAlchemy connection string to use to connect to the slave database.
+# (string value)
+#slave_connection = <None>
+
+# The SQL mode to be used for MySQL sessions. This option, including the
+# default, overrides any server-set SQL mode. To use whatever SQL mode is set
+# by the server configuration, set this to no value. Example: mysql_sql_mode=
+# (string value)
+#mysql_sql_mode = TRADITIONAL
+
+# Timeout before idle SQL connections are reaped. (integer value)
+# Deprecated group/name - [DEFAULT]/sql_idle_timeout
+# Deprecated group/name - [DATABASE]/sql_idle_timeout
+# Deprecated group/name - [sql]/idle_timeout
+#idle_timeout = 3600
+
+# Minimum number of SQL connections to keep open in a pool. (integer value)
+# Deprecated group/name - [DEFAULT]/sql_min_pool_size
+# Deprecated group/name - [DATABASE]/sql_min_pool_size
+#min_pool_size = 1
+
+# Maximum number of SQL connections to keep open in a pool. (integer value)
+# Deprecated group/name - [DEFAULT]/sql_max_pool_size
+# Deprecated group/name - [DATABASE]/sql_max_pool_size
+#max_pool_size = <None>
+
+# Maximum number of database connection retries during startup. Set to -1 to
+# specify an infinite retry count. (integer value)
+# Deprecated group/name - [DEFAULT]/sql_max_retries
+# Deprecated group/name - [DATABASE]/sql_max_retries
+#max_retries = 10
+
+# Interval between retries of opening a SQL connection. (integer value)
+# Deprecated group/name - [DEFAULT]/sql_retry_interval
+# Deprecated group/name - [DATABASE]/reconnect_interval
+#retry_interval = 10
+
+# If set, use this value for max_overflow with SQLAlchemy. (integer value)
+# Deprecated group/name - [DEFAULT]/sql_max_overflow
+# Deprecated group/name - [DATABASE]/sqlalchemy_max_overflow
+#max_overflow = <None>
+
+# Verbosity of SQL debugging information: 0=None, 100=Everything. (integer
+# value)
+# Deprecated group/name - [DEFAULT]/sql_connection_debug
+#connection_debug = 0
+
+# Add Python stack traces to SQL as comment strings. (boolean value)
+# Deprecated group/name - [DEFAULT]/sql_connection_trace
+#connection_trace = false
+
+# If set, use this value for pool_timeout with SQLAlchemy. (integer value)
+# Deprecated group/name - [DATABASE]/sqlalchemy_pool_timeout
+#pool_timeout = <None>
+
+# Enable the experimental use of database reconnect on connection lost.
+# (boolean value)
+#use_db_reconnect = false
+
+# Seconds between retries of a database transaction. (integer value)
+#db_retry_interval = 1
+
+# If True, increases the interval between retries of a database operation up to
+# db_max_retry_interval. (boolean value)
+#db_inc_retry_interval = true
+
+# If db_inc_retry_interval is set, the maximum seconds between retries of a
+# database operation. (integer value)
+#db_max_retry_interval = 10
+
+# Maximum retries in case of connection error or deadlock error before error is
+# raised. Set to -1 to specify an infinite retry count. (integer value)
+#db_max_retries = 20
+
+
+[fc-zone-manager]
+
+#
+# From cinder
+#
+
+# FC Zone Driver responsible for zone management (string value)
+#zone_driver = cinder.zonemanager.drivers.brocade.brcd_fc_zone_driver.BrcdFCZoneDriver
+
+# Zoning policy configured by user; valid values include "initiator-target" or
+# "initiator" (string value)
+#zoning_policy = initiator-target
+
+# Comma separated list of Fibre Channel fabric names. This list of names is
+# used to retrieve other SAN credentials for connecting to each SAN fabric
+# (string value)
+#fc_fabric_names = <None>
+
+# FC SAN Lookup Service (string value)
+#fc_san_lookup_service = cinder.zonemanager.drivers.brocade.brcd_fc_san_lookup_service.BrcdFCSanLookupService
+
+# Southbound connector for zoning operation (string value)
+#brcd_sb_connector = cinder.zonemanager.drivers.brocade.brcd_fc_zone_client_cli.BrcdFCZoneClientCLI
+
+# Southbound connector for zoning operation (string value)
+#cisco_sb_connector = cinder.zonemanager.drivers.cisco.cisco_fc_zone_client_cli.CiscoFCZoneClientCLI
+
+
+[keymgr]
+
+#
+# From cinder
+#
+
+# Authentication url for encryption service. (string value)
+#encryption_auth_url = http://localhost:5000/v3
+
+# Url for encryption service. (string value)
+#encryption_api_url = http://localhost:9311/v1
+
+# The full class name of the key manager API class (string value)
+#api_class = cinder.keymgr.conf_key_mgr.ConfKeyManager
+
+# Fixed key returned by key manager, specified in hex (string value)
+#fixed_key = <None>
+
+
+[keystone_authtoken]
+
+#
+# From keystonemiddleware.auth_token
+#
+
+# Complete public Identity API endpoint. (string value)
+#auth_uri = <None>
+auth_uri = http://VARINET4ADDR:5000/v2.0
+
+# API version of the admin Identity API endpoint. (string value)
+#auth_version = <None>
+
+# Do not handle authorization requests within the middleware, but delegate the
+# authorization decision to downstream WSGI components. (boolean value)
+#delay_auth_decision = false
+
+# Request timeout value for communicating with Identity API server. (integer
+# value)
+#http_connect_timeout = <None>
+
+# How many times are we trying to reconnect when communicating with Identity
+# API Server. (integer value)
+#http_request_max_retries = 3
+
+# Env key for the swift cache. (string value)
+#cache = <None>
+
+# Required if identity server requires client certificate (string value)
+#certfile = <None>
+
+# Required if identity server requires client certificate (string value)
+#keyfile = <None>
+
+# A PEM encoded Certificate Authority to use when verifying HTTPs connections.
+# Defaults to system CAs. (string value)
+#cafile = <None>
+
+# Verify HTTPS connections. (boolean value)
+#insecure = false
+
+# The region in which the identity server can be found. (string value)
+#region_name = <None>
+
+# Directory used to cache files related to PKI tokens. (string value)
+#signing_dir = <None>
+
+# Optionally specify a list of memcached server(s) to use for caching. If left
+# undefined, tokens will instead be cached in-process. (list value)
+# Deprecated group/name - [DEFAULT]/memcache_servers
+#memcached_servers = <None>
+
+# In order to prevent excessive effort spent validating tokens, the middleware
+# caches previously-seen tokens for a configurable duration (in seconds). Set
+# to -1 to disable caching completely. (integer value)
+#token_cache_time = 300
+
+# Determines the frequency at which the list of revoked tokens is retrieved
+# from the Identity service (in seconds). A high number of revocation events
+# combined with a low cache duration may significantly reduce performance.
+# (integer value)
+#revocation_cache_time = 10
+
+# (Optional) If defined, indicate whether token data should be authenticated or
+# authenticated and encrypted. Acceptable values are MAC or ENCRYPT.  If MAC,
+# token data is authenticated (with HMAC) in the cache. If ENCRYPT, token data
+# is encrypted and authenticated in the cache. If the value is not one of these
+# options or empty, auth_token will raise an exception on initialization.
+# (string value)
+#memcache_security_strategy = <None>
+
+# (Optional, mandatory if memcache_security_strategy is defined) This string is
+# used for key derivation. (string value)
+#memcache_secret_key = <None>
+
+# (Optional) Number of seconds memcached server is considered dead before it is
+# tried again. (integer value)
+#memcache_pool_dead_retry = 300
+
+# (Optional) Maximum total number of open connections to every memcached
+# server. (integer value)
+#memcache_pool_maxsize = 10
+
+# (Optional) Socket timeout in seconds for communicating with a memcached
+# server. (integer value)
+#memcache_pool_socket_timeout = 3
+
+# (Optional) Number of seconds a connection to memcached is held unused in the
+# pool before it is closed. (integer value)
+#memcache_pool_unused_timeout = 60
+
+# (Optional) Number of seconds that an operation will wait to get a memcached
+# client connection from the pool. (integer value)
+#memcache_pool_conn_get_timeout = 10
+
+# (Optional) Use the advanced (eventlet safe) memcached client pool. The
+# advanced pool will only work under python 2.x. (boolean value)
+#memcache_use_advanced_pool = false
+
+# (Optional) Indicate whether to set the X-Service-Catalog header. If False,
+# middleware will not ask for service catalog on token validation and will not
+# set the X-Service-Catalog header. (boolean value)
+#include_service_catalog = true
+
+# Used to control the use and type of token binding. Can be set to: "disabled"
+# to not check token binding. "permissive" (default) to validate binding
+# information if the bind type is of a form known to the server and ignore it
+# if not. "strict" like "permissive" but if the bind type is unknown the token
+# will be rejected. "required" any form of token binding is needed to be
+# allowed. Finally the name of a binding method that must be present in tokens.
+# (string value)
+#enforce_token_bind = permissive
+
+# If true, the revocation list will be checked for cached tokens. This requires
+# that PKI tokens are configured on the identity server. (boolean value)
+#check_revocations_for_cached = false
+
+# Hash algorithms to use for hashing PKI tokens. This may be a single algorithm
+# or multiple. The algorithms are those supported by Python standard
+# hashlib.new(). The hashes will be tried in the order given, so put the
+# preferred one first for performance. The result of the first hash will be
+# stored in the cache. This will typically be set to multiple values only while
+# migrating from a less secure algorithm to a more secure one. Once all the old
+# tokens are expired this option should be set to a single value for better
+# performance. (list value)
+#hash_algorithms = md5
+
+# Prefix to prepend at the beginning of the path. Deprecated, use identity_uri.
+# (string value)
+#auth_admin_prefix =
+
+# Host providing the admin Identity API endpoint. Deprecated, use identity_uri.
+# (string value)
+#auth_host = 127.0.0.1
+
+# Port of the admin Identity API endpoint. Deprecated, use identity_uri.
+# (integer value)
+#auth_port = 35357
+
+# Protocol of the admin Identity API endpoint (http or https). Deprecated, use
+# identity_uri. (string value)
+#auth_protocol = https
+
+# Complete admin Identity API endpoint. This should specify the unversioned
+# root endpoint e.g. https://localhost:35357/ (string value)
+#identity_uri = <None>
+identity_uri = http://VARINET4ADDR:35357
+
+# This option is deprecated and may be removed in a future release. Single
+# shared secret with the Keystone configuration used for bootstrapping a
+# Keystone installation, or otherwise bypassing the normal authentication
+# process. This option should not be used, use `admin_user` and
+# `admin_password` instead. (string value)
+#admin_token = <None>
+
+# Service username. (string value)
+#admin_user = <None>
+admin_user = cinder
+
+# Service user password. (string value)
+#admin_password = <None>
+admin_password = qum5net
+
+# Service tenant name. (string value)
+#admin_tenant_name = admin
+admin_tenant_name = services
+
+
+[matchmaker_redis]
+
+#
+# From oslo.messaging
+#
+
+# Host to locate redis. (string value)
+#host = 127.0.0.1
+
+# Use this port to connect to redis host. (integer value)
+#port = 6379
+
+# Password for Redis server (optional). (string value)
+#password = <None>
+
+#
+# From oslo.messaging
+#
+
+# Host to locate redis. (string value)
+#host = 127.0.0.1
+
+# Use this port to connect to redis host. (integer value)
+#port = 6379
+
+# Password for Redis server (optional). (string value)
+#password = <None>
+
+
+[matchmaker_ring]
+
+#
+# From oslo.messaging
+#
+
+# Matchmaker ring file (JSON). (string value)
+# Deprecated group/name - [DEFAULT]/matchmaker_ringfile
+#ringfile = /etc/oslo/matchmaker_ring.json
+
+#
+# From oslo.messaging
+#
+
+# Matchmaker ring file (JSON). (string value)
+# Deprecated group/name - [DEFAULT]/matchmaker_ringfile
+#ringfile = /etc/oslo/matchmaker_ring.json
+
+
+[oslo_concurrency]
+
+#
+# From oslo.concurrency
+#
+
+# Enables or disables inter-process locks. (boolean value)
+# Deprecated group/name - [DEFAULT]/disable_process_locking
+#disable_process_locking = false
+
+# Directory to use for lock files.  For security, the specified directory
+# should only be writable by the user running the processes that need locking.
+# Defaults to environment variable OSLO_LOCK_PATH. If external locks are used,
+# a lock path must be set. (string value)
+# Deprecated group/name - [DEFAULT]/lock_path
+#lock_path = <None>
+
+
+[oslo_messaging_amqp]
+
+#
+# From oslo.messaging
+#
+
+# address prefix used when sending to a specific server (string value)
+# Deprecated group/name - [amqp1]/server_request_prefix
+#server_request_prefix = exclusive
+
+# address prefix used when broadcasting to all servers (string value)
+# Deprecated group/name - [amqp1]/broadcast_prefix
+#broadcast_prefix = broadcast
+
+# address prefix when sending to any server in group (string value)
+# Deprecated group/name - [amqp1]/group_request_prefix
+#group_request_prefix = unicast
+
+# Name for the AMQP container (string value)
+# Deprecated group/name - [amqp1]/container_name
+#container_name = <None>
+
+# Timeout for inactive connections (in seconds) (integer value)
+# Deprecated group/name - [amqp1]/idle_timeout
+#idle_timeout = 0
+
+# Debug: dump AMQP frames to stdout (boolean value)
+# Deprecated group/name - [amqp1]/trace
+#trace = false
+
+# CA certificate PEM file to verify server certificate (string value)
+# Deprecated group/name - [amqp1]/ssl_ca_file
+#ssl_ca_file =
+
+# Identifying certificate PEM file to present to clients (string value)
+# Deprecated group/name - [amqp1]/ssl_cert_file
+#ssl_cert_file =
+
+# Private key PEM file used to sign cert_file certificate (string value)
+# Deprecated group/name - [amqp1]/ssl_key_file
+#ssl_key_file =
+
+# Password for decrypting ssl_key_file (if encrypted) (string value)
+# Deprecated group/name - [amqp1]/ssl_key_password
+#ssl_key_password = <None>
+
+# Accept clients using either SSL or plain TCP (boolean value)
+# Deprecated group/name - [amqp1]/allow_insecure_clients
+#allow_insecure_clients = false
+
+#
+# From oslo.messaging
+#
+
+# address prefix used when sending to a specific server (string value)
+# Deprecated group/name - [amqp1]/server_request_prefix
+#server_request_prefix = exclusive
+
+# address prefix used when broadcasting to all servers (string value)
+# Deprecated group/name - [amqp1]/broadcast_prefix
+#broadcast_prefix = broadcast
+
+# address prefix when sending to any server in group (string value)
+# Deprecated group/name - [amqp1]/group_request_prefix
+#group_request_prefix = unicast
+
+# Name for the AMQP container (string value)
+# Deprecated group/name - [amqp1]/container_name
+#container_name = <None>
+
+# Timeout for inactive connections (in seconds) (integer value)
+# Deprecated group/name - [amqp1]/idle_timeout
+#idle_timeout = 0
+
+# Debug: dump AMQP frames to stdout (boolean value)
+# Deprecated group/name - [amqp1]/trace
+#trace = false
+
+# CA certificate PEM file to verify server certificate (string value)
+# Deprecated group/name - [amqp1]/ssl_ca_file
+#ssl_ca_file =
+
+# Identifying certificate PEM file to present to clients (string value)
+# Deprecated group/name - [amqp1]/ssl_cert_file
+#ssl_cert_file =
+
+# Private key PEM file used to sign cert_file certificate (string value)
+# Deprecated group/name - [amqp1]/ssl_key_file
+#ssl_key_file =
+
+# Password for decrypting ssl_key_file (if encrypted) (string value)
+# Deprecated group/name - [amqp1]/ssl_key_password
+#ssl_key_password = <None>
+
+# Accept clients using either SSL or plain TCP (boolean value)
+# Deprecated group/name - [amqp1]/allow_insecure_clients
+#allow_insecure_clients = false
+
+
+[oslo_messaging_qpid]
+
+#
+# From oslo.messaging
+#
+
+# Use durable queues in AMQP. (boolean value)
+# Deprecated group/name - [DEFAULT]/amqp_durable_queues
+# Deprecated group/name - [DEFAULT]/rabbit_durable_queues
+#amqp_durable_queues = false
+
+# Auto-delete queues in AMQP. (boolean value)
+# Deprecated group/name - [DEFAULT]/amqp_auto_delete
+#amqp_auto_delete = false
+
+# Send a single AMQP reply to call message. The current behaviour since oslo-
+# incubator is to send two AMQP replies - first one with the payload, a second
+# one to ensure the other have finish to send the payload. We are going to
+# remove it in the N release, but we must keep backward compatible at the same
+# time. This option provides such compatibility - it defaults to False in
+# Liberty and can be turned on for early adopters with a new installations or
+# for testing. Please note, that this option will be removed in the Mitaka
+# release. (boolean value)
+#send_single_reply = false
+
+# Qpid broker hostname. (string value)
+# Deprecated group/name - [DEFAULT]/qpid_hostname
+#qpid_hostname = localhost
+
+# Qpid broker port. (integer value)
+# Deprecated group/name - [DEFAULT]/qpid_port
+#qpid_port = 5672
+
+# Qpid HA cluster host:port pairs. (list value)
+# Deprecated group/name - [DEFAULT]/qpid_hosts
+#qpid_hosts = $qpid_hostname:$qpid_port
+
+# Username for Qpid connection. (string value)
+# Deprecated group/name - [DEFAULT]/qpid_username
+#qpid_username =
+
+# Password for Qpid connection. (string value)
+# Deprecated group/name - [DEFAULT]/qpid_password
+#qpid_password =
+
+# Space separated list of SASL mechanisms to use for auth. (string value)
+# Deprecated group/name - [DEFAULT]/qpid_sasl_mechanisms
+#qpid_sasl_mechanisms =
+
+# Seconds between connection keepalive heartbeats. (integer value)
+# Deprecated group/name - [DEFAULT]/qpid_heartbeat
+#qpid_heartbeat = 60
+
+# Transport to use, either 'tcp' or 'ssl'. (string value)
+# Deprecated group/name - [DEFAULT]/qpid_protocol
+#qpid_protocol = tcp
+
+# Whether to disable the Nagle algorithm. (boolean value)
+# Deprecated group/name - [DEFAULT]/qpid_tcp_nodelay
+#qpid_tcp_nodelay = true
+
+# The number of prefetched messages held by receiver. (integer value)
+# Deprecated group/name - [DEFAULT]/qpid_receiver_capacity
+#qpid_receiver_capacity = 1
+
+# The qpid topology version to use.  Version 1 is what was originally used by
+# impl_qpid.  Version 2 includes some backwards-incompatible changes that allow
+# broker federation to work.  Users should update to version 2 when they are
+# able to take everything down, as it requires a clean break. (integer value)
+# Deprecated group/name - [DEFAULT]/qpid_topology_version
+#qpid_topology_version = 1
+
+#
+# From oslo.messaging
+#
+
+# Use durable queues in AMQP. (boolean value)
+# Deprecated group/name - [DEFAULT]/amqp_durable_queues
+# Deprecated group/name - [DEFAULT]/rabbit_durable_queues
+#amqp_durable_queues = false
+
+# Auto-delete queues in AMQP. (boolean value)
+# Deprecated group/name - [DEFAULT]/amqp_auto_delete
+#amqp_auto_delete = false
+
+# Send a single AMQP reply to call message. The current behaviour since oslo-
+# incubator is to send two AMQP replies - first one with the payload, a second
+# one to ensure the other have finish to send the payload. We are going to
+# remove it in the N release, but we must keep backward compatible at the same
+# time. This option provides such compatibility - it defaults to False in
+# Liberty and can be turned on for early adopters with a new installations or
+# for testing. Please note, that this option will be removed in the Mitaka
+# release. (boolean value)
+#send_single_reply = false
+
+# Qpid broker hostname. (string value)
+# Deprecated group/name - [DEFAULT]/qpid_hostname
+#qpid_hostname = localhost
+
+# Qpid broker port. (integer value)
+# Deprecated group/name - [DEFAULT]/qpid_port
+#qpid_port = 5672
+
+# Qpid HA cluster host:port pairs. (list value)
+# Deprecated group/name - [DEFAULT]/qpid_hosts
+#qpid_hosts = $qpid_hostname:$qpid_port
+
+# Username for Qpid connection. (string value)
+# Deprecated group/name - [DEFAULT]/qpid_username
+#qpid_username =
+
+# Password for Qpid connection. (string value)
+# Deprecated group/name - [DEFAULT]/qpid_password
+#qpid_password =
+
+# Space separated list of SASL mechanisms to use for auth. (string value)
+# Deprecated group/name - [DEFAULT]/qpid_sasl_mechanisms
+#qpid_sasl_mechanisms =
+
+# Seconds between connection keepalive heartbeats. (integer value)
+# Deprecated group/name - [DEFAULT]/qpid_heartbeat
+#qpid_heartbeat = 60
+
+# Transport to use, either 'tcp' or 'ssl'. (string value)
+# Deprecated group/name - [DEFAULT]/qpid_protocol
+#qpid_protocol = tcp
+
+# Whether to disable the Nagle algorithm. (boolean value)
+# Deprecated group/name - [DEFAULT]/qpid_tcp_nodelay
+#qpid_tcp_nodelay = true
+
+# The number of prefetched messages held by receiver. (integer value)
+# Deprecated group/name - [DEFAULT]/qpid_receiver_capacity
+#qpid_receiver_capacity = 1
+
+# The qpid topology version to use.  Version 1 is what was originally used by
+# impl_qpid.  Version 2 includes some backwards-incompatible changes that allow
+# broker federation to work.  Users should update to version 2 when they are
+# able to take everything down, as it requires a clean break. (integer value)
+# Deprecated group/name - [DEFAULT]/qpid_topology_version
+#qpid_topology_version = 1
+
+
+[oslo_messaging_rabbit]
+
+#
+# From oslo.messaging
+#
+
+# Use durable queues in AMQP. (boolean value)
+# Deprecated group/name - [DEFAULT]/amqp_durable_queues
+# Deprecated group/name - [DEFAULT]/rabbit_durable_queues
+#amqp_durable_queues = false
+amqp_durable_queues = False
+
+# Auto-delete queues in AMQP. (boolean value)
+# Deprecated group/name - [DEFAULT]/amqp_auto_delete
+#amqp_auto_delete = false
+
+# Send a single AMQP reply to call message. The current behaviour since oslo-
+# incubator is to send two AMQP replies - first one with the payload, a second
+# one to ensure the other have finish to send the payload. We are going to
+# remove it in the N release, but we must keep backward compatible at the same
+# time. This option provides such compatibility - it defaults to False in
+# Liberty and can be turned on for early adopters with a new installations or
+# for testing. Please note, that this option will be removed in the Mitaka
+# release. (boolean value)
+#send_single_reply = false
+
+# SSL version to use (valid only if SSL enabled). Valid values are TLSv1 and
+# SSLv23. SSLv2, SSLv3, TLSv1_1, and TLSv1_2 may be available on some
+# distributions. (string value)
+# Deprecated group/name - [DEFAULT]/kombu_ssl_version
+#kombu_ssl_version =
+
+# SSL key file (valid only if SSL enabled). (string value)
+# Deprecated group/name - [DEFAULT]/kombu_ssl_keyfile
+#kombu_ssl_keyfile =
+kombu_ssl_keyfile =
+
+# SSL cert file (valid only if SSL enabled). (string value)
+# Deprecated group/name - [DEFAULT]/kombu_ssl_certfile
+#kombu_ssl_certfile =
+kombu_ssl_certfile =
+
+# SSL certification authority file (valid only if SSL enabled). (string value)
+# Deprecated group/name - [DEFAULT]/kombu_ssl_ca_certs
+#kombu_ssl_ca_certs =
+kombu_ssl_ca_certs =
+
+# How long to wait before reconnecting in response to an AMQP consumer cancel
+# notification. (floating point value)
+# Deprecated group/name - [DEFAULT]/kombu_reconnect_delay
+#kombu_reconnect_delay = 1.0
+
+# How long to wait before considering a reconnect attempt to have failed. This
+# value should not be longer than rpc_response_timeout. (integer value)
+#kombu_reconnect_timeout = 60
+
+# The RabbitMQ broker address where a single node is used. (string value)
+# Deprecated group/name - [DEFAULT]/rabbit_host
+#rabbit_host = localhost
+rabbit_host = VARINET4ADDR
+
+# The RabbitMQ broker port where a single node is used. (integer value)
+# Deprecated group/name - [DEFAULT]/rabbit_port
+#rabbit_port = 5672
+rabbit_port = 5672
+
+# RabbitMQ HA cluster host:port pairs. (list value)
+# Deprecated group/name - [DEFAULT]/rabbit_hosts
+#rabbit_hosts = $rabbit_host:$rabbit_port
+rabbit_hosts = VARINET4ADDR:5672
+
+# Connect over SSL for RabbitMQ. (boolean value)
+# Deprecated group/name - [DEFAULT]/rabbit_use_ssl
+#rabbit_use_ssl = false
+rabbit_use_ssl = False
+
+# The RabbitMQ userid. (string value)
+# Deprecated group/name - [DEFAULT]/rabbit_userid
+#rabbit_userid = guest
+rabbit_userid = guest
+
+# The RabbitMQ password. (string value)
+# Deprecated group/name - [DEFAULT]/rabbit_password
+#rabbit_password = guest
+rabbit_password = guest
+
+# The RabbitMQ login method. (string value)
+# Deprecated group/name - [DEFAULT]/rabbit_login_method
+#rabbit_login_method = AMQPLAIN
+
+# The RabbitMQ virtual host. (string value)
+# Deprecated group/name - [DEFAULT]/rabbit_virtual_host
+#rabbit_virtual_host = /
+rabbit_virtual_host = /
+
+# How frequently to retry connecting with RabbitMQ. (integer value)
+#rabbit_retry_interval = 1
+
+# How long to backoff for between retries when connecting to RabbitMQ. (integer
+# value)
+# Deprecated group/name - [DEFAULT]/rabbit_retry_backoff
+#rabbit_retry_backoff = 2
+
+# Maximum number of RabbitMQ connection retries. Default is 0 (infinite retry
+# count). (integer value)
+# Deprecated group/name - [DEFAULT]/rabbit_max_retries
+#rabbit_max_retries = 0
+
+# Use HA queues in RabbitMQ (x-ha-policy: all). If you change this option, you
+# must wipe the RabbitMQ database. (boolean value)
+# Deprecated group/name - [DEFAULT]/rabbit_ha_queues
+#rabbit_ha_queues = false
+rabbit_ha_queues = False
+
+# Specifies the number of messages to prefetch. Setting to zero allows
+# unlimited messages. (integer value)
+#rabbit_qos_prefetch_count = 0
+
+# Number of seconds after which the Rabbit broker is considered down if
+# heartbeat's keep-alive fails (0 disable the heartbeat). EXPERIMENTAL (integer
+# value)
+#heartbeat_timeout_threshold = 60
+heartbeat_timeout_threshold = 0
+
+# How often times during the heartbeat_timeout_threshold we check the
+# heartbeat. (integer value)
+#heartbeat_rate = 2
+heartbeat_rate = 2
+
+# Deprecated, use rpc_backend=kombu+memory or rpc_backend=fake (boolean value)
+# Deprecated group/name - [DEFAULT]/fake_rabbit
+#fake_rabbit = false
+
+#
+# From oslo.messaging
+#
+
+# Use durable queues in AMQP. (boolean value)
+# Deprecated group/name - [DEFAULT]/amqp_durable_queues
+# Deprecated group/name - [DEFAULT]/rabbit_durable_queues
+#amqp_durable_queues = false
+
+# Auto-delete queues in AMQP. (boolean value)
+# Deprecated group/name - [DEFAULT]/amqp_auto_delete
+#amqp_auto_delete = false
+
+# Send a single AMQP reply to call message. The current behaviour since oslo-
+# incubator is to send two AMQP replies - first one with the payload, a second
+# one to ensure the other have finish to send the payload. We are going to
+# remove it in the N release, but we must keep backward compatible at the same
+# time. This option provides such compatibility - it defaults to False in
+# Liberty and can be turned on for early adopters with a new installations or
+# for testing. Please note, that this option will be removed in the Mitaka
+# release. (boolean value)
+#send_single_reply = false
+
+# SSL version to use (valid only if SSL enabled). Valid values are TLSv1 and
+# SSLv23. SSLv2, SSLv3, TLSv1_1, and TLSv1_2 may be available on some
+# distributions. (string value)
+# Deprecated group/name - [DEFAULT]/kombu_ssl_version
+#kombu_ssl_version =
+
+# SSL key file (valid only if SSL enabled). (string value)
+# Deprecated group/name - [DEFAULT]/kombu_ssl_keyfile
+#kombu_ssl_keyfile =
+
+# SSL cert file (valid only if SSL enabled). (string value)
+# Deprecated group/name - [DEFAULT]/kombu_ssl_certfile
+#kombu_ssl_certfile =
+
+# SSL certification authority file (valid only if SSL enabled). (string value)
+# Deprecated group/name - [DEFAULT]/kombu_ssl_ca_certs
+#kombu_ssl_ca_certs =
+
+# How long to wait before reconnecting in response to an AMQP consumer cancel
+# notification. (floating point value)
+# Deprecated group/name - [DEFAULT]/kombu_reconnect_delay
+#kombu_reconnect_delay = 1.0
+
+# How long to wait before considering a reconnect attempt to have failed. This
+# value should not be longer than rpc_response_timeout. (integer value)
+#kombu_reconnect_timeout = 60
+
+# The RabbitMQ broker address where a single node is used. (string value)
+# Deprecated group/name - [DEFAULT]/rabbit_host
+#rabbit_host = localhost
+
+# The RabbitMQ broker port where a single node is used. (integer value)
+# Deprecated group/name - [DEFAULT]/rabbit_port
+#rabbit_port = 5672
+
+# RabbitMQ HA cluster host:port pairs. (list value)
+# Deprecated group/name - [DEFAULT]/rabbit_hosts
+#rabbit_hosts = $rabbit_host:$rabbit_port
+
+# Connect over SSL for RabbitMQ. (boolean value)
+# Deprecated group/name - [DEFAULT]/rabbit_use_ssl
+#rabbit_use_ssl = false
+
+# The RabbitMQ userid. (string value)
+# Deprecated group/name - [DEFAULT]/rabbit_userid
+#rabbit_userid = guest
+
+# The RabbitMQ password. (string value)
+# Deprecated group/name - [DEFAULT]/rabbit_password
+#rabbit_password = guest
+
+# The RabbitMQ login method. (string value)
+# Deprecated group/name - [DEFAULT]/rabbit_login_method
+#rabbit_login_method = AMQPLAIN
+
+# The RabbitMQ virtual host. (string value)
+# Deprecated group/name - [DEFAULT]/rabbit_virtual_host
+#rabbit_virtual_host = /
+
+# How frequently to retry connecting with RabbitMQ. (integer value)
+#rabbit_retry_interval = 1
+
+# How long to backoff for between retries when connecting to RabbitMQ. (integer
+# value)
+# Deprecated group/name - [DEFAULT]/rabbit_retry_backoff
+#rabbit_retry_backoff = 2
+
+# Maximum number of RabbitMQ connection retries. Default is 0 (infinite retry
+# count). (integer value)
+# Deprecated group/name - [DEFAULT]/rabbit_max_retries
+#rabbit_max_retries = 0
+
+# Use HA queues in RabbitMQ (x-ha-policy: all). If you change this option, you
+# must wipe the RabbitMQ database. (boolean value)
+# Deprecated group/name - [DEFAULT]/rabbit_ha_queues
+#rabbit_ha_queues = false
+
+# Specifies the number of messages to prefetch. Setting to zero allows
+# unlimited messages. (integer value)
+#rabbit_qos_prefetch_count = 0
+
+# Number of seconds after which the Rabbit broker is considered down if
+# heartbeat's keep-alive fails (0 disable the heartbeat). EXPERIMENTAL (integer
+# value)
+#heartbeat_timeout_threshold = 60
+
+# How often times during the heartbeat_timeout_threshold we check the
+# heartbeat. (integer value)
+#heartbeat_rate = 2
+
+# Deprecated, use rpc_backend=kombu+memory or rpc_backend=fake (boolean value)
+# Deprecated group/name - [DEFAULT]/fake_rabbit
+#fake_rabbit = false
+
+
+[oslo_middleware]
+
+#
+# From oslo.middleware
+#
+
+# The maximum body size for each  request, in bytes. (integer value)
+# Deprecated group/name - [DEFAULT]/osapi_max_request_body_size
+# Deprecated group/name - [DEFAULT]/max_request_body_size
+#max_request_body_size = 114688
+
+#
+# From oslo.middleware
+#
+
+# The HTTP Header that will be used to determine what the original request
+# protocol scheme was, even if it was hidden by an SSL termination proxy.
+# (string value)
+#secure_proxy_ssl_header = X-Forwarded-Proto
+
+
+[oslo_policy]
+
+#
+# From oslo.policy
+#
+
+# The JSON file that defines policies. (string value)
+# Deprecated group/name - [DEFAULT]/policy_file
+#policy_file = policy.json
+
+# Default rule. Enforced when a requested rule is not found. (string value)
+# Deprecated group/name - [DEFAULT]/policy_default_rule
+#policy_default_rule = default
+
+# Directories where policy configuration files are stored. They can be relative
+# to any directory in the search path defined by the config_dir option, or
+# absolute paths. The file defined by policy_file must exist for these
+# directories to be searched.  Missing or empty directories are ignored. (multi
+# valued)
+# Deprecated group/name - [DEFAULT]/policy_dirs
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#policy_dirs = policy.d
+
+
+[oslo_reports]
+
+#
+# From oslo.reports
+#
+
+# Path to a log directory where to create a file (string value)
+#log_dir = <None>
+
+
+[profiler]
+
+#
+# From cinder
+#
+
+# If False fully disable profiling feature. (boolean value)
+#profiler_enabled = false
+
+# If False doesn't trace SQL requests. (boolean value)
+#trace_sqlalchemy = false
+
+[lvm]
+iscsi_helper=lioadm
+volume_group=cinder-volumes
+iscsi_ip_address=VARINET4ADDR
+volume_driver=cinder.volume.drivers.lvm.LVMVolumeDriver
+volumes_dir=/var/lib/cinder/volumes
+iscsi_protocol=iscsi
+volume_backend_name=lvm
+
+[ceph]
+volume_driver = cinder.volume.drivers.rbd.RBDDriver
+rbd_pool = volumes
+rbd_ceph_conf = /etc/ceph/ceph.conf
+rbd_flatten_volume_from_snapshot = false
+rbd_max_clone_depth = 5
+rbd_store_chunk_size = 4
+rados_connect_timeout = -1
+glance_api_version = 2
+rbd_user=cinder
+rbd_secret_uuid=RBDSECRET
diff --git a/qa/qa_scripts/openstack/files/glance-api.template.conf b/qa/qa_scripts/openstack/files/glance-api.template.conf
new file mode 100644
index 000000000..956fb1bf2
--- /dev/null
+++ b/qa/qa_scripts/openstack/files/glance-api.template.conf
@@ -0,0 +1,1590 @@
+[DEFAULT]
+
+#
+# From glance.api
+#
+
+# When true, this option sets the owner of an image to be the tenant.
+# Otherwise, the owner of the  image will be the authenticated user
+# issuing the request. (boolean value)
+#owner_is_tenant=true
+
+# Role used to identify an authenticated user as administrator.
+# (string value)
+#admin_role=admin
+
+# Allow unauthenticated users to access the API with read-only
+# privileges. This only applies when using ContextMiddleware. (boolean
+# value)
+#allow_anonymous_access=false
+
+# Limits request ID length. (integer value)
+#max_request_id_length=64
+
+# Public url to use for versions endpoint. The default is None, which
+# will use the request's host_url attribute to populate the URL base.
+# If Glance is operating behind a proxy, you will want to change this
+# to represent the proxy's URL. (string value)
+#public_endpoint=<None>
+
+# Whether to allow users to specify image properties beyond what the
+# image schema provides (boolean value)
+#allow_additional_image_properties=true
+
+# Maximum number of image members per image. Negative values evaluate
+# to unlimited. (integer value)
+#image_member_quota=128
+
+# Maximum number of properties allowed on an image. Negative values
+# evaluate to unlimited. (integer value)
+#image_property_quota=128
+
+# Maximum number of tags allowed on an image. Negative values evaluate
+# to unlimited. (integer value)
+#image_tag_quota=128
+
+# Maximum number of locations allowed on an image. Negative values
+# evaluate to unlimited. (integer value)
+#image_location_quota=10
+
+# Python module path of data access API (string value)
+#data_api=glance.db.sqlalchemy.api
+
+# Default value for the number of items returned by a request if not
+# specified explicitly in the request (integer value)
+#limit_param_default=25
+
+# Maximum permissible number of items that could be returned by a
+# request (integer value)
+#api_limit_max=1000
+
+# Whether to include the backend image storage location in image
+# properties. Revealing storage location can be a security risk, so
+# use this setting with caution! (boolean value)
+#show_image_direct_url=false
+show_image_direct_url=True
+
+# Whether to include the backend image locations in image properties.
+# For example, if using the file system store a URL of
+# "file:///path/to/image" will be returned to the user in the
+# 'direct_url' meta-data field. Revealing storage location can be a
+# security risk, so use this setting with caution!  The overrides
+# show_image_direct_url. (boolean value)
+#show_multiple_locations=false
+
+# Maximum size of image a user can upload in bytes. Defaults to
+# 1099511627776 bytes (1 TB).WARNING: this value should only be
+# increased after careful consideration and must be set to a value
+# under 8 EB (9223372036854775808). (integer value)
+# Maximum value: 9223372036854775808
+#image_size_cap=1099511627776
+
+# Set a system wide quota for every user. This value is the total
+# capacity that a user can use across all storage systems. A value of
+# 0 means unlimited.Optional unit can be specified for the value.
+# Accepted units are B, KB, MB, GB and TB representing Bytes,
+# KiloBytes, MegaBytes, GigaBytes and TeraBytes respectively. If no
+# unit is specified then Bytes is assumed. Note that there should not
+# be any space between value and unit and units are case sensitive.
+# (string value)
+#user_storage_quota=0
+
+# Deploy the v1 OpenStack Images API. (boolean value)
+#enable_v1_api=true
+
+# Deploy the v2 OpenStack Images API. (boolean value)
+#enable_v2_api=true
+
+# Deploy the v3 OpenStack Objects API. (boolean value)
+#enable_v3_api=false
+
+# Deploy the v1 OpenStack Registry API. (boolean value)
+#enable_v1_registry=true
+
+# Deploy the v2 OpenStack Registry API. (boolean value)
+#enable_v2_registry=true
+
+# The hostname/IP of the pydev process listening for debug connections
+# (string value)
+#pydev_worker_debug_host=<None>
+
+# The port on which a pydev process is listening for connections.
+# (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#pydev_worker_debug_port=5678
+
+# AES key for encrypting store 'location' metadata. This includes, if
+# used, Swift or S3 credentials. Should be set to a random string of
+# length 16, 24 or 32 bytes (string value)
+#metadata_encryption_key=<None>
+
+# Digest algorithm which will be used for digital signature. Use the
+# command "openssl list-message-digest-algorithms" to get the
+# available algorithmssupported by the version of OpenSSL on the
+# platform. Examples are "sha1", "sha256", "sha512", etc. (string
+# value)
+#digest_algorithm=sha256
+
+# This value sets what strategy will be used to determine the image
+# location order. Currently two strategies are packaged with Glance
+# 'location_order' and 'store_type'. (string value)
+# Allowed values: location_order, store_type
+#location_strategy=location_order
+
+# The location of the property protection file.This file contains the
+# rules for property protections and the roles/policies associated
+# with it. If this config value is not specified, by default, property
+# protections won't be enforced. If a value is specified and the file
+# is not found, then the glance-api service will not start. (string
+# value)
+#property_protection_file=<None>
+
+# This config value indicates whether "roles" or "policies" are used
+# in the property protection file. (string value)
+# Allowed values: roles, policies
+#property_protection_rule_format=roles
+
+# Modules of exceptions that are permitted to be recreated upon
+# receiving exception data from an rpc call. (list value)
+#allowed_rpc_exception_modules=glance.common.exception,exceptions
+
+# Address to bind the server.  Useful when selecting a particular
+# network interface. (string value)
+#bind_host=0.0.0.0
+bind_host=0.0.0.0
+
+# The port on which the server will listen. (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#bind_port=<None>
+bind_port=9292
+
+# The number of child process workers that will be created to service
+# requests. The default will be equal to the number of CPUs available.
+# (integer value)
+#workers=4
+workers=12
+
+# Maximum line size of message headers to be accepted. max_header_line
+# may need to be increased when using large tokens (typically those
+# generated by the Keystone v3 API with big service catalogs (integer
+# value)
+#max_header_line=16384
+
+# If False, server will return the header "Connection: close", If
+# True, server will return "Connection: Keep-Alive" in its responses.
+# In order to close the client socket connection explicitly after the
+# response is sent and read successfully by the client, you simply
+# have to set this option to False when you create a wsgi server.
+# (boolean value)
+#http_keepalive=true
+
+# Timeout for client connections' socket operations. If an incoming
+# connection is idle for this number of seconds it will be closed. A
+# value of '0' means wait forever. (integer value)
+#client_socket_timeout=900
+
+# The backlog value that will be used when creating the TCP listener
+# socket. (integer value)
+#backlog=4096
+backlog=4096
+
+# The value for the socket option TCP_KEEPIDLE.  This is the time in
+# seconds that the connection must be idle before TCP starts sending
+# keepalive probes. (integer value)
+#tcp_keepidle=600
+
+# CA certificate file to use to verify connecting clients. (string
+# value)
+#ca_file=<None>
+
+# Certificate file to use when starting API server securely. (string
+# value)
+#cert_file=<None>
+
+# Private key file to use when starting API server securely. (string
+# value)
+#key_file=<None>
+
+# If False fully disable profiling feature. (boolean value)
+#enabled=false
+
+# If False doesn't trace SQL requests. (boolean value)
+#trace_sqlalchemy=false
+
+# The path to the sqlite file database that will be used for image
+# cache management. (string value)
+#image_cache_sqlite_db=cache.db
+
+# The driver to use for image cache management. (string value)
+#image_cache_driver=sqlite
+
+# The upper limit (the maximum size of accumulated cache in bytes)
+# beyond which pruner, if running, starts cleaning the images cache.
+# (integer value)
+#image_cache_max_size=10737418240
+
+# The amount of time to let an image remain in the cache without being
+# accessed. (integer value)
+#image_cache_stall_time=86400
+
+# Base directory that the Image Cache uses. (string value)
+#image_cache_dir=/var/lib/glance/image-cache/
+image_cache_dir=/var/lib/glance/image-cache
+
+# Default publisher_id for outgoing notifications. (string value)
+#default_publisher_id=image.localhost
+
+# List of disabled notifications. A notification can be given either
+# as a notification type to disable a single event, or as a
+# notification group prefix to disable all events within a group.
+# Example: if this config option is set to ["image.create",
+# "metadef_namespace"], then "image.create" notification will not be
+# sent after image is created and none of the notifications for
+# metadefinition namespaces will be sent. (list value)
+#disabled_notifications =
+
+# Address to find the registry server. (string value)
+#registry_host=0.0.0.0
+registry_host=0.0.0.0
+
+# Port the registry server is listening on. (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#registry_port=9191
+registry_port=9191
+
+# Whether to pass through the user token when making requests to the
+# registry. To prevent failures with token expiration during big files
+# upload, it is recommended to set this parameter to False.If
+# "use_user_token" is not in effect, then admin credentials can be
+# specified. (boolean value)
+#use_user_token=true
+
+# The administrators user name. If "use_user_token" is not in effect,
+# then admin credentials can be specified. (string value)
+#admin_user=%SERVICE_USER%
+
+# The administrators password. If "use_user_token" is not in effect,
+# then admin credentials can be specified. (string value)
+#admin_password=%SERVICE_PASSWORD%
+
+# The tenant name of the administrative user. If "use_user_token" is
+# not in effect, then admin tenant name can be specified. (string
+# value)
+#admin_tenant_name=%SERVICE_TENANT_NAME%
+
+# The URL to the keystone service. If "use_user_token" is not in
+# effect and using keystone auth, then URL of keystone can be
+# specified. (string value)
+#auth_url=<None>
+
+# The strategy to use for authentication. If "use_user_token" is not
+# in effect, then auth strategy can be specified. (string value)
+#auth_strategy=noauth
+
+# The region for the authentication service. If "use_user_token" is
+# not in effect and using keystone auth, then region name can be
+# specified. (string value)
+#auth_region=<None>
+
+# The protocol to use for communication with the registry server.
+# Either http or https. (string value)
+#registry_client_protocol=http
+registry_client_protocol=http
+
+# The path to the key file to use in SSL connections to the registry
+# server, if any. Alternately, you may set the GLANCE_CLIENT_KEY_FILE
+# environment variable to a filepath of the key file (string value)
+#registry_client_key_file=<None>
+
+# The path to the cert file to use in SSL connections to the registry
+# server, if any. Alternately, you may set the GLANCE_CLIENT_CERT_FILE
+# environment variable to a filepath of the CA cert file (string
+# value)
+#registry_client_cert_file=<None>
+
+# The path to the certifying authority cert file to use in SSL
+# connections to the registry server, if any. Alternately, you may set
+# the GLANCE_CLIENT_CA_FILE environment variable to a filepath of the
+# CA cert file. (string value)
+#registry_client_ca_file=<None>
+
+# When using SSL in connections to the registry server, do not require
+# validation via a certifying authority. This is the registry's
+# equivalent of specifying --insecure on the command line using
+# glanceclient for the API. (boolean value)
+#registry_client_insecure=false
+
+# The period of time, in seconds, that the API server will wait for a
+# registry request to complete. A value of 0 implies no timeout.
+# (integer value)
+#registry_client_timeout=600
+
+# Whether to pass through headers containing user and tenant
+# information when making requests to the registry. This allows the
+# registry to use the context middleware without keystonemiddleware's
+# auth_token middleware, removing calls to the keystone auth service.
+# It is recommended that when using this option, secure communication
+# between glance api and glance registry is ensured by means other
+# than auth_token middleware. (boolean value)
+#send_identity_headers=false
+
+# The amount of time in seconds to delay before performing a delete.
+# (integer value)
+#scrub_time=0
+
+# The size of thread pool to be used for scrubbing images. The default
+# is one, which signifies serial scrubbing. Any value above one
+# indicates the max number of images that may be scrubbed in parallel.
+# (integer value)
+#scrub_pool_size=1
+
+# Turn on/off delayed delete. (boolean value)
+#delayed_delete=false
+
+# Role used to identify an authenticated user as administrator.
+# (string value)
+#admin_role=admin
+
+# Whether to pass through headers containing user and tenant
+# information when making requests to the registry. This allows the
+# registry to use the context middleware without keystonemiddleware's
+# auth_token middleware, removing calls to the keystone auth service.
+# It is recommended that when using this option, secure communication
+# between glance api and glance registry is ensured by means other
+# than auth_token middleware. (boolean value)
+#send_identity_headers=false
+
+#
+# From oslo.log
+#
+
+# Print debugging output (set logging level to DEBUG instead of
+# default INFO level). (boolean value)
+#debug=False
+debug=True
+
+# If set to false, will disable INFO logging level, making WARNING the
+# default. (boolean value)
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#verbose=True
+verbose=True
+
+# The name of a logging configuration file. This file is appended to
+# any existing logging configuration files. For details about logging
+# configuration files, see the Python logging module documentation.
+# (string value)
+# Deprecated group/name - [DEFAULT]/log_config
+#log_config_append=<None>
+
+# DEPRECATED. A logging.Formatter log message format string which may
+# use any of the available logging.LogRecord attributes. This option
+# is deprecated.  Please use logging_context_format_string and
+# logging_default_format_string instead. (string value)
+#log_format=<None>
+
+# Format string for %%(asctime)s in log records. Default: %(default)s
+# . (string value)
+#log_date_format=%Y-%m-%d %H:%M:%S
+
+# (Optional) Name of log file to output to. If no default is set,
+# logging will go to stdout. (string value)
+# Deprecated group/name - [DEFAULT]/logfile
+#log_file=/var/log/glance/api.log
+log_file=/var/log/glance/api.log
+
+# (Optional) The base directory used for relative --log-file paths.
+# (string value)
+# Deprecated group/name - [DEFAULT]/logdir
+#log_dir=<None>
+log_dir=/var/log/glance
+
+# Use syslog for logging. Existing syslog format is DEPRECATED and
+# will be changed later to honor RFC5424. (boolean value)
+#use_syslog=false
+use_syslog=False
+
+# (Optional) Enables or disables syslog rfc5424 format for logging. If
+# enabled, prefixes the MSG part of the syslog message with APP-NAME
+# (RFC5424). The format without the APP-NAME is deprecated in Kilo,
+# and will be removed in Mitaka, along with this option. (boolean
+# value)
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#use_syslog_rfc_format=true
+
+# Syslog facility to receive log lines. (string value)
+#syslog_log_facility=LOG_USER
+syslog_log_facility=LOG_USER
+
+# Log output to standard error. (boolean value)
+#use_stderr=False
+use_stderr=True
+
+# Format string to use for log messages with context. (string value)
+#logging_context_format_string=%(asctime)s.%(msecs)03d %(process)d %(levelname)s %(name)s [%(request_id)s %(user_identity)s] %(instance)s%(message)s
+
+# Format string to use for log messages without context. (string
+# value)
+#logging_default_format_string=%(asctime)s.%(msecs)03d %(process)d %(levelname)s %(name)s [-] %(instance)s%(message)s
+
+# Data to append to log format when level is DEBUG. (string value)
+#logging_debug_format_suffix=%(funcName)s %(pathname)s:%(lineno)d
+
+# Prefix each line of exception output with this format. (string
+# value)
+#logging_exception_prefix=%(asctime)s.%(msecs)03d %(process)d ERROR %(name)s %(instance)s
+
+# List of logger=LEVEL pairs. (list value)
+#default_log_levels=amqp=WARN,amqplib=WARN,boto=WARN,qpid=WARN,sqlalchemy=WARN,suds=INFO,oslo.messaging=INFO,iso8601=WARN,requests.packages.urllib3.connectionpool=WARN,urllib3.connectionpool=WARN,websocket=WARN,requests.packages.urllib3.util.retry=WARN,urllib3.util.retry=WARN,keystonemiddleware=WARN,routes.middleware=WARN,stevedore=WARN,taskflow=WARN
+
+# Enables or disables publication of error events. (boolean value)
+#publish_errors=false
+
+# The format for an instance that is passed with the log message.
+# (string value)
+#instance_format="[instance: %(uuid)s] "
+
+# The format for an instance UUID that is passed with the log message.
+# (string value)
+#instance_uuid_format="[instance: %(uuid)s] "
+
+# Enables or disables fatal status of deprecations. (boolean value)
+#fatal_deprecations=false
+
+#
+# From oslo.messaging
+#
+
+# Size of RPC connection pool. (integer value)
+# Deprecated group/name - [DEFAULT]/rpc_conn_pool_size
+#rpc_conn_pool_size=30
+
+# ZeroMQ bind address. Should be a wildcard (*), an ethernet
+# interface, or IP. The "host" option should point or resolve to this
+# address. (string value)
+#rpc_zmq_bind_address=*
+
+# MatchMaker driver. (string value)
+#rpc_zmq_matchmaker=local
+
+# ZeroMQ receiver listening port. (integer value)
+#rpc_zmq_port=9501
+
+# Number of ZeroMQ contexts, defaults to 1. (integer value)
+#rpc_zmq_contexts=1
+
+# Maximum number of ingress messages to locally buffer per topic.
+# Default is unlimited. (integer value)
+#rpc_zmq_topic_backlog=<None>
+
+# Directory for holding IPC sockets. (string value)
+#rpc_zmq_ipc_dir=/var/run/openstack
+
+# Name of this node. Must be a valid hostname, FQDN, or IP address.
+# Must match "host" option, if running Nova. (string value)
+#rpc_zmq_host=localhost
+
+# Seconds to wait before a cast expires (TTL). Only supported by
+# impl_zmq. (integer value)
+#rpc_cast_timeout=30
+
+# Heartbeat frequency. (integer value)
+#matchmaker_heartbeat_freq=300
+
+# Heartbeat time-to-live. (integer value)
+#matchmaker_heartbeat_ttl=600
+
+# Size of executor thread pool. (integer value)
+# Deprecated group/name - [DEFAULT]/rpc_thread_pool_size
+#executor_thread_pool_size=64
+
+# The Drivers(s) to handle sending notifications. Possible values are
+# messaging, messagingv2, routing, log, test, noop (multi valued)
+#notification_driver =
+notification_driver =messaging
+
+# AMQP topic used for OpenStack notifications. (list value)
+# Deprecated group/name - [rpc_notifier2]/topics
+#notification_topics=notifications
+
+# Seconds to wait for a response from a call. (integer value)
+#rpc_response_timeout=60
+
+# A URL representing the messaging driver to use and its full
+# configuration. If not set, we fall back to the rpc_backend option
+# and driver specific configuration. (string value)
+#transport_url=<None>
+
+# The messaging driver to use, defaults to rabbit. Other drivers
+# include qpid and zmq. (string value)
+#rpc_backend=rabbit
+
+# The default exchange under which topics are scoped. May be
+# overridden by an exchange name specified in the transport_url
+# option. (string value)
+#control_exchange=openstack
+hw_scsi_model=virtio-scsi
+hw_disk_bus=scsi
+hw_qemu_guest_agent=yes
+os_require_quiesce=yes
+
+[database]
+
+#
+# From oslo.db
+#
+
+# The file name to use with SQLite. (string value)
+# Deprecated group/name - [DEFAULT]/sqlite_db
+#sqlite_db=oslo.sqlite
+
+# If True, SQLite uses synchronous mode. (boolean value)
+# Deprecated group/name - [DEFAULT]/sqlite_synchronous
+#sqlite_synchronous=true
+
+# The back end to use for the database. (string value)
+# Deprecated group/name - [DEFAULT]/db_backend
+#backend=sqlalchemy
+
+# The SQLAlchemy connection string to use to connect to the database.
+# (string value)
+# Deprecated group/name - [DEFAULT]/sql_connection
+# Deprecated group/name - [DATABASE]/sql_connection
+# Deprecated group/name - [sql]/connection
+#connection=mysql://glance:glance@localhost/glance
+connection=mysql+pymysql://glance:qum5net@VARINET4ADDR/glance
+
+# The SQLAlchemy connection string to use to connect to the slave
+# database. (string value)
+#slave_connection=<None>
+
+# The SQL mode to be used for MySQL sessions. This option, including
+# the default, overrides any server-set SQL mode. To use whatever SQL
+# mode is set by the server configuration, set this to no value.
+# Example: mysql_sql_mode= (string value)
+#mysql_sql_mode=TRADITIONAL
+
+# Timeout before idle SQL connections are reaped. (integer value)
+# Deprecated group/name - [DEFAULT]/sql_idle_timeout
+# Deprecated group/name - [DATABASE]/sql_idle_timeout
+# Deprecated group/name - [sql]/idle_timeout
+#idle_timeout=3600
+idle_timeout=3600
+
+# Minimum number of SQL connections to keep open in a pool. (integer
+# value)
+# Deprecated group/name - [DEFAULT]/sql_min_pool_size
+# Deprecated group/name - [DATABASE]/sql_min_pool_size
+#min_pool_size=1
+
+# Maximum number of SQL connections to keep open in a pool. (integer
+# value)
+# Deprecated group/name - [DEFAULT]/sql_max_pool_size
+# Deprecated group/name - [DATABASE]/sql_max_pool_size
+#max_pool_size=<None>
+
+# Maximum number of database connection retries during startup. Set to
+# -1 to specify an infinite retry count. (integer value)
+# Deprecated group/name - [DEFAULT]/sql_max_retries
+# Deprecated group/name - [DATABASE]/sql_max_retries
+#max_retries=10
+
+# Interval between retries of opening a SQL connection. (integer
+# value)
+# Deprecated group/name - [DEFAULT]/sql_retry_interval
+# Deprecated group/name - [DATABASE]/reconnect_interval
+#retry_interval=10
+
+# If set, use this value for max_overflow with SQLAlchemy. (integer
+# value)
+# Deprecated group/name - [DEFAULT]/sql_max_overflow
+# Deprecated group/name - [DATABASE]/sqlalchemy_max_overflow
+#max_overflow=<None>
+
+# Verbosity of SQL debugging information: 0=None, 100=Everything.
+# (integer value)
+# Deprecated group/name - [DEFAULT]/sql_connection_debug
+#connection_debug=0
+
+# Add Python stack traces to SQL as comment strings. (boolean value)
+# Deprecated group/name - [DEFAULT]/sql_connection_trace
+#connection_trace=false
+
+# If set, use this value for pool_timeout with SQLAlchemy. (integer
+# value)
+# Deprecated group/name - [DATABASE]/sqlalchemy_pool_timeout
+#pool_timeout=<None>
+
+# Enable the experimental use of database reconnect on connection
+# lost. (boolean value)
+#use_db_reconnect=false
+
+# Seconds between retries of a database transaction. (integer value)
+#db_retry_interval=1
+
+# If True, increases the interval between retries of a database
+# operation up to db_max_retry_interval. (boolean value)
+#db_inc_retry_interval=true
+
+# If db_inc_retry_interval is set, the maximum seconds between retries
+# of a database operation. (integer value)
+#db_max_retry_interval=10
+
+# Maximum retries in case of connection error or deadlock error before
+# error is raised. Set to -1 to specify an infinite retry count.
+# (integer value)
+#db_max_retries=20
+
+#
+# From oslo.db.concurrency
+#
+
+# Enable the experimental use of thread pooling for all DB API calls
+# (boolean value)
+# Deprecated group/name - [DEFAULT]/dbapi_use_tpool
+#use_tpool=false
+
+
+[glance_store]
+
+#
+# From glance.store
+#
+
+# List of stores enabled (list value)
+#stores=file,http
+stores=rbd
+default_store=rbd
+
+# Default scheme to use to store image data. The scheme must be
+# registered by one of the stores defined by the 'stores' config
+# option. (string value)
+#default_store=file
+
+# Minimum interval seconds to execute updating dynamic storage
+# capabilities based on backend status then. It's not a periodic
+# routine, the update logic will be executed only when interval
+# seconds elapsed and an operation of store has triggered. The feature
+# will be enabled only when the option value greater then zero.
+# (integer value)
+#store_capabilities_update_min_interval=0
+
+#
+# From glance.store
+#
+
+# Hostname or IP address of the instance to connect to, or a mongodb
+# URI, or a list of hostnames / mongodb URIs. If host is an IPv6
+# literal it must be enclosed in '[' and ']' characters following the
+# RFC2732 URL syntax (e.g. '[::1]' for localhost) (string value)
+#mongodb_store_uri=<None>
+
+# Database to use (string value)
+#mongodb_store_db=<None>
+
+# Images will be chunked into objects of this size (in megabytes). For
+# best performance, this should be a power of two. (integer value)
+#sheepdog_store_chunk_size=64
+
+# Port of sheep daemon. (integer value)
+#sheepdog_store_port=7000
+
+# IP address of sheep daemon. (string value)
+#sheepdog_store_address=localhost
+
+# RADOS images will be chunked into objects of this size (in
+# megabytes). For best performance, this should be a power of two.
+# (integer value)
+rbd_store_chunk_size=8
+
+# RADOS pool in which images are stored. (string value)
+#rbd_store_pool=images
+rbd_store_pool=images
+
+# RADOS user to authenticate as (only applicable if using Cephx. If
+# <None>, a default will be chosen based on the client. section in
+# rbd_store_ceph_conf) (string value)
+rbd_store_user=glance
+
+# Ceph configuration file path. If <None>, librados will locate the
+# default config. If using cephx authentication, this file should
+# include a reference to the right keyring in a client.<USER> section
+# (string value)
+#rbd_store_ceph_conf=/etc/ceph/ceph.conf
+rbd_store_ceph_conf=/etc/ceph/ceph.conf
+
+# Timeout value (in seconds) used when connecting to ceph cluster. If
+# value <= 0, no timeout is set and default librados value is used.
+# (integer value)
+#rados_connect_timeout=0
+
+# Directory to which the Filesystem backend store writes images.
+# (string value)
+#filesystem_store_datadir=/var/lib/glance/images/
+
+# List of directories and its priorities to which the Filesystem
+# backend store writes images. (multi valued)
+#filesystem_store_datadirs =
+
+# The path to a file which contains the metadata to be returned with
+# any location associated with this store.  The file must contain a
+# valid JSON object. The object should contain the keys 'id' and
+# 'mountpoint'. The value for both keys should be 'string'. (string
+# value)
+#filesystem_store_metadata_file=<None>
+
+# The required permission for created image file. In this way the user
+# other service used, e.g. Nova, who consumes the image could be the
+# exclusive member of the group that owns the files created. Assigning
+# it less then or equal to zero means don't change the default
+# permission of the file. This value will be decoded as an octal
+# digit. (integer value)
+#filesystem_store_file_perm=0
+
+# If True, swiftclient won't check for a valid SSL certificate when
+# authenticating. (boolean value)
+#swift_store_auth_insecure=false
+
+# A string giving the CA certificate file to use in SSL connections
+# for verifying certs. (string value)
+#swift_store_cacert=<None>
+
+# The region of the swift endpoint to be used for single tenant. This
+# setting is only necessary if the tenant has multiple swift
+# endpoints. (string value)
+#swift_store_region=<None>
+
+# If set, the configured endpoint will be used. If None, the storage
+# url from the auth response will be used. (string value)
+#swift_store_endpoint=<None>
+
+# A string giving the endpoint type of the swift service to use
+# (publicURL, adminURL or internalURL). This setting is only used if
+# swift_store_auth_version is 2. (string value)
+#swift_store_endpoint_type=publicURL
+
+# A string giving the service type of the swift service to use. This
+# setting is only used if swift_store_auth_version is 2. (string
+# value)
+#swift_store_service_type=object-store
+
+# Container within the account that the account should use for storing
+# images in Swift when using single container mode. In multiple
+# container mode, this will be the prefix for all containers. (string
+# value)
+#swift_store_container=glance
+
+# The size, in MB, that Glance will start chunking image files and do
+# a large object manifest in Swift. (integer value)
+#swift_store_large_object_size=5120
+
+# The amount of data written to a temporary disk buffer during the
+# process of chunking the image file. (integer value)
+#swift_store_large_object_chunk_size=200
+
+# A boolean value that determines if we create the container if it
+# does not exist. (boolean value)
+#swift_store_create_container_on_put=false
+
+# If set to True, enables multi-tenant storage mode which causes
+# Glance images to be stored in tenant specific Swift accounts.
+# (boolean value)
+#swift_store_multi_tenant=false
+
+# When set to 0, a single-tenant store will only use one container to
+# store all images. When set to an integer value between 1 and 32, a
+# single-tenant store will use multiple containers to store images,
+# and this value will determine how many containers are created.Used
+# only when swift_store_multi_tenant is disabled. The total number of
+# containers that will be used is equal to 16^N, so if this config
+# option is set to 2, then 16^2=256 containers will be used to store
+# images. (integer value)
+#swift_store_multiple_containers_seed=0
+
+# A list of tenants that will be granted read/write access on all
+# Swift containers created by Glance in multi-tenant mode. (list
+# value)
+#swift_store_admin_tenants =
+
+# If set to False, disables SSL layer compression of https swift
+# requests. Setting to False may improve performance for images which
+# are already in a compressed format, eg qcow2. (boolean value)
+#swift_store_ssl_compression=true
+
+# The number of times a Swift download will be retried before the
+# request fails. (integer value)
+#swift_store_retry_get_count=0
+
+# The reference to the default swift account/backing store parameters
+# to use for adding new images. (string value)
+#default_swift_reference=ref1
+
+# Version of the authentication service to use. Valid versions are 2
+# and 3 for keystone and 1 (deprecated) for swauth and rackspace.
+# (deprecated - use "auth_version" in swift_store_config_file) (string
+# value)
+#swift_store_auth_version=2
+
+# The address where the Swift authentication service is listening.
+# (deprecated - use "auth_address" in swift_store_config_file) (string
+# value)
+#swift_store_auth_address=<None>
+
+# The user to authenticate against the Swift authentication service
+# (deprecated - use "user" in swift_store_config_file) (string value)
+#swift_store_user=<None>
+
+# Auth key for the user authenticating against the Swift
+# authentication service. (deprecated - use "key" in
+# swift_store_config_file) (string value)
+#swift_store_key=<None>
+
+# The config file that has the swift account(s)configs. (string value)
+#swift_store_config_file=<None>
+
+# ESX/ESXi or vCenter Server target system. The server value can be an
+# IP address or a DNS name. (string value)
+#vmware_server_host=<None>
+
+# Username for authenticating with VMware ESX/VC server. (string
+# value)
+#vmware_server_username=<None>
+
+# Password for authenticating with VMware ESX/VC server. (string
+# value)
+#vmware_server_password=<None>
+
+# DEPRECATED. Inventory path to a datacenter. If the
+# vmware_server_host specified is an ESX/ESXi, the
+# vmware_datacenter_path is optional. If specified, it should be "ha-
+# datacenter". This option is deprecated in favor of vmware_datastores
+# and will be removed in the Liberty release. (string value)
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#vmware_datacenter_path=ha-datacenter
+
+# DEPRECATED. Datastore associated with the datacenter. This option is
+# deprecated in favor of vmware_datastores and will be removed in the
+# Liberty release. (string value)
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#vmware_datastore_name=<None>
+
+# Number of times VMware ESX/VC server API must be retried upon
+# connection related issues. (integer value)
+#vmware_api_retry_count=10
+
+# The interval used for polling remote tasks invoked on VMware ESX/VC
+# server. (integer value)
+#vmware_task_poll_interval=5
+
+# The name of the directory where the glance images will be stored in
+# the VMware datastore. (string value)
+#vmware_store_image_dir=/openstack_glance
+
+# Allow to perform insecure SSL requests to ESX/VC. (boolean value)
+#vmware_api_insecure=false
+
+# A list of datastores where the image can be stored. This option may
+# be specified multiple times for specifying multiple datastores.
+# Either one of vmware_datastore_name or vmware_datastores is
+# required. The datastore name should be specified after its
+# datacenter path, separated by ":". An optional weight may be given
+# after the datastore name, separated again by ":". Thus, the required
+# format becomes <datacenter_path>:<datastore_name>:<optional_weight>.
+# When adding an image, the datastore with highest weight will be
+# selected, unless there is not enough free space available in cases
+# where the image size is already known. If no weight is given, it is
+# assumed to be zero and the directory will be considered for
+# selection last. If multiple datastores have the same weight, then
+# the one with the most free space available is selected. (multi
+# valued)
+#vmware_datastores =
+
+# The host where the S3 server is listening. (string value)
+#s3_store_host=<None>
+
+# The S3 query token access key. (string value)
+#s3_store_access_key=<None>
+
+# The S3 query token secret key. (string value)
+#s3_store_secret_key=<None>
+
+# The S3 bucket to be used to store the Glance data. (string value)
+#s3_store_bucket=<None>
+
+# The local directory where uploads will be staged before they are
+# transferred into S3. (string value)
+#s3_store_object_buffer_dir=<None>
+
+# A boolean to determine if the S3 bucket should be created on upload
+# if it does not exist or if an error should be returned to the user.
+# (boolean value)
+#s3_store_create_bucket_on_put=false
+
+# The S3 calling format used to determine the bucket. Either subdomain
+# or path can be used. (string value)
+#s3_store_bucket_url_format=subdomain
+
+# What size, in MB, should S3 start chunking image files and do a
+# multipart upload in S3. (integer value)
+#s3_store_large_object_size=100
+
+# What multipart upload part size, in MB, should S3 use when uploading
+# parts. The size must be greater than or equal to 5M. (integer value)
+#s3_store_large_object_chunk_size=10
+
+# The number of thread pools to perform a multipart upload in S3.
+# (integer value)
+#s3_store_thread_pools=10
+
+# Enable the use of a proxy. (boolean value)
+#s3_store_enable_proxy=false
+
+# Address or hostname for the proxy server. (string value)
+#s3_store_proxy_host=<None>
+
+# The port to use when connecting over a proxy. (integer value)
+#s3_store_proxy_port=8080
+
+# The username to connect to the proxy. (string value)
+#s3_store_proxy_user=<None>
+
+# The password to use when connecting over a proxy. (string value)
+#s3_store_proxy_password=<None>
+
+# Info to match when looking for cinder in the service catalog. Format
+# is : separated values of the form:
+# <service_type>:<service_name>:<endpoint_type> (string value)
+#cinder_catalog_info=volume:cinder:publicURL
+
+# Override service catalog lookup with template for cinder endpoint
+# e.g. http://localhost:8776/v1/%(project_id)s (string value)
+#cinder_endpoint_template=<None>
+
+# Region name of this node (string value)
+#os_region_name=<None>
+os_region_name=RegionOne
+
+# Location of ca certificates file to use for cinder client requests.
+# (string value)
+#cinder_ca_certificates_file=<None>
+
+# Number of cinderclient retries on failed http calls (integer value)
+#cinder_http_retries=3
+
+# Allow to perform insecure SSL requests to cinder (boolean value)
+#cinder_api_insecure=false
+
+
+[image_format]
+
+#
+# From glance.api
+#
+
+# Supported values for the 'container_format' image attribute (list
+# value)
+# Deprecated group/name - [DEFAULT]/container_formats
+#container_formats=ami,ari,aki,bare,ovf,ova
+
+# Supported values for the 'disk_format' image attribute (list value)
+# Deprecated group/name - [DEFAULT]/disk_formats
+#disk_formats=ami,ari,aki,vhd,vmdk,raw,qcow2,vdi,iso
+
+
+[keystone_authtoken]
+
+#
+# From keystonemiddleware.auth_token
+#
+
+# Complete public Identity API endpoint. (string value)
+#auth_uri=<None>
+auth_uri=http://VARINET4ADDR:5000/v2.0
+
+# API version of the admin Identity API endpoint. (string value)
+#auth_version=<None>
+
+# Do not handle authorization requests within the middleware, but
+# delegate the authorization decision to downstream WSGI components.
+# (boolean value)
+#delay_auth_decision=false
+
+# Request timeout value for communicating with Identity API server.
+# (integer value)
+#http_connect_timeout=<None>
+
+# How many times are we trying to reconnect when communicating with
+# Identity API Server. (integer value)
+#http_request_max_retries=3
+
+# Env key for the swift cache. (string value)
+#cache=<None>
+
+# Required if identity server requires client certificate (string
+# value)
+#certfile=<None>
+
+# Required if identity server requires client certificate (string
+# value)
+#keyfile=<None>
+
+# A PEM encoded Certificate Authority to use when verifying HTTPs
+# connections. Defaults to system CAs. (string value)
+#cafile=<None>
+
+# Verify HTTPS connections. (boolean value)
+#insecure=false
+
+# The region in which the identity server can be found. (string value)
+#region_name=<None>
+
+# Directory used to cache files related to PKI tokens. (string value)
+#signing_dir=<None>
+
+# Optionally specify a list of memcached server(s) to use for caching.
+# If left undefined, tokens will instead be cached in-process. (list
+# value)
+# Deprecated group/name - [DEFAULT]/memcache_servers
+#memcached_servers=<None>
+
+# In order to prevent excessive effort spent validating tokens, the
+# middleware caches previously-seen tokens for a configurable duration
+# (in seconds). Set to -1 to disable caching completely. (integer
+# value)
+#token_cache_time=300
+
+# Determines the frequency at which the list of revoked tokens is
+# retrieved from the Identity service (in seconds). A high number of
+# revocation events combined with a low cache duration may
+# significantly reduce performance. (integer value)
+#revocation_cache_time=10
+
+# (Optional) If defined, indicate whether token data should be
+# authenticated or authenticated and encrypted. Acceptable values are
+# MAC or ENCRYPT.  If MAC, token data is authenticated (with HMAC) in
+# the cache. If ENCRYPT, token data is encrypted and authenticated in
+# the cache. If the value is not one of these options or empty,
+# auth_token will raise an exception on initialization. (string value)
+#memcache_security_strategy=<None>
+
+# (Optional, mandatory if memcache_security_strategy is defined) This
+# string is used for key derivation. (string value)
+#memcache_secret_key=<None>
+
+# (Optional) Number of seconds memcached server is considered dead
+# before it is tried again. (integer value)
+#memcache_pool_dead_retry=300
+
+# (Optional) Maximum total number of open connections to every
+# memcached server. (integer value)
+#memcache_pool_maxsize=10
+
+# (Optional) Socket timeout in seconds for communicating with a
+# memcached server. (integer value)
+#memcache_pool_socket_timeout=3
+
+# (Optional) Number of seconds a connection to memcached is held
+# unused in the pool before it is closed. (integer value)
+#memcache_pool_unused_timeout=60
+
+# (Optional) Number of seconds that an operation will wait to get a
+# memcached client connection from the pool. (integer value)
+#memcache_pool_conn_get_timeout=10
+
+# (Optional) Use the advanced (eventlet safe) memcached client pool.
+# The advanced pool will only work under python 2.x. (boolean value)
+#memcache_use_advanced_pool=false
+
+# (Optional) Indicate whether to set the X-Service-Catalog header. If
+# False, middleware will not ask for service catalog on token
+# validation and will not set the X-Service-Catalog header. (boolean
+# value)
+#include_service_catalog=true
+
+# Used to control the use and type of token binding. Can be set to:
+# "disabled" to not check token binding. "permissive" (default) to
+# validate binding information if the bind type is of a form known to
+# the server and ignore it if not. "strict" like "permissive" but if
+# the bind type is unknown the token will be rejected. "required" any
+# form of token binding is needed to be allowed. Finally the name of a
+# binding method that must be present in tokens. (string value)
+#enforce_token_bind=permissive
+
+# If true, the revocation list will be checked for cached tokens. This
+# requires that PKI tokens are configured on the identity server.
+# (boolean value)
+#check_revocations_for_cached=false
+
+# Hash algorithms to use for hashing PKI tokens. This may be a single
+# algorithm or multiple. The algorithms are those supported by Python
+# standard hashlib.new(). The hashes will be tried in the order given,
+# so put the preferred one first for performance. The result of the
+# first hash will be stored in the cache. This will typically be set
+# to multiple values only while migrating from a less secure algorithm
+# to a more secure one. Once all the old tokens are expired this
+# option should be set to a single value for better performance. (list
+# value)
+#hash_algorithms=md5
+
+# Prefix to prepend at the beginning of the path. Deprecated, use
+# identity_uri. (string value)
+#auth_admin_prefix =
+
+# Host providing the admin Identity API endpoint. Deprecated, use
+# identity_uri. (string value)
+#auth_host=127.0.0.1
+
+# Port of the admin Identity API endpoint. Deprecated, use
+# identity_uri. (integer value)
+#auth_port=35357
+
+# Protocol of the admin Identity API endpoint (http or https).
+# Deprecated, use identity_uri. (string value)
+#auth_protocol=http
+
+# Complete admin Identity API endpoint. This should specify the
+# unversioned root endpoint e.g. https://localhost:35357/ (string
+# value)
+#identity_uri=<None>
+identity_uri=http://VARINET4ADDR:35357
+
+# This option is deprecated and may be removed in a future release.
+# Single shared secret with the Keystone configuration used for
+# bootstrapping a Keystone installation, or otherwise bypassing the
+# normal authentication process. This option should not be used, use
+# `admin_user` and `admin_password` instead. (string value)
+#admin_token=<None>
+
+# Service username. (string value)
+#admin_user=<None>
+admin_user=glance
+
+# Service user password. (string value)
+#admin_password=<None>
+admin_password=qum5net
+
+# Service tenant name. (string value)
+#admin_tenant_name=admin
+admin_tenant_name=services
+
+
+[matchmaker_redis]
+
+#
+# From oslo.messaging
+#
+
+# Host to locate redis. (string value)
+#host=127.0.0.1
+
+# Use this port to connect to redis host. (integer value)
+#port=6379
+
+# Password for Redis server (optional). (string value)
+#password=<None>
+
+
+[matchmaker_ring]
+
+#
+# From oslo.messaging
+#
+
+# Matchmaker ring file (JSON). (string value)
+# Deprecated group/name - [DEFAULT]/matchmaker_ringfile
+#ringfile=/etc/oslo/matchmaker_ring.json
+
+
+[oslo_concurrency]
+
+#
+# From oslo.concurrency
+#
+
+# Enables or disables inter-process locks. (boolean value)
+# Deprecated group/name - [DEFAULT]/disable_process_locking
+#disable_process_locking=false
+
+# Directory to use for lock files.  For security, the specified
+# directory should only be writable by the user running the processes
+# that need locking. Defaults to environment variable OSLO_LOCK_PATH.
+# If external locks are used, a lock path must be set. (string value)
+# Deprecated group/name - [DEFAULT]/lock_path
+#lock_path=<None>
+
+
+[oslo_messaging_amqp]
+
+#
+# From oslo.messaging
+#
+
+# address prefix used when sending to a specific server (string value)
+# Deprecated group/name - [amqp1]/server_request_prefix
+#server_request_prefix=exclusive
+
+# address prefix used when broadcasting to all servers (string value)
+# Deprecated group/name - [amqp1]/broadcast_prefix
+#broadcast_prefix=broadcast
+
+# address prefix when sending to any server in group (string value)
+# Deprecated group/name - [amqp1]/group_request_prefix
+#group_request_prefix=unicast
+
+# Name for the AMQP container (string value)
+# Deprecated group/name - [amqp1]/container_name
+#container_name=<None>
+
+# Timeout for inactive connections (in seconds) (integer value)
+# Deprecated group/name - [amqp1]/idle_timeout
+#idle_timeout=0
+
+# Debug: dump AMQP frames to stdout (boolean value)
+# Deprecated group/name - [amqp1]/trace
+#trace=false
+
+# CA certificate PEM file to verify server certificate (string value)
+# Deprecated group/name - [amqp1]/ssl_ca_file
+#ssl_ca_file =
+
+# Identifying certificate PEM file to present to clients (string
+# value)
+# Deprecated group/name - [amqp1]/ssl_cert_file
+#ssl_cert_file =
+
+# Private key PEM file used to sign cert_file certificate (string
+# value)
+# Deprecated group/name - [amqp1]/ssl_key_file
+#ssl_key_file =
+
+# Password for decrypting ssl_key_file (if encrypted) (string value)
+# Deprecated group/name - [amqp1]/ssl_key_password
+#ssl_key_password=<None>
+
+# Accept clients using either SSL or plain TCP (boolean value)
+# Deprecated group/name - [amqp1]/allow_insecure_clients
+#allow_insecure_clients=false
+
+
+[oslo_messaging_qpid]
+
+#
+# From oslo.messaging
+#
+
+# Use durable queues in AMQP. (boolean value)
+# Deprecated group/name - [DEFAULT]/amqp_durable_queues
+# Deprecated group/name - [DEFAULT]/rabbit_durable_queues
+#amqp_durable_queues=false
+
+# Auto-delete queues in AMQP. (boolean value)
+# Deprecated group/name - [DEFAULT]/amqp_auto_delete
+#amqp_auto_delete=false
+
+# Send a single AMQP reply to call message. The current behaviour
+# since oslo-incubator is to send two AMQP replies - first one with
+# the payload, a second one to ensure the other have finish to send
+# the payload. We are going to remove it in the N release, but we must
+# keep backward compatible at the same time. This option provides such
+# compatibility - it defaults to False in Liberty and can be turned on
+# for early adopters with a new installations or for testing. Please
+# note, that this option will be removed in the Mitaka release.
+# (boolean value)
+#send_single_reply=false
+
+# Qpid broker hostname. (string value)
+# Deprecated group/name - [DEFAULT]/qpid_hostname
+#qpid_hostname=localhost
+
+# Qpid broker port. (integer value)
+# Deprecated group/name - [DEFAULT]/qpid_port
+#qpid_port=5672
+
+# Qpid HA cluster host:port pairs. (list value)
+# Deprecated group/name - [DEFAULT]/qpid_hosts
+#qpid_hosts=$qpid_hostname:$qpid_port
+
+# Username for Qpid connection. (string value)
+# Deprecated group/name - [DEFAULT]/qpid_username
+#qpid_username =
+
+# Password for Qpid connection. (string value)
+# Deprecated group/name - [DEFAULT]/qpid_password
+#qpid_password =
+
+# Space separated list of SASL mechanisms to use for auth. (string
+# value)
+# Deprecated group/name - [DEFAULT]/qpid_sasl_mechanisms
+#qpid_sasl_mechanisms =
+
+# Seconds between connection keepalive heartbeats. (integer value)
+# Deprecated group/name - [DEFAULT]/qpid_heartbeat
+#qpid_heartbeat=60
+
+# Transport to use, either 'tcp' or 'ssl'. (string value)
+# Deprecated group/name - [DEFAULT]/qpid_protocol
+#qpid_protocol=tcp
+
+# Whether to disable the Nagle algorithm. (boolean value)
+# Deprecated group/name - [DEFAULT]/qpid_tcp_nodelay
+#qpid_tcp_nodelay=true
+
+# The number of prefetched messages held by receiver. (integer value)
+# Deprecated group/name - [DEFAULT]/qpid_receiver_capacity
+#qpid_receiver_capacity=1
+
+# The qpid topology version to use.  Version 1 is what was originally
+# used by impl_qpid.  Version 2 includes some backwards-incompatible
+# changes that allow broker federation to work.  Users should update
+# to version 2 when they are able to take everything down, as it
+# requires a clean break. (integer value)
+# Deprecated group/name - [DEFAULT]/qpid_topology_version
+#qpid_topology_version=1
+
+
+[oslo_messaging_rabbit]
+
+#
+# From oslo.messaging
+#
+
+# Use durable queues in AMQP. (boolean value)
+# Deprecated group/name - [DEFAULT]/amqp_durable_queues
+# Deprecated group/name - [DEFAULT]/rabbit_durable_queues
+#amqp_durable_queues=false
+amqp_durable_queues=False
+
+# Auto-delete queues in AMQP. (boolean value)
+# Deprecated group/name - [DEFAULT]/amqp_auto_delete
+#amqp_auto_delete=false
+
+# Send a single AMQP reply to call message. The current behaviour
+# since oslo-incubator is to send two AMQP replies - first one with
+# the payload, a second one to ensure the other have finish to send
+# the payload. We are going to remove it in the N release, but we must
+# keep backward compatible at the same time. This option provides such
+# compatibility - it defaults to False in Liberty and can be turned on
+# for early adopters with a new installations or for testing. Please
+# note, that this option will be removed in the Mitaka release.
+# (boolean value)
+#send_single_reply=false
+
+# SSL version to use (valid only if SSL enabled). Valid values are
+# TLSv1 and SSLv23. SSLv2, SSLv3, TLSv1_1, and TLSv1_2 may be
+# available on some distributions. (string value)
+# Deprecated group/name - [DEFAULT]/kombu_ssl_version
+#kombu_ssl_version =
+
+# SSL key file (valid only if SSL enabled). (string value)
+# Deprecated group/name - [DEFAULT]/kombu_ssl_keyfile
+#kombu_ssl_keyfile =
+
+# SSL cert file (valid only if SSL enabled). (string value)
+# Deprecated group/name - [DEFAULT]/kombu_ssl_certfile
+#kombu_ssl_certfile =
+
+# SSL certification authority file (valid only if SSL enabled).
+# (string value)
+# Deprecated group/name - [DEFAULT]/kombu_ssl_ca_certs
+#kombu_ssl_ca_certs =
+
+# How long to wait before reconnecting in response to an AMQP consumer
+# cancel notification. (floating point value)
+# Deprecated group/name - [DEFAULT]/kombu_reconnect_delay
+#kombu_reconnect_delay=1.0
+
+# How long to wait before considering a reconnect attempt to have
+# failed. This value should not be longer than rpc_response_timeout.
+# (integer value)
+#kombu_reconnect_timeout=60
+
+# The RabbitMQ broker address where a single node is used. (string
+# value)
+# Deprecated group/name - [DEFAULT]/rabbit_host
+#rabbit_host=localhost
+rabbit_host=VARINET4ADDR
+
+# The RabbitMQ broker port where a single node is used. (integer
+# value)
+# Deprecated group/name - [DEFAULT]/rabbit_port
+#rabbit_port=5672
+rabbit_port=5672
+
+# RabbitMQ HA cluster host:port pairs. (list value)
+# Deprecated group/name - [DEFAULT]/rabbit_hosts
+#rabbit_hosts=$rabbit_host:$rabbit_port
+rabbit_hosts=VARINET4ADDR:5672
+
+# Connect over SSL for RabbitMQ. (boolean value)
+# Deprecated group/name - [DEFAULT]/rabbit_use_ssl
+#rabbit_use_ssl=false
+rabbit_use_ssl=False
+
+# The RabbitMQ userid. (string value)
+# Deprecated group/name - [DEFAULT]/rabbit_userid
+#rabbit_userid=guest
+rabbit_userid=guest
+
+# The RabbitMQ password. (string value)
+# Deprecated group/name - [DEFAULT]/rabbit_password
+#rabbit_password=guest
+rabbit_password=guest
+
+# The RabbitMQ login method. (string value)
+# Deprecated group/name - [DEFAULT]/rabbit_login_method
+#rabbit_login_method=AMQPLAIN
+
+# The RabbitMQ virtual host. (string value)
+# Deprecated group/name - [DEFAULT]/rabbit_virtual_host
+#rabbit_virtual_host=/
+rabbit_virtual_host=/
+
+# How frequently to retry connecting with RabbitMQ. (integer value)
+#rabbit_retry_interval=1
+
+# How long to backoff for between retries when connecting to RabbitMQ.
+# (integer value)
+# Deprecated group/name - [DEFAULT]/rabbit_retry_backoff
+#rabbit_retry_backoff=2
+
+# Maximum number of RabbitMQ connection retries. Default is 0
+# (infinite retry count). (integer value)
+# Deprecated group/name - [DEFAULT]/rabbit_max_retries
+#rabbit_max_retries=0
+
+# Use HA queues in RabbitMQ (x-ha-policy: all). If you change this
+# option, you must wipe the RabbitMQ database. (boolean value)
+# Deprecated group/name - [DEFAULT]/rabbit_ha_queues
+#rabbit_ha_queues=false
+rabbit_ha_queues=False
+
+# Number of seconds after which the Rabbit broker is considered down
+# if heartbeat's keep-alive fails (0 disable the heartbeat).
+# EXPERIMENTAL (integer value)
+#heartbeat_timeout_threshold=60
+heartbeat_timeout_threshold=0
+
+# How often times during the heartbeat_timeout_threshold we check the
+# heartbeat. (integer value)
+#heartbeat_rate=2
+heartbeat_rate=2
+
+# Deprecated, use rpc_backend=kombu+memory or rpc_backend=fake
+# (boolean value)
+# Deprecated group/name - [DEFAULT]/fake_rabbit
+#fake_rabbit=false
+rabbit_notification_exchange=glance
+rabbit_notification_topic=notifications
+
+
+[oslo_policy]
+
+#
+# From oslo.policy
+#
+
+# The JSON file that defines policies. (string value)
+# Deprecated group/name - [DEFAULT]/policy_file
+#policy_file=policy.json
+
+# Default rule. Enforced when a requested rule is not found. (string
+# value)
+# Deprecated group/name - [DEFAULT]/policy_default_rule
+#policy_default_rule=default
+
+# Directories where policy configuration files are stored. They can be
+# relative to any directory in the search path defined by the
+# config_dir option, or absolute paths. The file defined by
+# policy_file must exist for these directories to be searched.
+# Missing or empty directories are ignored. (multi valued)
+# Deprecated group/name - [DEFAULT]/policy_dirs
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#policy_dirs=policy.d
+
+
+[paste_deploy]
+
+#
+# From glance.api
+#
+
+# Partial name of a pipeline in your paste configuration file with the
+# service name removed. For example, if your paste section name is
+# [pipeline:glance-api-keystone] use the value "keystone" (string
+# value)
+#flavor=<None>
+flavor=keystone
+
+# Name of the paste configuration file. (string value)
+#config_file=/usr/share/glance/glance-api-dist-paste.ini
+
+
+[store_type_location_strategy]
+
+#
+# From glance.api
+#
+
+# The store names to use to get store preference order. The name must
+# be registered by one of the stores defined by the 'stores' config
+# option. This option will be applied when you using 'store_type'
+# option as image location strategy defined by the 'location_strategy'
+# config option. (list value)
+#store_type_preference =
+
+
+[task]
+
+#
+# From glance.api
+#
+
+# Time in hours for which a task lives after, either succeeding or
+# failing (integer value)
+# Deprecated group/name - [DEFAULT]/task_time_to_live
+#task_time_to_live=48
+
+# Specifies which task executor to be used to run the task scripts.
+# (string value)
+#task_executor=taskflow
+
+# Work dir for asynchronous task operations. The directory set here
+# will be used to operate over images - normally before they are
+# imported in the destination store. When providing work dir, make
+# sure enough space is provided for concurrent tasks to run
+# efficiently without running out of space. A rough estimation can be
+# done by multiplying the number of `max_workers` - or the N of
+# workers running - by an average image size (e.g 500MB). The image
+# size estimation should be done based on the average size in your
+# deployment. Note that depending on the tasks running you may need to
+# multiply this number by some factor depending on what the task does.
+# For example, you may want to double the available size if image
+# conversion is enabled. All this being said, remember these are just
+# estimations and you should do them based on the worst case scenario
+# and be prepared to act in case they were wrong. (string value)
+#work_dir=<None>
+
+
+[taskflow_executor]
+
+#
+# From glance.api
+#
+
+# The mode in which the engine will run. Can be 'serial' or
+# 'parallel'. (string value)
+# Allowed values: serial, parallel
+#engine_mode=parallel
+
+# The number of parallel activities executed at the same time by the
+# engine. The value can be greater than one when the engine mode is
+# 'parallel'. (integer value)
+# Deprecated group/name - [task]/eventlet_executor_pool_size
+#max_workers=10
diff --git a/qa/qa_scripts/openstack/files/kilo.template.conf b/qa/qa_scripts/openstack/files/kilo.template.conf
new file mode 100644
index 000000000..35d359c89
--- /dev/null
+++ b/qa/qa_scripts/openstack/files/kilo.template.conf
@@ -0,0 +1,1077 @@
+[general]
+
+# Path to a public key to install on servers. If a usable key has not
+# been installed on the remote servers, the user is prompted for a
+# password and this key is installed so the password will not be
+# required again.
+CONFIG_SSH_KEY=/root/.ssh/id_rsa.pub
+
+# Default password to be used everywhere (overridden by passwords set
+# for individual services or users).
+CONFIG_DEFAULT_PASSWORD=
+
+# Specify 'y' to install MariaDB. ['y', 'n']
+CONFIG_MARIADB_INSTALL=y
+
+# Specify 'y' to install OpenStack Image Service (glance). ['y', 'n']
+CONFIG_GLANCE_INSTALL=y
+
+# Specify 'y' to install OpenStack Block Storage (cinder). ['y', 'n']
+CONFIG_CINDER_INSTALL=y
+
+# Specify 'y' to install OpenStack Compute (nova). ['y', 'n']
+CONFIG_NOVA_INSTALL=y
+
+# Specify 'y' to install OpenStack Networking (neutron); otherwise,
+# Compute Networking (nova) will be used. ['y', 'n']
+CONFIG_NEUTRON_INSTALL=y
+
+# Specify 'y' to install OpenStack Dashboard (horizon). ['y', 'n']
+CONFIG_HORIZON_INSTALL=y
+
+# Specify 'y' to install OpenStack Object Storage (swift). ['y', 'n']
+CONFIG_SWIFT_INSTALL=y
+
+# Specify 'y' to install OpenStack Metering (ceilometer). ['y', 'n']
+CONFIG_CEILOMETER_INSTALL=y
+
+# Specify 'y' to install OpenStack Data Processing (sahara). In case
+# of sahara installation packstack also installs heat.['y', 'n']
+CONFIG_SAHARA_INSTALL=n
+
+# Specify 'y' to install OpenStack Orchestration (heat). ['y', 'n']
+CONFIG_HEAT_INSTALL=n
+
+# Specify 'y' to install OpenStack Database (trove) ['y', 'n']
+CONFIG_TROVE_INSTALL=n
+
+# Specify 'y' to install OpenStack Bare Metal Provisioning (ironic).
+# ['y', 'n']
+CONFIG_IRONIC_INSTALL=n
+
+# Specify 'y' to install the OpenStack Client packages (command-line
+# tools). An admin "rc" file will also be installed. ['y', 'n']
+CONFIG_CLIENT_INSTALL=y
+
+# Comma-separated list of NTP servers. Leave plain if Packstack
+# should not install ntpd on instances.
+CONFIG_NTP_SERVERS=clock.redhat.com
+
+# Specify 'y' to install Nagios to monitor OpenStack hosts. Nagios
+# provides additional tools for monitoring the OpenStack environment.
+# ['n']
+CONFIG_NAGIOS_INSTALL=n
+
+# Comma-separated list of servers to be excluded from the
+# installation. This is helpful if you are running Packstack a second
+# time with the same answer file and do not want Packstack to
+# overwrite these server's configurations. Leave empty if you do not
+# need to exclude any servers.
+EXCLUDE_SERVERS=
+
+# Specify 'y' if you want to run OpenStack services in debug mode;
+# otherwise, specify 'n'. ['y', 'n']
+CONFIG_DEBUG_MODE=y
+
+# Server on which to install OpenStack services specific to the
+# controller role (for example, API servers or dashboard).
+CONFIG_CONTROLLER_HOST=VARINET4ADDR
+
+# List the servers on which to install the Compute service.
+CONFIG_COMPUTE_HOSTS=VARINET4ADDR
+
+# List of servers on which to install the network service such as
+# Compute networking (nova network) or OpenStack Networking (neutron).
+CONFIG_NETWORK_HOSTS=VARINET4ADDR
+
+# Specify 'y' if you want to use VMware vCenter as hypervisor and
+# storage; otherwise, specify 'n'. ['y', 'n']
+CONFIG_VMWARE_BACKEND=n
+
+# Specify 'y' if you want to use unsupported parameters. This should
+# be used only if you know what you are doing. Issues caused by using
+# unsupported options will not be fixed before the next major release.
+# ['y', 'n']
+CONFIG_UNSUPPORTED=n
+
+# Specify 'y' if you want to use subnet addresses (in CIDR format)
+# instead of interface names in following options:
+# CONFIG_NOVA_COMPUTE_PRIVIF, CONFIG_NOVA_NETWORK_PRIVIF,
+# CONFIG_NOVA_NETWORK_PUBIF, CONFIG_NEUTRON_OVS_BRIDGE_IFACES,
+# CONFIG_NEUTRON_LB_INTERFACE_MAPPINGS, CONFIG_NEUTRON_OVS_TUNNEL_IF.
+# This is useful for cases when interface names are not same on all
+# installation hosts.
+CONFIG_USE_SUBNETS=n
+
+# IP address of the VMware vCenter server.
+CONFIG_VCENTER_HOST=
+
+# User name for VMware vCenter server authentication.
+CONFIG_VCENTER_USER=
+
+# Password for VMware vCenter server authentication.
+CONFIG_VCENTER_PASSWORD=
+
+# Comma separated list of names of the VMware vCenter clusters. Note:
+# if multiple clusters are specified each one is mapped to one
+# compute, otherwise all computes are mapped to same cluster.
+CONFIG_VCENTER_CLUSTER_NAMES=
+
+# (Unsupported!) Server on which to install OpenStack services
+# specific to storage servers such as Image or Block Storage services.
+CONFIG_STORAGE_HOST=VARINET4ADDR
+
+# (Unsupported!) Server on which to install OpenStack services
+# specific to OpenStack Data Processing (sahara).
+CONFIG_SAHARA_HOST=VARINET4ADDR
+
+# Specify 'y' to enable the EPEL repository (Extra Packages for
+# Enterprise Linux). ['y', 'n']
+CONFIG_USE_EPEL=n
+
+# Comma-separated list of URLs for any additional yum repositories,
+# to use for installation.
+CONFIG_REPO=
+
+# Specify 'y' to enable the RDO testing repository. ['y', 'n']
+CONFIG_ENABLE_RDO_TESTING=n
+
+# To subscribe each server with Red Hat Subscription Manager, include
+# this with CONFIG_RH_PW.
+CONFIG_RH_USER=
+
+# To subscribe each server to receive updates from a Satellite
+# server, provide the URL of the Satellite server. You must also
+# provide a user name (CONFIG_SATELLITE_USERNAME) and password
+# (CONFIG_SATELLITE_PASSWORD) or an access key (CONFIG_SATELLITE_AKEY)
+# for authentication.
+CONFIG_SATELLITE_URL=
+
+# To subscribe each server with Red Hat Subscription Manager, include
+# this with CONFIG_RH_USER.
+CONFIG_RH_PW=
+
+# Specify 'y' to enable RHEL optional repositories. ['y', 'n']
+CONFIG_RH_OPTIONAL=y
+
+# HTTP proxy to use with Red Hat Subscription Manager.
+CONFIG_RH_PROXY=
+
+# Port to use for Red Hat Subscription Manager's HTTP proxy.
+CONFIG_RH_PROXY_PORT=
+
+# User name to use for Red Hat Subscription Manager's HTTP proxy.
+CONFIG_RH_PROXY_USER=
+
+# Password to use for Red Hat Subscription Manager's HTTP proxy.
+CONFIG_RH_PROXY_PW=
+
+# User name to authenticate with the RHN Satellite server; if you
+# intend to use an access key for Satellite authentication, leave this
+# blank.
+CONFIG_SATELLITE_USER=
+
+# Password to authenticate with the RHN Satellite server; if you
+# intend to use an access key for Satellite authentication, leave this
+# blank.
+CONFIG_SATELLITE_PW=
+
+# Access key for the Satellite server; if you intend to use a user
+# name and password for Satellite authentication, leave this blank.
+CONFIG_SATELLITE_AKEY=
+
+# Certificate path or URL of the certificate authority to verify that
+# the connection with the Satellite server is secure. If you are not
+# using Satellite in your deployment, leave this blank.
+CONFIG_SATELLITE_CACERT=
+
+# Profile name that should be used as an identifier for the system in
+# RHN Satellite (if required).
+CONFIG_SATELLITE_PROFILE=
+
+# Comma-separated list of flags passed to the rhnreg_ks command.
+# Valid flags are: novirtinfo, norhnsd, nopackages ['novirtinfo',
+# 'norhnsd', 'nopackages']
+CONFIG_SATELLITE_FLAGS=
+
+# HTTP proxy to use when connecting to the RHN Satellite server (if
+# required).
+CONFIG_SATELLITE_PROXY=
+
+# User name to authenticate with the Satellite-server HTTP proxy.
+CONFIG_SATELLITE_PROXY_USER=
+
+# User password to authenticate with the Satellite-server HTTP proxy.
+CONFIG_SATELLITE_PROXY_PW=
+
+# Specify filepath for CA cert file. If CONFIG_SSL_CACERT_SELFSIGN is
+# set to 'n' it has to be preexisting file.
+CONFIG_SSL_CACERT_FILE=/etc/pki/tls/certs/selfcert.crt
+
+# Specify filepath for CA cert key file. If
+# CONFIG_SSL_CACERT_SELFSIGN is set to 'n' it has to be preexisting
+# file.
+CONFIG_SSL_CACERT_KEY_FILE=/etc/pki/tls/private/selfkey.key
+
+# Enter the path to use to store generated SSL certificates in.
+CONFIG_SSL_CERT_DIR=~/packstackca/
+
+# Specify 'y' if you want Packstack to pregenerate the CA
+# Certificate.
+CONFIG_SSL_CACERT_SELFSIGN=y
+
+# Enter the selfsigned CAcert subject country.
+CONFIG_SELFSIGN_CACERT_SUBJECT_C=--
+
+# Enter the selfsigned CAcert subject state.
+CONFIG_SELFSIGN_CACERT_SUBJECT_ST=State
+
+# Enter the selfsigned CAcert subject location.
+CONFIG_SELFSIGN_CACERT_SUBJECT_L=City
+
+# Enter the selfsigned CAcert subject organization.
+CONFIG_SELFSIGN_CACERT_SUBJECT_O=openstack
+
+# Enter the selfsigned CAcert subject organizational unit.
+CONFIG_SELFSIGN_CACERT_SUBJECT_OU=packstack
+
+# Enter the selfsigned CAcert subject common name.
+CONFIG_SELFSIGN_CACERT_SUBJECT_CN=VARHOSTNAME
+
+CONFIG_SELFSIGN_CACERT_SUBJECT_MAIL=admin@VARHOSTNAME
+
+# Service to be used as the AMQP broker. Allowed values are: qpid,
+# rabbitmq ['qpid', 'rabbitmq']
+CONFIG_AMQP_BACKEND=rabbitmq
+
+# IP address of the server on which to install the AMQP service.
+CONFIG_AMQP_HOST=VARINET4ADDR
+
+# Specify 'y' to enable SSL for the AMQP service. ['y', 'n']
+CONFIG_AMQP_ENABLE_SSL=n
+
+# Specify 'y' to enable authentication for the AMQP service. ['y',
+# 'n']
+CONFIG_AMQP_ENABLE_AUTH=n
+
+# Password for the NSS certificate database of the AMQP service.
+CONFIG_AMQP_NSS_CERTDB_PW=PW_PLACEHOLDER
+
+# User for AMQP authentication.
+CONFIG_AMQP_AUTH_USER=amqp_user
+
+# Password for AMQP authentication.
+CONFIG_AMQP_AUTH_PASSWORD=PW_PLACEHOLDER
+
+# IP address of the server on which to install MariaDB. If a MariaDB
+# installation was not specified in CONFIG_MARIADB_INSTALL, specify
+# the IP address of an existing database server (a MariaDB cluster can
+# also be specified).
+CONFIG_MARIADB_HOST=VARINET4ADDR
+
+# User name for the MariaDB administrative user.
+CONFIG_MARIADB_USER=root
+
+# Password for the MariaDB administrative user.
+CONFIG_MARIADB_PW=qum5net
+
+# Password to use for the Identity service (keystone) to access the
+# database.
+CONFIG_KEYSTONE_DB_PW=qum5net
+
+# Enter y if cron job for removing soft deleted DB rows should be
+# created.
+CONFIG_KEYSTONE_DB_PURGE_ENABLE=True
+
+# Default region name to use when creating tenants in the Identity
+# service.
+CONFIG_KEYSTONE_REGION=RegionOne
+
+# Token to use for the Identity service API.
+CONFIG_KEYSTONE_ADMIN_TOKEN=9390caff845749c3ac74453eb4f384e2
+
+# Email address for the Identity service 'admin' user.  Defaults to
+CONFIG_KEYSTONE_ADMIN_EMAIL=root@localhost
+
+# User name for the Identity service 'admin' user.  Defaults to
+# 'admin'.
+CONFIG_KEYSTONE_ADMIN_USERNAME=admin
+
+# Password to use for the Identity service 'admin' user.
+CONFIG_KEYSTONE_ADMIN_PW=qum5net
+
+# Password to use for the Identity service 'demo' user.
+CONFIG_KEYSTONE_DEMO_PW=qum5net
+
+# Identity service API version string. ['v2.0', 'v3']
+CONFIG_KEYSTONE_API_VERSION=v2.0
+
+# Identity service token format (UUID or PKI). The recommended format
+# for new deployments is UUID. ['UUID', 'PKI']
+CONFIG_KEYSTONE_TOKEN_FORMAT=UUID
+
+# Name of service to use to run the Identity service (keystone or
+# httpd). ['keystone', 'httpd']
+CONFIG_KEYSTONE_SERVICE_NAME=httpd
+
+# Type of Identity service backend (sql or ldap). ['sql', 'ldap']
+CONFIG_KEYSTONE_IDENTITY_BACKEND=sql
+
+# URL for the Identity service LDAP backend.
+CONFIG_KEYSTONE_LDAP_URL=ldap://VARINET4ADDR
+
+# User DN for the Identity service LDAP backend.  Used to bind to the
+# LDAP server if the LDAP server does not allow anonymous
+# authentication.
+CONFIG_KEYSTONE_LDAP_USER_DN=
+
+# User DN password for the Identity service LDAP backend.
+CONFIG_KEYSTONE_LDAP_USER_PASSWORD=
+
+# Base suffix for the Identity service LDAP backend.
+CONFIG_KEYSTONE_LDAP_SUFFIX=
+
+# Query scope for the Identity service LDAP backend. Use 'one' for
+# onelevel/singleLevel or 'sub' for subtree/wholeSubtree ('base' is
+# not actually used by the Identity service and is therefore
+# deprecated). ['base', 'one', 'sub']
+CONFIG_KEYSTONE_LDAP_QUERY_SCOPE=one
+
+# Query page size for the Identity service LDAP backend.
+CONFIG_KEYSTONE_LDAP_PAGE_SIZE=-1
+
+# User subtree for the Identity service LDAP backend.
+CONFIG_KEYSTONE_LDAP_USER_SUBTREE=
+
+# User query filter for the Identity service LDAP backend.
+CONFIG_KEYSTONE_LDAP_USER_FILTER=
+
+# User object class for the Identity service LDAP backend.
+CONFIG_KEYSTONE_LDAP_USER_OBJECTCLASS=
+
+# User ID attribute for the Identity service LDAP backend.
+CONFIG_KEYSTONE_LDAP_USER_ID_ATTRIBUTE=
+
+# User name attribute for the Identity service LDAP backend.
+CONFIG_KEYSTONE_LDAP_USER_NAME_ATTRIBUTE=
+
+# User email address attribute for the Identity service LDAP backend.
+CONFIG_KEYSTONE_LDAP_USER_MAIL_ATTRIBUTE=
+
+# User-enabled attribute for the Identity service LDAP backend.
+CONFIG_KEYSTONE_LDAP_USER_ENABLED_ATTRIBUTE=
+
+# Bit mask integer applied to user-enabled attribute for the Identity
+# service LDAP backend. Indicate the bit that the enabled value is
+# stored in if the LDAP server represents "enabled" as a bit on an
+# integer rather than a boolean. A value of "0" indicates the mask is
+# not used (default). If this is not set to "0", the typical value is
+# "2", typically used when
+# "CONFIG_KEYSTONE_LDAP_USER_ENABLED_ATTRIBUTE = userAccountControl".
+CONFIG_KEYSTONE_LDAP_USER_ENABLED_MASK=-1
+
+# Value of enabled attribute which indicates user is enabled for the
+# Identity service LDAP backend. This should match an appropriate
+# integer value if the LDAP server uses non-boolean (bitmask) values
+# to indicate whether a user is enabled or disabled. If this is not
+# set as 'y', the typical value is "512". This is typically used when
+# "CONFIG_KEYSTONE_LDAP_USER_ENABLED_ATTRIBUTE = userAccountControl".
+CONFIG_KEYSTONE_LDAP_USER_ENABLED_DEFAULT=TRUE
+
+# Specify 'y' if users are disabled (not enabled) in the Identity
+# service LDAP backend (inverts boolean-enalbed values).  Some LDAP
+# servers use a boolean lock attribute where "y" means an account is
+# disabled. Setting this to 'y' allows these lock attributes to be
+# used. This setting will have no effect if
+# "CONFIG_KEYSTONE_LDAP_USER_ENABLED_MASK" is in use. ['n', 'y']
+CONFIG_KEYSTONE_LDAP_USER_ENABLED_INVERT=n
+
+# Comma-separated list of attributes stripped from LDAP user entry
+# upon update.
+CONFIG_KEYSTONE_LDAP_USER_ATTRIBUTE_IGNORE=
+
+# Identity service LDAP attribute mapped to default_project_id for
+# users.
+CONFIG_KEYSTONE_LDAP_USER_DEFAULT_PROJECT_ID_ATTRIBUTE=
+
+# Specify 'y' if you want to be able to create Identity service users
+# through the Identity service interface; specify 'n' if you will
+# create directly in the LDAP backend. ['n', 'y']
+CONFIG_KEYSTONE_LDAP_USER_ALLOW_CREATE=n
+
+# Specify 'y' if you want to be able to update Identity service users
+# through the Identity service interface; specify 'n' if you will
+# update directly in the LDAP backend. ['n', 'y']
+CONFIG_KEYSTONE_LDAP_USER_ALLOW_UPDATE=n
+
+# Specify 'y' if you want to be able to delete Identity service users
+# through the Identity service interface; specify 'n' if you will
+# delete directly in the LDAP backend. ['n', 'y']
+CONFIG_KEYSTONE_LDAP_USER_ALLOW_DELETE=n
+
+# Identity service LDAP attribute mapped to password.
+CONFIG_KEYSTONE_LDAP_USER_PASS_ATTRIBUTE=
+
+# DN of the group entry to hold enabled LDAP users when using enabled
+# emulation.
+CONFIG_KEYSTONE_LDAP_USER_ENABLED_EMULATION_DN=
+
+# List of additional LDAP attributes for mapping additional attribute
+# mappings for users. The attribute-mapping format is
+# <ldap_attr>:<user_attr>, where ldap_attr is the attribute in the
+# LDAP entry and user_attr is the Identity API attribute.
+CONFIG_KEYSTONE_LDAP_USER_ADDITIONAL_ATTRIBUTE_MAPPING=
+
+# Group subtree for the Identity service LDAP backend.
+CONFIG_KEYSTONE_LDAP_GROUP_SUBTREE=
+
+# Group query filter for the Identity service LDAP backend.
+CONFIG_KEYSTONE_LDAP_GROUP_FILTER=
+
+# Group object class for the Identity service LDAP backend.
+CONFIG_KEYSTONE_LDAP_GROUP_OBJECTCLASS=
+
+# Group ID attribute for the Identity service LDAP backend.
+CONFIG_KEYSTONE_LDAP_GROUP_ID_ATTRIBUTE=
+
+# Group name attribute for the Identity service LDAP backend.
+CONFIG_KEYSTONE_LDAP_GROUP_NAME_ATTRIBUTE=
+
+# Group member attribute for the Identity service LDAP backend.
+CONFIG_KEYSTONE_LDAP_GROUP_MEMBER_ATTRIBUTE=
+
+# Group description attribute for the Identity service LDAP backend.
+CONFIG_KEYSTONE_LDAP_GROUP_DESC_ATTRIBUTE=
+
+# Comma-separated list of attributes stripped from LDAP group entry
+# upon update.
+CONFIG_KEYSTONE_LDAP_GROUP_ATTRIBUTE_IGNORE=
+
+# Specify 'y' if you want to be able to create Identity service
+# groups through the Identity service interface; specify 'n' if you
+# will create directly in the LDAP backend. ['n', 'y']
+CONFIG_KEYSTONE_LDAP_GROUP_ALLOW_CREATE=n
+
+# Specify 'y' if you want to be able to update Identity service
+# groups through the Identity service interface; specify 'n' if you
+# will update directly in the LDAP backend. ['n', 'y']
+CONFIG_KEYSTONE_LDAP_GROUP_ALLOW_UPDATE=n
+
+# Specify 'y' if you want to be able to delete Identity service
+# groups through the Identity service interface; specify 'n' if you
+# will delete directly in the LDAP backend. ['n', 'y']
+CONFIG_KEYSTONE_LDAP_GROUP_ALLOW_DELETE=n
+
+# List of additional LDAP attributes used for mapping additional
+# attribute mappings for groups. The attribute=mapping format is
+# <ldap_attr>:<group_attr>, where ldap_attr is the attribute in the
+# LDAP entry and group_attr is the Identity API attribute.
+CONFIG_KEYSTONE_LDAP_GROUP_ADDITIONAL_ATTRIBUTE_MAPPING=
+
+# Specify 'y' if the Identity service LDAP backend should use TLS.
+# ['n', 'y']
+CONFIG_KEYSTONE_LDAP_USE_TLS=n
+
+# CA certificate directory for Identity service LDAP backend (if TLS
+# is used).
+CONFIG_KEYSTONE_LDAP_TLS_CACERTDIR=
+
+# CA certificate file for Identity service LDAP backend (if TLS is
+# used).
+CONFIG_KEYSTONE_LDAP_TLS_CACERTFILE=
+
+# Certificate-checking strictness level for Identity service LDAP
+# backend; valid options are: never, allow, demand. ['never', 'allow',
+# 'demand']
+CONFIG_KEYSTONE_LDAP_TLS_REQ_CERT=demand
+
+# Password to use for the Image service (glance) to access the
+# database.
+CONFIG_GLANCE_DB_PW=qum5net
+
+# Password to use for the Image service to authenticate with the
+# Identity service.
+CONFIG_GLANCE_KS_PW=qum5net
+
+# Storage backend for the Image service (controls how the Image
+# service stores disk images). Valid options are: file or swift
+# (Object Storage). The Object Storage service must be enabled to use
+# it as a working backend; otherwise, Packstack falls back to 'file'.
+# ['file', 'swift']
+CONFIG_GLANCE_BACKEND=file
+
+# Password to use for the Block Storage service (cinder) to access
+# the database.
+CONFIG_CINDER_DB_PW=qum5net
+
+# Enter y if cron job for removing soft deleted DB rows should be
+# created.
+CONFIG_CINDER_DB_PURGE_ENABLE=True
+
+# Password to use for the Block Storage service to authenticate with
+# the Identity service.
+CONFIG_CINDER_KS_PW=qum5net
+
+# Storage backend to use for the Block Storage service; valid options
+# are: lvm, gluster, nfs, vmdk, netapp. ['lvm', 'gluster', 'nfs',
+# 'vmdk', 'netapp']
+CONFIG_CINDER_BACKEND=lvm
+
+# Specify 'y' to create the Block Storage volumes group. That is,
+# Packstack creates a raw disk image in /var/lib/cinder, and mounts it
+# using a loopback device. This should only be used for testing on a
+# proof-of-concept installation of the Block Storage service (a file-
+# backed volume group is not suitable for production usage). ['y',
+# 'n']
+CONFIG_CINDER_VOLUMES_CREATE=y
+
+# Size of Block Storage volumes group. Actual volume size will be
+# extended with 3% more space for VG metadata. Remember that the size
+# of the volume group will restrict the amount of disk space that you
+# can expose to Compute instances, and that the specified amount must
+# be available on the device used for /var/lib/cinder.
+CONFIG_CINDER_VOLUMES_SIZE=20G
+
+# A single or comma-separated list of Red Hat Storage (gluster)
+# volume shares to mount. Example: 'ip-address:/vol-name', 'domain
+# :/vol-name'
+CONFIG_CINDER_GLUSTER_MOUNTS=
+
+# A single or comma-separated list of NFS exports to mount. Example:
+# 'ip-address:/export-name'
+CONFIG_CINDER_NFS_MOUNTS=
+
+# Administrative user account name used to access the NetApp storage
+# system or proxy server.
+CONFIG_CINDER_NETAPP_LOGIN=
+
+# Password for the NetApp administrative user account specified in
+# the CONFIG_CINDER_NETAPP_LOGIN parameter.
+CONFIG_CINDER_NETAPP_PASSWORD=
+
+# Hostname (or IP address) for the NetApp storage system or proxy
+# server.
+CONFIG_CINDER_NETAPP_HOSTNAME=
+
+# The TCP port to use for communication with the storage system or
+# proxy. If not specified, Data ONTAP drivers will use 80 for HTTP and
+# 443 for HTTPS; E-Series will use 8080 for HTTP and 8443 for HTTPS.
+# Defaults to 80.
+CONFIG_CINDER_NETAPP_SERVER_PORT=80
+
+# Storage family type used on the NetApp storage system; valid
+# options are ontap_7mode for using Data ONTAP operating in 7-Mode,
+# ontap_cluster for using clustered Data ONTAP, or E-Series for NetApp
+# E-Series. Defaults to ontap_cluster. ['ontap_7mode',
+# 'ontap_cluster', 'eseries']
+CONFIG_CINDER_NETAPP_STORAGE_FAMILY=ontap_cluster
+
+# The transport protocol used when communicating with the NetApp
+# storage system or proxy server. Valid values are http or https.
+# Defaults to 'http'. ['http', 'https']
+CONFIG_CINDER_NETAPP_TRANSPORT_TYPE=http
+
+# Storage protocol to be used on the data path with the NetApp
+# storage system; valid options are iscsi, fc, nfs. Defaults to nfs.
+# ['iscsi', 'fc', 'nfs']
+CONFIG_CINDER_NETAPP_STORAGE_PROTOCOL=nfs
+
+# Quantity to be multiplied by the requested volume size to ensure
+# enough space is available on the virtual storage server (Vserver) to
+# fulfill the volume creation request.  Defaults to 1.0.
+CONFIG_CINDER_NETAPP_SIZE_MULTIPLIER=1.0
+
+# Time period (in minutes) that is allowed to elapse after the image
+# is last accessed, before it is deleted from the NFS image cache.
+# When a cache-cleaning cycle begins, images in the cache that have
+# not been accessed in the last M minutes, where M is the value of
+# this parameter, are deleted from the cache to create free space on
+# the NFS share. Defaults to 720.
+CONFIG_CINDER_NETAPP_EXPIRY_THRES_MINUTES=720
+
+# If the percentage of available space for an NFS share has dropped
+# below the value specified by this parameter, the NFS image cache is
+# cleaned.  Defaults to 20.
+CONFIG_CINDER_NETAPP_THRES_AVL_SIZE_PERC_START=20
+
+# When the percentage of available space on an NFS share has reached
+# the percentage specified by this parameter, the driver stops
+# clearing files from the NFS image cache that have not been accessed
+# in the last M minutes, where M is the value of the
+# CONFIG_CINDER_NETAPP_EXPIRY_THRES_MINUTES parameter. Defaults to 60.
+CONFIG_CINDER_NETAPP_THRES_AVL_SIZE_PERC_STOP=60
+
+# Single or comma-separated list of NetApp NFS shares for Block
+# Storage to use.  Format: ip-address:/export-name. Defaults to ''.
+CONFIG_CINDER_NETAPP_NFS_SHARES=
+
+# File with the list of available NFS shares.   Defaults to
+# '/etc/cinder/shares.conf'.
+CONFIG_CINDER_NETAPP_NFS_SHARES_CONFIG=/etc/cinder/shares.conf
+
+# This parameter is only utilized when the storage protocol is
+# configured to use iSCSI or FC. This parameter is used to restrict
+# provisioning to the specified controller volumes. Specify the value
+# of this parameter to be a comma separated list of NetApp controller
+# volume names to be used for provisioning. Defaults to ''.
+CONFIG_CINDER_NETAPP_VOLUME_LIST=
+
+# The vFiler unit on which provisioning of block storage volumes will
+# be done. This parameter is only used by the driver when connecting
+# to an instance with a storage family of Data ONTAP operating in
+# 7-Mode Only use this parameter when utilizing the MultiStore feature
+# on the NetApp storage system. Defaults to ''.
+CONFIG_CINDER_NETAPP_VFILER=
+
+# The name of the config.conf stanza for a Data ONTAP (7-mode) HA
+# partner.  This option is only used by the driver when connecting to
+# an instance with a storage family of Data ONTAP operating in 7-Mode,
+# and it is required if the storage protocol selected is FC. Defaults
+# to ''.
+CONFIG_CINDER_NETAPP_PARTNER_BACKEND_NAME=
+
+# This option specifies the virtual storage server (Vserver) name on
+# the storage cluster on which provisioning of block storage volumes
+# should occur. Defaults to ''.
+CONFIG_CINDER_NETAPP_VSERVER=
+
+# Restricts provisioning to the specified controllers. Value must be
+# a comma-separated list of controller hostnames or IP addresses to be
+# used for provisioning. This option is only utilized when the storage
+# family is configured to use E-Series. Defaults to ''.
+CONFIG_CINDER_NETAPP_CONTROLLER_IPS=
+
+# Password for the NetApp E-Series storage array. Defaults to ''.
+CONFIG_CINDER_NETAPP_SA_PASSWORD=
+
+# This option is used to define how the controllers in the E-Series
+# storage array will work with the particular operating system on the
+# hosts that are connected to it. Defaults to 'linux_dm_mp'
+CONFIG_CINDER_NETAPP_ESERIES_HOST_TYPE=linux_dm_mp
+
+# Path to the NetApp E-Series proxy application on a proxy server.
+# The value is combined with the value of the
+# CONFIG_CINDER_NETAPP_TRANSPORT_TYPE, CONFIG_CINDER_NETAPP_HOSTNAME,
+# and CONFIG_CINDER_NETAPP_HOSTNAME options to create the URL used by
+# the driver to connect to the proxy application. Defaults to
+# '/devmgr/v2'.
+CONFIG_CINDER_NETAPP_WEBSERVICE_PATH=/devmgr/v2
+
+# Restricts provisioning to the specified storage pools. Only dynamic
+# disk pools are currently supported. The value must be a comma-
+# separated list of disk pool names to be used for provisioning.
+# Defaults to ''.
+CONFIG_CINDER_NETAPP_STORAGE_POOLS=
+
+# Password to use for OpenStack Bare Metal Provisioning (ironic) to
+# access the database.
+CONFIG_IRONIC_DB_PW=PW_PLACEHOLDER
+
+# Password to use for OpenStack Bare Metal Provisioning to
+# authenticate with the Identity service.
+CONFIG_IRONIC_KS_PW=PW_PLACEHOLDER
+
+# Enter y if cron job for removing soft deleted DB rows should be
+# created.
+CONFIG_NOVA_DB_PURGE_ENABLE=True
+
+# Password to use for the Compute service (nova) to access the
+# database.
+CONFIG_NOVA_DB_PW=qum5net
+
+# Password to use for the Compute service to authenticate with the
+# Identity service.
+CONFIG_NOVA_KS_PW=qum5net
+
+# Overcommitment ratio for virtual to physical CPUs. Specify 1.0 to
+# disable CPU overcommitment.
+CONFIG_NOVA_SCHED_CPU_ALLOC_RATIO=16.0
+
+# Overcommitment ratio for virtual to physical RAM. Specify 1.0 to
+# disable RAM overcommitment.
+CONFIG_NOVA_SCHED_RAM_ALLOC_RATIO=1.5
+
+# Protocol used for instance migration. Valid options are: tcp and
+# ssh. Note that by default, the Compute user is created with the
+# /sbin/nologin shell so that the SSH protocol will not work. To make
+# the SSH protocol work, you must configure the Compute user on
+# compute hosts manually. ['tcp', 'ssh']
+CONFIG_NOVA_COMPUTE_MIGRATE_PROTOCOL=tcp
+
+# Manager that runs the Compute service.
+CONFIG_NOVA_COMPUTE_MANAGER=nova.compute.manager.ComputeManager
+
+# PEM encoded certificate to be used for ssl on the https server,
+# leave blank if one should be generated, this certificate should not
+# require a passphrase. If CONFIG_HORIZON_SSL is set to 'n' this
+# parameter is ignored.
+CONFIG_VNC_SSL_CERT=
+
+# SSL keyfile corresponding to the certificate if one was entered. If
+# CONFIG_HORIZON_SSL is set to 'n' this parameter is ignored.
+CONFIG_VNC_SSL_KEY=
+
+# Enter the PCI passthrough array of hash in JSON style for
+# controller eg. [{"vendor_id":"1234", "product_id":"5678",
+# "name":"default"}, {...}]
+CONFIG_NOVA_PCI_ALIAS=
+
+# Enter the PCI passthrough whitelist array of hash in JSON style for
+# controller eg. [{"vendor_id":"1234", "product_id":"5678",
+# "name':"default"}, {...}]
+CONFIG_NOVA_PCI_PASSTHROUGH_WHITELIST=
+
+# Private interface for flat DHCP on the Compute servers.
+CONFIG_NOVA_COMPUTE_PRIVIF=
+
+# Compute Network Manager. ['^nova\.network\.manager\.\w+Manager$']
+CONFIG_NOVA_NETWORK_MANAGER=nova.network.manager.FlatDHCPManager
+
+# Public interface on the Compute network server.
+CONFIG_NOVA_NETWORK_PUBIF=eth0
+
+# Private interface for flat DHCP on the Compute network server.
+CONFIG_NOVA_NETWORK_PRIVIF=
+
+# IP Range for flat DHCP. ['^[\:\.\da-fA-f]+(\/\d+){0,1}$']
+CONFIG_NOVA_NETWORK_FIXEDRANGE=192.168.32.0/22
+
+# IP Range for floating IP addresses. ['^[\:\.\da-
+# fA-f]+(\/\d+){0,1}$']
+CONFIG_NOVA_NETWORK_FLOATRANGE=10.3.4.0/22
+
+# Specify 'y' to automatically assign a floating IP to new instances.
+# ['y', 'n']
+CONFIG_NOVA_NETWORK_AUTOASSIGNFLOATINGIP=n
+
+# First VLAN for private networks (Compute networking).
+CONFIG_NOVA_NETWORK_VLAN_START=100
+
+# Number of networks to support (Compute networking).
+CONFIG_NOVA_NETWORK_NUMBER=1
+
+# Number of addresses in each private subnet (Compute networking).
+CONFIG_NOVA_NETWORK_SIZE=255
+
+# Password to use for OpenStack Networking (neutron) to authenticate
+# with the Identity service.
+CONFIG_NEUTRON_KS_PW=qum5net
+
+# The password to use for OpenStack Networking to access the
+# database.
+CONFIG_NEUTRON_DB_PW=qum5net
+
+# The name of the Open vSwitch bridge (or empty for linuxbridge) for
+# the OpenStack Networking L3 agent to use for external  traffic.
+# Specify 'provider' if you intend to use a provider network to handle
+# external traffic.
+CONFIG_NEUTRON_L3_EXT_BRIDGE=br-ex
+
+# Password for the OpenStack Networking metadata agent.
+CONFIG_NEUTRON_METADATA_PW=qum5net
+
+# Specify 'y' to install OpenStack Networking's Load-Balancing-
+# as-a-Service (LBaaS). ['y', 'n']
+CONFIG_LBAAS_INSTALL=n
+
+# Specify 'y' to install OpenStack Networking's L3 Metering agent
+# ['y', 'n']
+CONFIG_NEUTRON_METERING_AGENT_INSTALL=n
+
+# Specify 'y' to configure OpenStack Networking's Firewall-
+# as-a-Service (FWaaS). ['y', 'n']
+CONFIG_NEUTRON_FWAAS=n
+
+# Specify 'y' to configure OpenStack Networking's VPN-as-a-Service
+# (VPNaaS). ['y', 'n']
+CONFIG_NEUTRON_VPNAAS=n
+
+# Comma-separated list of network-type driver entry points to be
+# loaded from the neutron.ml2.type_drivers namespace. ['local',
+# 'flat', 'vlan', 'gre', 'vxlan']
+CONFIG_NEUTRON_ML2_TYPE_DRIVERS=vxlan
+
+# Comma-separated, ordered list of network types to allocate as
+# tenant networks. The 'local' value is only useful for single-box
+# testing and provides no connectivity between hosts. ['local',
+# 'vlan', 'gre', 'vxlan']
+CONFIG_NEUTRON_ML2_TENANT_NETWORK_TYPES=vxlan
+
+# Comma-separated ordered list of networking mechanism driver entry
+# points to be loaded from the neutron.ml2.mechanism_drivers
+# namespace. ['logger', 'test', 'linuxbridge', 'openvswitch',
+# 'hyperv', 'ncs', 'arista', 'cisco_nexus', 'mlnx', 'l2population',
+# 'sriovnicswitch']
+CONFIG_NEUTRON_ML2_MECHANISM_DRIVERS=openvswitch
+
+# Comma-separated list of physical_network names with which flat
+# networks can be created. Use * to allow flat networks with arbitrary
+# physical_network names.
+CONFIG_NEUTRON_ML2_FLAT_NETWORKS=*
+
+# Comma-separated list of <physical_network>:<vlan_min>:<vlan_max> or
+# <physical_network> specifying physical_network names usable for VLAN
+# provider and tenant networks, as well as ranges of VLAN tags on each
+# available for allocation to tenant networks.
+CONFIG_NEUTRON_ML2_VLAN_RANGES=
+
+# Comma-separated list of <tun_min>:<tun_max> tuples enumerating
+# ranges of GRE tunnel IDs that are available for tenant-network
+# allocation. A tuple must be an array with tun_max +1 - tun_min >
+# 1000000.
+CONFIG_NEUTRON_ML2_TUNNEL_ID_RANGES=
+
+# Comma-separated list of addresses for VXLAN multicast group. If
+# left empty, disables VXLAN from sending allocate broadcast traffic
+# (disables multicast VXLAN mode). Should be a Multicast IP (v4 or v6)
+# address.
+CONFIG_NEUTRON_ML2_VXLAN_GROUP=
+
+# Comma-separated list of <vni_min>:<vni_max> tuples enumerating
+# ranges of VXLAN VNI IDs that are available for tenant network
+# allocation. Minimum value is 0 and maximum value is 16777215.
+CONFIG_NEUTRON_ML2_VNI_RANGES=10:100
+
+# Name of the L2 agent to be used with OpenStack Networking.
+# ['linuxbridge', 'openvswitch']
+CONFIG_NEUTRON_L2_AGENT=openvswitch
+
+# Comma separated list of supported PCI vendor devices defined by
+# vendor_id:product_id according to the PCI ID Repository.
+CONFIG_NEUTRON_ML2_SUPPORTED_PCI_VENDOR_DEVS=['15b3:1004', '8086:10ca']
+
+# Specify 'y' if the sriov agent is required
+CONFIG_NEUTRON_ML2_SRIOV_AGENT_REQUIRED=n
+
+# Comma-separated list of interface mappings for the OpenStack
+# Networking ML2 SRIOV agent. Each tuple in the list must be in the
+# format <physical_network>:<net_interface>. Example:
+# physnet1:eth1,physnet2:eth2,physnet3:eth3.
+CONFIG_NEUTRON_ML2_SRIOV_INTERFACE_MAPPINGS=
+
+# Comma-separated list of interface mappings for the OpenStack
+# Networking linuxbridge plugin. Each tuple in the list must be in the
+# format <physical_network>:<net_interface>. Example:
+# physnet1:eth1,physnet2:eth2,physnet3:eth3.
+CONFIG_NEUTRON_LB_INTERFACE_MAPPINGS=
+
+# Comma-separated list of bridge mappings for the OpenStack
+# Networking Open vSwitch plugin. Each tuple in the list must be in
+# the format <physical_network>:<ovs_bridge>. Example: physnet1:br-
+# eth1,physnet2:br-eth2,physnet3:br-eth3
+CONFIG_NEUTRON_OVS_BRIDGE_MAPPINGS=
+
+# Comma-separated list of colon-separated Open vSwitch
+# <bridge>:<interface> pairs. The interface will be added to the
+# associated bridge. If you desire the bridge to be persistent a value
+# must be added to this directive, also
+# CONFIG_NEUTRON_OVS_BRIDGE_MAPPINGS must be set in order to create
+# the proper port. This can be achieved from the command line by
+# issuing the following command: packstack --allinone --os-neutron-
+# ovs-bridge-mappings=ext-net:br-ex --os-neutron-ovs-bridge-interfaces
+# =br-ex:eth0
+CONFIG_NEUTRON_OVS_BRIDGE_IFACES=
+
+# Interface for the Open vSwitch tunnel. Packstack overrides the IP
+# address used for tunnels on this hypervisor to the IP found on the
+# specified interface (for example, eth1).
+CONFIG_NEUTRON_OVS_TUNNEL_IF=
+
+# VXLAN UDP port.
+CONFIG_NEUTRON_OVS_VXLAN_UDP_PORT=4789
+
+# Specify 'y' to set up Horizon communication over https. ['y', 'n']
+CONFIG_HORIZON_SSL=n
+
+# Secret key to use for Horizon Secret Encryption Key.
+CONFIG_HORIZON_SECRET_KEY=e2ba54f295f84d0c8d645de8e36fcc33
+
+# PEM-encoded certificate to be used for SSL connections on the https
+# server. To generate a certificate, leave blank.
+CONFIG_HORIZON_SSL_CERT=
+
+# SSL keyfile corresponding to the certificate if one was specified.
+# The certificate should not require a passphrase.
+CONFIG_HORIZON_SSL_KEY=
+
+CONFIG_HORIZON_SSL_CACERT=
+
+# Password to use for the Object Storage service to authenticate with
+# the Identity service.
+CONFIG_SWIFT_KS_PW=qum5net
+
+# Comma-separated list of devices to use as storage device for Object
+# Storage. Each entry must take the format /path/to/dev (for example,
+# specifying /dev/vdb installs /dev/vdb as the Object Storage storage
+# device; Packstack does not create the filesystem, you must do this
+# first). If left empty, Packstack creates a loopback device for test
+# setup.
+CONFIG_SWIFT_STORAGES=
+
+# Number of Object Storage storage zones; this number MUST be no
+# larger than the number of configured storage devices.
+CONFIG_SWIFT_STORAGE_ZONES=1
+
+# Number of Object Storage storage replicas; this number MUST be no
+# larger than the number of configured storage zones.
+CONFIG_SWIFT_STORAGE_REPLICAS=1
+
+# File system type for storage nodes. ['xfs', 'ext4']
+CONFIG_SWIFT_STORAGE_FSTYPE=ext4
+
+# Custom seed number to use for swift_hash_path_suffix in
+# /etc/swift/swift.conf. If you do not provide a value, a seed number
+# is automatically generated.
+CONFIG_SWIFT_HASH=54760d6b88814b53
+
+# Size of the Object Storage loopback file storage device.
+CONFIG_SWIFT_STORAGE_SIZE=2G
+
+# Password used by Orchestration service user to authenticate against
+# the database.
+CONFIG_HEAT_DB_PW=PW_PLACEHOLDER
+
+# Encryption key to use for authentication in the Orchestration
+# database (16, 24, or 32 chars).
+CONFIG_HEAT_AUTH_ENC_KEY=2e06ca7c4aa3400c
+
+# Password to use for the Orchestration service to authenticate with
+# the Identity service.
+CONFIG_HEAT_KS_PW=PW_PLACEHOLDER
+
+# Specify 'y' to install the Orchestration CloudWatch API. ['y', 'n']
+CONFIG_HEAT_CLOUDWATCH_INSTALL=n
+
+# Specify 'y' to install the Orchestration CloudFormation API. ['y',
+# 'n']
+CONFIG_HEAT_CFN_INSTALL=n
+
+# Name of the Identity domain for Orchestration.
+CONFIG_HEAT_DOMAIN=heat
+
+# Name of the Identity domain administrative user for Orchestration.
+CONFIG_HEAT_DOMAIN_ADMIN=heat_admin
+
+# Password for the Identity domain administrative user for
+# Orchestration.
+CONFIG_HEAT_DOMAIN_PASSWORD=PW_PLACEHOLDER
+
+# Specify 'y' to provision for demo usage and testing. ['y', 'n']
+CONFIG_PROVISION_DEMO=y
+
+# Specify 'y' to configure the OpenStack Integration Test Suite
+# (tempest) for testing. The test suite requires OpenStack Networking
+# to be installed. ['y', 'n']
+CONFIG_PROVISION_TEMPEST=n
+
+# CIDR network address for the floating IP subnet.
+CONFIG_PROVISION_DEMO_FLOATRANGE=172.24.4.224/28
+
+# The name to be assigned to the demo image in Glance (default
+# "cirros").
+CONFIG_PROVISION_IMAGE_NAME=cirros
+
+# A URL or local file location for an image to download and provision
+# in Glance (defaults to a URL for a recent "cirros" image).
+CONFIG_PROVISION_IMAGE_URL=http://download.cirros-cloud.net/0.3.3/cirros-0.3.3-x86_64-disk.img
+
+# Format for the demo image (default "qcow2").
+CONFIG_PROVISION_IMAGE_FORMAT=qcow2
+
+# User to use when connecting to instances booted from the demo
+# image.
+CONFIG_PROVISION_IMAGE_SSH_USER=cirros
+
+# Name of the Integration Test Suite provisioning user. If you do not
+# provide a user name, Tempest is configured in a standalone mode.
+CONFIG_PROVISION_TEMPEST_USER=
+
+# Password to use for the Integration Test Suite provisioning user.
+CONFIG_PROVISION_TEMPEST_USER_PW=PW_PLACEHOLDER
+
+# CIDR network address for the floating IP subnet.
+CONFIG_PROVISION_TEMPEST_FLOATRANGE=172.24.4.224/28
+
+# URI of the Integration Test Suite git repository.
+CONFIG_PROVISION_TEMPEST_REPO_URI=https://github.com/openstack/tempest.git
+
+# Revision (branch) of the Integration Test Suite git repository.
+CONFIG_PROVISION_TEMPEST_REPO_REVISION=master
+
+# Specify 'y' to configure the Open vSwitch external bridge for an
+# all-in-one deployment (the L3 external bridge acts as the gateway
+# for virtual machines). ['y', 'n']
+CONFIG_PROVISION_OVS_BRIDGE=y
+
+# Password to use for OpenStack Data Processing (sahara) to access
+# the database.
+CONFIG_SAHARA_DB_PW=PW_PLACEHOLDER
+
+# Password to use for OpenStack Data Processing to authenticate with
+# the Identity service.
+CONFIG_SAHARA_KS_PW=PW_PLACEHOLDER
+
+# Secret key for signing Telemetry service (ceilometer) messages.
+CONFIG_CEILOMETER_SECRET=d1cd21accf764049
+
+# Password to use for Telemetry to authenticate with the Identity
+# service.
+CONFIG_CEILOMETER_KS_PW=qum5net
+
+# Backend driver for Telemetry's group membership coordination.
+# ['redis', 'none']
+CONFIG_CEILOMETER_COORDINATION_BACKEND=redis
+
+# IP address of the server on which to install MongoDB.
+CONFIG_MONGODB_HOST=VARINET4ADDR
+
+# IP address of the server on which to install the Redis master
+# server.
+CONFIG_REDIS_MASTER_HOST=VARINET4ADDR
+
+# Port on which the Redis server(s) listens.
+CONFIG_REDIS_PORT=6379
+
+# Specify 'y' to have Redis try to use HA. ['y', 'n']
+CONFIG_REDIS_HA=n
+
+# Hosts on which to install Redis slaves.
+CONFIG_REDIS_SLAVE_HOSTS=
+
+# Hosts on which to install Redis sentinel servers.
+CONFIG_REDIS_SENTINEL_HOSTS=
+
+# Host to configure as the Redis coordination sentinel.
+CONFIG_REDIS_SENTINEL_CONTACT_HOST=
+
+# Port on which Redis sentinel servers listen.
+CONFIG_REDIS_SENTINEL_PORT=26379
+
+# Quorum value for Redis sentinel servers.
+CONFIG_REDIS_SENTINEL_QUORUM=2
+
+# Name of the master server watched by the Redis sentinel. ['[a-z]+']
+CONFIG_REDIS_MASTER_NAME=mymaster
+
+# Password to use for OpenStack Database-as-a-Service (trove) to
+# access the database.
+CONFIG_TROVE_DB_PW=PW_PLACEHOLDER
+
+# Password to use for OpenStack Database-as-a-Service to authenticate
+# with the Identity service.
+CONFIG_TROVE_KS_PW=PW_PLACEHOLDER
+
+# User name to use when OpenStack Database-as-a-Service connects to
+# the Compute service.
+CONFIG_TROVE_NOVA_USER=trove
+
+# Tenant to use when OpenStack Database-as-a-Service connects to the
+# Compute service.
+CONFIG_TROVE_NOVA_TENANT=services
+
+# Password to use when OpenStack Database-as-a-Service connects to
+# the Compute service.
+CONFIG_TROVE_NOVA_PW=PW_PLACEHOLDER
+
+# Password of the nagiosadmin user on the Nagios server.
+CONFIG_NAGIOS_PW=PW_PLACEHOLDER
diff --git a/qa/qa_scripts/openstack/files/nova.template.conf b/qa/qa_scripts/openstack/files/nova.template.conf
new file mode 100644
index 000000000..c63c8648f
--- /dev/null
+++ b/qa/qa_scripts/openstack/files/nova.template.conf
@@ -0,0 +1,3698 @@
+[DEFAULT]
+
+#
+# From nova
+#
+
+# Number of times to retry live-migration before failing. If == -1, try until
+# out of hosts. If == 0, only try once, no retries. (integer value)
+#migrate_max_retries=-1
+
+# The topic console auth proxy nodes listen on (string value)
+#consoleauth_topic=consoleauth
+
+# The driver to use for database access (string value)
+#db_driver=nova.db
+
+# Backend to use for IPv6 generation (string value)
+#ipv6_backend=rfc2462
+
+# The driver for servicegroup service (valid options are: db, zk, mc) (string
+# value)
+#servicegroup_driver=db
+
+# The availability_zone to show internal services under (string value)
+#internal_service_availability_zone=internal
+internal_service_availability_zone=internal
+
+# Default compute node availability_zone (string value)
+#default_availability_zone=nova
+default_availability_zone=nova
+
+# The topic cert nodes listen on (string value)
+#cert_topic=cert
+
+# Image ID used when starting up a cloudpipe vpn server (string value)
+#vpn_image_id=0
+
+# Flavor for vpn instances (string value)
+#vpn_flavor=m1.tiny
+
+# Template for cloudpipe instance boot script (string value)
+#boot_script_template=$pybasedir/nova/cloudpipe/bootscript.template
+
+# Network to push into openvpn config (string value)
+#dmz_net=10.0.0.0
+
+# Netmask to push into openvpn config (string value)
+#dmz_mask=255.255.255.0
+
+# Suffix to add to project name for vpn key and secgroups (string value)
+#vpn_key_suffix=-vpn
+
+# Record sessions to FILE.[session_number] (boolean value)
+#record=false
+
+# Become a daemon (background process) (boolean value)
+#daemon=false
+
+# Disallow non-encrypted connections (boolean value)
+#ssl_only=false
+
+# Source is ipv6 (boolean value)
+#source_is_ipv6=false
+
+# SSL certificate file (string value)
+#cert=self.pem
+
+# SSL key file (if separate from cert) (string value)
+#key=<None>
+
+# Run webserver on same port. Serve files from DIR. (string value)
+#web=/usr/share/spice-html5
+
+# Host on which to listen for incoming requests (string value)
+#novncproxy_host=0.0.0.0
+novncproxy_host=0.0.0.0
+
+# Port on which to listen for incoming requests (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#novncproxy_port=6080
+novncproxy_port=6080
+
+# Host on which to listen for incoming requests (string value)
+#serialproxy_host=0.0.0.0
+
+# Port on which to listen for incoming requests (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#serialproxy_port=6083
+
+# Host on which to listen for incoming requests (string value)
+#html5proxy_host=0.0.0.0
+
+# Port on which to listen for incoming requests (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#html5proxy_port=6082
+
+# Driver to use for the console proxy (string value)
+#console_driver=nova.console.xvp.XVPConsoleProxy
+
+# Stub calls to compute worker for tests (boolean value)
+#stub_compute=false
+
+# Publicly visible name for this console host (string value)
+#console_public_hostname=x86-017.build.eng.bos.redhat.com
+
+# The topic console proxy nodes listen on (string value)
+#console_topic=console
+
+# XVP conf template (string value)
+#console_xvp_conf_template=$pybasedir/nova/console/xvp.conf.template
+
+# Generated XVP conf file (string value)
+#console_xvp_conf=/etc/xvp.conf
+
+# XVP master process pid file (string value)
+#console_xvp_pid=/var/run/xvp.pid
+
+# XVP log file (string value)
+#console_xvp_log=/var/log/xvp.log
+
+# Port for XVP to multiplex VNC connections on (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#console_xvp_multiplex_port=5900
+
+# How many seconds before deleting tokens (integer value)
+#console_token_ttl=600
+
+# Filename of root CA (string value)
+#ca_file=cacert.pem
+
+# Filename of private key (string value)
+#key_file=private/cakey.pem
+
+# Filename of root Certificate Revocation List (string value)
+#crl_file=crl.pem
+
+# Where we keep our keys (string value)
+#keys_path=$state_path/keys
+
+# Where we keep our root CA (string value)
+#ca_path=$state_path/CA
+
+# Should we use a CA for each project? (boolean value)
+#use_project_ca=false
+
+# Subject for certificate for users, %s for project, user, timestamp (string
+# value)
+#user_cert_subject=/C=US/ST=California/O=OpenStack/OU=NovaDev/CN=%.16s-%.16s-%s
+
+# Subject for certificate for projects, %s for project, timestamp (string
+# value)
+#project_cert_subject=/C=US/ST=California/O=OpenStack/OU=NovaDev/CN=project-ca-%.16s-%s
+
+# Services to be added to the available pool on create (boolean value)
+#enable_new_services=true
+
+# Template string to be used to generate instance names (string value)
+#instance_name_template=instance-%08x
+
+# Template string to be used to generate snapshot names (string value)
+#snapshot_name_template=snapshot-%s
+
+# When set, compute API will consider duplicate hostnames invalid within the
+# specified scope, regardless of case. Should be empty, "project" or "global".
+# (string value)
+#osapi_compute_unique_server_name_scope =
+
+# Make exception message format errors fatal (boolean value)
+#fatal_exception_format_errors=false
+
+# Parent directory for tempdir used for image decryption (string value)
+#image_decryption_dir=/tmp
+
+# Hostname or IP for OpenStack to use when accessing the S3 api (string value)
+#s3_host=$my_ip
+
+# Port used when accessing the S3 api (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#s3_port=3333
+
+# Access key to use for S3 server for images (string value)
+#s3_access_key=notchecked
+
+# Secret key to use for S3 server for images (string value)
+#s3_secret_key=notchecked
+
+# Whether to use SSL when talking to S3 (boolean value)
+#s3_use_ssl=false
+
+# Whether to affix the tenant id to the access key when downloading from S3
+# (boolean value)
+#s3_affix_tenant=false
+
+# IP address of this host (string value)
+#my_ip=10.16.48.92
+
+# Block storage IP address of this host (string value)
+#my_block_storage_ip=$my_ip
+
+# Name of this node.  This can be an opaque identifier.  It is not necessarily
+# a hostname, FQDN, or IP address. However, the node name must be valid within
+# an AMQP key, and if using ZeroMQ, a valid hostname, FQDN, or IP address
+# (string value)
+#host=x86-017.build.eng.bos.redhat.com
+
+# Use IPv6 (boolean value)
+#use_ipv6=false
+use_ipv6=False
+
+# If set, send compute.instance.update notifications on instance state changes.
+# Valid values are None for no notifications, "vm_state" for notifications on
+# VM state changes, or "vm_and_task_state" for notifications on VM and task
+# state changes. (string value)
+#notify_on_state_change=<None>
+
+# If set, send api.fault notifications on caught exceptions in the API service.
+# (boolean value)
+#notify_api_faults=false
+notify_api_faults=False
+
+# Default notification level for outgoing notifications (string value)
+# Allowed values: DEBUG, INFO, WARN, ERROR, CRITICAL
+#default_notification_level=INFO
+
+# Default publisher_id for outgoing notifications (string value)
+#default_publisher_id=<None>
+
+# DEPRECATED: THIS VALUE SHOULD BE SET WHEN CREATING THE NETWORK. If True in
+# multi_host mode, all compute hosts share the same dhcp address. The same IP
+# address used for DHCP will be added on each nova-network node which is only
+# visible to the vms on the same host. (boolean value)
+#share_dhcp_address=false
+
+# DEPRECATED: THIS VALUE SHOULD BE SET WHEN CREATING THE NETWORK. MTU setting
+# for network interface. (integer value)
+#network_device_mtu=<None>
+
+# Path to S3 buckets (string value)
+#buckets_path=$state_path/buckets
+
+# IP address for S3 API to listen (string value)
+#s3_listen=0.0.0.0
+
+# Port for S3 API to listen (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#s3_listen_port=3333
+
+# Directory where the nova python module is installed (string value)
+#pybasedir=/builddir/build/BUILD/nova-12.0.2
+
+# Directory where nova binaries are installed (string value)
+#bindir=/usr/local/bin
+
+# Top-level directory for maintaining nova's state (string value)
+#state_path=/var/lib/nova
+state_path=/var/lib/nova
+
+# An alias for a PCI passthrough device requirement. This allows users to
+# specify the alias in the extra_spec for a flavor, without needing to repeat
+# all the PCI property requirements. For example: pci_alias = { "name":
+# "QuickAssist",   "product_id": "0443",   "vendor_id": "8086",
+# "device_type": "ACCEL" } defines an alias for the Intel QuickAssist card.
+# (multi valued) (multi valued)
+#pci_alias =
+
+# White list of PCI devices available to VMs. For example:
+# pci_passthrough_whitelist =  [{"vendor_id": "8086", "product_id": "0443"}]
+# (multi valued)
+#pci_passthrough_whitelist =
+
+# Number of instances allowed per project (integer value)
+#quota_instances=10
+
+# Number of instance cores allowed per project (integer value)
+#quota_cores=20
+
+# Megabytes of instance RAM allowed per project (integer value)
+#quota_ram=51200
+
+# Number of floating IPs allowed per project (integer value)
+#quota_floating_ips=10
+
+# Number of fixed IPs allowed per project (this should be at least the number
+# of instances allowed) (integer value)
+#quota_fixed_ips=-1
+
+# Number of metadata items allowed per instance (integer value)
+#quota_metadata_items=128
+
+# Number of injected files allowed (integer value)
+#quota_injected_files=5
+
+# Number of bytes allowed per injected file (integer value)
+#quota_injected_file_content_bytes=10240
+
+# Length of injected file path (integer value)
+#quota_injected_file_path_length=255
+
+# Number of security groups per project (integer value)
+#quota_security_groups=10
+
+# Number of security rules per security group (integer value)
+#quota_security_group_rules=20
+
+# Number of key pairs per user (integer value)
+#quota_key_pairs=100
+
+# Number of server groups per project (integer value)
+#quota_server_groups=10
+
+# Number of servers per server group (integer value)
+#quota_server_group_members=10
+
+# Number of seconds until a reservation expires (integer value)
+#reservation_expire=86400
+
+# Count of reservations until usage is refreshed. This defaults to 0(off) to
+# avoid additional load but it is useful to turn on to help keep quota usage up
+# to date and reduce the impact of out of sync usage issues. (integer value)
+#until_refresh=0
+
+# Number of seconds between subsequent usage refreshes. This defaults to 0(off)
+# to avoid additional load but it is useful to turn on to help keep quota usage
+# up to date and reduce the impact of out of sync usage issues. Note that
+# quotas are not updated on a periodic task, they will update on a new
+# reservation if max_age has passed since the last reservation (integer value)
+#max_age=0
+
+# Default driver to use for quota checks (string value)
+#quota_driver=nova.quota.DbQuotaDriver
+
+# Seconds between nodes reporting state to datastore (integer value)
+#report_interval=10
+report_interval=10
+
+# Enable periodic tasks (boolean value)
+#periodic_enable=true
+
+# Range of seconds to randomly delay when starting the periodic task scheduler
+# to reduce stampeding. (Disable by setting to 0) (integer value)
+#periodic_fuzzy_delay=60
+
+# A list of APIs to enable by default (list value)
+#enabled_apis=ec2,osapi_compute,metadata
+enabled_apis=ec2,osapi_compute,metadata
+
+# A list of APIs with enabled SSL (list value)
+#enabled_ssl_apis =
+
+# The IP address on which the EC2 API will listen. (string value)
+#ec2_listen=0.0.0.0
+ec2_listen=0.0.0.0
+
+# The port on which the EC2 API will listen. (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#ec2_listen_port=8773
+ec2_listen_port=8773
+
+# Number of workers for EC2 API service. The default will be equal to the
+# number of CPUs available. (integer value)
+#ec2_workers=<None>
+ec2_workers=12
+
+# The IP address on which the OpenStack API will listen. (string value)
+#osapi_compute_listen=0.0.0.0
+osapi_compute_listen=0.0.0.0
+
+# The port on which the OpenStack API will listen. (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#osapi_compute_listen_port=8774
+osapi_compute_listen_port=8774
+
+# Number of workers for OpenStack API service. The default will be the number
+# of CPUs available. (integer value)
+#osapi_compute_workers=<None>
+osapi_compute_workers=12
+
+# OpenStack metadata service manager (string value)
+#metadata_manager=nova.api.manager.MetadataManager
+
+# The IP address on which the metadata API will listen. (string value)
+#metadata_listen=0.0.0.0
+metadata_listen=0.0.0.0
+
+# The port on which the metadata API will listen. (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#metadata_listen_port=8775
+metadata_listen_port=8775
+
+# Number of workers for metadata service. The default will be the number of
+# CPUs available. (integer value)
+#metadata_workers=<None>
+metadata_workers=12
+
+# Full class name for the Manager for compute (string value)
+#compute_manager=nova.compute.manager.ComputeManager
+compute_manager=nova.compute.manager.ComputeManager
+
+# Full class name for the Manager for console proxy (string value)
+#console_manager=nova.console.manager.ConsoleProxyManager
+
+# Manager for console auth (string value)
+#consoleauth_manager=nova.consoleauth.manager.ConsoleAuthManager
+
+# Full class name for the Manager for cert (string value)
+#cert_manager=nova.cert.manager.CertManager
+
+# Full class name for the Manager for network (string value)
+#network_manager=nova.network.manager.FlatDHCPManager
+
+# Full class name for the Manager for scheduler (string value)
+#scheduler_manager=nova.scheduler.manager.SchedulerManager
+
+# Maximum time since last check-in for up service (integer value)
+#service_down_time=60
+service_down_time=60
+
+# Whether to log monkey patching (boolean value)
+#monkey_patch=false
+
+# List of modules/decorators to monkey patch (list value)
+#monkey_patch_modules=nova.api.ec2.cloud:nova.notifications.notify_decorator,nova.compute.api:nova.notifications.notify_decorator
+
+# Length of generated instance admin passwords (integer value)
+#password_length=12
+
+# Time period to generate instance usages for.  Time period must be hour, day,
+# month or year (string value)
+#instance_usage_audit_period=month
+
+# Start and use a daemon that can run the commands that need to be run with
+# root privileges. This option is usually enabled on nodes that run nova
+# compute processes (boolean value)
+#use_rootwrap_daemon=false
+
+# Path to the rootwrap configuration file to use for running commands as root
+# (string value)
+#rootwrap_config=/etc/nova/rootwrap.conf
+rootwrap_config=/etc/nova/rootwrap.conf
+
+# Explicitly specify the temporary working directory (string value)
+#tempdir=<None>
+
+# Port that the XCP VNC proxy should bind to (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#xvpvncproxy_port=6081
+
+# Address that the XCP VNC proxy should bind to (string value)
+#xvpvncproxy_host=0.0.0.0
+
+# The full class name of the volume API class to use (string value)
+#volume_api_class=nova.volume.cinder.API
+volume_api_class=nova.volume.cinder.API
+
+# File name for the paste.deploy config for nova-api (string value)
+#api_paste_config=api-paste.ini
+api_paste_config=api-paste.ini
+
+# A python format string that is used as the template to generate log lines.
+# The following values can be formatted into it: client_ip, date_time,
+# request_line, status_code, body_length, wall_seconds. (string value)
+#wsgi_log_format=%(client_ip)s "%(request_line)s" status: %(status_code)s len: %(body_length)s time: %(wall_seconds).7f
+
+# The HTTP header used to determine the scheme for the original request, even
+# if it was removed by an SSL terminating proxy. Typical value is
+# "HTTP_X_FORWARDED_PROTO". (string value)
+#secure_proxy_ssl_header=<None>
+
+# CA certificate file to use to verify connecting clients (string value)
+#ssl_ca_file=<None>
+
+# SSL certificate of API server (string value)
+#ssl_cert_file=<None>
+
+# SSL private key of API server (string value)
+#ssl_key_file=<None>
+
+# Sets the value of TCP_KEEPIDLE in seconds for each server socket. Not
+# supported on OS X. (integer value)
+#tcp_keepidle=600
+
+# Size of the pool of greenthreads used by wsgi (integer value)
+#wsgi_default_pool_size=1000
+
+# Maximum line size of message headers to be accepted. max_header_line may need
+# to be increased when using large tokens (typically those generated by the
+# Keystone v3 API with big service catalogs). (integer value)
+#max_header_line=16384
+
+# If False, closes the client socket connection explicitly. (boolean value)
+#wsgi_keep_alive=true
+
+# Timeout for client connections' socket operations. If an incoming connection
+# is idle for this number of seconds it will be closed. A value of '0' means
+# wait forever. (integer value)
+#client_socket_timeout=900
+
+#
+# From nova.api
+#
+
+# File to load JSON formatted vendor data from (string value)
+#vendordata_jsonfile_path=<None>
+
+# Permit instance snapshot operations. (boolean value)
+#allow_instance_snapshots=true
+
+# Whether to use per-user rate limiting for the api. This option is only used
+# by v2 api. Rate limiting is removed from v2.1 api. (boolean value)
+#api_rate_limit=false
+
+#
+# The strategy to use for auth: keystone or noauth2. noauth2 is designed for
+# testing only, as it does no actual credential checking. noauth2 provides
+# administrative credentials only if 'admin' is specified as the username.
+#  (string value)
+#auth_strategy=keystone
+auth_strategy=keystone
+
+# Treat X-Forwarded-For as the canonical remote address. Only enable this if
+# you have a sanitizing proxy. (boolean value)
+#use_forwarded_for=false
+use_forwarded_for=False
+
+# The IP address of the EC2 API server (string value)
+#ec2_host=$my_ip
+
+# The internal IP address of the EC2 API server (string value)
+#ec2_dmz_host=$my_ip
+
+# The port of the EC2 API server (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#ec2_port=8773
+
+# The protocol to use when connecting to the EC2 API server (string value)
+# Allowed values: http, https
+#ec2_scheme=http
+
+# The path prefix used to call the ec2 API server (string value)
+#ec2_path=/
+
+# List of region=fqdn pairs separated by commas (list value)
+#region_list =
+
+# Number of failed auths before lockout. (integer value)
+#lockout_attempts=5
+
+# Number of minutes to lockout if triggered. (integer value)
+#lockout_minutes=15
+
+# Number of minutes for lockout window. (integer value)
+#lockout_window=15
+
+# URL to get token from ec2 request. (string value)
+#keystone_ec2_url=http://localhost:5000/v2.0/ec2tokens
+
+# Return the IP address as private dns hostname in describe instances (boolean
+# value)
+#ec2_private_dns_show_ip=false
+
+# Validate security group names according to EC2 specification (boolean value)
+#ec2_strict_validation=true
+
+# Time in seconds before ec2 timestamp expires (integer value)
+#ec2_timestamp_expiry=300
+
+# Disable SSL certificate verification. (boolean value)
+#keystone_ec2_insecure=false
+
+# List of metadata versions to skip placing into the config drive (string
+# value)
+#config_drive_skip_versions=1.0 2007-01-19 2007-03-01 2007-08-29 2007-10-10 2007-12-15 2008-02-01 2008-09-01
+
+# Driver to use for vendor data (string value)
+#vendordata_driver=nova.api.metadata.vendordata_json.JsonFileVendorData
+
+# Time in seconds to cache metadata; 0 to disable metadata caching entirely
+# (not recommended). Increasingthis should improve response times of the
+# metadata API when under heavy load. Higher values may increase memoryusage
+# and result in longer times for host metadata changes to take effect. (integer
+# value)
+#metadata_cache_expiration=15
+
+# The maximum number of items returned in a single response from a collection
+# resource (integer value)
+#osapi_max_limit=1000
+
+# Base URL that will be presented to users in links to the OpenStack Compute
+# API (string value)
+#osapi_compute_link_prefix=<None>
+
+# Base URL that will be presented to users in links to glance resources (string
+# value)
+#osapi_glance_link_prefix=<None>
+
+# DEPRECATED: Specify list of extensions to load when using
+# osapi_compute_extension option with
+# nova.api.openstack.compute.legacy_v2.contrib.select_extensions This option
+# will be removed in the near future. After that point you have to run all of
+# the API. (list value)
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#osapi_compute_ext_list =
+
+# Full path to fping. (string value)
+#fping_path=/usr/sbin/fping
+fping_path=/usr/sbin/fping
+
+# Enables or disables quota checking for tenant networks (boolean value)
+#enable_network_quota=false
+
+# Control for checking for default networks (string value)
+#use_neutron_default_nets=False
+
+# Default tenant id when creating neutron networks (string value)
+#neutron_default_tenant_id=default
+
+# Number of private networks allowed per project (integer value)
+#quota_networks=3
+
+# osapi compute extension to load. This option will be removed in the near
+# future. After that point you have to run all of the API. (multi valued)
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#osapi_compute_extension=nova.api.openstack.compute.legacy_v2.contrib.standard_extensions
+
+# List of instance states that should hide network info (list value)
+#osapi_hide_server_address_states=building
+
+# Enables returning of the instance password by the relevant server API calls
+# such as create, rebuild or rescue, If the hypervisor does not support
+# password injection then the password returned will not be correct (boolean
+# value)
+#enable_instance_password=true
+
+#
+# From nova.compute
+#
+
+# Allow destination machine to match source for resize. Useful when testing in
+# single-host environments. (boolean value)
+#allow_resize_to_same_host=false
+allow_resize_to_same_host=False
+
+# Availability zone to use when user doesn't specify one (string value)
+#default_schedule_zone=<None>
+
+# These are image properties which a snapshot should not inherit from an
+# instance (list value)
+#non_inheritable_image_properties=cache_in_nova,bittorrent
+
+# Kernel image that indicates not to use a kernel, but to use a raw disk image
+# instead (string value)
+#null_kernel=nokernel
+
+# When creating multiple instances with a single request using the os-multiple-
+# create API extension, this template will be used to build the display name
+# for each instance. The benefit is that the instances end up with different
+# hostnames. To restore legacy behavior of every instance having the same name,
+# set this option to "%(name)s".  Valid keys for the template are: name, uuid,
+# count. (string value)
+#multi_instance_display_name_template=%(name)s-%(count)d
+
+# Maximum number of devices that will result in a local image being created on
+# the hypervisor node. A negative number means unlimited. Setting
+# max_local_block_devices to 0 means that any request that attempts to create a
+# local disk will fail. This option is meant to limit the number of local discs
+# (so root local disc that is the result of --image being used, and any other
+# ephemeral and swap disks). 0 does not mean that images will be automatically
+# converted to volumes and boot instances from volumes - it just means that all
+# requests that attempt to create a local disk will fail. (integer value)
+#max_local_block_devices=3
+
+# Default flavor to use for the EC2 API only. The Nova API does not support a
+# default flavor. (string value)
+#default_flavor=m1.small
+
+# Console proxy host to use to connect to instances on this host. (string
+# value)
+#console_host=x86-017.build.eng.bos.redhat.com
+
+# Name of network to use to set access IPs for instances (string value)
+#default_access_ip_network_name=<None>
+
+# Whether to batch up the application of IPTables rules during a host restart
+# and apply all at the end of the init phase (boolean value)
+#defer_iptables_apply=false
+
+# Where instances are stored on disk (string value)
+#instances_path=$state_path/instances
+
+# Generate periodic compute.instance.exists notifications (boolean value)
+#instance_usage_audit=false
+
+# Number of 1 second retries needed in live_migration (integer value)
+#live_migration_retry_count=30
+
+# Whether to start guests that were running before the host rebooted (boolean
+# value)
+#resume_guests_state_on_host_boot=false
+
+# Number of times to retry network allocation on failures (integer value)
+#network_allocate_retries=0
+
+# Maximum number of instance builds to run concurrently (integer value)
+#max_concurrent_builds=10
+
+# Maximum number of live migrations to run concurrently. This limit is enforced
+# to avoid outbound live migrations overwhelming the host/network and causing
+# failures. It is not recommended that you change this unless you are very sure
+# that doing so is safe and stable in your environment. (integer value)
+#max_concurrent_live_migrations=1
+
+# Number of times to retry block device allocation on failures (integer value)
+#block_device_allocate_retries=60
+
+# The number of times to attempt to reap an instance's files. (integer value)
+#maximum_instance_delete_attempts=5
+
+# Interval to pull network bandwidth usage info. Not supported on all
+# hypervisors. Set to -1 to disable. Setting this to 0 will run at the default
+# rate. (integer value)
+#bandwidth_poll_interval=600
+
+# Interval to sync power states between the database and the hypervisor. Set to
+# -1 to disable. Setting this to 0 will run at the default rate. (integer
+# value)
+#sync_power_state_interval=600
+
+# Number of seconds between instance network information cache updates (integer
+# value)
+#heal_instance_info_cache_interval=60
+heal_instance_info_cache_interval=60
+
+# Interval in seconds for reclaiming deleted instances (integer value)
+#reclaim_instance_interval=0
+
+# Interval in seconds for gathering volume usages (integer value)
+#volume_usage_poll_interval=0
+
+# Interval in seconds for polling shelved instances to offload. Set to -1 to
+# disable.Setting this to 0 will run at the default rate. (integer value)
+#shelved_poll_interval=3600
+
+# Time in seconds before a shelved instance is eligible for removing from a
+# host. -1 never offload, 0 offload immediately when shelved (integer value)
+#shelved_offload_time=0
+
+# Interval in seconds for retrying failed instance file deletes. Set to -1 to
+# disable. Setting this to 0 will run at the default rate. (integer value)
+#instance_delete_interval=300
+
+# Waiting time interval (seconds) between block device allocation retries on
+# failures (integer value)
+#block_device_allocate_retries_interval=3
+
+# Waiting time interval (seconds) between sending the scheduler a list of
+# current instance UUIDs to verify that its view of instances is in sync with
+# nova. If the CONF option `scheduler_tracks_instance_changes` is False,
+# changing this option will have no effect. (integer value)
+#scheduler_instance_sync_interval=120
+
+# Interval in seconds for updating compute resources. A number less than 0
+# means to disable the task completely. Leaving this at the default of 0 will
+# cause this to run at the default periodic interval. Setting it to any
+# positive value will cause it to run at approximately that number of seconds.
+# (integer value)
+#update_resources_interval=0
+
+# Action to take if a running deleted instance is detected.Set to 'noop' to
+# take no action. (string value)
+# Allowed values: noop, log, shutdown, reap
+#running_deleted_instance_action=reap
+
+# Number of seconds to wait between runs of the cleanup task. (integer value)
+#running_deleted_instance_poll_interval=1800
+
+# Number of seconds after being deleted when a running instance should be
+# considered eligible for cleanup. (integer value)
+#running_deleted_instance_timeout=0
+
+# Automatically hard reboot an instance if it has been stuck in a rebooting
+# state longer than N seconds. Set to 0 to disable. (integer value)
+#reboot_timeout=0
+
+# Amount of time in seconds an instance can be in BUILD before going into ERROR
+# status. Set to 0 to disable. (integer value)
+#instance_build_timeout=0
+
+# Automatically unrescue an instance after N seconds. Set to 0 to disable.
+# (integer value)
+#rescue_timeout=0
+
+# Automatically confirm resizes after N seconds. Set to 0 to disable. (integer
+# value)
+#resize_confirm_window=0
+
+# Total amount of time to wait in seconds for an instance to perform a clean
+# shutdown. (integer value)
+#shutdown_timeout=60
+
+# Monitor classes available to the compute which may be specified more than
+# once. This option is DEPRECATED and no longer used. Use setuptools entry
+# points to list available monitor plugins. (multi valued)
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#compute_available_monitors =
+
+# A list of monitors that can be used for getting compute metrics. You can use
+# the alias/name from the setuptools entry points for nova.compute.monitors.*
+# namespaces. If no namespace is supplied, the "cpu." namespace is assumed for
+# backwards-compatibility. An example value that would enable both the CPU and
+# NUMA memory bandwidth monitors that used the virt driver variant:
+# ["cpu.virt_driver", "numa_mem_bw.virt_driver"] (list value)
+#compute_monitors =
+
+# Amount of disk in MB to reserve for the host (integer value)
+#reserved_host_disk_mb=0
+
+# Amount of memory in MB to reserve for the host (integer value)
+#reserved_host_memory_mb=512
+reserved_host_memory_mb=512
+
+# Class that will manage stats for the local compute host (string value)
+#compute_stats_class=nova.compute.stats.Stats
+
+# The names of the extra resources to track. (list value)
+#compute_resources=vcpu
+
+# Virtual CPU to physical CPU allocation ratio which affects all CPU filters.
+# This configuration specifies a global ratio for CoreFilter. For
+# AggregateCoreFilter, it will fall back to this configuration value if no per-
+# aggregate setting found. NOTE: This can be set per-compute, or if set to 0.0,
+# the value set on the scheduler node(s) will be used and defaulted to 16.0
+# (floating point value)
+#cpu_allocation_ratio=0.0
+cpu_allocation_ratio=16.0
+
+# Virtual ram to physical ram allocation ratio which affects all ram filters.
+# This configuration specifies a global ratio for RamFilter. For
+# AggregateRamFilter, it will fall back to this configuration value if no per-
+# aggregate setting found. NOTE: This can be set per-compute, or if set to 0.0,
+# the value set on the scheduler node(s) will be used and defaulted to 1.5
+# (floating point value)
+#ram_allocation_ratio=0.0
+ram_allocation_ratio=1.5
+
+# The topic compute nodes listen on (string value)
+#compute_topic=compute
+
+#
+# From nova.network
+#
+
+# The full class name of the network API class to use (string value)
+#network_api_class=nova.network.api.API
+network_api_class=nova.network.neutronv2.api.API
+
+# Driver to use for network creation (string value)
+#network_driver=nova.network.linux_net
+
+# Default pool for floating IPs (string value)
+#default_floating_pool=nova
+default_floating_pool=public
+
+# Autoassigning floating IP to VM (boolean value)
+#auto_assign_floating_ip=false
+
+# Full class name for the DNS Manager for floating IPs (string value)
+#floating_ip_dns_manager=nova.network.noop_dns_driver.NoopDNSDriver
+
+# Full class name for the DNS Manager for instance IPs (string value)
+#instance_dns_manager=nova.network.noop_dns_driver.NoopDNSDriver
+
+# Full class name for the DNS Zone for instance IPs (string value)
+#instance_dns_domain =
+
+# URL for LDAP server which will store DNS entries (string value)
+#ldap_dns_url=ldap://ldap.example.com:389
+
+# User for LDAP DNS (string value)
+#ldap_dns_user=uid=admin,ou=people,dc=example,dc=org
+
+# Password for LDAP DNS (string value)
+#ldap_dns_password=password
+
+# Hostmaster for LDAP DNS driver Statement of Authority (string value)
+#ldap_dns_soa_hostmaster=hostmaster@example.org
+
+# DNS Servers for LDAP DNS driver (multi valued)
+#ldap_dns_servers=dns.example.org
+
+# Base DN for DNS entries in LDAP (string value)
+#ldap_dns_base_dn=ou=hosts,dc=example,dc=org
+
+# Refresh interval (in seconds) for LDAP DNS driver Statement of Authority
+# (string value)
+#ldap_dns_soa_refresh=1800
+
+# Retry interval (in seconds) for LDAP DNS driver Statement of Authority
+# (string value)
+#ldap_dns_soa_retry=3600
+
+# Expiry interval (in seconds) for LDAP DNS driver Statement of Authority
+# (string value)
+#ldap_dns_soa_expiry=86400
+
+# Minimum interval (in seconds) for LDAP DNS driver Statement of Authority
+# (string value)
+#ldap_dns_soa_minimum=7200
+
+# Location of flagfiles for dhcpbridge (multi valued)
+#dhcpbridge_flagfile=/etc/nova/nova.conf
+
+# Location to keep network config files (string value)
+#networks_path=$state_path/networks
+
+# Interface for public IP addresses (string value)
+#public_interface=eth0
+
+# Location of nova-dhcpbridge (string value)
+#dhcpbridge=/usr/bin/nova-dhcpbridge
+
+# Public IP of network host (string value)
+#routing_source_ip=$my_ip
+
+# Lifetime of a DHCP lease in seconds (integer value)
+#dhcp_lease_time=86400
+
+# If set, uses specific DNS server for dnsmasq. Can be specified multiple
+# times. (multi valued)
+#dns_server =
+
+# If set, uses the dns1 and dns2 from the network ref. as dns servers. (boolean
+# value)
+#use_network_dns_servers=false
+
+# A list of dmz ranges that should be accepted (list value)
+#dmz_cidr =
+
+# Traffic to this range will always be snatted to the fallback ip, even if it
+# would normally be bridged out of the node. Can be specified multiple times.
+# (multi valued)
+#force_snat_range =
+force_snat_range =0.0.0.0/0
+
+# Override the default dnsmasq settings with this file (string value)
+#dnsmasq_config_file =
+
+# Driver used to create ethernet devices. (string value)
+#linuxnet_interface_driver=nova.network.linux_net.LinuxBridgeInterfaceDriver
+
+# Name of Open vSwitch bridge used with linuxnet (string value)
+#linuxnet_ovs_integration_bridge=br-int
+
+# Send gratuitous ARPs for HA setup (boolean value)
+#send_arp_for_ha=false
+
+# Send this many gratuitous ARPs for HA setup (integer value)
+#send_arp_for_ha_count=3
+
+# Use single default gateway. Only first nic of vm will get default gateway
+# from dhcp server (boolean value)
+#use_single_default_gateway=false
+
+# An interface that bridges can forward to. If this is set to all then all
+# traffic will be forwarded. Can be specified multiple times. (multi valued)
+#forward_bridge_interface=all
+
+# The IP address for the metadata API server (string value)
+#metadata_host=$my_ip
+metadata_host=VARINET4ADDR
+
+# The port for the metadata API port (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#metadata_port=8775
+
+# Regular expression to match the iptables rule that should always be on the
+# top. (string value)
+#iptables_top_regex =
+
+# Regular expression to match the iptables rule that should always be on the
+# bottom. (string value)
+#iptables_bottom_regex =
+
+# The table that iptables to jump to when a packet is to be dropped. (string
+# value)
+#iptables_drop_action=DROP
+
+# Amount of time, in seconds, that ovs_vsctl should wait for a response from
+# the database. 0 is to wait forever. (integer value)
+#ovs_vsctl_timeout=120
+
+# If passed, use fake network devices and addresses (boolean value)
+#fake_network=false
+
+# Number of times to retry ebtables commands on failure. (integer value)
+#ebtables_exec_attempts=3
+
+# Number of seconds to wait between ebtables retries. (floating point value)
+#ebtables_retry_interval=1.0
+
+# Bridge for simple network instances (string value)
+#flat_network_bridge=<None>
+
+# DNS server for simple network (string value)
+#flat_network_dns=8.8.4.4
+
+# Whether to attempt to inject network setup into guest (boolean value)
+#flat_injected=false
+
+# FlatDhcp will bridge into this interface if set (string value)
+#flat_interface=<None>
+
+# First VLAN for private networks (integer value)
+# Minimum value: 1
+# Maximum value: 4094
+#vlan_start=100
+
+# VLANs will bridge into this interface if set (string value)
+#vlan_interface=<None>
+
+# Number of networks to support (integer value)
+#num_networks=1
+
+# Public IP for the cloudpipe VPN servers (string value)
+#vpn_ip=$my_ip
+
+# First Vpn port for private networks (integer value)
+#vpn_start=1000
+
+# Number of addresses in each private subnet (integer value)
+#network_size=256
+
+# Fixed IPv6 address block (string value)
+#fixed_range_v6=fd00::/48
+
+# Default IPv4 gateway (string value)
+#gateway=<None>
+
+# Default IPv6 gateway (string value)
+#gateway_v6=<None>
+
+# Number of addresses reserved for vpn clients (integer value)
+#cnt_vpn_clients=0
+
+# Seconds after which a deallocated IP is disassociated (integer value)
+#fixed_ip_disassociate_timeout=600
+
+# Number of attempts to create unique mac address (integer value)
+#create_unique_mac_address_attempts=5
+
+# If True, skip using the queue and make local calls (boolean value)
+#fake_call=false
+
+# If True, unused gateway devices (VLAN and bridge) are deleted in VLAN network
+# mode with multi hosted networks (boolean value)
+#teardown_unused_network_gateway=false
+
+# If True, send a dhcp release on instance termination (boolean value)
+#force_dhcp_release=True
+
+# If True, when a DNS entry must be updated, it sends a fanout cast to all
+# network hosts to update their DNS entries in multi host mode (boolean value)
+#update_dns_entries=false
+
+# Number of seconds to wait between runs of updates to DNS entries. (integer
+# value)
+#dns_update_periodic_interval=-1
+
+# Domain to use for building the hostnames (string value)
+#dhcp_domain=novalocal
+dhcp_domain=novalocal
+
+# Indicates underlying L3 management library (string value)
+#l3_lib=nova.network.l3.LinuxNetL3
+
+# The topic network nodes listen on (string value)
+#network_topic=network
+
+# Default value for multi_host in networks. Also, if set, some rpc network
+# calls will be sent directly to host. (boolean value)
+#multi_host=false
+
+# The full class name of the security API class (string value)
+#security_group_api=nova
+security_group_api=neutron
+
+#
+# From nova.openstack.common.memorycache
+#
+
+# Memcached servers or None for in process cache. (list value)
+#memcached_servers=<None>
+
+#
+# From nova.openstack.common.policy
+#
+
+# The JSON file that defines policies. (string value)
+#policy_file=policy.json
+
+# Default rule. Enforced when a requested rule is not found. (string value)
+#policy_default_rule=default
+
+# Directories where policy configuration files are stored. They can be relative
+# to any directory in the search path defined by the config_dir option, or
+# absolute paths. The file defined by policy_file must exist for these
+# directories to be searched.  Missing or empty directories are ignored. (multi
+# valued)
+#policy_dirs=policy.d
+
+#
+# From nova.scheduler
+#
+
+# Virtual disk to physical disk allocation ratio (floating point value)
+#disk_allocation_ratio=1.0
+
+# Tells filters to ignore hosts that have this many or more instances currently
+# in build, resize, snapshot, migrate, rescue or unshelve task states (integer
+# value)
+#max_io_ops_per_host=8
+
+# Ignore hosts that have too many instances (integer value)
+#max_instances_per_host=50
+
+# Absolute path to scheduler configuration JSON file. (string value)
+#scheduler_json_config_location =
+
+# The scheduler host manager class to use (string value)
+#scheduler_host_manager=nova.scheduler.host_manager.HostManager
+
+# New instances will be scheduled on a host chosen randomly from a subset of
+# the N best hosts. This property defines the subset size that a host is chosen
+# from. A value of 1 chooses the first host returned by the weighing functions.
+# This value must be at least 1. Any value less than 1 will be ignored, and 1
+# will be used instead (integer value)
+#scheduler_host_subset_size=1
+
+# Force the filter to consider only keys matching the given namespace. (string
+# value)
+#aggregate_image_properties_isolation_namespace=<None>
+
+# The separator used between the namespace and keys (string value)
+#aggregate_image_properties_isolation_separator=.
+
+# Images to run on isolated host (list value)
+#isolated_images =
+
+# Host reserved for specific images (list value)
+#isolated_hosts =
+
+# Whether to force isolated hosts to run only isolated images (boolean value)
+#restrict_isolated_hosts_to_isolated_images=true
+
+# Filter classes available to the scheduler which may be specified more than
+# once.  An entry of "nova.scheduler.filters.all_filters" maps to all filters
+# included with nova. (multi valued)
+#scheduler_available_filters=nova.scheduler.filters.all_filters
+
+# Which filter class names to use for filtering hosts when not specified in the
+# request. (list value)
+#scheduler_default_filters=RetryFilter,AvailabilityZoneFilter,RamFilter,DiskFilter,ComputeFilter,ComputeCapabilitiesFilter,ImagePropertiesFilter,ServerGroupAntiAffinityFilter,ServerGroupAffinityFilter
+scheduler_default_filters=RetryFilter,AvailabilityZoneFilter,RamFilter,ComputeFilter,ComputeCapabilitiesFilter,ImagePropertiesFilter,CoreFilter
+
+# Which weight class names to use for weighing hosts (list value)
+#scheduler_weight_classes=nova.scheduler.weights.all_weighers
+
+# Determines if the Scheduler tracks changes to instances to help with its
+# filtering decisions. (boolean value)
+#scheduler_tracks_instance_changes=true
+
+# Which filter class names to use for filtering baremetal hosts when not
+# specified in the request. (list value)
+#baremetal_scheduler_default_filters=RetryFilter,AvailabilityZoneFilter,ComputeFilter,ComputeCapabilitiesFilter,ImagePropertiesFilter,ExactRamFilter,ExactDiskFilter,ExactCoreFilter
+
+# Flag to decide whether to use baremetal_scheduler_default_filters or not.
+# (boolean value)
+#scheduler_use_baremetal_filters=false
+
+# Default driver to use for the scheduler (string value)
+#scheduler_driver=nova.scheduler.filter_scheduler.FilterScheduler
+scheduler_driver=nova.scheduler.filter_scheduler.FilterScheduler
+
+# How often (in seconds) to run periodic tasks in the scheduler driver of your
+# choice. Please note this is likely to interact with the value of
+# service_down_time, but exactly how they interact will depend on your choice
+# of scheduler driver. (integer value)
+#scheduler_driver_task_period=60
+
+# The topic scheduler nodes listen on (string value)
+#scheduler_topic=scheduler
+
+# Maximum number of attempts to schedule an instance (integer value)
+#scheduler_max_attempts=3
+
+# Multiplier used for weighing host io ops. Negative numbers mean a preference
+# to choose light workload compute hosts. (floating point value)
+#io_ops_weight_multiplier=-1.0
+
+# Multiplier used for weighing ram.  Negative numbers mean to stack vs spread.
+# (floating point value)
+#ram_weight_multiplier=1.0
+
+#
+# From nova.virt
+#
+
+# Config drive format. (string value)
+# Allowed values: iso9660, vfat
+#config_drive_format=iso9660
+
+# Set to "always" to force injection to take place on a config drive. NOTE: The
+# "always" will be deprecated in the Liberty release cycle. (string value)
+# Allowed values: always, True, False
+#force_config_drive=<None>
+
+# Name and optionally path of the tool used for ISO image creation (string
+# value)
+#mkisofs_cmd=genisoimage
+
+# Name of the mkfs commands for ephemeral device. The format is <os_type>=<mkfs
+# command> (multi valued)
+#virt_mkfs =
+
+# Attempt to resize the filesystem by accessing the image over a block device.
+# This is done by the host and may not be necessary if the image contains a
+# recent version of cloud-init. Possible mechanisms require the nbd driver (for
+# qcow and raw), or loop (for raw). (boolean value)
+#resize_fs_using_block_device=false
+
+# Amount of time, in seconds, to wait for NBD device start up. (integer value)
+#timeout_nbd=10
+
+# Driver to use for controlling virtualization. Options include:
+# libvirt.LibvirtDriver, xenapi.XenAPIDriver, fake.FakeDriver,
+# ironic.IronicDriver, vmwareapi.VMwareVCDriver, hyperv.HyperVDriver (string
+# value)
+#compute_driver=libvirt.LibvirtDriver
+compute_driver=libvirt.LibvirtDriver
+
+# The default format an ephemeral_volume will be formatted with on creation.
+# (string value)
+#default_ephemeral_format=<None>
+
+# VM image preallocation mode: "none" => no storage provisioning is done up
+# front, "space" => storage is fully allocated at instance start (string value)
+# Allowed values: none, space
+#preallocate_images=none
+
+# Whether to use cow images (boolean value)
+#use_cow_images=true
+
+# Fail instance boot if vif plugging fails (boolean value)
+#vif_plugging_is_fatal=true
+vif_plugging_is_fatal=True
+
+# Number of seconds to wait for neutron vif plugging events to arrive before
+# continuing or failing (see vif_plugging_is_fatal). If this is set to zero and
+# vif_plugging_is_fatal is False, events should not be expected to arrive at
+# all. (integer value)
+#vif_plugging_timeout=300
+vif_plugging_timeout=300
+
+# Firewall driver (defaults to hypervisor specific iptables driver) (string
+# value)
+#firewall_driver=nova.virt.libvirt.firewall.IptablesFirewallDriver
+firewall_driver=nova.virt.firewall.NoopFirewallDriver
+
+# Whether to allow network traffic from same network (boolean value)
+#allow_same_net_traffic=true
+
+# Defines which pcpus that instance vcpus can use. For example, "4-12,^8,15"
+# (string value)
+#vcpu_pin_set=<None>
+
+# Number of seconds to wait between runs of the image cache manager. Set to -1
+# to disable. Setting this to 0 will run at the default rate. (integer value)
+#image_cache_manager_interval=2400
+
+# Where cached images are stored under $instances_path. This is NOT the full
+# path - just a folder name. For per-compute-host cached images, set to
+# _base_$my_ip (string value)
+#image_cache_subdirectory_name=_base
+
+# Should unused base images be removed? (boolean value)
+#remove_unused_base_images=true
+
+# Unused unresized base images younger than this will not be removed (integer
+# value)
+#remove_unused_original_minimum_age_seconds=86400
+
+# Force backing images to raw format (boolean value)
+#force_raw_images=true
+force_raw_images=True
+
+# Template file for injected network (string value)
+#injected_network_template=/usr/share/nova/interfaces.template
+
+#
+# From oslo.log
+#
+
+# Print debugging output (set logging level to DEBUG instead of default INFO
+# level). (boolean value)
+#debug=false
+debug=True
+
+# If set to false, will disable INFO logging level, making WARNING the default.
+# (boolean value)
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#verbose=true
+verbose=True
+
+# The name of a logging configuration file. This file is appended to any
+# existing logging configuration files. For details about logging configuration
+# files, see the Python logging module documentation. (string value)
+# Deprecated group;name - DEFAULT;log_config
+#log_config_append=<None>
+
+# DEPRECATED. A logging.Formatter log message format string which may use any
+# of the available logging.LogRecord attributes. This option is deprecated.
+# Please use logging_context_format_string and logging_default_format_string
+# instead. (string value)
+#log_format=<None>
+
+# Format string for %%(asctime)s in log records. Default: %(default)s . (string
+# value)
+#log_date_format=%Y-%m-%d %H:%M:%S
+
+# (Optional) Name of log file to output to. If no default is set, logging will
+# go to stdout. (string value)
+# Deprecated group;name - DEFAULT;logfile
+#log_file=<None>
+
+# (Optional) The base directory used for relative --log-file paths. (string
+# value)
+# Deprecated group;name - DEFAULT;logdir
+#log_dir=/var/log/nova
+log_dir=/var/log/nova
+
+# Use syslog for logging. Existing syslog format is DEPRECATED and will be
+# changed later to honor RFC5424. (boolean value)
+#use_syslog=false
+use_syslog=False
+
+# (Optional) Enables or disables syslog rfc5424 format for logging. If enabled,
+# prefixes the MSG part of the syslog message with APP-NAME (RFC5424). The
+# format without the APP-NAME is deprecated in Kilo, and will be removed in
+# Mitaka, along with this option. (boolean value)
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#use_syslog_rfc_format=true
+
+# Syslog facility to receive log lines. (string value)
+#syslog_log_facility=LOG_USER
+syslog_log_facility=LOG_USER
+
+# Log output to standard error. (boolean value)
+#use_stderr=False
+use_stderr=True
+
+# Format string to use for log messages with context. (string value)
+#logging_context_format_string=%(asctime)s.%(msecs)03d %(process)d %(levelname)s %(name)s [%(request_id)s %(user_identity)s] %(instance)s%(message)s
+
+# Format string to use for log messages without context. (string value)
+#logging_default_format_string=%(asctime)s.%(msecs)03d %(process)d %(levelname)s %(name)s [-] %(instance)s%(message)s
+
+# Data to append to log format when level is DEBUG. (string value)
+#logging_debug_format_suffix=%(funcName)s %(pathname)s:%(lineno)d
+
+# Prefix each line of exception output with this format. (string value)
+#logging_exception_prefix=%(asctime)s.%(msecs)03d %(process)d ERROR %(name)s %(instance)s
+
+# List of logger=LEVEL pairs. (list value)
+#default_log_levels=amqp=WARN,amqplib=WARN,boto=WARN,qpid=WARN,sqlalchemy=WARN,suds=INFO,oslo.messaging=INFO,iso8601=WARN,requests.packages.urllib3.connectionpool=WARN,urllib3.connectionpool=WARN,websocket=WARN,requests.packages.urllib3.util.retry=WARN,urllib3.util.retry=WARN,keystonemiddleware=WARN,routes.middleware=WARN,stevedore=WARN,taskflow=WARN
+
+# Enables or disables publication of error events. (boolean value)
+#publish_errors=false
+
+# The format for an instance that is passed with the log message. (string
+# value)
+#instance_format="[instance: %(uuid)s] "
+
+# The format for an instance UUID that is passed with the log message. (string
+# value)
+#instance_uuid_format="[instance: %(uuid)s] "
+
+# Enables or disables fatal status of deprecations. (boolean value)
+#fatal_deprecations=false
+
+#
+# From oslo.messaging
+#
+
+# Size of RPC connection pool. (integer value)
+# Deprecated group;name - DEFAULT;rpc_conn_pool_size
+#rpc_conn_pool_size=30
+
+# ZeroMQ bind address. Should be a wildcard (*), an ethernet interface, or IP.
+# The "host" option should point or resolve to this address. (string value)
+#rpc_zmq_bind_address=*
+
+# MatchMaker driver. (string value)
+#rpc_zmq_matchmaker=local
+
+# ZeroMQ receiver listening port. (integer value)
+#rpc_zmq_port=9501
+
+# Number of ZeroMQ contexts, defaults to 1. (integer value)
+#rpc_zmq_contexts=1
+
+# Maximum number of ingress messages to locally buffer per topic. Default is
+# unlimited. (integer value)
+#rpc_zmq_topic_backlog=<None>
+
+# Directory for holding IPC sockets. (string value)
+#rpc_zmq_ipc_dir=/var/run/openstack
+
+# Name of this node. Must be a valid hostname, FQDN, or IP address. Must match
+# "host" option, if running Nova. (string value)
+#rpc_zmq_host=localhost
+
+# Seconds to wait before a cast expires (TTL). Only supported by impl_zmq.
+# (integer value)
+#rpc_cast_timeout=30
+
+# Heartbeat frequency. (integer value)
+#matchmaker_heartbeat_freq=300
+
+# Heartbeat time-to-live. (integer value)
+#matchmaker_heartbeat_ttl=600
+
+# Size of executor thread pool. (integer value)
+# Deprecated group;name - DEFAULT;rpc_thread_pool_size
+#executor_thread_pool_size=64
+
+# The Drivers(s) to handle sending notifications. Possible values are
+# messaging, messagingv2, routing, log, test, noop (multi valued)
+#notification_driver =
+notification_driver =nova.openstack.common.notifier.rabbit_notifier,ceilometer.compute.nova_notifier
+
+# AMQP topic used for OpenStack notifications. (list value)
+# Deprecated group;name - [rpc_notifier2]/topics
+#notification_topics=notifications
+notification_topics=notifications
+
+# Seconds to wait for a response from a call. (integer value)
+#rpc_response_timeout=60
+
+# A URL representing the messaging driver to use and its full configuration. If
+# not set, we fall back to the rpc_backend option and driver specific
+# configuration. (string value)
+#transport_url=<None>
+
+# The messaging driver to use, defaults to rabbit. Other drivers include qpid
+# and zmq. (string value)
+#rpc_backend=rabbit
+rpc_backend=rabbit
+
+# The default exchange under which topics are scoped. May be overridden by an
+# exchange name specified in the transport_url option. (string value)
+#control_exchange=openstack
+
+#
+# From oslo.service.periodic_task
+#
+
+# Some periodic tasks can be run in a separate process. Should we run them
+# here? (boolean value)
+#run_external_periodic_tasks=true
+
+#
+# From oslo.service.service
+#
+
+# Enable eventlet backdoor.  Acceptable values are 0, <port>, and
+# <start>:<end>, where 0 results in listening on a random tcp port number;
+# <port> results in listening on the specified port number (and not enabling
+# backdoor if that port is in use); and <start>:<end> results in listening on
+# the smallest unused port number within the specified range of port numbers.
+# The chosen port is displayed in the service's log file. (string value)
+#backdoor_port=<None>
+
+# Enables or disables logging values of all registered options when starting a
+# service (at DEBUG level). (boolean value)
+#log_options=true
+sql_connection=mysql+pymysql://nova:qum5net@VARINET4ADDR/nova
+image_service=nova.image.glance.GlanceImageService
+lock_path=/var/lib/nova/tmp
+osapi_volume_listen=0.0.0.0
+vncserver_proxyclient_address=VARHOSTNAME.ceph.redhat.com
+vnc_keymap=en-us
+vnc_enabled=True
+vncserver_listen=0.0.0.0
+novncproxy_base_url=http://VARINET4ADDR:6080/vnc_auto.html
+
+rbd_user = cinder
+rbd_secret_uuid = RBDSECRET 
+
+[api_database]
+
+#
+# From nova
+#
+
+# The SQLAlchemy connection string to use to connect to the Nova API database.
+# (string value)
+#connection=mysql://nova:nova@localhost/nova
+
+# If True, SQLite uses synchronous mode. (boolean value)
+#sqlite_synchronous=true
+
+# The SQLAlchemy connection string to use to connect to the slave database.
+# (string value)
+#slave_connection=<None>
+
+# The SQL mode to be used for MySQL sessions. This option, including the
+# default, overrides any server-set SQL mode. To use whatever SQL mode is set
+# by the server configuration, set this to no value. Example: mysql_sql_mode=
+# (string value)
+#mysql_sql_mode=TRADITIONAL
+
+# Timeout before idle SQL connections are reaped. (integer value)
+#idle_timeout=3600
+
+# Maximum number of SQL connections to keep open in a pool. (integer value)
+#max_pool_size=<None>
+
+# Maximum number of database connection retries during startup. Set to -1 to
+# specify an infinite retry count. (integer value)
+#max_retries=-1
+
+# Interval between retries of opening a SQL connection. (integer value)
+#retry_interval=10
+
+# If set, use this value for max_overflow with SQLAlchemy. (integer value)
+#max_overflow=<None>
+
+# Verbosity of SQL debugging information: 0=None, 100=Everything. (integer
+# value)
+#connection_debug=0
+
+# Add Python stack traces to SQL as comment strings. (boolean value)
+#connection_trace=false
+
+# If set, use this value for pool_timeout with SQLAlchemy. (integer value)
+#pool_timeout=<None>
+
+
+[barbican]
+
+#
+# From nova
+#
+
+# Info to match when looking for barbican in the service catalog. Format is:
+# separated values of the form: <service_type>:<service_name>:<endpoint_type>
+# (string value)
+#catalog_info=key-manager:barbican:public
+
+# Override service catalog lookup with template for barbican endpoint e.g.
+# http://localhost:9311/v1/%(project_id)s (string value)
+#endpoint_template=<None>
+
+# Region name of this node (string value)
+#os_region_name=<None>
+
+
+[cells]
+
+#
+# From nova.cells
+#
+
+# Enable cell functionality (boolean value)
+#enable=false
+
+# The topic cells nodes listen on (string value)
+#topic=cells
+
+# Manager for cells (string value)
+#manager=nova.cells.manager.CellsManager
+
+# Name of this cell (string value)
+#name=nova
+
+# Key/Multi-value list with the capabilities of the cell (list value)
+#capabilities=hypervisor=xenserver;kvm,os=linux;windows
+
+# Seconds to wait for response from a call to a cell. (integer value)
+#call_timeout=60
+
+# Percentage of cell capacity to hold in reserve. Affects both memory and disk
+# utilization (floating point value)
+#reserve_percent=10.0
+
+# Type of cell (string value)
+# Allowed values: api, compute
+#cell_type=compute
+
+# Number of seconds after which a lack of capability and capacity updates
+# signals the child cell is to be treated as a mute. (integer value)
+#mute_child_interval=300
+
+# Seconds between bandwidth updates for cells. (integer value)
+#bandwidth_update_interval=600
+
+# Cells communication driver to use (string value)
+#driver=nova.cells.rpc_driver.CellsRPCDriver
+
+# Number of seconds after an instance was updated or deleted to continue to
+# update cells (integer value)
+#instance_updated_at_threshold=3600
+
+# Number of instances to update per periodic task run (integer value)
+#instance_update_num_instances=1
+
+# Maximum number of hops for cells routing. (integer value)
+#max_hop_count=10
+
+# Cells scheduler to use (string value)
+#scheduler=nova.cells.scheduler.CellsScheduler
+
+# Base queue name to use when communicating between cells.  Various topics by
+# message type will be appended to this. (string value)
+#rpc_driver_queue_base=cells.intercell
+
+# Filter classes the cells scheduler should use.  An entry of
+# "nova.cells.filters.all_filters" maps to all cells filters included with
+# nova. (list value)
+#scheduler_filter_classes=nova.cells.filters.all_filters
+
+# Weigher classes the cells scheduler should use.  An entry of
+# "nova.cells.weights.all_weighers" maps to all cell weighers included with
+# nova. (list value)
+#scheduler_weight_classes=nova.cells.weights.all_weighers
+
+# How many retries when no cells are available. (integer value)
+#scheduler_retries=10
+
+# How often to retry in seconds when no cells are available. (integer value)
+#scheduler_retry_delay=2
+
+# Interval, in seconds, for getting fresh cell information from the database.
+# (integer value)
+#db_check_interval=60
+
+# Configuration file from which to read cells configuration.  If given,
+# overrides reading cells from the database. (string value)
+#cells_config=<None>
+
+# Multiplier used to weigh mute children. (The value should be negative.)
+# (floating point value)
+#mute_weight_multiplier=-10000.0
+
+# Multiplier used for weighing ram.  Negative numbers mean to stack vs spread.
+# (floating point value)
+#ram_weight_multiplier=10.0
+
+# Multiplier used to weigh offset weigher. (floating point value)
+#offset_weight_multiplier=1.0
+
+
+[cinder]
+
+#
+# From nova
+#
+
+# Info to match when looking for cinder in the service catalog. Format is:
+# separated values of the form: <service_type>:<service_name>:<endpoint_type>
+# (string value)
+#catalog_info=volumev2:cinderv2:publicURL
+catalog_info=volumev2:cinderv2:publicURL
+
+# Override service catalog lookup with template for cinder endpoint e.g.
+# http://localhost:8776/v1/%(project_id)s (string value)
+#endpoint_template=<None>
+
+# Region name of this node (string value)
+#os_region_name=<None>
+
+# Number of cinderclient retries on failed http calls (integer value)
+#http_retries=3
+
+# Allow attach between instance and volume in different availability zones.
+# (boolean value)
+#cross_az_attach=true
+
+
+[conductor]
+
+#
+# From nova
+#
+
+# Perform nova-conductor operations locally (boolean value)
+#use_local=false
+use_local=False
+
+# The topic on which conductor nodes listen (string value)
+#topic=conductor
+
+# Full class name for the Manager for conductor (string value)
+#manager=nova.conductor.manager.ConductorManager
+
+# Number of workers for OpenStack Conductor service. The default will be the
+# number of CPUs available. (integer value)
+#workers=<None>
+
+
+[cors]
+
+#
+# From oslo.middleware
+#
+
+# Indicate whether this resource may be shared with the domain received in the
+# requests "origin" header. (string value)
+#allowed_origin=<None>
+
+# Indicate that the actual request can include user credentials (boolean value)
+#allow_credentials=true
+
+# Indicate which headers are safe to expose to the API. Defaults to HTTP Simple
+# Headers. (list value)
+#expose_headers=Content-Type,Cache-Control,Content-Language,Expires,Last-Modified,Pragma
+
+# Maximum cache age of CORS preflight requests. (integer value)
+#max_age=3600
+
+# Indicate which methods can be used during the actual request. (list value)
+#allow_methods=GET,POST,PUT,DELETE,OPTIONS
+
+# Indicate which header field names may be used during the actual request.
+# (list value)
+#allow_headers=Content-Type,Cache-Control,Content-Language,Expires,Last-Modified,Pragma
+
+
+[cors.subdomain]
+
+#
+# From oslo.middleware
+#
+
+# Indicate whether this resource may be shared with the domain received in the
+# requests "origin" header. (string value)
+#allowed_origin=<None>
+
+# Indicate that the actual request can include user credentials (boolean value)
+#allow_credentials=true
+
+# Indicate which headers are safe to expose to the API. Defaults to HTTP Simple
+# Headers. (list value)
+#expose_headers=Content-Type,Cache-Control,Content-Language,Expires,Last-Modified,Pragma
+
+# Maximum cache age of CORS preflight requests. (integer value)
+#max_age=3600
+
+# Indicate which methods can be used during the actual request. (list value)
+#allow_methods=GET,POST,PUT,DELETE,OPTIONS
+
+# Indicate which header field names may be used during the actual request.
+# (list value)
+#allow_headers=Content-Type,Cache-Control,Content-Language,Expires,Last-Modified,Pragma
+
+
+[database]
+
+#
+# From nova
+#
+
+# The file name to use with SQLite. (string value)
+# Deprecated group;name - DEFAULT;sqlite_db
+#sqlite_db=oslo.sqlite
+
+# If True, SQLite uses synchronous mode. (boolean value)
+# Deprecated group;name - DEFAULT;sqlite_synchronous
+#sqlite_synchronous=true
+
+# The back end to use for the database. (string value)
+# Deprecated group;name - DEFAULT;db_backend
+#backend=sqlalchemy
+
+# The SQLAlchemy connection string to use to connect to the database. (string
+# value)
+# Deprecated group;name - DEFAULT;sql_connection
+# Deprecated group;name - [DATABASE]/sql_connection
+# Deprecated group;name - [sql]/connection
+#connection=<None>
+
+# The SQLAlchemy connection string to use to connect to the slave database.
+# (string value)
+#slave_connection=<None>
+
+# The SQL mode to be used for MySQL sessions. This option, including the
+# default, overrides any server-set SQL mode. To use whatever SQL mode is set
+# by the server configuration, set this to no value. Example: mysql_sql_mode=
+# (string value)
+#mysql_sql_mode=TRADITIONAL
+
+# Timeout before idle SQL connections are reaped. (integer value)
+# Deprecated group;name - DEFAULT;sql_idle_timeout
+# Deprecated group;name - [DATABASE]/sql_idle_timeout
+# Deprecated group;name - [sql]/idle_timeout
+#idle_timeout=3600
+
+# Minimum number of SQL connections to keep open in a pool. (integer value)
+# Deprecated group;name - DEFAULT;sql_min_pool_size
+# Deprecated group;name - [DATABASE]/sql_min_pool_size
+#min_pool_size=1
+
+# Maximum number of SQL connections to keep open in a pool. (integer value)
+# Deprecated group;name - DEFAULT;sql_max_pool_size
+# Deprecated group;name - [DATABASE]/sql_max_pool_size
+#max_pool_size=<None>
+
+# Maximum number of database connection retries during startup. Set to -1 to
+# specify an infinite retry count. (integer value)
+# Deprecated group;name - DEFAULT;sql_max_retries
+# Deprecated group;name - [DATABASE]/sql_max_retries
+#max_retries=10
+
+# Interval between retries of opening a SQL connection. (integer value)
+# Deprecated group;name - DEFAULT;sql_retry_interval
+# Deprecated group;name - [DATABASE]/reconnect_interval
+#retry_interval=10
+
+# If set, use this value for max_overflow with SQLAlchemy. (integer value)
+# Deprecated group;name - DEFAULT;sql_max_overflow
+# Deprecated group;name - [DATABASE]/sqlalchemy_max_overflow
+#max_overflow=<None>
+
+# Verbosity of SQL debugging information: 0=None, 100=Everything. (integer
+# value)
+# Deprecated group;name - DEFAULT;sql_connection_debug
+#connection_debug=0
+
+# Add Python stack traces to SQL as comment strings. (boolean value)
+# Deprecated group;name - DEFAULT;sql_connection_trace
+#connection_trace=false
+
+# If set, use this value for pool_timeout with SQLAlchemy. (integer value)
+# Deprecated group;name - [DATABASE]/sqlalchemy_pool_timeout
+#pool_timeout=<None>
+
+# Enable the experimental use of database reconnect on connection lost.
+# (boolean value)
+#use_db_reconnect=false
+
+# Seconds between retries of a database transaction. (integer value)
+#db_retry_interval=1
+
+# If True, increases the interval between retries of a database operation up to
+# db_max_retry_interval. (boolean value)
+#db_inc_retry_interval=true
+
+# If db_inc_retry_interval is set, the maximum seconds between retries of a
+# database operation. (integer value)
+#db_max_retry_interval=10
+
+# Maximum retries in case of connection error or deadlock error before error is
+# raised. Set to -1 to specify an infinite retry count. (integer value)
+#db_max_retries=20
+
+#
+# From oslo.db
+#
+
+# The file name to use with SQLite. (string value)
+# Deprecated group;name - DEFAULT;sqlite_db
+#sqlite_db=oslo.sqlite
+
+# If True, SQLite uses synchronous mode. (boolean value)
+# Deprecated group;name - DEFAULT;sqlite_synchronous
+#sqlite_synchronous=true
+
+# The back end to use for the database. (string value)
+# Deprecated group;name - DEFAULT;db_backend
+#backend=sqlalchemy
+
+# The SQLAlchemy connection string to use to connect to the database. (string
+# value)
+# Deprecated group;name - DEFAULT;sql_connection
+# Deprecated group;name - [DATABASE]/sql_connection
+# Deprecated group;name - [sql]/connection
+#connection=<None>
+
+# The SQLAlchemy connection string to use to connect to the slave database.
+# (string value)
+#slave_connection=<None>
+
+# The SQL mode to be used for MySQL sessions. This option, including the
+# default, overrides any server-set SQL mode. To use whatever SQL mode is set
+# by the server configuration, set this to no value. Example: mysql_sql_mode=
+# (string value)
+#mysql_sql_mode=TRADITIONAL
+
+# Timeout before idle SQL connections are reaped. (integer value)
+# Deprecated group;name - DEFAULT;sql_idle_timeout
+# Deprecated group;name - [DATABASE]/sql_idle_timeout
+# Deprecated group;name - [sql]/idle_timeout
+#idle_timeout=3600
+
+# Minimum number of SQL connections to keep open in a pool. (integer value)
+# Deprecated group;name - DEFAULT;sql_min_pool_size
+# Deprecated group;name - [DATABASE]/sql_min_pool_size
+#min_pool_size=1
+
+# Maximum number of SQL connections to keep open in a pool. (integer value)
+# Deprecated group;name - DEFAULT;sql_max_pool_size
+# Deprecated group;name - [DATABASE]/sql_max_pool_size
+#max_pool_size=<None>
+
+# Maximum number of database connection retries during startup. Set to -1 to
+# specify an infinite retry count. (integer value)
+# Deprecated group;name - DEFAULT;sql_max_retries
+# Deprecated group;name - [DATABASE]/sql_max_retries
+#max_retries=10
+
+# Interval between retries of opening a SQL connection. (integer value)
+# Deprecated group;name - DEFAULT;sql_retry_interval
+# Deprecated group;name - [DATABASE]/reconnect_interval
+#retry_interval=10
+
+# If set, use this value for max_overflow with SQLAlchemy. (integer value)
+# Deprecated group;name - DEFAULT;sql_max_overflow
+# Deprecated group;name - [DATABASE]/sqlalchemy_max_overflow
+#max_overflow=<None>
+
+# Verbosity of SQL debugging information: 0=None, 100=Everything. (integer
+# value)
+# Deprecated group;name - DEFAULT;sql_connection_debug
+#connection_debug=0
+
+# Add Python stack traces to SQL as comment strings. (boolean value)
+# Deprecated group;name - DEFAULT;sql_connection_trace
+#connection_trace=false
+
+# If set, use this value for pool_timeout with SQLAlchemy. (integer value)
+# Deprecated group;name - [DATABASE]/sqlalchemy_pool_timeout
+#pool_timeout=<None>
+
+# Enable the experimental use of database reconnect on connection lost.
+# (boolean value)
+#use_db_reconnect=false
+
+# Seconds between retries of a database transaction. (integer value)
+#db_retry_interval=1
+
+# If True, increases the interval between retries of a database operation up to
+# db_max_retry_interval. (boolean value)
+#db_inc_retry_interval=true
+
+# If db_inc_retry_interval is set, the maximum seconds between retries of a
+# database operation. (integer value)
+#db_max_retry_interval=10
+
+# Maximum retries in case of connection error or deadlock error before error is
+# raised. Set to -1 to specify an infinite retry count. (integer value)
+#db_max_retries=20
+
+
+[ephemeral_storage_encryption]
+
+#
+# From nova.compute
+#
+
+# Whether to encrypt ephemeral storage (boolean value)
+#enabled=false
+
+# The cipher and mode to be used to encrypt ephemeral storage. Which ciphers
+# are available ciphers depends on kernel support. See /proc/crypto for the
+# list of available options. (string value)
+#cipher=aes-xts-plain64
+
+# The bit length of the encryption key to be used to encrypt ephemeral storage
+# (in XTS mode only half of the bits are used for encryption key) (integer
+# value)
+#key_size=512
+
+
+[glance]
+
+#
+# From nova
+#
+
+# Default glance hostname or IP address (string value)
+#host=$my_ip
+
+# Default glance port (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#port=9292
+
+# Default protocol to use when connecting to glance. Set to https for SSL.
+# (string value)
+# Allowed values: http, https
+#protocol=http
+
+# A list of the glance api servers available to nova. Prefix with https:// for
+# ssl-based glance api servers. ([hostname|ip]:port) (list value)
+#api_servers=<None>
+api_servers=VARINET4ADDR:9292
+
+# Allow to perform insecure SSL (https) requests to glance (boolean value)
+#api_insecure=false
+
+# Number of retries when uploading / downloading an image to / from glance.
+# (integer value)
+#num_retries=0
+
+# A list of url scheme that can be downloaded directly via the direct_url.
+# Currently supported schemes: [file]. (list value)
+#allowed_direct_url_schemes =
+
+
+[guestfs]
+
+#
+# From nova.virt
+#
+
+# Enable guestfs debug (boolean value)
+#debug=false
+
+
+[hyperv]
+
+#
+# From nova.virt
+#
+
+# The name of a Windows share name mapped to the "instances_path" dir and used
+# by the resize feature to copy files to the target host. If left blank, an
+# administrative share will be used, looking for the same "instances_path" used
+# locally (string value)
+#instances_path_share =
+
+# Force V1 WMI utility classes (boolean value)
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#force_hyperv_utils_v1=false
+
+# Force V1 volume utility class (boolean value)
+#force_volumeutils_v1=false
+
+# External virtual switch Name, if not provided, the first external virtual
+# switch is used (string value)
+#vswitch_name=<None>
+
+# Required for live migration among hosts with different CPU features (boolean
+# value)
+#limit_cpu_features=false
+
+# Sets the admin password in the config drive image (boolean value)
+#config_drive_inject_password=false
+
+# Path of qemu-img command which is used to convert between different image
+# types (string value)
+#qemu_img_cmd=qemu-img.exe
+
+# Attaches the Config Drive image as a cdrom drive instead of a disk drive
+# (boolean value)
+#config_drive_cdrom=false
+
+# Enables metrics collections for an instance by using Hyper-V's metric APIs.
+# Collected data can by retrieved by other apps and services, e.g.: Ceilometer.
+# Requires Hyper-V / Windows Server 2012 and above (boolean value)
+#enable_instance_metrics_collection=false
+
+# Enables dynamic memory allocation (ballooning) when set to a value greater
+# than 1. The value expresses the ratio between the total RAM assigned to an
+# instance and its startup RAM amount. For example a ratio of 2.0 for an
+# instance with 1024MB of RAM implies 512MB of RAM allocated at startup
+# (floating point value)
+#dynamic_memory_ratio=1.0
+
+# Number of seconds to wait for instance to shut down after soft reboot request
+# is made. We fall back to hard reboot if instance does not shutdown within
+# this window. (integer value)
+#wait_soft_reboot_seconds=60
+
+# The number of times to retry to attach a volume (integer value)
+#volume_attach_retry_count=10
+
+# Interval between volume attachment attempts, in seconds (integer value)
+#volume_attach_retry_interval=5
+
+# The number of times to retry checking for a disk mounted via iSCSI. (integer
+# value)
+#mounted_disk_query_retry_count=10
+
+# Interval between checks for a mounted iSCSI disk, in seconds. (integer value)
+#mounted_disk_query_retry_interval=5
+
+
+[image_file_url]
+
+#
+# From nova
+#
+
+# List of file systems that are configured in this file in the
+# image_file_url:<list entry name> sections (list value)
+#filesystems =
+
+
+[ironic]
+
+#
+# From nova.virt
+#
+
+# Version of Ironic API service endpoint. (integer value)
+#api_version=1
+
+# URL for Ironic API endpoint. (string value)
+#api_endpoint=<None>
+
+# Ironic keystone admin name (string value)
+#admin_username=<None>
+
+# Ironic keystone admin password. (string value)
+#admin_password=<None>
+
+# Ironic keystone auth token.DEPRECATED: use admin_username, admin_password,
+# and admin_tenant_name instead (string value)
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#admin_auth_token=<None>
+
+# Keystone public API endpoint. (string value)
+#admin_url=<None>
+
+# Log level override for ironicclient. Set this in order to override the global
+# "default_log_levels", "verbose", and "debug" settings. DEPRECATED: use
+# standard logging configuration. (string value)
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#client_log_level=<None>
+
+# Ironic keystone tenant name. (string value)
+#admin_tenant_name=<None>
+
+# How many retries when a request does conflict. If <= 0, only try once, no
+# retries. (integer value)
+#api_max_retries=60
+
+# How often to retry in seconds when a request does conflict (integer value)
+#api_retry_interval=2
+
+
+[keymgr]
+
+#
+# From nova
+#
+
+# Fixed key returned by key manager, specified in hex (string value)
+#fixed_key=<None>
+
+# The full class name of the key manager API class (string value)
+#api_class=nova.keymgr.conf_key_mgr.ConfKeyManager
+
+
+[keystone_authtoken]
+
+#
+# From keystonemiddleware.auth_token
+#
+
+# Complete public Identity API endpoint. (string value)
+#auth_uri=<None>
+auth_uri=http://VARINET4ADDR:5000/v2.0
+
+# API version of the admin Identity API endpoint. (string value)
+#auth_version=<None>
+
+# Do not handle authorization requests within the middleware, but delegate the
+# authorization decision to downstream WSGI components. (boolean value)
+#delay_auth_decision=false
+
+# Request timeout value for communicating with Identity API server. (integer
+# value)
+#http_connect_timeout=<None>
+
+# How many times are we trying to reconnect when communicating with Identity
+# API Server. (integer value)
+#http_request_max_retries=3
+
+# Env key for the swift cache. (string value)
+#cache=<None>
+
+# Required if identity server requires client certificate (string value)
+#certfile=<None>
+
+# Required if identity server requires client certificate (string value)
+#keyfile=<None>
+
+# A PEM encoded Certificate Authority to use when verifying HTTPs connections.
+# Defaults to system CAs. (string value)
+#cafile=<None>
+
+# Verify HTTPS connections. (boolean value)
+#insecure=false
+
+# The region in which the identity server can be found. (string value)
+#region_name=<None>
+
+# Directory used to cache files related to PKI tokens. (string value)
+#signing_dir=<None>
+
+# Optionally specify a list of memcached server(s) to use for caching. If left
+# undefined, tokens will instead be cached in-process. (list value)
+# Deprecated group;name - DEFAULT;memcache_servers
+#memcached_servers=<None>
+
+# In order to prevent excessive effort spent validating tokens, the middleware
+# caches previously-seen tokens for a configurable duration (in seconds). Set
+# to -1 to disable caching completely. (integer value)
+#token_cache_time=300
+
+# Determines the frequency at which the list of revoked tokens is retrieved
+# from the Identity service (in seconds). A high number of revocation events
+# combined with a low cache duration may significantly reduce performance.
+# (integer value)
+#revocation_cache_time=10
+
+# (Optional) If defined, indicate whether token data should be authenticated or
+# authenticated and encrypted. Acceptable values are MAC or ENCRYPT.  If MAC,
+# token data is authenticated (with HMAC) in the cache. If ENCRYPT, token data
+# is encrypted and authenticated in the cache. If the value is not one of these
+# options or empty, auth_token will raise an exception on initialization.
+# (string value)
+#memcache_security_strategy=<None>
+
+# (Optional, mandatory if memcache_security_strategy is defined) This string is
+# used for key derivation. (string value)
+#memcache_secret_key=<None>
+
+# (Optional) Number of seconds memcached server is considered dead before it is
+# tried again. (integer value)
+#memcache_pool_dead_retry=300
+
+# (Optional) Maximum total number of open connections to every memcached
+# server. (integer value)
+#memcache_pool_maxsize=10
+
+# (Optional) Socket timeout in seconds for communicating with a memcached
+# server. (integer value)
+#memcache_pool_socket_timeout=3
+
+# (Optional) Number of seconds a connection to memcached is held unused in the
+# pool before it is closed. (integer value)
+#memcache_pool_unused_timeout=60
+
+# (Optional) Number of seconds that an operation will wait to get a memcached
+# client connection from the pool. (integer value)
+#memcache_pool_conn_get_timeout=10
+
+# (Optional) Use the advanced (eventlet safe) memcached client pool. The
+# advanced pool will only work under python 2.x. (boolean value)
+#memcache_use_advanced_pool=false
+
+# (Optional) Indicate whether to set the X-Service-Catalog header. If False,
+# middleware will not ask for service catalog on token validation and will not
+# set the X-Service-Catalog header. (boolean value)
+#include_service_catalog=true
+
+# Used to control the use and type of token binding. Can be set to: "disabled"
+# to not check token binding. "permissive" (default) to validate binding
+# information if the bind type is of a form known to the server and ignore it
+# if not. "strict" like "permissive" but if the bind type is unknown the token
+# will be rejected. "required" any form of token binding is needed to be
+# allowed. Finally the name of a binding method that must be present in tokens.
+# (string value)
+#enforce_token_bind=permissive
+
+# If true, the revocation list will be checked for cached tokens. This requires
+# that PKI tokens are configured on the identity server. (boolean value)
+#check_revocations_for_cached=false
+
+# Hash algorithms to use for hashing PKI tokens. This may be a single algorithm
+# or multiple. The algorithms are those supported by Python standard
+# hashlib.new(). The hashes will be tried in the order given, so put the
+# preferred one first for performance. The result of the first hash will be
+# stored in the cache. This will typically be set to multiple values only while
+# migrating from a less secure algorithm to a more secure one. Once all the old
+# tokens are expired this option should be set to a single value for better
+# performance. (list value)
+#hash_algorithms=md5
+
+# Prefix to prepend at the beginning of the path. Deprecated, use identity_uri.
+# (string value)
+#auth_admin_prefix =
+
+# Host providing the admin Identity API endpoint. Deprecated, use identity_uri.
+# (string value)
+#auth_host=127.0.0.1
+
+# Port of the admin Identity API endpoint. Deprecated, use identity_uri.
+# (integer value)
+#auth_port=35357
+
+# Protocol of the admin Identity API endpoint (http or https). Deprecated, use
+# identity_uri. (string value)
+#auth_protocol=http
+
+# Complete admin Identity API endpoint. This should specify the unversioned
+# root endpoint e.g. https://localhost:35357/ (string value)
+#identity_uri=<None>
+identity_uri=http://VARINET4ADDR:35357
+
+# This option is deprecated and may be removed in a future release. Single
+# shared secret with the Keystone configuration used for bootstrapping a
+# Keystone installation, or otherwise bypassing the normal authentication
+# process. This option should not be used, use `admin_user` and
+# `admin_password` instead. (string value)
+#admin_token=<None>
+
+# Service username. (string value)
+#admin_user=<None>
+admin_user=nova
+
+# Service user password. (string value)
+#admin_password=<None>
+admin_password=qum5net
+
+# Service tenant name. (string value)
+#admin_tenant_name=admin
+admin_tenant_name=services
+
+
+[libvirt]
+
+#
+# From nova.virt
+#
+
+# Rescue ami image. This will not be used if an image id is provided by the
+# user. (string value)
+#rescue_image_id=<None>
+
+# Rescue aki image (string value)
+#rescue_kernel_id=<None>
+
+# Rescue ari image (string value)
+#rescue_ramdisk_id=<None>
+
+# Libvirt domain type (string value)
+# Allowed values: kvm, lxc, qemu, uml, xen, parallels
+#virt_type=kvm
+virt_type=kvm
+
+# Override the default libvirt URI (which is dependent on virt_type) (string
+# value)
+#connection_uri =
+
+# Inject the admin password at boot time, without an agent. (boolean value)
+#inject_password=false
+inject_password=False
+
+# Inject the ssh public key at boot time (boolean value)
+#inject_key=false
+inject_key=False
+
+# The partition to inject to : -2 => disable, -1 => inspect (libguestfs only),
+# 0 => not partitioned, >0 => partition number (integer value)
+#inject_partition=-2
+inject_partition=-2
+
+# Sync virtual and real mouse cursors in Windows VMs (boolean value)
+#use_usb_tablet=true
+
+# Migration target URI (any included "%s" is replaced with the migration target
+# hostname) (string value)
+#live_migration_uri=qemu+tcp://%s/system
+live_migration_uri=qemu+tcp://nova@%s/system
+
+# Migration flags to be set for live migration (string value)
+#live_migration_flag=VIR_MIGRATE_UNDEFINE_SOURCE, VIR_MIGRATE_PEER2PEER, VIR_MIGRATE_LIVE, VIR_MIGRATE_TUNNELLED
+live_migration_flag="VIR_MIGRATE_UNDEFINE_SOURCE, VIR_MIGRATE_PEER2PEER, VIR_MIGRATE_LIVE, VIR_MIGRATE_PERSIST_DEST, VIR_MIGRATE_TUNNELLED"
+
+# Migration flags to be set for block migration (string value)
+#block_migration_flag=VIR_MIGRATE_UNDEFINE_SOURCE, VIR_MIGRATE_PEER2PEER, VIR_MIGRATE_LIVE, VIR_MIGRATE_TUNNELLED, VIR_MIGRATE_NON_SHARED_INC
+
+# Maximum bandwidth(in MiB/s) to be used during migration. If set to 0, will
+# choose a suitable default. Some hypervisors do not support this feature and
+# will return an error if bandwidth is not 0. Please refer to the libvirt
+# documentation for further details (integer value)
+#live_migration_bandwidth=0
+
+# Maximum permitted downtime, in milliseconds, for live migration switchover.
+# Will be rounded up to a minimum of 100ms. Use a large value if guest liveness
+# is unimportant. (integer value)
+#live_migration_downtime=500
+
+# Number of incremental steps to reach max downtime value. Will be rounded up
+# to a minimum of 3 steps (integer value)
+#live_migration_downtime_steps=10
+
+# Time to wait, in seconds, between each step increase of the migration
+# downtime. Minimum delay is 10 seconds. Value is per GiB of guest RAM + disk
+# to be transferred, with lower bound of a minimum of 2 GiB per device (integer
+# value)
+#live_migration_downtime_delay=75
+
+# Time to wait, in seconds, for migration to successfully complete transferring
+# data before aborting the operation. Value is per GiB of guest RAM + disk to
+# be transferred, with lower bound of a minimum of 2 GiB. Should usually be
+# larger than downtime delay * downtime steps. Set to 0 to disable timeouts.
+# (integer value)
+#live_migration_completion_timeout=800
+
+# Time to wait, in seconds, for migration to make forward progress in
+# transferring data before aborting the operation. Set to 0 to disable
+# timeouts. (integer value)
+#live_migration_progress_timeout=150
+
+# Snapshot image format. Defaults to same as source image (string value)
+# Allowed values: raw, qcow2, vmdk, vdi
+#snapshot_image_format=<None>
+
+# Override the default disk prefix for the devices attached to a server, which
+# is dependent on virt_type. (valid options are: sd, xvd, uvd, vd) (string
+# value)
+#disk_prefix=<None>
+
+# Number of seconds to wait for instance to shut down after soft reboot request
+# is made. We fall back to hard reboot if instance does not shutdown within
+# this window. (integer value)
+#wait_soft_reboot_seconds=120
+
+# Set to "host-model" to clone the host CPU feature flags; to "host-
+# passthrough" to use the host CPU model exactly; to "custom" to use a named
+# CPU model; to "none" to not set any CPU model. If virt_type="kvm|qemu", it
+# will default to "host-model", otherwise it will default to "none" (string
+# value)
+# Allowed values: host-model, host-passthrough, custom, none
+#cpu_mode=<None>
+cpu_mode=host-model
+
+# Set to a named libvirt CPU model (see names listed in
+# /usr/share/libvirt/cpu_map.xml). Only has effect if cpu_mode="custom" and
+# virt_type="kvm|qemu" (string value)
+#cpu_model=<None>
+
+# Location where libvirt driver will store snapshots before uploading them to
+# image service (string value)
+#snapshots_directory=$instances_path/snapshots
+
+# Location where the Xen hvmloader is kept (string value)
+#xen_hvmloader_path=/usr/lib/xen/boot/hvmloader
+
+# Specific cachemodes to use for different disk types e.g:
+# file=directsync,block=none (list value)
+#disk_cachemodes =
+disk_cachemodes="network=writeback"
+
+# A path to a device that will be used as source of entropy on the host.
+# Permitted options are: /dev/random or /dev/hwrng (string value)
+#rng_dev_path=<None>
+
+# For qemu or KVM guests, set this option to specify a default machine type per
+# host architecture. You can find a list of supported machine types in your
+# environment by checking the output of the "virsh capabilities"command. The
+# format of the value for this config option is host-arch=machine-type. For
+# example: x86_64=machinetype1,armv7l=machinetype2 (list value)
+#hw_machine_type=<None>
+
+# The data source used to the populate the host "serial" UUID exposed to guest
+# in the virtual BIOS. (string value)
+# Allowed values: none, os, hardware, auto
+#sysinfo_serial=auto
+
+# A number of seconds to memory usage statistics period. Zero or negative value
+# mean to disable memory usage statistics. (integer value)
+#mem_stats_period_seconds=10
+
+# List of uid targets and ranges.Syntax is guest-uid:host-uid:countMaximum of 5
+# allowed. (list value)
+#uid_maps =
+
+# List of guid targets and ranges.Syntax is guest-gid:host-gid:countMaximum of
+# 5 allowed. (list value)
+#gid_maps =
+
+# In a realtime host context vCPUs for guest will run in that scheduling
+# priority. Priority depends on the host kernel (usually 1-99) (integer value)
+#realtime_scheduler_priority=1
+
+# VM Images format. If default is specified, then use_cow_images flag is used
+# instead of this one. (string value)
+# Allowed values: raw, qcow2, lvm, rbd, ploop, default
+#images_type=default
+images_type=rbd
+
+# LVM Volume Group that is used for VM images, when you specify
+# images_type=lvm. (string value)
+#images_volume_group=<None>
+
+# Create sparse logical volumes (with virtualsize) if this flag is set to True.
+# (boolean value)
+#sparse_logical_volumes=false
+
+# The RADOS pool in which rbd volumes are stored (string value)
+#images_rbd_pool=rbd
+images_rbd_pool=vms
+
+# Path to the ceph configuration file to use (string value)
+#images_rbd_ceph_conf =
+images_rbd_ceph_conf = /etc/ceph/ceph.conf
+rbd_user = cinder
+rbd_secret_uuid = RBDSECRET 
+
+# Discard option for nova managed disks. Need Libvirt(1.0.6) Qemu1.5 (raw
+# format) Qemu1.6(qcow2 format) (string value)
+# Allowed values: ignore, unmap
+#hw_disk_discard=<None>
+hw_disk_discard=unmap
+
+# Allows image information files to be stored in non-standard locations (string
+# value)
+#image_info_filename_pattern=$instances_path/$image_cache_subdirectory_name/%(image)s.info
+
+# DEPRECATED: Should unused kernel images be removed? This is only safe to
+# enable if all compute nodes have been updated to support this option (running
+# Grizzly or newer level compute). This will be the default behavior in the
+# 13.0.0 release. (boolean value)
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#remove_unused_kernels=true
+
+# Unused resized base images younger than this will not be removed (integer
+# value)
+#remove_unused_resized_minimum_age_seconds=3600
+
+# Write a checksum for files in _base to disk (boolean value)
+#checksum_base_images=false
+
+# How frequently to checksum base images (integer value)
+#checksum_interval_seconds=3600
+
+# Method used to wipe old volumes. (string value)
+# Allowed values: none, zero, shred
+#volume_clear=zero
+
+# Size in MiB to wipe at start of old volumes. 0 => all (integer value)
+#volume_clear_size=0
+
+# Compress snapshot images when possible. This currently applies exclusively to
+# qcow2 images (boolean value)
+#snapshot_compression=false
+
+# Use virtio for bridge interfaces with KVM/QEMU (boolean value)
+#use_virtio_for_bridges=true
+
+# Protocols listed here will be accessed directly from QEMU. Currently
+# supported protocols: [gluster] (list value)
+#qemu_allowed_storage_drivers =
+vif_driver=nova.virt.libvirt.vif.LibvirtGenericVIFDriver
+
+
+[matchmaker_redis]
+
+#
+# From oslo.messaging
+#
+
+# Host to locate redis. (string value)
+#host=127.0.0.1
+
+# Use this port to connect to redis host. (integer value)
+#port=6379
+
+# Password for Redis server (optional). (string value)
+#password=<None>
+
+
+[matchmaker_ring]
+
+#
+# From oslo.messaging
+#
+
+# Matchmaker ring file (JSON). (string value)
+# Deprecated group;name - DEFAULT;matchmaker_ringfile
+#ringfile=/etc/oslo/matchmaker_ring.json
+
+
+[metrics]
+
+#
+# From nova.scheduler
+#
+
+# Multiplier used for weighing metrics. (floating point value)
+#weight_multiplier=1.0
+
+# How the metrics are going to be weighed. This should be in the form of
+# "<name1>=<ratio1>, <name2>=<ratio2>, ...", where <nameX> is one of the
+# metrics to be weighed, and <ratioX> is the corresponding ratio. So for
+# "name1=1.0, name2=-1.0" The final weight would be name1.value * 1.0 +
+# name2.value * -1.0. (list value)
+#weight_setting =
+
+# How to treat the unavailable metrics. When a metric is NOT available for a
+# host, if it is set to be True, it would raise an exception, so it is
+# recommended to use the scheduler filter MetricFilter to filter out those
+# hosts. If it is set to be False, the unavailable metric would be treated as a
+# negative factor in weighing process, the returned value would be set by the
+# option weight_of_unavailable. (boolean value)
+#required=true
+
+# The final weight value to be returned if required is set to False and any one
+# of the metrics set by weight_setting is unavailable. (floating point value)
+#weight_of_unavailable=-10000.0
+
+
+[neutron]
+
+#
+# From nova.api
+#
+
+# Set flag to indicate Neutron will proxy metadata requests and resolve
+# instance ids. (boolean value)
+#service_metadata_proxy=false
+service_metadata_proxy=True
+
+# Shared secret to validate proxies Neutron metadata requests (string value)
+#metadata_proxy_shared_secret =
+metadata_proxy_shared_secret =qum5net
+
+#
+# From nova.network
+#
+
+# URL for connecting to neutron (string value)
+#url=http://127.0.0.1:9696
+url=http://VARINET4ADDR:9696
+
+# User id for connecting to neutron in admin context. DEPRECATED: specify an
+# auth_plugin and appropriate credentials instead. (string value)
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#admin_user_id=<None>
+
+# Username for connecting to neutron in admin context DEPRECATED: specify an
+# auth_plugin and appropriate credentials instead. (string value)
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#admin_username=<None>
+admin_username=neutron
+
+# Password for connecting to neutron in admin context DEPRECATED: specify an
+# auth_plugin and appropriate credentials instead. (string value)
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#admin_password=<None>
+admin_password=qum5net
+
+# Tenant id for connecting to neutron in admin context DEPRECATED: specify an
+# auth_plugin and appropriate credentials instead. (string value)
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#admin_tenant_id=<None>
+
+# Tenant name for connecting to neutron in admin context. This option will be
+# ignored if neutron_admin_tenant_id is set. Note that with Keystone V3 tenant
+# names are only unique within a domain. DEPRECATED: specify an auth_plugin and
+# appropriate credentials instead. (string value)
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#admin_tenant_name=<None>
+admin_tenant_name=services
+
+# Region name for connecting to neutron in admin context (string value)
+#region_name=<None>
+region_name=RegionOne
+
+# Authorization URL for connecting to neutron in admin context. DEPRECATED:
+# specify an auth_plugin and appropriate credentials instead. (string value)
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#admin_auth_url=http://localhost:5000/v2.0
+admin_auth_url=http://VARINET4ADDR:5000/v2.0
+
+# Authorization strategy for connecting to neutron in admin context.
+# DEPRECATED: specify an auth_plugin and appropriate credentials instead. If an
+# auth_plugin is specified strategy will be ignored. (string value)
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#auth_strategy=keystone
+auth_strategy=keystone
+
+# Name of Integration Bridge used by Open vSwitch (string value)
+#ovs_bridge=br-int
+ovs_bridge=br-int
+
+# Number of seconds before querying neutron for extensions (integer value)
+#extension_sync_interval=600
+extension_sync_interval=600
+
+#
+# From nova.network.neutronv2
+#
+
+# Authentication URL (string value)
+#auth_url=<None>
+
+# Name of the plugin to load (string value)
+#auth_plugin=<None>
+
+# PEM encoded Certificate Authority to use when verifying HTTPs connections.
+# (string value)
+# Deprecated group;name - [neutron]/ca_certificates_file
+#cafile=<None>
+
+# PEM encoded client certificate cert file (string value)
+#certfile=<None>
+
+# Domain ID to scope to (string value)
+#domain_id=<None>
+
+# Domain name to scope to (string value)
+#domain_name=<None>
+
+# Verify HTTPS connections. (boolean value)
+# Deprecated group;name - [neutron]/api_insecure
+#insecure=false
+
+# PEM encoded client certificate key file (string value)
+#keyfile=<None>
+
+# User's password (string value)
+#password=<None>
+
+# Domain ID containing project (string value)
+#project_domain_id=<None>
+
+# Domain name containing project (string value)
+#project_domain_name=<None>
+
+# Project ID to scope to (string value)
+#project_id=<None>
+
+# Project name to scope to (string value)
+#project_name=<None>
+
+# Tenant ID to scope to (string value)
+#tenant_id=<None>
+
+# Tenant name to scope to (string value)
+#tenant_name=<None>
+
+# Timeout value for http requests (integer value)
+# Deprecated group;name - [neutron]/url_timeout
+#timeout=<None>
+timeout=30
+
+# Trust ID (string value)
+#trust_id=<None>
+
+# User's domain id (string value)
+#user_domain_id=<None>
+
+# User's domain name (string value)
+#user_domain_name=<None>
+
+# User id (string value)
+#user_id=<None>
+
+# Username (string value)
+# Deprecated group;name - DEFAULT;username
+#username=<None>
+default_tenant_id=default
+
+
+[osapi_v21]
+
+#
+# From nova.api
+#
+
+# DEPRECATED: Whether the V2.1 API is enabled or not. This option will be
+# removed in the near future. (boolean value)
+# Deprecated group;name - [osapi_v21]/enabled
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#enabled=true
+
+# DEPRECATED: A list of v2.1 API extensions to never load. Specify the
+# extension aliases here. This option will be removed in the near future. After
+# that point you have to run all of the API. (list value)
+# Deprecated group;name - [osapi_v21]/extensions_blacklist
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#extensions_blacklist =
+
+# DEPRECATED: If the list is not empty then a v2.1 API extension will only be
+# loaded if it exists in this list. Specify the extension aliases here. This
+# option will be removed in the near future. After that point you have to run
+# all of the API. (list value)
+# Deprecated group;name - [osapi_v21]/extensions_whitelist
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#extensions_whitelist =
+
+
+[oslo_concurrency]
+
+#
+# From oslo.concurrency
+#
+
+# Enables or disables inter-process locks. (boolean value)
+# Deprecated group;name - DEFAULT;disable_process_locking
+#disable_process_locking=false
+
+# Directory to use for lock files.  For security, the specified directory
+# should only be writable by the user running the processes that need locking.
+# Defaults to environment variable OSLO_LOCK_PATH. If external locks are used,
+# a lock path must be set. (string value)
+# Deprecated group;name - DEFAULT;lock_path
+#lock_path=/var/lib/nova/tmp
+
+
+[oslo_messaging_amqp]
+
+#
+# From oslo.messaging
+#
+
+# address prefix used when sending to a specific server (string value)
+# Deprecated group;name - [amqp1]/server_request_prefix
+#server_request_prefix=exclusive
+
+# address prefix used when broadcasting to all servers (string value)
+# Deprecated group;name - [amqp1]/broadcast_prefix
+#broadcast_prefix=broadcast
+
+# address prefix when sending to any server in group (string value)
+# Deprecated group;name - [amqp1]/group_request_prefix
+#group_request_prefix=unicast
+
+# Name for the AMQP container (string value)
+# Deprecated group;name - [amqp1]/container_name
+#container_name=<None>
+
+# Timeout for inactive connections (in seconds) (integer value)
+# Deprecated group;name - [amqp1]/idle_timeout
+#idle_timeout=0
+
+# Debug: dump AMQP frames to stdout (boolean value)
+# Deprecated group;name - [amqp1]/trace
+#trace=false
+
+# CA certificate PEM file to verify server certificate (string value)
+# Deprecated group;name - [amqp1]/ssl_ca_file
+#ssl_ca_file =
+
+# Identifying certificate PEM file to present to clients (string value)
+# Deprecated group;name - [amqp1]/ssl_cert_file
+#ssl_cert_file =
+
+# Private key PEM file used to sign cert_file certificate (string value)
+# Deprecated group;name - [amqp1]/ssl_key_file
+#ssl_key_file =
+
+# Password for decrypting ssl_key_file (if encrypted) (string value)
+# Deprecated group;name - [amqp1]/ssl_key_password
+#ssl_key_password=<None>
+
+# Accept clients using either SSL or plain TCP (boolean value)
+# Deprecated group;name - [amqp1]/allow_insecure_clients
+#allow_insecure_clients=false
+
+
+[oslo_messaging_qpid]
+
+#
+# From oslo.messaging
+#
+
+# Use durable queues in AMQP. (boolean value)
+# Deprecated group;name - DEFAULT;amqp_durable_queues
+# Deprecated group;name - DEFAULT;rabbit_durable_queues
+#amqp_durable_queues=false
+
+# Auto-delete queues in AMQP. (boolean value)
+# Deprecated group;name - DEFAULT;amqp_auto_delete
+#amqp_auto_delete=false
+
+# Send a single AMQP reply to call message. The current behaviour since oslo-
+# incubator is to send two AMQP replies - first one with the payload, a second
+# one to ensure the other have finish to send the payload. We are going to
+# remove it in the N release, but we must keep backward compatible at the same
+# time. This option provides such compatibility - it defaults to False in
+# Liberty and can be turned on for early adopters with a new installations or
+# for testing. Please note, that this option will be removed in the Mitaka
+# release. (boolean value)
+#send_single_reply=false
+
+# Qpid broker hostname. (string value)
+# Deprecated group;name - DEFAULT;qpid_hostname
+#qpid_hostname=localhost
+
+# Qpid broker port. (integer value)
+# Deprecated group;name - DEFAULT;qpid_port
+#qpid_port=5672
+
+# Qpid HA cluster host:port pairs. (list value)
+# Deprecated group;name - DEFAULT;qpid_hosts
+#qpid_hosts=$qpid_hostname:$qpid_port
+
+# Username for Qpid connection. (string value)
+# Deprecated group;name - DEFAULT;qpid_username
+#qpid_username =
+
+# Password for Qpid connection. (string value)
+# Deprecated group;name - DEFAULT;qpid_password
+#qpid_password =
+
+# Space separated list of SASL mechanisms to use for auth. (string value)
+# Deprecated group;name - DEFAULT;qpid_sasl_mechanisms
+#qpid_sasl_mechanisms =
+
+# Seconds between connection keepalive heartbeats. (integer value)
+# Deprecated group;name - DEFAULT;qpid_heartbeat
+#qpid_heartbeat=60
+
+# Transport to use, either 'tcp' or 'ssl'. (string value)
+# Deprecated group;name - DEFAULT;qpid_protocol
+#qpid_protocol=tcp
+
+# Whether to disable the Nagle algorithm. (boolean value)
+# Deprecated group;name - DEFAULT;qpid_tcp_nodelay
+#qpid_tcp_nodelay=true
+
+# The number of prefetched messages held by receiver. (integer value)
+# Deprecated group;name - DEFAULT;qpid_receiver_capacity
+#qpid_receiver_capacity=1
+
+# The qpid topology version to use.  Version 1 is what was originally used by
+# impl_qpid.  Version 2 includes some backwards-incompatible changes that allow
+# broker federation to work.  Users should update to version 2 when they are
+# able to take everything down, as it requires a clean break. (integer value)
+# Deprecated group;name - DEFAULT;qpid_topology_version
+#qpid_topology_version=1
+
+
+[oslo_messaging_rabbit]
+
+#
+# From oslo.messaging
+#
+
+# Use durable queues in AMQP. (boolean value)
+# Deprecated group;name - DEFAULT;amqp_durable_queues
+# Deprecated group;name - DEFAULT;rabbit_durable_queues
+#amqp_durable_queues=false
+amqp_durable_queues=False
+
+# Auto-delete queues in AMQP. (boolean value)
+# Deprecated group;name - DEFAULT;amqp_auto_delete
+#amqp_auto_delete=false
+
+# Send a single AMQP reply to call message. The current behaviour since oslo-
+# incubator is to send two AMQP replies - first one with the payload, a second
+# one to ensure the other have finish to send the payload. We are going to
+# remove it in the N release, but we must keep backward compatible at the same
+# time. This option provides such compatibility - it defaults to False in
+# Liberty and can be turned on for early adopters with a new installations or
+# for testing. Please note, that this option will be removed in the Mitaka
+# release. (boolean value)
+#send_single_reply=false
+
+# SSL version to use (valid only if SSL enabled). Valid values are TLSv1 and
+# SSLv23. SSLv2, SSLv3, TLSv1_1, and TLSv1_2 may be available on some
+# distributions. (string value)
+# Deprecated group;name - DEFAULT;kombu_ssl_version
+#kombu_ssl_version =
+
+# SSL key file (valid only if SSL enabled). (string value)
+# Deprecated group;name - DEFAULT;kombu_ssl_keyfile
+#kombu_ssl_keyfile =
+
+# SSL cert file (valid only if SSL enabled). (string value)
+# Deprecated group;name - DEFAULT;kombu_ssl_certfile
+#kombu_ssl_certfile =
+
+# SSL certification authority file (valid only if SSL enabled). (string value)
+# Deprecated group;name - DEFAULT;kombu_ssl_ca_certs
+#kombu_ssl_ca_certs =
+
+# How long to wait before reconnecting in response to an AMQP consumer cancel
+# notification. (floating point value)
+# Deprecated group;name - DEFAULT;kombu_reconnect_delay
+#kombu_reconnect_delay=1.0
+kombu_reconnect_delay=1.0
+
+# How long to wait before considering a reconnect attempt to have failed. This
+# value should not be longer than rpc_response_timeout. (integer value)
+#kombu_reconnect_timeout=60
+
+# Determines how the next RabbitMQ node is chosen in case the one we are
+# currently connected to becomes unavailable. Takes effect only if more than
+# one RabbitMQ node is provided in config. (string value)
+# Allowed values: round-robin, shuffle
+#kombu_failover_strategy=round-robin
+
+# The RabbitMQ broker address where a single node is used. (string value)
+# Deprecated group;name - DEFAULT;rabbit_host
+#rabbit_host=localhost
+rabbit_host=VARINET4ADDR
+
+# The RabbitMQ broker port where a single node is used. (integer value)
+# Deprecated group;name - DEFAULT;rabbit_port
+#rabbit_port=5672
+rabbit_port=5672
+
+# RabbitMQ HA cluster host:port pairs. (list value)
+# Deprecated group;name - DEFAULT;rabbit_hosts
+#rabbit_hosts=$rabbit_host:$rabbit_port
+rabbit_hosts=VARINET4ADDR:5672
+
+# Connect over SSL for RabbitMQ. (boolean value)
+# Deprecated group;name - DEFAULT;rabbit_use_ssl
+#rabbit_use_ssl=false
+rabbit_use_ssl=False
+
+# The RabbitMQ userid. (string value)
+# Deprecated group;name - DEFAULT;rabbit_userid
+#rabbit_userid=guest
+rabbit_userid=guest
+
+# The RabbitMQ password. (string value)
+# Deprecated group;name - DEFAULT;rabbit_password
+#rabbit_password=guest
+rabbit_password=guest
+
+# The RabbitMQ login method. (string value)
+# Deprecated group;name - DEFAULT;rabbit_login_method
+#rabbit_login_method=AMQPLAIN
+
+# The RabbitMQ virtual host. (string value)
+# Deprecated group;name - DEFAULT;rabbit_virtual_host
+#rabbit_virtual_host=/
+rabbit_virtual_host=/
+
+# How frequently to retry connecting with RabbitMQ. (integer value)
+#rabbit_retry_interval=1
+
+# How long to backoff for between retries when connecting to RabbitMQ. (integer
+# value)
+# Deprecated group;name - DEFAULT;rabbit_retry_backoff
+#rabbit_retry_backoff=2
+
+# Maximum number of RabbitMQ connection retries. Default is 0 (infinite retry
+# count). (integer value)
+# Deprecated group;name - DEFAULT;rabbit_max_retries
+#rabbit_max_retries=0
+
+# Use HA queues in RabbitMQ (x-ha-policy: all). If you change this option, you
+# must wipe the RabbitMQ database. (boolean value)
+# Deprecated group;name - DEFAULT;rabbit_ha_queues
+#rabbit_ha_queues=false
+rabbit_ha_queues=False
+
+# Specifies the number of messages to prefetch. Setting to zero allows
+# unlimited messages. (integer value)
+#rabbit_qos_prefetch_count=0
+
+# Number of seconds after which the Rabbit broker is considered down if
+# heartbeat's keep-alive fails (0 disable the heartbeat). EXPERIMENTAL (integer
+# value)
+#heartbeat_timeout_threshold=60
+heartbeat_timeout_threshold=0
+
+# How often times during the heartbeat_timeout_threshold we check the
+# heartbeat. (integer value)
+#heartbeat_rate=2
+heartbeat_rate=2
+
+# Deprecated, use rpc_backend=kombu+memory or rpc_backend=fake (boolean value)
+# Deprecated group;name - DEFAULT;fake_rabbit
+#fake_rabbit=false
+
+
+[oslo_middleware]
+
+#
+# From oslo.middleware
+#
+
+# The maximum body size for each  request, in bytes. (integer value)
+# Deprecated group;name - DEFAULT;osapi_max_request_body_size
+# Deprecated group;name - DEFAULT;max_request_body_size
+#max_request_body_size=114688
+
+#
+# From oslo.middleware
+#
+
+# The HTTP Header that will be used to determine what the original request
+# protocol scheme was, even if it was hidden by an SSL termination proxy.
+# (string value)
+#secure_proxy_ssl_header=X-Forwarded-Proto
+
+
+[rdp]
+
+#
+# From nova
+#
+
+# Location of RDP html5 console proxy, in the form "http://127.0.0.1:6083/"
+# (string value)
+#html5_proxy_base_url=http://127.0.0.1:6083/
+
+# Enable RDP related features (boolean value)
+#enabled=false
+
+
+[serial_console]
+
+#
+# From nova
+#
+
+# Host on which to listen for incoming requests (string value)
+#serialproxy_host=0.0.0.0
+
+# Port on which to listen for incoming requests (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#serialproxy_port=6083
+
+# Enable serial console related features (boolean value)
+#enabled=false
+
+# Range of TCP ports to use for serial ports on compute hosts (string value)
+#port_range=10000:20000
+
+# Location of serial console proxy. (string value)
+#base_url=ws://127.0.0.1:6083/
+
+# IP address on which instance serial console should listen (string value)
+#listen=127.0.0.1
+
+# The address to which proxy clients (like nova-serialproxy) should connect
+# (string value)
+#proxyclient_address=127.0.0.1
+
+
+[spice]
+
+#
+# From nova
+#
+
+# Host on which to listen for incoming requests (string value)
+#html5proxy_host=0.0.0.0
+
+# Port on which to listen for incoming requests (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#html5proxy_port=6082
+
+# Location of spice HTML5 console proxy, in the form
+# "http://127.0.0.1:6082/spice_auto.html" (string value)
+#html5proxy_base_url=http://127.0.0.1:6082/spice_auto.html
+
+# IP address on which instance spice server should listen (string value)
+#server_listen=127.0.0.1
+
+# The address to which proxy clients (like nova-spicehtml5proxy) should connect
+# (string value)
+#server_proxyclient_address=127.0.0.1
+
+# Enable spice related features (boolean value)
+#enabled=false
+
+# Enable spice guest agent support (boolean value)
+#agent_enabled=true
+
+# Keymap for spice (string value)
+#keymap=en-us
+
+
+[ssl]
+
+#
+# From oslo.service.sslutils
+#
+
+# CA certificate file to use to verify connecting clients. (string value)
+#ca_file=<None>
+
+# Certificate file to use when starting the server securely. (string value)
+#cert_file=<None>
+
+# Private key file to use when starting the server securely. (string value)
+#key_file=<None>
+
+
+[trusted_computing]
+
+#
+# From nova.scheduler
+#
+
+# Attestation server HTTP (string value)
+#attestation_server=<None>
+
+# Attestation server Cert file for Identity verification (string value)
+#attestation_server_ca_file=<None>
+
+# Attestation server port (string value)
+#attestation_port=8443
+
+# Attestation web API URL (string value)
+#attestation_api_url=/OpenAttestationWebServices/V1.0
+
+# Attestation authorization blob - must change (string value)
+#attestation_auth_blob=<None>
+
+# Attestation status cache valid period length (integer value)
+#attestation_auth_timeout=60
+
+# Disable SSL cert verification for Attestation service (boolean value)
+#attestation_insecure_ssl=false
+
+
+[upgrade_levels]
+
+#
+# From nova
+#
+
+# Set a version cap for messages sent to the base api in any service (string
+# value)
+#baseapi=<None>
+
+# Set a version cap for messages sent to cert services (string value)
+#cert=<None>
+
+# Set a version cap for messages sent to conductor services (string value)
+#conductor=<None>
+
+# Set a version cap for messages sent to console services (string value)
+#console=<None>
+
+# Set a version cap for messages sent to consoleauth services (string value)
+#consoleauth=<None>
+
+#
+# From nova.cells
+#
+
+# Set a version cap for messages sent between cells services (string value)
+#intercell=<None>
+
+# Set a version cap for messages sent to local cells services (string value)
+#cells=<None>
+
+#
+# From nova.compute
+#
+
+# Set a version cap for messages sent to compute services. If you plan to do a
+# live upgrade from an old version to a newer version, you should set this
+# option to the old version before beginning the live upgrade procedure. Only
+# upgrading to the next version is supported, so you cannot skip a release for
+# the live upgrade procedure. (string value)
+#compute=<None>
+
+#
+# From nova.network
+#
+
+# Set a version cap for messages sent to network services (string value)
+#network=<None>
+
+#
+# From nova.scheduler
+#
+
+# Set a version cap for messages sent to scheduler services (string value)
+#scheduler=<None>
+
+
+[vmware]
+
+#
+# From nova.virt
+#
+
+# The maximum number of ObjectContent data objects that should be returned in a
+# single result. A positive value will cause the operation to suspend the
+# retrieval when the count of objects reaches the specified maximum. The server
+# may still limit the count to something less than the configured value. Any
+# remaining objects may be retrieved with additional requests. (integer value)
+#maximum_objects=100
+
+# The PBM status. (boolean value)
+#pbm_enabled=false
+
+# PBM service WSDL file location URL. e.g.
+# file:///opt/SDK/spbm/wsdl/pbmService.wsdl Not setting this will disable
+# storage policy based placement of instances. (string value)
+#pbm_wsdl_location=<None>
+
+# The PBM default policy. If pbm_wsdl_location is set and there is no defined
+# storage policy for the specific request then this policy will be used.
+# (string value)
+#pbm_default_policy=<None>
+
+# Hostname or IP address for connection to VMware vCenter host. (string value)
+#host_ip=<None>
+
+# Port for connection to VMware vCenter host. (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#host_port=443
+
+# Username for connection to VMware vCenter host. (string value)
+#host_username=<None>
+
+# Password for connection to VMware vCenter host. (string value)
+#host_password=<None>
+
+# Specify a CA bundle file to use in verifying the vCenter server certificate.
+# (string value)
+#ca_file=<None>
+
+# If true, the vCenter server certificate is not verified. If false, then the
+# default CA truststore is used for verification. This option is ignored if
+# "ca_file" is set. (boolean value)
+#insecure=false
+
+# Name of a VMware Cluster ComputeResource. (string value)
+#cluster_name=<None>
+
+# Regex to match the name of a datastore. (string value)
+#datastore_regex=<None>
+
+# The interval used for polling of remote tasks. (floating point value)
+#task_poll_interval=0.5
+
+# The number of times we retry on failures, e.g., socket error, etc. (integer
+# value)
+#api_retry_count=10
+
+# VNC starting port (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#vnc_port=5900
+
+# Total number of VNC ports (integer value)
+#vnc_port_total=10000
+
+# Whether to use linked clone (boolean value)
+#use_linked_clone=true
+
+# Optional VIM Service WSDL Location e.g http://<server>/vimService.wsdl.
+# Optional over-ride to default location for bug work-arounds (string value)
+#wsdl_location=<None>
+
+# Physical ethernet adapter name for vlan networking (string value)
+#vlan_interface=vmnic0
+
+# Name of Integration Bridge (string value)
+#integration_bridge=br-int
+
+# Set this value if affected by an increased network latency causing repeated
+# characters when typing in a remote console. (integer value)
+#console_delay_seconds=<None>
+
+# Identifies the remote system that serial port traffic will be sent to. If
+# this is not set, no serial ports will be added to the created VMs. (string
+# value)
+#serial_port_service_uri=<None>
+
+# Identifies a proxy service that provides network access to the
+# serial_port_service_uri. This option is ignored if serial_port_service_uri is
+# not specified. (string value)
+#serial_port_proxy_uri=<None>
+
+# The prefix for where cached images are stored. This is NOT the full path -
+# just a folder prefix. This should only be used when a datastore cache should
+# be shared between compute nodes. Note: this should only be used when the
+# compute nodes have a shared file system. (string value)
+#cache_prefix=<None>
+
+
+[vnc]
+
+#
+# From nova
+#
+
+# Location of VNC console proxy, in the form
+# "http://127.0.0.1:6080/vnc_auto.html" (string value)
+# Deprecated group;name - DEFAULT;novncproxy_base_url
+#novncproxy_base_url=http://127.0.0.1:6080/vnc_auto.html
+
+# Location of nova xvp VNC console proxy, in the form
+# "http://127.0.0.1:6081/console" (string value)
+# Deprecated group;name - DEFAULT;xvpvncproxy_base_url
+#xvpvncproxy_base_url=http://127.0.0.1:6081/console
+
+# IP address on which instance vncservers should listen (string value)
+# Deprecated group;name - DEFAULT;vncserver_listen
+#vncserver_listen=127.0.0.1
+
+# The address to which proxy clients (like nova-xvpvncproxy) should connect
+# (string value)
+# Deprecated group;name - DEFAULT;vncserver_proxyclient_address
+#vncserver_proxyclient_address=127.0.0.1
+
+# Enable VNC related features (boolean value)
+# Deprecated group;name - DEFAULT;vnc_enabled
+#enabled=true
+
+# Keymap for VNC (string value)
+# Deprecated group;name - DEFAULT;vnc_keymap
+#keymap=en-us
+
+
+[workarounds]
+
+#
+# From nova
+#
+
+# This option allows a fallback to sudo for performance reasons. For example
+# see https://bugs.launchpad.net/nova/+bug/1415106 (boolean value)
+#disable_rootwrap=false
+
+# When using libvirt 1.2.2 live snapshots fail intermittently under load.  This
+# config option provides a mechanism to enable live snapshot while this is
+# resolved.  See https://bugs.launchpad.net/nova/+bug/1334398 (boolean value)
+#disable_libvirt_livesnapshot=true
+
+# DEPRECATED: Whether to destroy instances on startup when we suspect they have
+# previously been evacuated. This can result in data loss if undesired. See
+# https://launchpad.net/bugs/1419785 (boolean value)
+# This option is deprecated for removal.
+# Its value may be silently ignored in the future.
+#destroy_after_evacuate=true
+
+# Whether or not to handle events raised from the compute driver's 'emit_event'
+# method. These are lifecycle events raised from compute drivers that implement
+# the method. An example of a lifecycle event is an instance starting or
+# stopping. If the instance is going through task state changes due to an API
+# operation, like resize, the events are ignored. However, this is an advanced
+# feature which allows the hypervisor to signal to the compute service that an
+# unexpected state change has occurred in an instance and the instance can be
+# shutdown automatically - which can inherently race in reboot operations or
+# when the compute service or host is rebooted, either planned or due to an
+# unexpected outage. Care should be taken when using this and
+# sync_power_state_interval is negative since then if any instances are out of
+# sync between the hypervisor and the Nova database they will have to be
+# synchronized manually. See https://bugs.launchpad.net/bugs/1444630 (boolean
+# value)
+#handle_virt_lifecycle_events=true
+
+
+[xenserver]
+
+#
+# From nova.virt
+#
+
+# Name of Integration Bridge used by Open vSwitch (string value)
+#ovs_integration_bridge=xapi1
+
+# Number of seconds to wait for agent reply (integer value)
+#agent_timeout=30
+
+# Number of seconds to wait for agent to be fully operational (integer value)
+#agent_version_timeout=300
+
+# Number of seconds to wait for agent reply to resetnetwork request (integer
+# value)
+#agent_resetnetwork_timeout=60
+
+# Specifies the path in which the XenAPI guest agent should be located. If the
+# agent is present, network configuration is not injected into the image. Used
+# if compute_driver=xenapi.XenAPIDriver and flat_injected=True (string value)
+#agent_path=usr/sbin/xe-update-networking
+
+# Disables the use of the XenAPI agent in any image regardless of what image
+# properties are present. (boolean value)
+#disable_agent=false
+
+# Determines if the XenAPI agent should be used when the image used does not
+# contain a hint to declare if the agent is present or not. The hint is a
+# glance property "xenapi_use_agent" that has the value "True" or "False". Note
+# that waiting for the agent when it is not present will significantly increase
+# server boot times. (boolean value)
+#use_agent_default=false
+
+# Timeout in seconds for XenAPI login. (integer value)
+#login_timeout=10
+
+# Maximum number of concurrent XenAPI connections. Used only if
+# compute_driver=xenapi.XenAPIDriver (integer value)
+#connection_concurrent=5
+
+# URL for connection to XenServer/Xen Cloud Platform. A special value of
+# unix://local can be used to connect to the local unix socket.  Required if
+# compute_driver=xenapi.XenAPIDriver (string value)
+#connection_url=<None>
+
+# Username for connection to XenServer/Xen Cloud Platform. Used only if
+# compute_driver=xenapi.XenAPIDriver (string value)
+#connection_username=root
+
+# Password for connection to XenServer/Xen Cloud Platform. Used only if
+# compute_driver=xenapi.XenAPIDriver (string value)
+#connection_password=<None>
+
+# The interval used for polling of coalescing vhds. Used only if
+# compute_driver=xenapi.XenAPIDriver (floating point value)
+#vhd_coalesce_poll_interval=5.0
+
+# Ensure compute service is running on host XenAPI connects to. (boolean value)
+#check_host=true
+
+# Max number of times to poll for VHD to coalesce. Used only if
+# compute_driver=xenapi.XenAPIDriver (integer value)
+#vhd_coalesce_max_attempts=20
+
+# Base path to the storage repository (string value)
+#sr_base_path=/var/run/sr-mount
+
+# The iSCSI Target Host (string value)
+#target_host=<None>
+
+# The iSCSI Target Port, default is port 3260 (string value)
+#target_port=3260
+
+# IQN Prefix (string value)
+#iqn_prefix=iqn.2010-10.org.openstack
+
+# Used to enable the remapping of VBD dev (Works around an issue in Ubuntu
+# Maverick) (boolean value)
+#remap_vbd_dev=false
+
+# Specify prefix to remap VBD dev to (ex. /dev/xvdb -> /dev/sdb) (string value)
+#remap_vbd_dev_prefix=sd
+
+# Base URL for torrent files; must contain a slash character (see RFC 1808,
+# step 6) (string value)
+#torrent_base_url=<None>
+
+# Probability that peer will become a seeder. (1.0 = 100%) (floating point
+# value)
+#torrent_seed_chance=1.0
+
+# Number of seconds after downloading an image via BitTorrent that it should be
+# seeded for other peers. (integer value)
+#torrent_seed_duration=3600
+
+# Cached torrent files not accessed within this number of seconds can be reaped
+# (integer value)
+#torrent_max_last_accessed=86400
+
+# Beginning of port range to listen on (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#torrent_listen_port_start=6881
+
+# End of port range to listen on (integer value)
+# Minimum value: 1
+# Maximum value: 65535
+#torrent_listen_port_end=6891
+
+# Number of seconds a download can remain at the same progress percentage w/o
+# being considered a stall (integer value)
+#torrent_download_stall_cutoff=600
+
+# Maximum number of seeder processes to run concurrently within a given dom0.
+# (-1 = no limit) (integer value)
+#torrent_max_seeder_processes_per_host=1
+
+# To use for hosts with different CPUs (boolean value)
+#use_join_force=true
+
+# Cache glance images locally. `all` will cache all images, `some` will only
+# cache images that have the image_property `cache_in_nova=True`, and `none`
+# turns off caching entirely (string value)
+# Allowed values: all, some, none
+#cache_images=all
+
+# Compression level for images, e.g., 9 for gzip -9. Range is 1-9, 9 being most
+# compressed but most CPU intensive on dom0. (integer value)
+# Minimum value: 1
+# Maximum value: 9
+#image_compression_level=<None>
+
+# Default OS type (string value)
+#default_os_type=linux
+
+# Time to wait for a block device to be created (integer value)
+#block_device_creation_timeout=10
+
+# Maximum size in bytes of kernel or ramdisk images (integer value)
+#max_kernel_ramdisk_size=16777216
+
+# Filter for finding the SR to be used to install guest instances on. To use
+# the Local Storage in default XenServer/XCP installations set this flag to
+# other-config:i18n-key=local-storage. To select an SR with a different
+# matching criteria, you could set it to other-config:my_favorite_sr=true. On
+# the other hand, to fall back on the Default SR, as displayed by XenCenter,
+# set this flag to: default-sr:true (string value)
+#sr_matching_filter=default-sr:true
+
+# Whether to use sparse_copy for copying data on a resize down (False will use
+# standard dd). This speeds up resizes down considerably since large runs of
+# zeros won't have to be rsynced (boolean value)
+#sparse_copy=true
+
+# Maximum number of retries to unplug VBD. if <=0, should try once and no retry
+# (integer value)
+#num_vbd_unplug_retries=10
+
+# Whether or not to download images via Bit Torrent. (string value)
+# Allowed values: all, some, none
+#torrent_images=none
+
+# Name of network to use for booting iPXE ISOs (string value)
+#ipxe_network_name=<None>
+
+# URL to the iPXE boot menu (string value)
+#ipxe_boot_menu_url=<None>
+
+# Name and optionally path of the tool used for ISO image creation (string
+# value)
+#ipxe_mkisofs_cmd=mkisofs
+
+# Number of seconds to wait for instance to go to running state (integer value)
+#running_timeout=60
+
+# The XenAPI VIF driver using XenServer Network APIs. (string value)
+#vif_driver=nova.virt.xenapi.vif.XenAPIBridgeDriver
+
+# Dom0 plugin driver used to handle image uploads. (string value)
+#image_upload_handler=nova.virt.xenapi.image.glance.GlanceStore
+
+# Number of seconds to wait for an SR to settle if the VDI does not exist when
+# first introduced (integer value)
+#introduce_vdi_retry_wait=20
+
+
+[zookeeper]
+
+#
+# From nova
+#
+
+# The ZooKeeper addresses for servicegroup service in the format of
+# host1:port,host2:port,host3:port (string value)
+#address=<None>
+
+# The recv_timeout parameter for the zk session (integer value)
+#recv_timeout=4000
+
+# The prefix used in ZooKeeper to store ephemeral nodes (string value)
+#sg_prefix=/servicegroups
+
+# Number of seconds to wait until retrying to join the session (integer value)
+#sg_retry_interval=5
+
+[osapi_v3]
+enabled=False
diff --git a/qa/qa_scripts/openstack/fix_conf_file.sh b/qa/qa_scripts/openstack/fix_conf_file.sh
new file mode 100755
index 000000000..8ccd27249
--- /dev/null
+++ b/qa/qa_scripts/openstack/fix_conf_file.sh
@@ -0,0 +1,28 @@
+source ./copy_func.sh
+#
+# Take a templated file, modify a local copy, and write it to the
+# remote site.
+#
+# Usage: fix_conf_file <remote-site> <file-name> <remote-location> [<rbd-secret>]
+#      <remote-site> -- site where we want this modified file stored.
+#      <file-name> -- name of the remote file.
+#      <remote-location> -- directory where the file will be stored
+#      <rbd-secret> -- (optional) rbd_secret used by libvirt
+#
+function fix_conf_file() {
+    if [[ $# < 3 ]]; then
+        echo 'fix_conf_file: Too few parameters' 
+        exit 1
+    fi
+    openstack_node_local=${1}
+    cp files/${2}.template.conf ${2}.conf
+    hostname=`ssh $openstack_node_local hostname`
+    inet4addr=`ssh $openstack_node_local hostname -i`
+    sed -i s/VARHOSTNAME/$hostname/g ${2}.conf
+    sed -i s/VARINET4ADDR/$inet4addr/g ${2}.conf
+    if [[ $# == 4 ]]; then
+        sed -i s/RBDSECRET/${4}/g ${2}.conf
+    fi
+    copy_file ${2}.conf $openstack_node_local ${3} 0644 "root:root"
+    rm ${2}.conf
+}
diff --git a/qa/qa_scripts/openstack/image_create.sh b/qa/qa_scripts/openstack/image_create.sh
new file mode 100755
index 000000000..ee7f61f3b
--- /dev/null
+++ b/qa/qa_scripts/openstack/image_create.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+#
+# Set up a vm on packstack.  Use the iso in RHEL_ISO (defaults to home dir)
+#
+set -fv
+source ./copy_func.sh
+source ./fix_conf_file.sh
+openstack_node=${1}
+ceph_node=${2}
+
+RHEL_ISO=${RHEL_ISO:-~/rhel-server-7.2-x86_64-boot.iso}
+copy_file ${RHEL_ISO} $openstack_node .
+copy_file execs/run_openstack.sh $openstack_node . 0755
+filler=`date +%s`
+ssh $openstack_node ./run_openstack.sh "${openstack_node}X${filler}" rhel-server-7.2-x86_64-boot.iso
+ssh $ceph_node sudo ceph df
diff --git a/qa/qa_scripts/openstack/openstack.sh b/qa/qa_scripts/openstack/openstack.sh
new file mode 100755
index 000000000..1c1e6c00f
--- /dev/null
+++ b/qa/qa_scripts/openstack/openstack.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+#
+# Install Openstack.
+#     Usage: openstack <openstack-site> <ceph-monitor>
+#
+# This script installs Openstack on one node, and connects it to a ceph
+# cluster on another set of nodes.  It is intended to run from a third
+# node.
+#
+# Assumes a single node Openstack cluster and a single monitor ceph
+# cluster.
+#
+# The execs directory contains scripts to be run on remote sites.
+# The files directory contains files to be copied to remote sites.
+#
+
+set -fv
+source ./copy_func.sh
+source ./fix_conf_file.sh
+openstack_node=${1}
+ceph_node=${2}
+./packstack.sh $openstack_node $ceph_node
+echo 'done running packstack'
+sleep 60
+./connectceph.sh $openstack_node $ceph_node
+echo 'done connecting'
+sleep 60
+./image_create.sh $openstack_node $ceph_node
diff --git a/qa/qa_scripts/openstack/packstack.sh b/qa/qa_scripts/openstack/packstack.sh
new file mode 100755
index 000000000..3f891f98c
--- /dev/null
+++ b/qa/qa_scripts/openstack/packstack.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+#
+# Install openstack by running packstack.
+#
+# Implements the operations in:
+# https://docs.google.com/document/d/1us18KR3LuLyINgGk2rmI-SVj9UksCE7y4C2D_68Aa8o/edit?ts=56a78fcb
+#
+# The directory named files contains a template for the kilo.conf file used by packstack.
+#
+set -fv
+source ./copy_func.sh
+source ./fix_conf_file.sh
+openstack_node=${1}
+ceph_node=${2}
+
+copy_file execs/openstack-preinstall.sh $openstack_node . 0777 
+fix_conf_file $openstack_node kilo .
+ssh $openstack_node sudo ./openstack-preinstall.sh
+sleep 240
+ssh $openstack_node sudo packstack --answer-file kilo.conf
diff --git a/qa/rbd/common.sh b/qa/rbd/common.sh
new file mode 100644
index 000000000..232cf45ad
--- /dev/null
+++ b/qa/rbd/common.sh
@@ -0,0 +1,103 @@
+#!/usr/bin/env bash
+
+die() {
+	echo "$*"
+	exit 1
+}
+
+cleanup() {
+    rm -rf $TDIR
+    TDIR=""
+}
+
+set_variables() {
+    # defaults
+    [ -z "$bindir" ] && bindir=$PWD       # location of init-ceph
+    if [ -z "$conf" ]; then
+        conf="$basedir/ceph.conf"
+        [ -e $conf ] || conf="/etc/ceph/ceph.conf"
+    fi
+    [ -e $conf ] || die "conf file not found"
+
+    CCONF="ceph-conf -c $conf"
+
+    [ -z "$mnt" ] && mnt="/c"
+    if [ -z "$monhost" ]; then
+        $CCONF -t mon -i 0 'mon addr' > $TDIR/cconf_mon
+        if [ $? -ne 0 ]; then
+            $CCONF -t mon.a -i 0 'mon addr' > $TDIR/cconf_mon
+            [ $? -ne 0 ] && die "can't figure out \$monhost"
+        fi
+        read monhost < $TDIR/cconf_mon
+    fi
+
+    [ -z "$imgsize" ] && imgsize=1024
+    [ -z "$user" ] && user=admin
+    [ -z "$keyring" ] && keyring="`$CCONF keyring`"
+    [ -z "$secret" ] && secret="`ceph-authtool $keyring -n client.$user -p`"
+
+    monip="`echo $monhost | sed 's/:/ /g' | awk '{print $1}'`"
+    monport="`echo $monhost | sed 's/:/ /g' | awk '{print $2}'`"
+
+    [ -z "$monip" ] && die "bad mon address"
+
+    [ -z "$monport" ] && monport=6789
+
+    set -e
+
+    mydir=`hostname`_`echo $0 | sed 's/\//_/g'`
+
+    img_name=test.`hostname`.$$
+}
+
+rbd_load() {
+	modprobe rbd
+}
+
+rbd_create_image() {
+	id=$1
+	rbd create $img_name.$id --size=$imgsize
+}
+
+rbd_add() {
+	id=$1
+	echo "$monip:$monport name=$user,secret=$secret rbd $img_name.$id" \
+	    > /sys/bus/rbd/add
+
+	pushd /sys/bus/rbd/devices &> /dev/null
+	[ $? -eq 0 ] || die "failed to cd"
+	devid=""
+	rm -f "$TDIR/rbd_devs"
+	for f in *; do echo $f >> "$TDIR/rbd_devs"; done
+	sort -nr "$TDIR/rbd_devs" > "$TDIR/rev_rbd_devs"
+	while read f < "$TDIR/rev_rbd_devs"; do
+	  read d_img_name < "$f/name"
+	  if [ "x$d_img_name" == "x$img_name.$id" ]; then
+	    devid=$f
+	    break
+	  fi
+	done
+	popd &> /dev/null
+
+	[ "x$devid" == "x" ] && die "failed to find $img_name.$id"
+
+	export rbd$id=$devid
+	while [ ! -e /dev/rbd$devid ]; do sleep 1; done
+}
+
+rbd_test_init() {
+	rbd_load
+}
+
+rbd_remove() {
+	echo $1 > /sys/bus/rbd/remove
+}
+
+rbd_rm_image() {
+	id=$1
+	rbd rm $imgname.$id
+}
+
+TDIR=`mktemp -d`
+trap cleanup INT TERM EXIT
+set_variables
diff --git a/qa/rbd/conf/+ b/qa/rbd/conf/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/rbd/conf/+
diff --git a/qa/rbd/conf/disable-pool-app.yaml b/qa/rbd/conf/disable-pool-app.yaml
new file mode 100644
index 000000000..099532f57
--- /dev/null
+++ b/qa/rbd/conf/disable-pool-app.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        mon warn on pool no app: false
diff --git a/qa/rbd/data-pool/ec.yaml b/qa/rbd/data-pool/ec.yaml
new file mode 100644
index 000000000..f39a5bb4c
--- /dev/null
+++ b/qa/rbd/data-pool/ec.yaml
@@ -0,0 +1,24 @@
+tasks:
+- exec:
+    client.0:
+      - sudo ceph osd erasure-code-profile set teuthologyprofile crush-failure-domain=osd m=1 k=2
+      - sudo ceph osd pool create datapool 4 4 erasure teuthologyprofile
+      - sudo ceph osd pool set datapool allow_ec_overwrites true
+      - rbd pool init datapool
+
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    conf:
+      client:
+        rbd default data pool: datapool
+      osd: # force bluestore since it's required for ec overwrites
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        enable experimental unrecoverable data corrupting features: "*"
+        osd debug randomize hobject sort order: false
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
diff --git a/qa/rbd/data-pool/none.yaml b/qa/rbd/data-pool/none.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/rbd/data-pool/none.yaml
diff --git a/qa/rbd/data-pool/replicated.yaml b/qa/rbd/data-pool/replicated.yaml
new file mode 100644
index 000000000..c5647dba1
--- /dev/null
+++ b/qa/rbd/data-pool/replicated.yaml
@@ -0,0 +1,11 @@
+tasks:
+- exec:
+    client.0:
+      - sudo ceph osd pool create datapool 4
+      - rbd pool init datapool
+
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default data pool: datapool
diff --git a/qa/rbd/krbd_blkroset.t b/qa/rbd/krbd_blkroset.t
new file mode 100644
index 000000000..428636de0
--- /dev/null
+++ b/qa/rbd/krbd_blkroset.t
@@ -0,0 +1,352 @@
+
+Setup
+=====
+
+  $ RO_KEY=$(ceph auth get-or-create-key client.ro mon 'profile rbd' mgr 'profile rbd' osd 'profile rbd-read-only')
+  $ rbd create --size 10 img
+  $ rbd snap create --no-progress img@snap
+  $ rbd snap protect img@snap
+  $ rbd clone img@snap cloneimg
+  $ rbd create --size 1 imgpart
+  $ DEV=$(sudo rbd map imgpart)
+  $ cat <<EOF | sudo sfdisk $DEV >/dev/null 2>&1
+  > unit: sectors
+  > /dev/rbd0p1 : start=        512, size=    512, Id=83
+  > /dev/rbd0p2 : start=       1024, size=    512, Id=83
+  > EOF
+  $ sudo rbd unmap $DEV
+  $ rbd snap create --no-progress imgpart@snap
+
+
+Image HEAD
+==========
+
+R/W, unpartitioned:
+
+  $ DEV=$(sudo rbd map img)
+  $ blockdev --getro $DEV
+  0
+  $ dd if=/dev/urandom of=$DEV bs=1k seek=1 count=1 status=none
+  $ blkdiscard $DEV
+  $ blockdev --setro $DEV
+  .*BLKROSET: Permission denied (re)
+  [1]
+  $ sudo blockdev --setro $DEV
+  $ blockdev --getro $DEV
+  1
+  $ dd if=/dev/urandom of=$DEV bs=1k seek=1 count=1 status=none
+  dd: error writing '/dev/rbd?': Operation not permitted (glob)
+  [1]
+  $ blkdiscard $DEV
+  blkdiscard: /dev/rbd?: BLKDISCARD ioctl failed: Operation not permitted (glob)
+  [1]
+  $ blockdev --setrw $DEV
+  .*BLKROSET: Permission denied (re)
+  [1]
+  $ sudo blockdev --setrw $DEV
+  $ blockdev --getro $DEV
+  0
+  $ dd if=/dev/urandom of=$DEV bs=1k seek=1 count=1 status=none
+  $ blkdiscard $DEV
+  $ sudo rbd unmap $DEV
+
+R/W, partitioned:
+
+  $ DEV=$(sudo rbd map imgpart)
+  $ udevadm settle
+  $ blockdev --getro ${DEV}p1
+  0
+  $ blockdev --getro ${DEV}p2
+  0
+  $ dd if=/dev/urandom of=${DEV}p1 bs=1k seek=1 count=1 status=none
+  $ blkdiscard ${DEV}p1
+  $ dd if=/dev/urandom of=${DEV}p2 bs=1k seek=1 count=1 status=none
+  $ blkdiscard ${DEV}p2
+  $ blockdev --setro ${DEV}p1
+  .*BLKROSET: Permission denied (re)
+  [1]
+  $ sudo blockdev --setro ${DEV}p1
+  $ blockdev --getro ${DEV}p1
+  1
+  $ blockdev --getro ${DEV}p2
+  0
+  $ dd if=/dev/urandom of=${DEV}p1 bs=1k seek=1 count=1 status=none
+  dd: error writing '/dev/rbd?p1': Operation not permitted (glob)
+  [1]
+  $ blkdiscard ${DEV}p1
+  blkdiscard: /dev/rbd?p1: BLKDISCARD ioctl failed: Operation not permitted (glob)
+  [1]
+  $ dd if=/dev/urandom of=${DEV}p2 bs=1k seek=1 count=1 status=none
+  $ blkdiscard ${DEV}p2
+  $ blockdev --setrw ${DEV}p1
+  .*BLKROSET: Permission denied (re)
+  [1]
+  $ sudo blockdev --setrw ${DEV}p1
+  $ blockdev --getro ${DEV}p1
+  0
+  $ blockdev --getro ${DEV}p2
+  0
+  $ dd if=/dev/urandom of=${DEV}p1 bs=1k seek=1 count=1 status=none
+  $ blkdiscard ${DEV}p1
+  $ dd if=/dev/urandom of=${DEV}p2 bs=1k seek=1 count=1 status=none
+  $ blkdiscard ${DEV}p2
+  $ sudo rbd unmap $DEV
+
+  $ DEV=$(sudo rbd map imgpart)
+  $ udevadm settle
+  $ blockdev --getro ${DEV}p1
+  0
+  $ blockdev --getro ${DEV}p2
+  0
+  $ dd if=/dev/urandom of=${DEV}p1 bs=1k seek=1 count=1 status=none
+  $ blkdiscard ${DEV}p1
+  $ dd if=/dev/urandom of=${DEV}p2 bs=1k seek=1 count=1 status=none
+  $ blkdiscard ${DEV}p2
+  $ blockdev --setro ${DEV}p2
+  .*BLKROSET: Permission denied (re)
+  [1]
+  $ sudo blockdev --setro ${DEV}p2
+  $ blockdev --getro ${DEV}p1
+  0
+  $ blockdev --getro ${DEV}p2
+  1
+  $ dd if=/dev/urandom of=${DEV}p1 bs=1k seek=1 count=1 status=none
+  $ blkdiscard ${DEV}p1
+  $ dd if=/dev/urandom of=${DEV}p2 bs=1k seek=1 count=1 status=none
+  dd: error writing '/dev/rbd?p2': Operation not permitted (glob)
+  [1]
+  $ blkdiscard ${DEV}p2
+  blkdiscard: /dev/rbd?p2: BLKDISCARD ioctl failed: Operation not permitted (glob)
+  [1]
+  $ blockdev --setrw ${DEV}p2
+  .*BLKROSET: Permission denied (re)
+  [1]
+  $ sudo blockdev --setrw ${DEV}p2
+  $ blockdev --getro ${DEV}p1
+  0
+  $ blockdev --getro ${DEV}p2
+  0
+  $ dd if=/dev/urandom of=${DEV}p1 bs=1k seek=1 count=1 status=none
+  $ blkdiscard ${DEV}p1
+  $ dd if=/dev/urandom of=${DEV}p2 bs=1k seek=1 count=1 status=none
+  $ blkdiscard ${DEV}p2
+  $ sudo rbd unmap $DEV
+
+R/O, unpartitioned:
+
+  $ DEV=$(sudo rbd map --read-only img)
+  $ blockdev --getro $DEV
+  1
+  $ dd if=/dev/urandom of=$DEV bs=1k seek=1 count=1 status=none
+  dd: error writing '/dev/rbd?': Operation not permitted (glob)
+  [1]
+  $ blkdiscard $DEV
+  blkdiscard: /dev/rbd?: BLKDISCARD ioctl failed: Operation not permitted (glob)
+  [1]
+  $ blockdev --setrw $DEV
+  .*BLKROSET: Permission denied (re)
+  [1]
+  $ sudo blockdev --setrw $DEV  # succeeds but effectively ignored
+  $ blockdev --getro $DEV
+  1
+  $ dd if=/dev/urandom of=$DEV bs=1k seek=1 count=1 status=none
+  dd: error writing '/dev/rbd?': Operation not permitted (glob)
+  [1]
+  $ blkdiscard $DEV
+  blkdiscard: /dev/rbd?: BLKDISCARD ioctl failed: Operation not permitted (glob)
+  [1]
+  $ sudo rbd unmap $DEV
+
+R/O, partitioned:
+
+  $ DEV=$(sudo rbd map --read-only imgpart)
+  $ udevadm settle
+  $ blockdev --getro ${DEV}p1
+  1
+  $ blockdev --getro ${DEV}p2
+  1
+  $ dd if=/dev/urandom of=${DEV}p1 bs=1k seek=1 count=1 status=none
+  dd: error writing '/dev/rbd?p1': Operation not permitted (glob)
+  [1]
+  $ blkdiscard ${DEV}p1
+  blkdiscard: /dev/rbd?p1: BLKDISCARD ioctl failed: Operation not permitted (glob)
+  [1]
+  $ dd if=/dev/urandom of=${DEV}p2 bs=1k seek=1 count=1 status=none
+  dd: error writing '/dev/rbd?p2': Operation not permitted (glob)
+  [1]
+  $ blkdiscard ${DEV}p2
+  blkdiscard: /dev/rbd?p2: BLKDISCARD ioctl failed: Operation not permitted (glob)
+  [1]
+  $ blockdev --setrw ${DEV}p1
+  .*BLKROSET: Permission denied (re)
+  [1]
+  $ sudo blockdev --setrw ${DEV}p1  # succeeds but effectively ignored
+  $ blockdev --setrw ${DEV}p2
+  .*BLKROSET: Permission denied (re)
+  [1]
+  $ sudo blockdev --setrw ${DEV}p2  # succeeds but effectively ignored
+  $ blockdev --getro ${DEV}p1
+  1
+  $ blockdev --getro ${DEV}p2
+  1
+  $ dd if=/dev/urandom of=${DEV}p1 bs=1k seek=1 count=1 status=none
+  dd: error writing '/dev/rbd?p1': Operation not permitted (glob)
+  [1]
+  $ blkdiscard ${DEV}p1
+  blkdiscard: /dev/rbd?p1: BLKDISCARD ioctl failed: Operation not permitted (glob)
+  [1]
+  $ dd if=/dev/urandom of=${DEV}p2 bs=1k seek=1 count=1 status=none
+  dd: error writing '/dev/rbd?p2': Operation not permitted (glob)
+  [1]
+  $ blkdiscard ${DEV}p2
+  blkdiscard: /dev/rbd?p2: BLKDISCARD ioctl failed: Operation not permitted (glob)
+  [1]
+  $ sudo rbd unmap $DEV
+
+
+Image snapshot
+==============
+
+Unpartitioned:
+
+  $ DEV=$(sudo rbd map img@snap)
+  $ blockdev --getro $DEV
+  1
+  $ dd if=/dev/urandom of=$DEV bs=1k seek=1 count=1 status=none
+  dd: error writing '/dev/rbd?': Operation not permitted (glob)
+  [1]
+  $ blkdiscard $DEV
+  blkdiscard: /dev/rbd?: BLKDISCARD ioctl failed: Operation not permitted (glob)
+  [1]
+  $ blockdev --setrw $DEV
+  .*BLKROSET: Permission denied (re)
+  [1]
+  $ sudo blockdev --setrw $DEV  # succeeds but effectively ignored
+  $ blockdev --getro $DEV
+  1
+  $ dd if=/dev/urandom of=$DEV bs=1k seek=1 count=1 status=none
+  dd: error writing '/dev/rbd?': Operation not permitted (glob)
+  [1]
+  $ blkdiscard $DEV
+  blkdiscard: /dev/rbd?: BLKDISCARD ioctl failed: Operation not permitted (glob)
+  [1]
+  $ sudo rbd unmap $DEV
+
+Partitioned:
+
+  $ DEV=$(sudo rbd map imgpart@snap)
+  $ udevadm settle
+  $ blockdev --getro ${DEV}p1
+  1
+  $ blockdev --getro ${DEV}p2
+  1
+  $ dd if=/dev/urandom of=${DEV}p1 bs=1k seek=1 count=1 status=none
+  dd: error writing '/dev/rbd?p1': Operation not permitted (glob)
+  [1]
+  $ blkdiscard ${DEV}p1
+  blkdiscard: /dev/rbd?p1: BLKDISCARD ioctl failed: Operation not permitted (glob)
+  [1]
+  $ dd if=/dev/urandom of=${DEV}p2 bs=1k seek=1 count=1 status=none
+  dd: error writing '/dev/rbd?p2': Operation not permitted (glob)
+  [1]
+  $ blkdiscard ${DEV}p2
+  blkdiscard: /dev/rbd?p2: BLKDISCARD ioctl failed: Operation not permitted (glob)
+  [1]
+  $ blockdev --setrw ${DEV}p1
+  .*BLKROSET: Permission denied (re)
+  [1]
+  $ sudo blockdev --setrw ${DEV}p1  # succeeds but effectively ignored
+  $ blockdev --setrw ${DEV}p2
+  .*BLKROSET: Permission denied (re)
+  [1]
+  $ sudo blockdev --setrw ${DEV}p2  # succeeds but effectively ignored
+  $ blockdev --getro ${DEV}p1
+  1
+  $ blockdev --getro ${DEV}p2
+  1
+  $ dd if=/dev/urandom of=${DEV}p1 bs=1k seek=1 count=1 status=none
+  dd: error writing '/dev/rbd?p1': Operation not permitted (glob)
+  [1]
+  $ blkdiscard ${DEV}p1
+  blkdiscard: /dev/rbd?p1: BLKDISCARD ioctl failed: Operation not permitted (glob)
+  [1]
+  $ dd if=/dev/urandom of=${DEV}p2 bs=1k seek=1 count=1 status=none
+  dd: error writing '/dev/rbd?p2': Operation not permitted (glob)
+  [1]
+  $ blkdiscard ${DEV}p2
+  blkdiscard: /dev/rbd?p2: BLKDISCARD ioctl failed: Operation not permitted (glob)
+  [1]
+  $ sudo rbd unmap $DEV
+
+
+read-only OSD caps
+==================
+
+R/W:
+
+  $ DEV=$(sudo rbd map --id ro --key $(echo $RO_KEY) img)
+  rbd: sysfs write failed
+  rbd: map failed: (1) Operation not permitted
+  [1]
+
+R/O:
+
+  $ DEV=$(sudo rbd map --id ro --key $(echo $RO_KEY) --read-only img)
+  $ blockdev --getro $DEV
+  1
+  $ sudo rbd unmap $DEV
+
+Snapshot:
+
+  $ DEV=$(sudo rbd map --id ro --key $(echo $RO_KEY) img@snap)
+  $ blockdev --getro $DEV
+  1
+  $ sudo rbd unmap $DEV
+
+R/W, clone:
+
+  $ DEV=$(sudo rbd map --id ro --key $(echo $RO_KEY) cloneimg)
+  rbd: sysfs write failed
+  rbd: map failed: (1) Operation not permitted
+  [1]
+
+R/O, clone:
+
+  $ DEV=$(sudo rbd map --id ro --key $(echo $RO_KEY) --read-only cloneimg)
+  $ blockdev --getro $DEV
+  1
+  $ sudo rbd unmap $DEV
+
+
+rw -> ro with open_count > 0
+============================
+
+  $ DEV=$(sudo rbd map img)
+  $ { sleep 10; sudo blockdev --setro $DEV; } &
+  $ dd if=/dev/urandom of=$DEV bs=1k oflag=direct status=noxfer
+  dd: error writing '/dev/rbd?': Operation not permitted (glob)
+  [1-9]\d*\+0 records in (re)
+  [1-9]\d*\+0 records out (re)
+  [1]
+  $ sudo rbd unmap $DEV
+
+
+"-o rw --read-only" should result in read-only mapping
+======================================================
+
+  $ DEV=$(sudo rbd map -o rw --read-only img)
+  $ blockdev --getro $DEV
+  1
+  $ sudo rbd unmap $DEV
+
+
+Teardown
+========
+
+  $ rbd snap purge imgpart >/dev/null 2>&1
+  $ rbd rm imgpart >/dev/null 2>&1
+  $ rbd rm cloneimg >/dev/null 2>&1
+  $ rbd snap unprotect img@snap
+  $ rbd snap purge img >/dev/null 2>&1
+  $ rbd rm img >/dev/null 2>&1
+
diff --git a/qa/rbd/krbd_deep_flatten.t b/qa/rbd/krbd_deep_flatten.t
new file mode 100644
index 000000000..486b966d9
--- /dev/null
+++ b/qa/rbd/krbd_deep_flatten.t
@@ -0,0 +1,329 @@
+
+Write:
+
+  $ rbd create --size 12M --image-feature layering,deep-flatten img
+  $ DEV=$(sudo rbd map img)
+  $ xfs_io -c 'pwrite -w 0 12M' $DEV >/dev/null
+  $ sudo rbd unmap $DEV
+  $ rbd snap create --no-progress img@snap
+  $ rbd snap protect img@snap
+  $ rbd clone img@snap cloneimg
+  $ rbd snap create --no-progress cloneimg@snap
+  $ DEV=$(sudo rbd map cloneimg)
+  $ xfs_io -c 'pwrite -S 0xab -w 6M 1k' $DEV >/dev/null
+  $ sudo rbd unmap $DEV
+
+  $ DEV=$(sudo rbd map cloneimg)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0600000 abab abab abab abab abab abab abab abab
+  *
+  0600400 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+  $ DEV=$(sudo rbd map cloneimg@snap)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+
+  $ rbd flatten --no-progress cloneimg
+  $ rbd snap unprotect img@snap
+  $ rbd snap rm --no-progress img@snap
+  $ rbd rm --no-progress img
+
+  $ DEV=$(sudo rbd map cloneimg)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0600000 abab abab abab abab abab abab abab abab
+  *
+  0600400 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+  $ DEV=$(sudo rbd map cloneimg@snap)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+
+  $ rbd snap rm --no-progress cloneimg@snap
+  $ rbd rm --no-progress cloneimg
+
+Write, whole object:
+
+  $ rbd create --size 12M --image-feature layering,deep-flatten img
+  $ DEV=$(sudo rbd map img)
+  $ xfs_io -c 'pwrite -w 0 12M' $DEV >/dev/null
+  $ sudo rbd unmap $DEV
+  $ rbd snap create --no-progress img@snap
+  $ rbd snap protect img@snap
+  $ rbd clone img@snap cloneimg
+  $ rbd snap create --no-progress cloneimg@snap
+  $ DEV=$(sudo rbd map cloneimg)
+  $ xfs_io -d -c 'pwrite -b 4M -S 0xab 4M 4M' $DEV >/dev/null
+  $ sudo rbd unmap $DEV
+
+  $ DEV=$(sudo rbd map cloneimg)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000 abab abab abab abab abab abab abab abab
+  *
+  0800000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+  $ DEV=$(sudo rbd map cloneimg@snap)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+
+  $ rbd flatten --no-progress cloneimg
+  $ rbd snap unprotect img@snap
+  $ rbd snap rm --no-progress img@snap
+  $ rbd rm --no-progress img
+
+  $ DEV=$(sudo rbd map cloneimg)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000 abab abab abab abab abab abab abab abab
+  *
+  0800000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+  $ DEV=$(sudo rbd map cloneimg@snap)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+
+  $ rbd snap rm --no-progress cloneimg@snap
+  $ rbd rm --no-progress cloneimg
+
+Zeroout:
+
+  $ rbd create --size 12M --image-feature layering,deep-flatten img
+  $ DEV=$(sudo rbd map img)
+  $ xfs_io -c 'pwrite -w 0 12M' $DEV >/dev/null
+  $ sudo rbd unmap $DEV
+  $ rbd snap create --no-progress img@snap
+  $ rbd snap protect img@snap
+  $ rbd clone img@snap cloneimg
+  $ rbd snap create --no-progress cloneimg@snap
+  $ DEV=$(sudo rbd map cloneimg)
+  $ fallocate -z -o 6M -l 1k $DEV
+  $ sudo rbd unmap $DEV
+
+  $ DEV=$(sudo rbd map cloneimg)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0600000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0600400 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+  $ DEV=$(sudo rbd map cloneimg@snap)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+
+  $ rbd flatten --no-progress cloneimg
+  $ rbd snap unprotect img@snap
+  $ rbd snap rm --no-progress img@snap
+  $ rbd rm --no-progress img
+
+  $ DEV=$(sudo rbd map cloneimg)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0600000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0600400 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+  $ DEV=$(sudo rbd map cloneimg@snap)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+
+  $ rbd snap rm --no-progress cloneimg@snap
+  $ rbd rm --no-progress cloneimg
+
+Zeroout, whole object:
+
+  $ rbd create --size 12M --image-feature layering,deep-flatten img
+  $ DEV=$(sudo rbd map img)
+  $ xfs_io -c 'pwrite -w 0 12M' $DEV >/dev/null
+  $ sudo rbd unmap $DEV
+  $ rbd snap create --no-progress img@snap
+  $ rbd snap protect img@snap
+  $ rbd clone img@snap cloneimg
+  $ rbd snap create --no-progress cloneimg@snap
+  $ DEV=$(sudo rbd map cloneimg)
+  $ fallocate -z -o 4M -l 4M $DEV
+  $ sudo rbd unmap $DEV
+
+  $ DEV=$(sudo rbd map cloneimg)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0800000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+  $ DEV=$(sudo rbd map cloneimg@snap)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+
+  $ rbd flatten --no-progress cloneimg
+  $ rbd snap unprotect img@snap
+  $ rbd snap rm --no-progress img@snap
+  $ rbd rm --no-progress img
+
+  $ DEV=$(sudo rbd map cloneimg)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0800000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+  $ DEV=$(sudo rbd map cloneimg@snap)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+
+  $ rbd snap rm --no-progress cloneimg@snap
+  $ rbd rm --no-progress cloneimg
+
+Discard, whole object, empty clone:
+
+  $ rbd create --size 12M --image-feature layering,deep-flatten img
+  $ DEV=$(sudo rbd map img)
+  $ xfs_io -c 'pwrite -w 0 12M' $DEV >/dev/null
+  $ sudo rbd unmap $DEV
+  $ rbd snap create --no-progress img@snap
+  $ rbd snap protect img@snap
+  $ rbd clone img@snap cloneimg
+  $ rbd snap create --no-progress cloneimg@snap
+  $ DEV=$(sudo rbd map cloneimg)
+  $ blkdiscard -o 4M -l 4M $DEV
+  $ sudo rbd unmap $DEV
+
+  $ DEV=$(sudo rbd map cloneimg)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+  $ DEV=$(sudo rbd map cloneimg@snap)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+
+  $ rbd flatten --no-progress cloneimg
+  $ rbd snap unprotect img@snap
+  $ rbd snap rm --no-progress img@snap
+  $ rbd rm --no-progress img
+
+  $ DEV=$(sudo rbd map cloneimg)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+  $ DEV=$(sudo rbd map cloneimg@snap)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+
+  $ rbd snap rm --no-progress cloneimg@snap
+  $ rbd rm --no-progress cloneimg
+
+Discard, whole object, full clone:
+
+  $ rbd create --size 12M --image-feature layering,deep-flatten img
+  $ DEV=$(sudo rbd map img)
+  $ xfs_io -c 'pwrite -w 0 12M' $DEV >/dev/null
+  $ sudo rbd unmap $DEV
+  $ rbd snap create --no-progress img@snap
+  $ rbd snap protect img@snap
+  $ rbd clone img@snap cloneimg
+  $ rbd snap create --no-progress cloneimg@snap
+  $ DEV=$(sudo rbd map cloneimg)
+  $ xfs_io -c 'pwrite -S 0xab -w 0 12M' $DEV >/dev/null
+  $ blkdiscard -o 4M -l 4M $DEV
+  $ sudo rbd unmap $DEV
+
+  $ DEV=$(sudo rbd map cloneimg)
+  $ hexdump $DEV
+  0000000 abab abab abab abab abab abab abab abab
+  *
+  0400000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0800000 abab abab abab abab abab abab abab abab
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+  $ DEV=$(sudo rbd map cloneimg@snap)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+
+  $ rbd flatten --no-progress cloneimg
+  $ rbd snap unprotect img@snap
+  $ rbd snap rm --no-progress img@snap
+  $ rbd rm --no-progress img
+
+  $ DEV=$(sudo rbd map cloneimg)
+  $ hexdump $DEV
+  0000000 abab abab abab abab abab abab abab abab
+  *
+  0400000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0800000 abab abab abab abab abab abab abab abab
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+  $ DEV=$(sudo rbd map cloneimg@snap)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+
+  $ rbd snap rm --no-progress cloneimg@snap
+  $ rbd rm --no-progress cloneimg
diff --git a/qa/rbd/krbd_default_map_options.t b/qa/rbd/krbd_default_map_options.t
new file mode 100644
index 000000000..5dac5d6ae
--- /dev/null
+++ b/qa/rbd/krbd_default_map_options.t
@@ -0,0 +1,64 @@
+Set up
+
+  $ ceph osd pool create rbda
+  pool 'rbda' created
+  $ rbd pool init rbda
+  $ rbd create rbda/image1 --size 1000
+
+Test at map options level
+
+  $ OPTIONS="alloc_size=65536,lock_on_read"
+  $ EXPECTED="${OPTIONS}"
+  $ DEV=$(sudo rbd map rbda/image1 --options ${OPTIONS})
+  $ sudo grep -q ${EXPECTED} /sys/bus/rbd/devices/${DEV#/dev/rbd}/config_info
+  $ sudo rbd unmap rbda/image1
+
+Test at global level
+
+  $ OPTIONS="alloc_size=4096,crc"
+  $ EXPECTED="${OPTIONS}"
+  $ rbd config global set global rbd_default_map_options ${OPTIONS}
+  $ DEV=$(sudo rbd map rbda/image1)
+  $ sudo grep -q ${EXPECTED} /sys/bus/rbd/devices/${DEV#/dev/rbd}/config_info
+  $ sudo rbd unmap rbda/image1
+
+  $ OPTIONS="alloc_size=65536,lock_on_read"
+  $ EXPECTED="alloc_size=65536,crc,lock_on_read"
+  $ DEV=$(sudo rbd map rbda/image1 --options ${OPTIONS})
+  $ sudo grep -q ${EXPECTED} /sys/bus/rbd/devices/${DEV#/dev/rbd}/config_info
+  $ sudo rbd unmap rbda/image1
+
+Test at pool level
+
+  $ OPTIONS="alloc_size=8192,share"
+  $ EXPECTED="${OPTIONS}"
+  $ rbd config pool set rbda rbd_default_map_options ${OPTIONS}
+  $ DEV=$(sudo rbd map rbda/image1)
+  $ sudo grep -q ${EXPECTED} /sys/bus/rbd/devices/${DEV#/dev/rbd}/config_info
+  $ sudo rbd unmap rbda/image1
+
+  $ OPTIONS="lock_on_read,alloc_size=65536"
+  $ EXPECTED="alloc_size=65536,lock_on_read,share"
+  $ DEV=$(sudo rbd map rbda/image1 --options ${OPTIONS})
+  $ sudo grep -q ${EXPECTED} /sys/bus/rbd/devices/${DEV#/dev/rbd}/config_info
+  $ sudo rbd unmap rbda/image1
+
+Test at image level
+
+  $ OPTIONS="alloc_size=16384,tcp_nodelay"
+  $ EXPECTED="${OPTIONS}"
+  $ rbd config image set rbda/image1 rbd_default_map_options ${OPTIONS}
+  $ DEV=$(sudo rbd map rbda/image1)
+  $ sudo grep -q ${EXPECTED} /sys/bus/rbd/devices/${DEV#/dev/rbd}/config_info
+  $ sudo rbd unmap rbda/image1
+
+  $ OPTIONS="lock_on_read,alloc_size=65536"
+  $ EXPECTED="alloc_size=65536,lock_on_read,tcp_nodelay"
+  $ DEV=$(sudo rbd map rbda/image1 --options ${OPTIONS})
+  $ sudo grep -q ${EXPECTED} /sys/bus/rbd/devices/${DEV#/dev/rbd}/config_info
+  $ sudo rbd unmap rbda/image1
+
+Teardown
+
+  $ ceph osd pool rm rbda rbda --yes-i-really-really-mean-it
+  pool 'rbda' removed
diff --git a/qa/rbd/krbd_discard.t b/qa/rbd/krbd_discard.t
new file mode 100644
index 000000000..528e1dc3d
--- /dev/null
+++ b/qa/rbd/krbd_discard.t
@@ -0,0 +1,398 @@
+
+  $ rbd create --size 4M img
+  $ DEV=$(sudo rbd map img)
+
+Zero, < 1 block:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 156672 -l 512 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131584 -l 64512 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131584 -l 65024 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131072 -l 65024 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+Zero, 1 block:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131072 -l 65536 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0030000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131072 -l 66048 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0030000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 130560 -l 66048 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0030000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 130560 -l 66560 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0030000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+Zero, < 2 blocks:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 163840 -l 65536 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131584 -l 130048 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131584 -l 130560 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0030000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0040000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131072 -l 130560 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0030000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+Zero, 2 blocks:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131072 -l 131072 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0040000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131072 -l 131584 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0040000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 130560 -l 131584 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0040000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 130560 -l 132096 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0040000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+Zero, 37 blocks:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 589824 -l 2424832 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0090000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  02e0000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 589312 -l 2424832 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0090000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  02d0000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 590336 -l 2424832 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  00a0000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  02e0000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+Truncate:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 4193792 -l 512 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 4129280 -l 65024 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 4128768 -l 65536 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  03f0000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 4128256 -l 66048 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  03f0000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 4063744 -l 130560 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  03f0000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 4063232 -l 131072 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  03e0000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 4062720 -l 131584 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  03e0000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 512 -l 4193792 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0010000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+Delete:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 0 -l 4194304 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+Empty clone:
+
+  $ xfs_io -c 'pwrite -S 0xab -w 0 4M' $DEV >/dev/null
+  $ sudo rbd unmap $DEV
+  $ rbd snap create --no-progress img@snap
+  $ rbd snap protect img@snap
+
+  $ rbd clone img@snap cloneimg1
+  $ DEV=$(sudo rbd map cloneimg1)
+  $ blkdiscard -o 720896 -l 2719744 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 abab abab abab abab abab abab abab abab
+  *
+  0400000
+  $ sudo rbd unmap $DEV
+
+  $ rbd clone img@snap cloneimg2
+  $ DEV=$(sudo rbd map cloneimg2)
+  $ blkdiscard -o 1474560 -l 2719744 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 abab abab abab abab abab abab abab abab
+  *
+  0400000
+  $ sudo rbd unmap $DEV
+
+  $ rbd clone img@snap cloneimg3
+  $ DEV=$(sudo rbd map cloneimg3)
+  $ blkdiscard -o 0 -l 4194304 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 abab abab abab abab abab abab abab abab
+  *
+  0400000
+  $ sudo rbd unmap $DEV
+
+Full clone:
+
+  $ rbd clone img@snap cloneimg4
+  $ DEV=$(sudo rbd map cloneimg4)
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 720896 -l 2719744 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  00b0000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0340000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 1474560 -l 2719744 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0170000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 0 -l 4194304 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ sudo rbd unmap $DEV
+
+Multiple object requests:
+
+  $ rbd create --size 50M --stripe-unit 16K --stripe-count 5 fancyimg
+  $ DEV=$(sudo rbd map fancyimg)
+
+  $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null
+  $ blkdiscard -o 0 -l 143360 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  3200000
+
+  $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null
+  $ blkdiscard -o 0 -l 286720 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0008000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0014000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  001c000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0028000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0030000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  003c000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0044000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  3200000
+
+  $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null
+  $ blkdiscard -o 0 -l 573440 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0050000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  3200000
+
+  $ sudo rbd unmap $DEV
+
+  $ rbd rm --no-progress fancyimg
+  $ rbd rm --no-progress cloneimg4
+  $ rbd rm --no-progress cloneimg3
+  $ rbd rm --no-progress cloneimg2
+  $ rbd rm --no-progress cloneimg1
+  $ rbd snap unprotect img@snap
+  $ rbd snap rm --no-progress img@snap
+  $ rbd rm --no-progress img
diff --git a/qa/rbd/krbd_discard_4M.t b/qa/rbd/krbd_discard_4M.t
new file mode 100644
index 000000000..7ed744c11
--- /dev/null
+++ b/qa/rbd/krbd_discard_4M.t
@@ -0,0 +1,330 @@
+
+  $ rbd create --size 4M img
+  $ DEV=$(sudo rbd map -o alloc_size=4194304 img)
+
+Zero, < 1 block:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 156672 -l 512 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131584 -l 64512 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131584 -l 65024 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131072 -l 65024 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+Zero, 1 block:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131072 -l 65536 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131072 -l 66048 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 130560 -l 66048 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 130560 -l 66560 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+Zero, < 2 blocks:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 163840 -l 65536 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131584 -l 130048 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131584 -l 130560 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131072 -l 130560 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+Zero, 2 blocks:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131072 -l 131072 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131072 -l 131584 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 130560 -l 131584 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 130560 -l 132096 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+Zero, 37 blocks:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 589824 -l 2424832 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 589312 -l 2424832 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 590336 -l 2424832 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+Truncate:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 4193792 -l 512 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  03ffe00 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 4129280 -l 65024 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  03f0200 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 4128768 -l 65536 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  03f0000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 4128256 -l 66048 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  03efe00 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 4063744 -l 130560 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  03e0200 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 4063232 -l 131072 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  03e0000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 4062720 -l 131584 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  03dfe00 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 512 -l 4193792 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0000200 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+Delete:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 0 -l 4194304 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+Empty clone:
+
+  $ xfs_io -c 'pwrite -S 0xab -w 0 4M' $DEV >/dev/null
+  $ sudo rbd unmap $DEV
+  $ rbd snap create --no-progress img@snap
+  $ rbd snap protect img@snap
+
+  $ rbd clone img@snap cloneimg1
+  $ DEV=$(sudo rbd map -o alloc_size=4194304 cloneimg1)
+  $ blkdiscard -o 720896 -l 2719744 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 abab abab abab abab abab abab abab abab
+  *
+  0400000
+  $ sudo rbd unmap $DEV
+
+  $ rbd clone img@snap cloneimg2
+  $ DEV=$(sudo rbd map -o alloc_size=4194304 cloneimg2)
+  $ blkdiscard -o 1474560 -l 2719744 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 abab abab abab abab abab abab abab abab
+  *
+  0400000
+  $ sudo rbd unmap $DEV
+
+  $ rbd clone img@snap cloneimg3
+  $ DEV=$(sudo rbd map -o alloc_size=4194304 cloneimg3)
+  $ blkdiscard -o 0 -l 4194304 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 abab abab abab abab abab abab abab abab
+  *
+  0400000
+  $ sudo rbd unmap $DEV
+
+Full clone:
+
+  $ rbd clone img@snap cloneimg4
+  $ DEV=$(sudo rbd map -o alloc_size=4194304 cloneimg4)
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 720896 -l 2719744 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 1474560 -l 2719744 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0168000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 0 -l 4194304 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ sudo rbd unmap $DEV
+
+Multiple object requests:
+
+  $ rbd create --size 50M --stripe-unit 16K --stripe-count 5 fancyimg
+  $ DEV=$(sudo rbd map -o alloc_size=4194304 fancyimg)
+
+  $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null
+  $ blkdiscard -o 0 -l 143360 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  3200000
+
+  $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null
+  $ blkdiscard -o 0 -l 286720 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  3200000
+
+  $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null
+  $ blkdiscard -o 0 -l 573440 $DEV
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  3200000
+
+  $ sudo rbd unmap $DEV
+
+  $ rbd rm --no-progress fancyimg
+  $ rbd rm --no-progress cloneimg4
+  $ rbd rm --no-progress cloneimg3
+  $ rbd rm --no-progress cloneimg2
+  $ rbd rm --no-progress cloneimg1
+  $ rbd snap unprotect img@snap
+  $ rbd snap rm --no-progress img@snap
+  $ rbd rm --no-progress img
diff --git a/qa/rbd/krbd_discard_512b.t b/qa/rbd/krbd_discard_512b.t
new file mode 100644
index 000000000..6669ca8fc
--- /dev/null
+++ b/qa/rbd/krbd_discard_512b.t
@@ -0,0 +1,416 @@
+
+  $ rbd create --size 4M img
+  $ DEV=$(sudo rbd map -o alloc_size=512 img)
+
+Zero, < 1 block:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 156672 -l 512 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0026400 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0026600 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131584 -l 64512 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020200 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  002fe00 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131584 -l 65024 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020200 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0030000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131072 -l 65024 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  002fe00 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+Zero, 1 block:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131072 -l 65536 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0030000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131072 -l 66048 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0030200 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 130560 -l 66048 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  001fe00 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0030000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 130560 -l 66560 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  001fe00 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0030200 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+Zero, < 2 blocks:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 163840 -l 65536 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0028000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0038000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131584 -l 130048 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020200 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  003fe00 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131584 -l 130560 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020200 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0040000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131072 -l 130560 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  003fe00 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+Zero, 2 blocks:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131072 -l 131072 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0040000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 131072 -l 131584 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0040200 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 130560 -l 131584 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  001fe00 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0040000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 130560 -l 132096 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  001fe00 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0040200 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+Zero, 37 blocks:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 589824 -l 2424832 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0090000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  02e0000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 589312 -l 2424832 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  008fe00 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  02dfe00 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 590336 -l 2424832 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0090200 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  02e0200 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+Truncate:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 4193792 -l 512 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  03ffe00 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 4129280 -l 65024 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  03f0200 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 4128768 -l 65536 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  03f0000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 4128256 -l 66048 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  03efe00 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 4063744 -l 130560 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  03e0200 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 4063232 -l 131072 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  03e0000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 4062720 -l 131584 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  03dfe00 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 512 -l 4193792 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0000200 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+Delete:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 0 -l 4194304 $DEV
+  $ hexdump $DEV
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+Empty clone:
+
+  $ xfs_io -c 'pwrite -S 0xab -w 0 4M' $DEV >/dev/null
+  $ sudo rbd unmap $DEV
+  $ rbd snap create --no-progress img@snap
+  $ rbd snap protect img@snap
+
+  $ rbd clone img@snap cloneimg1
+  $ DEV=$(sudo rbd map -o alloc_size=512 cloneimg1)
+  $ blkdiscard -o 720896 -l 2719744 $DEV
+  $ hexdump $DEV
+  0000000 abab abab abab abab abab abab abab abab
+  *
+  0400000
+  $ sudo rbd unmap $DEV
+
+  $ rbd clone img@snap cloneimg2
+  $ DEV=$(sudo rbd map -o alloc_size=512 cloneimg2)
+  $ blkdiscard -o 1474560 -l 2719744 $DEV
+  $ hexdump $DEV
+  0000000 abab abab abab abab abab abab abab abab
+  *
+  0400000
+  $ sudo rbd unmap $DEV
+
+  $ rbd clone img@snap cloneimg3
+  $ DEV=$(sudo rbd map -o alloc_size=512 cloneimg3)
+  $ blkdiscard -o 0 -l 4194304 $DEV
+  $ hexdump $DEV
+  0000000 abab abab abab abab abab abab abab abab
+  *
+  0400000
+  $ sudo rbd unmap $DEV
+
+Full clone:
+
+  $ rbd clone img@snap cloneimg4
+  $ DEV=$(sudo rbd map -o alloc_size=512 cloneimg4)
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 720896 -l 2719744 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  00b0000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0348000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 1474560 -l 2719744 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0168000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ blkdiscard -o 0 -l 4194304 $DEV
+  $ hexdump $DEV
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ sudo rbd unmap $DEV
+
+Multiple object requests:
+
+  $ rbd create --size 50M --stripe-unit 16K --stripe-count 5 fancyimg
+  $ DEV=$(sudo rbd map -o alloc_size=512 fancyimg)
+
+  $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null
+  $ blkdiscard -o 0 -l 143360 $DEV
+  $ hexdump $DEV
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0023000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  3200000
+
+  $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null
+  $ blkdiscard -o 0 -l 286720 $DEV
+  $ hexdump $DEV
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0046000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  3200000
+
+  $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null
+  $ blkdiscard -o 0 -l 573440 $DEV
+  $ hexdump $DEV
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  008c000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  3200000
+
+  $ sudo rbd unmap $DEV
+
+  $ rbd rm --no-progress fancyimg
+  $ rbd rm --no-progress cloneimg4
+  $ rbd rm --no-progress cloneimg3
+  $ rbd rm --no-progress cloneimg2
+  $ rbd rm --no-progress cloneimg1
+  $ rbd snap unprotect img@snap
+  $ rbd snap rm --no-progress img@snap
+  $ rbd rm --no-progress img
diff --git a/qa/rbd/krbd_discard_granularity.t b/qa/rbd/krbd_discard_granularity.t
new file mode 100644
index 000000000..844643bae
--- /dev/null
+++ b/qa/rbd/krbd_discard_granularity.t
@@ -0,0 +1,40 @@
+
+  $ rbd create --size 20M img
+
+  $ DEV=$(sudo rbd map img)
+  $ blockdev --getiomin $DEV
+  65536
+  $ blockdev --getioopt $DEV
+  65536
+  $ cat /sys/block/${DEV#/dev/}/queue/discard_granularity
+  65536
+  $ sudo rbd unmap $DEV
+
+  $ DEV=$(sudo rbd map -o alloc_size=512 img)
+  $ blockdev --getiomin $DEV
+  512
+  $ blockdev --getioopt $DEV
+  512
+  $ cat /sys/block/${DEV#/dev/}/queue/discard_granularity
+  512
+  $ sudo rbd unmap $DEV
+
+  $ DEV=$(sudo rbd map -o alloc_size=4194304 img)
+  $ blockdev --getiomin $DEV
+  4194304
+  $ blockdev --getioopt $DEV
+  4194304
+  $ cat /sys/block/${DEV#/dev/}/queue/discard_granularity
+  4194304
+  $ sudo rbd unmap $DEV
+
+  $ DEV=$(sudo rbd map -o alloc_size=8388608 img)
+  $ blockdev --getiomin $DEV
+  4194304
+  $ blockdev --getioopt $DEV
+  4194304
+  $ cat /sys/block/${DEV#/dev/}/queue/discard_granularity
+  4194304
+  $ sudo rbd unmap $DEV
+
+  $ rbd rm --no-progress img
diff --git a/qa/rbd/krbd_get_features.t b/qa/rbd/krbd_get_features.t
new file mode 100644
index 000000000..0c26e9c33
--- /dev/null
+++ b/qa/rbd/krbd_get_features.t
@@ -0,0 +1,31 @@
+
+journaling makes the image only unwritable, rather than both unreadable
+and unwritable:
+
+  $ rbd create --size 1 --image-feature layering,exclusive-lock,journaling img
+  $ rbd snap create --no-progress img@snap
+  $ rbd snap protect img@snap
+  $ rbd clone --image-feature layering,exclusive-lock,journaling img@snap cloneimg
+
+  $ DEV=$(sudo rbd map img)
+  rbd: sysfs write failed
+  rbd: map failed: (6) No such device or address
+  [6]
+  $ DEV=$(sudo rbd map --read-only img)
+  $ blockdev --getro $DEV
+  1
+  $ sudo rbd unmap $DEV
+
+  $ DEV=$(sudo rbd map cloneimg)
+  rbd: sysfs write failed
+  rbd: map failed: (6) No such device or address
+  [6]
+  $ DEV=$(sudo rbd map --read-only cloneimg)
+  $ blockdev --getro $DEV
+  1
+  $ sudo rbd unmap $DEV
+
+  $ rbd rm --no-progress cloneimg
+  $ rbd snap unprotect img@snap
+  $ rbd snap rm --no-progress img@snap
+  $ rbd rm --no-progress img
diff --git a/qa/rbd/krbd_huge_image.t b/qa/rbd/krbd_huge_image.t
new file mode 100644
index 000000000..e0ce6a954
--- /dev/null
+++ b/qa/rbd/krbd_huge_image.t
@@ -0,0 +1,41 @@
+
+  $ get_field() {
+  >     rbd info --format=json $1 | python3 -c "import sys, json; print(json.load(sys.stdin)['$2'])"
+  > }
+
+Write to first and last sectors and make sure we hit the right objects:
+
+  $ ceph osd pool create hugeimg 12 >/dev/null 2>&1
+  $ rbd pool init hugeimg
+  $ rbd create --size 4E --object-size 4K --image-feature layering hugeimg/img
+  $ DEV=$(sudo rbd map hugeimg/img)
+  $ xfs_io -c 'pwrite 0 512' $DEV >/dev/null # first sector
+  $ xfs_io -c 'pwrite 4611686018427387392 512' $DEV >/dev/null # last sector
+  $ sudo rbd unmap $DEV
+
+  $ get_field hugeimg/img size
+  4611686018427387904
+  $ get_field hugeimg/img objects
+  1125899906842624
+  $ rados -p hugeimg ls | grep $(get_field hugeimg/img block_name_prefix) | sort
+  .*\.0000000000000000 (re)
+  .*\.0003ffffffffffff (re)
+
+Dump first and last megabytes:
+
+  $ DEV=$(sudo rbd map hugeimg/img)
+  $ dd if=$DEV bs=1M count=1 status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0000200 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0100000
+  $ dd if=$DEV bs=1M skip=4398046511103 status=none | hexdump
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  00ffe00 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0100000
+  $ sudo rbd unmap $DEV
+
+  $ ceph osd pool delete hugeimg hugeimg --yes-i-really-really-mean-it >/dev/null 2>&1
diff --git a/qa/rbd/krbd_modprobe.t b/qa/rbd/krbd_modprobe.t
new file mode 100644
index 000000000..a0e04d394
--- /dev/null
+++ b/qa/rbd/krbd_modprobe.t
@@ -0,0 +1,10 @@
+
+  $ sudo modprobe -r rbd
+  $ sudo modprobe -r libceph
+  $ lsmod | grep libceph
+  [1]
+  $ rbd create --size 1 img
+  $ DEV=$(sudo rbd map img)
+  $ sudo grep -q ',key=' /sys/bus/rbd/devices/${DEV#/dev/rbd}/config_info
+  $ sudo rbd unmap $DEV
+  $ rbd rm --no-progress img
diff --git a/qa/rbd/krbd_msgr_segments.t b/qa/rbd/krbd_msgr_segments.t
new file mode 100644
index 000000000..b89a921a1
--- /dev/null
+++ b/qa/rbd/krbd_msgr_segments.t
@@ -0,0 +1,85 @@
+
+  $ get_block_name_prefix() {
+  >     rbd info --format=json $1 | python3 -c "import sys, json; print(json.load(sys.stdin)['block_name_prefix'])"
+  > }
+
+Short segments:
+
+  $ rbd create --size 12M img
+  $ DEV=$(sudo rbd map img)
+  $ xfs_io -d -c 'pwrite 5120 512' $DEV >/dev/null
+  $ xfs_io -d -c 'pwrite 12577280 512' $DEV >/dev/null
+  $ hexdump $DEV
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0001400 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0001600 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0bfea00 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0bfec00 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+  $ rbd rm --no-progress img
+
+Short segment, ceph_msg_data_bio_cursor_init():
+
+  $ rbd create --size 12M img
+  $ DEV=$(sudo rbd map img)
+  $ xfs_io -d -c 'pwrite 0 512' $DEV >/dev/null
+  $ rados -p rbd stat $(get_block_name_prefix img).0000000000000000
+  .* size 512 (re)
+  $ xfs_io -d -c 'pread -b 2M 0 2M' $DEV >/dev/null
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0000200 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+  $ rbd rm --no-progress img
+
+Short segment, ceph_msg_data_bio_advance():
+
+  $ rbd create --size 12M img
+  $ DEV=$(sudo rbd map img)
+  $ xfs_io -d -c 'pwrite 0 1049088' $DEV >/dev/null
+  $ rados -p rbd stat $(get_block_name_prefix img).0000000000000000
+  .* size 1049088 (re)
+  $ xfs_io -d -c 'pread -b 2M 0 2M' $DEV >/dev/null
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0100200 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+  $ rbd rm --no-progress img
+
+Cloned bios (dm-snapshot.ko, based on generic/081):
+
+  $ rbd create --size 300M img
+  $ DEV=$(sudo rbd map img)
+  $ sudo vgcreate vg_img $DEV
+    Physical volume "/dev/rbd?" successfully created* (glob)
+    Volume group "vg_img" successfully created
+  $ sudo lvcreate -L 256M -n lv_img vg_img
+    Logical volume "lv_img" created.
+  $ udevadm settle
+  $ sudo mkfs.ext4 -q /dev/mapper/vg_img-lv_img
+  $ sudo lvcreate -L 4M --snapshot -n lv_snap vg_img/lv_img | grep created
+    Logical volume "lv_snap" created.
+  $ udevadm settle
+  $ sudo mount /dev/mapper/vg_img-lv_snap /mnt
+  $ sudo xfs_io -f -c 'pwrite 0 5M' /mnt/file1 >/dev/null
+  $ sudo umount /mnt
+  $ sudo vgremove -f vg_img
+    Logical volume "lv_snap" successfully removed
+    Logical volume "lv_img" successfully removed
+    Volume group "vg_img" successfully removed
+  $ sudo pvremove $DEV
+    Labels on physical volume "/dev/rbd?" successfully wiped* (glob)
+  $ sudo rbd unmap $DEV
+  $ rbd rm --no-progress img
diff --git a/qa/rbd/krbd_parent_overlap.t b/qa/rbd/krbd_parent_overlap.t
new file mode 100644
index 000000000..3489d83db
--- /dev/null
+++ b/qa/rbd/krbd_parent_overlap.t
@@ -0,0 +1,69 @@
+
+For reads, only the object extent needs to be reverse mapped:
+
+  $ rbd create --size 5M img
+  $ DEV=$(sudo rbd map img)
+  $ xfs_io -c 'pwrite 0 5M' $DEV >/dev/null
+  $ sudo rbd unmap $DEV
+  $ rbd snap create --no-progress img@snap
+  $ rbd snap protect img@snap
+  $ rbd clone img@snap cloneimg
+  $ rbd resize --no-progress --size 12M cloneimg
+  $ DEV=$(sudo rbd map cloneimg)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0500000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0c00000
+  $ dd if=$DEV iflag=direct bs=4M status=none | hexdump
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0500000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0c00000
+  $ sudo rbd unmap $DEV
+  $ rbd rm --no-progress cloneimg
+  $ rbd snap unprotect img@snap
+  $ rbd snap rm --no-progress img@snap
+  $ rbd rm --no-progress img
+
+For writes, the entire object needs to be reverse mapped:
+
+  $ rbd create --size 2M img
+  $ DEV=$(sudo rbd map img)
+  $ xfs_io -c 'pwrite 0 1M' $DEV >/dev/null
+  $ sudo rbd unmap $DEV
+  $ rbd snap create --no-progress img@snap
+  $ rbd snap protect img@snap
+  $ rbd clone img@snap cloneimg
+  $ rbd resize --no-progress --size 8M cloneimg
+  $ DEV=$(sudo rbd map cloneimg)
+  $ xfs_io -c 'pwrite -S 0xef 3M 1M' $DEV >/dev/null
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0100000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0300000 efef efef efef efef efef efef efef efef
+  *
+  0400000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0800000
+  $ sudo rbd unmap $DEV
+  $ DEV=$(sudo rbd map cloneimg)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0100000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0300000 efef efef efef efef efef efef efef efef
+  *
+  0400000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0800000
+  $ sudo rbd unmap $DEV
+  $ rbd rm --no-progress cloneimg
+  $ rbd snap unprotect img@snap
+  $ rbd snap rm --no-progress img@snap
+  $ rbd rm --no-progress img
diff --git a/qa/rbd/krbd_whole_object_zeroout.t b/qa/rbd/krbd_whole_object_zeroout.t
new file mode 100644
index 000000000..850c8c968
--- /dev/null
+++ b/qa/rbd/krbd_whole_object_zeroout.t
@@ -0,0 +1,143 @@
+
+  $ get_block_name_prefix() {
+  >     rbd info --format=json $1 | python3 -c "import sys, json; print(json.load(sys.stdin)['block_name_prefix'])"
+  > }
+
+  $ rbd create --size 200M img
+  $ DEV=$(sudo rbd map img)
+  $ xfs_io -c 'pwrite -b 4M 0 200M' $DEV >/dev/null
+  $ sudo rbd unmap $DEV
+  $ rbd snap create --no-progress img@snap
+  $ rbd snap protect img@snap
+
+cloneimg1:
+1 object in an object set, 4M
+25 full object sets
+25 objects in total
+
+  $ rbd clone img@snap cloneimg1
+  $ DEV=$(sudo rbd map cloneimg1)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  c800000
+  $ fallocate -z -l 100M $DEV
+  $ hexdump $DEV
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  6400000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  c800000
+  $ sudo rbd unmap $DEV
+  $ DEV=$(sudo rbd map cloneimg1)
+  $ hexdump $DEV
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  6400000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  c800000
+  $ sudo rbd unmap $DEV
+
+cloneimg2:
+7 objects in an object set, 28M
+3 full object sets
+min((100M % 28M) / 512K, 7) = 7 objects in the last object set
+28 objects in total
+
+  $ rbd clone --stripe-unit 512K --stripe-count 7 img@snap cloneimg2
+  $ DEV=$(sudo rbd map cloneimg2)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  c800000
+  $ fallocate -z -l 100M $DEV
+  $ hexdump $DEV
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  6400000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  c800000
+  $ sudo rbd unmap $DEV
+  $ DEV=$(sudo rbd map cloneimg2)
+  $ hexdump $DEV
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  6400000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  c800000
+  $ sudo rbd unmap $DEV
+
+cloneimg3:
+23 objects in an object set, 92M
+1 full object set
+min((100M % 92M) / 512K, 23) = 16 objects in the last object set
+39 objects in total
+
+  $ rbd clone --stripe-unit 512K --stripe-count 23 img@snap cloneimg3
+  $ DEV=$(sudo rbd map cloneimg3)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  c800000
+  $ fallocate -z -l 100M $DEV
+  $ hexdump $DEV
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  6400000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  c800000
+  $ sudo rbd unmap $DEV
+  $ DEV=$(sudo rbd map cloneimg3)
+  $ hexdump $DEV
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  6400000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  c800000
+  $ sudo rbd unmap $DEV
+
+cloneimg4:
+65 objects in an object set, 260M
+0 full object sets
+min((100M % 260M) / 512K, 65) = 65 objects in the last object set
+65 objects in total
+
+  $ rbd clone --stripe-unit 512K --stripe-count 65 img@snap cloneimg4
+  $ DEV=$(sudo rbd map cloneimg4)
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  c800000
+  $ fallocate -z -l 100M $DEV
+  $ hexdump $DEV
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  6400000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  c800000
+  $ sudo rbd unmap $DEV
+  $ DEV=$(sudo rbd map cloneimg4)
+  $ hexdump $DEV
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  6400000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  c800000
+  $ sudo rbd unmap $DEV
+
+  $ rados -p rbd ls | grep -c $(get_block_name_prefix cloneimg1)
+  25
+  $ rados -p rbd ls | grep -c $(get_block_name_prefix cloneimg2)
+  28
+  $ rados -p rbd ls | grep -c $(get_block_name_prefix cloneimg3)
+  39
+  $ rados -p rbd ls | grep -c $(get_block_name_prefix cloneimg4)
+  65
+
+  $ rbd rm --no-progress cloneimg4
+  $ rbd rm --no-progress cloneimg3
+  $ rbd rm --no-progress cloneimg2
+  $ rbd rm --no-progress cloneimg1
+  $ rbd snap unprotect img@snap
+  $ rbd snap rm --no-progress img@snap
+  $ rbd rm --no-progress img
diff --git a/qa/rbd/krbd_zeroout.t b/qa/rbd/krbd_zeroout.t
new file mode 100644
index 000000000..10bb230e8
--- /dev/null
+++ b/qa/rbd/krbd_zeroout.t
@@ -0,0 +1,422 @@
+
+  $ rbd create --size 4M img
+  $ DEV=$(sudo rbd map img)
+
+Zero, < 1 block:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 156672 -l 512 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0026400 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0026600 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 131584 -l 64512 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020200 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  002fe00 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 131584 -l 65024 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020200 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0030000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 131072 -l 65024 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  002fe00 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+Zero, 1 block:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 131072 -l 65536 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0030000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 131072 -l 66048 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0030200 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 130560 -l 66048 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  001fe00 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0030000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 130560 -l 66560 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  001fe00 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0030200 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+Zero, < 2 blocks:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 163840 -l 65536 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0028000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0038000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 131584 -l 130048 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020200 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  003fe00 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 131584 -l 130560 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020200 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0040000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 131072 -l 130560 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  003fe00 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+Zero, 2 blocks:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 131072 -l 131072 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0040000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 131072 -l 131584 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0020000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0040200 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 130560 -l 131584 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  001fe00 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0040000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 130560 -l 132096 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  001fe00 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0040200 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+Zero, 37 blocks:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 589824 -l 2424832 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0090000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  02e0000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 589312 -l 2424832 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  008fe00 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  02dfe00 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 590336 -l 2424832 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0090200 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  02e0200 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+Truncate:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 4193792 -l 512 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  03ffe00 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 4129280 -l 65024 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  03f0200 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 4128768 -l 65536 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  03f0000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 4128256 -l 66048 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  03efe00 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 4063744 -l 130560 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  03e0200 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 4063232 -l 131072 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  03e0000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 4062720 -l 131584 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  03dfe00 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 512 -l 4193792 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0000200 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+Delete:
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 0 -l 4194304 $DEV
+  $ hexdump $DEV
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+Empty clone:
+
+  $ xfs_io -c 'pwrite -S 0xab -w 0 4M' $DEV >/dev/null
+  $ sudo rbd unmap $DEV
+  $ rbd snap create --no-progress img@snap
+  $ rbd snap protect img@snap
+
+  $ rbd clone img@snap cloneimg1
+  $ DEV=$(sudo rbd map cloneimg1)
+  $ fallocate -z -o 720896 -l 2719744 $DEV
+  $ hexdump $DEV
+  0000000 abab abab abab abab abab abab abab abab
+  *
+  00b0000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0348000 abab abab abab abab abab abab abab abab
+  *
+  0400000
+  $ sudo rbd unmap $DEV
+
+  $ rbd clone img@snap cloneimg2
+  $ DEV=$(sudo rbd map cloneimg2)
+  $ fallocate -z -o 1474560 -l 2719744 $DEV
+  $ hexdump $DEV
+  0000000 abab abab abab abab abab abab abab abab
+  *
+  0168000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+  $ sudo rbd unmap $DEV
+
+  $ rbd clone img@snap cloneimg3
+  $ DEV=$(sudo rbd map cloneimg3)
+  $ fallocate -z -o 0 -l 4194304 $DEV
+  $ hexdump $DEV
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+  $ sudo rbd unmap $DEV
+
+Full clone:
+
+  $ rbd clone img@snap cloneimg4
+  $ DEV=$(sudo rbd map cloneimg4)
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 720896 -l 2719744 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  00b0000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0348000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 1474560 -l 2719744 $DEV
+  $ hexdump $DEV
+  0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  0168000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null
+  $ fallocate -z -o 0 -l 4194304 $DEV
+  $ hexdump $DEV
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0400000
+
+  $ sudo rbd unmap $DEV
+
+Multiple object requests:
+
+  $ rbd create --size 50M --stripe-unit 16K --stripe-count 5 fancyimg
+  $ DEV=$(sudo rbd map fancyimg)
+
+  $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null
+  $ fallocate -z -o 0 -l 143360 $DEV
+  $ hexdump $DEV
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0023000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  3200000
+
+  $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null
+  $ fallocate -z -o 0 -l 286720 $DEV
+  $ hexdump $DEV
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  0046000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  3200000
+
+  $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null
+  $ fallocate -z -o 0 -l 573440 $DEV
+  $ hexdump $DEV
+  0000000 0000 0000 0000 0000 0000 0000 0000 0000
+  *
+  008c000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+  *
+  3200000
+
+  $ sudo rbd unmap $DEV
+
+  $ rbd rm --no-progress fancyimg
+  $ rbd rm --no-progress cloneimg4
+  $ rbd rm --no-progress cloneimg3
+  $ rbd rm --no-progress cloneimg2
+  $ rbd rm --no-progress cloneimg1
+  $ rbd snap unprotect img@snap
+  $ rbd snap rm --no-progress img@snap
+  $ rbd rm --no-progress img
diff --git a/qa/rbd/rbd.sh b/qa/rbd/rbd.sh
new file mode 100755
index 000000000..2b7ce8ee2
--- /dev/null
+++ b/qa/rbd/rbd.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+set -x
+
+basedir=`echo $0 | sed 's/[^/]*$//g'`.
+. $basedir/common.sh
+
+rbd_test_init
+
+
+create_multiple() {
+	for i in `seq 1 10`; do
+		rbd_create_image $i
+	done
+
+	for i in `seq 1 10`; do
+		rbd_add $i
+	done
+	for i in `seq 1 10`; do
+		devname=/dev/rbd`eval echo \\$rbd$i`
+		echo $devname
+	done
+	for i in `seq 1 10`; do
+		devid=`eval echo \\$rbd$i`
+		rbd_remove $devid
+	done
+	for i in `seq 1 10`; do
+		rbd_rm_image $i
+	done
+}
+
+test_dbench() {
+	rbd_create_image 0
+	rbd_add 0
+
+	devname=/dev/rbd$rbd0
+
+	mkfs -t ext3 $devname
+	mount -t ext3 $devname $mnt
+
+	dbench -D $mnt -t 30 5
+	sync
+
+	umount $mnt
+	rbd_remove $rbd0
+	rbd_rm_image 0
+}
+
+create_multiple
+test_dbench
+
diff --git a/qa/releases/infernalis.yaml b/qa/releases/infernalis.yaml
new file mode 100644
index 000000000..f21e7fe8a
--- /dev/null
+++ b/qa/releases/infernalis.yaml
@@ -0,0 +1,5 @@
+tasks:
+- exec:
+    osd.0:
+      - ceph osd set sortbitwise
+      - for p in `ceph osd pool ls` ; do ceph osd pool set $p use_gmt_hitset true ; done
diff --git a/qa/releases/jewel.yaml b/qa/releases/jewel.yaml
new file mode 100644
index 000000000..ab09c083e
--- /dev/null
+++ b/qa/releases/jewel.yaml
@@ -0,0 +1,6 @@
+tasks:
+- exec:
+    osd.0:
+      - ceph osd set sortbitwise
+      - ceph osd set require_jewel_osds
+      - for p in `ceph osd pool ls` ; do ceph osd pool set $p use_gmt_hitset true ; done
diff --git a/qa/releases/kraken.yaml b/qa/releases/kraken.yaml
new file mode 100644
index 000000000..57342057a
--- /dev/null
+++ b/qa/releases/kraken.yaml
@@ -0,0 +1,4 @@
+tasks:
+- exec:
+    osd.0:
+      - ceph osd set require_kraken_osds
diff --git a/qa/releases/luminous-with-mgr.yaml b/qa/releases/luminous-with-mgr.yaml
new file mode 100644
index 000000000..ea3130768
--- /dev/null
+++ b/qa/releases/luminous-with-mgr.yaml
@@ -0,0 +1,11 @@
+tasks:
+- exec:
+    osd.0:
+      - ceph osd require-osd-release luminous
+      - ceph osd set-require-min-compat-client luminous
+- ceph.healthy:
+overrides:
+  ceph:
+    conf:
+      mon:
+        mon warn on osd down out interval zero: false
diff --git a/qa/releases/luminous.yaml b/qa/releases/luminous.yaml
new file mode 100644
index 000000000..768861c21
--- /dev/null
+++ b/qa/releases/luminous.yaml
@@ -0,0 +1,21 @@
+tasks:
+- exec:
+    mgr.x:
+      - mkdir -p /var/lib/ceph/mgr/ceph-x
+      - ceph auth get-or-create-key mgr.x mon 'allow profile mgr'
+      - ceph auth export mgr.x > /var/lib/ceph/mgr/ceph-x/keyring
+- ceph.restart:
+    daemons: [mgr.x]
+    wait-for-healthy: false
+- exec:
+    osd.0:
+      - ceph osd require-osd-release luminous
+      - ceph osd set-require-min-compat-client luminous
+- ceph.healthy:
+overrides:
+  ceph:
+    conf:
+      mon:
+        mon warn on osd down out interval zero: false
+    log-ignorelist:
+      - no active mgr
diff --git a/qa/releases/mimic.yaml b/qa/releases/mimic.yaml
new file mode 100644
index 000000000..f901e7eda
--- /dev/null
+++ b/qa/releases/mimic.yaml
@@ -0,0 +1,6 @@
+tasks:
+- exec:
+    osd.0:
+      - ceph osd require-osd-release mimic
+      - ceph osd set-require-min-compat-client mimic
+- ceph.healthy:
diff --git a/qa/releases/nautilus.yaml b/qa/releases/nautilus.yaml
new file mode 100644
index 000000000..dd650f173
--- /dev/null
+++ b/qa/releases/nautilus.yaml
@@ -0,0 +1,7 @@
+tasks:
+- exec:
+    osd.0:
+      - ceph osd require-osd-release nautilus
+      - ceph osd set-require-min-compat-client nautilus
+      - for p in `ceph osd pool ls`; do ceph osd pool set $p pg_autoscale_mode off; done
+- ceph.healthy:
diff --git a/qa/releases/octopus.yaml b/qa/releases/octopus.yaml
new file mode 100644
index 000000000..935603395
--- /dev/null
+++ b/qa/releases/octopus.yaml
@@ -0,0 +1,7 @@
+tasks:
+- exec:
+    osd.0:
+      - ceph osd require-osd-release octopus
+      - ceph osd set-require-min-compat-client octopus
+      - for f in `ceph osd pool ls` ; do ceph osd pool set $f pg_autoscale_mode off ; done
+- ceph.healthy:
diff --git a/qa/releases/pacific-from-o.yaml b/qa/releases/pacific-from-o.yaml
new file mode 100644
index 000000000..a5eabff35
--- /dev/null
+++ b/qa/releases/pacific-from-o.yaml
@@ -0,0 +1,6 @@
+tasks:
+- exec:
+    osd.0:
+      - ceph osd require-osd-release pacific
+      - ceph osd set-require-min-compat-client pacific
+- ceph.healthy:
diff --git a/qa/releases/pacific.yaml b/qa/releases/pacific.yaml
new file mode 100644
index 000000000..fb5c39a08
--- /dev/null
+++ b/qa/releases/pacific.yaml
@@ -0,0 +1,7 @@
+tasks:
+- exec:
+    osd.0:
+      - ceph osd require-osd-release pacific
+      - ceph osd set-require-min-compat-client pacific
+      - for f in `ceph osd pool ls` ; do ceph osd pool set $f pg_autoscale_mode off ; done
+- ceph.healthy:
diff --git a/qa/releases/quincy.yaml b/qa/releases/quincy.yaml
new file mode 100644
index 000000000..731c6bead
--- /dev/null
+++ b/qa/releases/quincy.yaml
@@ -0,0 +1,6 @@
+tasks:
+- exec:
+    osd.0:
+      - ceph osd require-osd-release quincy
+      - ceph osd set-require-min-compat-client quincy
+- ceph.healthy:
diff --git a/qa/releases/reef.yaml b/qa/releases/reef.yaml
new file mode 100644
index 000000000..a64b2fb9f
--- /dev/null
+++ b/qa/releases/reef.yaml
@@ -0,0 +1,6 @@
+tasks:
+- exec:
+    osd.0:
+      - ceph osd require-osd-release reef
+      - ceph osd set-require-min-compat-client reef
+- ceph.healthy:
diff --git a/qa/rgw/ignore-pg-availability.yaml b/qa/rgw/ignore-pg-availability.yaml
new file mode 100644
index 000000000..732e40306
--- /dev/null
+++ b/qa/rgw/ignore-pg-availability.yaml
@@ -0,0 +1,11 @@
+# https://tracker.ceph.com/issues/45802
+# https://tracker.ceph.com/issues/51282
+# https://tracker.ceph.com/issues/61168
+# https://tracker.ceph.com/issues/62504
+overrides:
+  ceph:
+    log-ignorelist:
+    - \(PG_AVAILABILITY\)
+    - \(PG_DEGRADED\)
+    - \(POOL_APP_NOT_ENABLED\)
+    - not have an application enabled
diff --git a/qa/rgw/s3tests-branch.yaml b/qa/rgw/s3tests-branch.yaml
new file mode 100644
index 000000000..10a5e9374
--- /dev/null
+++ b/qa/rgw/s3tests-branch.yaml
@@ -0,0 +1,4 @@
+overrides:
+  s3tests:
+    force-branch: ceph-reef
+    # git_remote: https://github.com/ceph/
diff --git a/qa/rgw_bucket_sharding/default.yaml b/qa/rgw_bucket_sharding/default.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/rgw_bucket_sharding/default.yaml
diff --git a/qa/rgw_bucket_sharding/single.yaml b/qa/rgw_bucket_sharding/single.yaml
new file mode 100644
index 000000000..c6b85f7ce
--- /dev/null
+++ b/qa/rgw_bucket_sharding/single.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        rgw override bucket index max shards: 1
diff --git a/qa/rgw_frontend/beast.yaml b/qa/rgw_frontend/beast.yaml
new file mode 100644
index 000000000..369e65f7c
--- /dev/null
+++ b/qa/rgw_frontend/beast.yaml
@@ -0,0 +1,3 @@
+overrides:
+  rgw:
+    frontend: beast
diff --git a/qa/rgw_pool_type/ec-profile.yaml b/qa/rgw_pool_type/ec-profile.yaml
new file mode 100644
index 000000000..05384cb53
--- /dev/null
+++ b/qa/rgw_pool_type/ec-profile.yaml
@@ -0,0 +1,10 @@
+overrides:
+  rgw:
+    ec-data-pool: true
+    erasure_code_profile:
+      name: testprofile
+      k: 3
+      m: 1
+      crush-failure-domain: osd
+  s3tests:
+    slow_backend: true
diff --git a/qa/rgw_pool_type/ec.yaml b/qa/rgw_pool_type/ec.yaml
new file mode 100644
index 000000000..7c99b7f85
--- /dev/null
+++ b/qa/rgw_pool_type/ec.yaml
@@ -0,0 +1,5 @@
+overrides:
+  rgw:
+    ec-data-pool: true
+  s3tests:
+    slow_backend: true
diff --git a/qa/rgw_pool_type/replicated.yaml b/qa/rgw_pool_type/replicated.yaml
new file mode 100644
index 000000000..c91709eaa
--- /dev/null
+++ b/qa/rgw_pool_type/replicated.yaml
@@ -0,0 +1,3 @@
+overrides:
+  rgw:
+    ec-data-pool: false
diff --git a/qa/run-standalone.sh b/qa/run-standalone.sh
new file mode 100755
index 000000000..9daaabfa1
--- /dev/null
+++ b/qa/run-standalone.sh
@@ -0,0 +1,141 @@
+#!/usr/bin/env bash
+set -e
+
+if [ ! -e CMakeCache.txt -o ! -d bin ]; then
+    echo 'run this from the build dir'
+    exit 1
+fi
+
+function get_cmake_variable() {
+    local variable=$1
+    grep "$variable" CMakeCache.txt | cut -d "=" -f 2
+}
+
+function get_python_path() {
+    python_common=$(realpath ../src/python-common)
+    echo $(realpath ../src/pybind):$(pwd)/lib/cython_modules/lib.3:$python_common
+}
+
+if [ `uname` = FreeBSD ]; then
+    # otherwise module prettytable will not be found
+    export PYTHONPATH=$(get_python_path):/usr/local/lib/python3.6/site-packages
+    exec_mode=+111
+    KERNCORE="kern.corefile"
+    COREPATTERN="core.%N.%P"
+else
+    export PYTHONPATH=$(get_python_path)
+    exec_mode=/111
+    KERNCORE="kernel.core_pattern"
+    COREPATTERN="core.%e.%p.%t"
+fi
+
+function cleanup() {
+    if [ -n "$precore" ]; then
+        sudo sysctl -w "${KERNCORE}=${precore}"
+    fi
+}
+
+function finish() {
+    cleanup
+    exit 0
+}
+
+trap finish TERM HUP INT
+
+PATH=$(pwd)/bin:$PATH
+
+# add /sbin and /usr/sbin to PATH to find sysctl in those cases where the
+# user's PATH does not get these directories by default (e.g., tumbleweed)
+PATH=$PATH:/sbin:/usr/sbin
+
+export LD_LIBRARY_PATH="$(pwd)/lib"
+
+# TODO: Use getops
+dryrun=false
+if [[ "$1" = "--dry-run" ]]; then
+    dryrun=true
+    shift
+fi
+
+all=false
+if [ "$1" = "" ]; then
+   all=true
+fi
+
+select=("$@")
+
+location="../qa/standalone"
+
+count=0
+errors=0
+userargs=""
+precore="$(sysctl -n $KERNCORE)"
+# If corepattern already set, avoid having to use sudo
+if [ "$precore" = "$COREPATTERN" ]; then
+    precore=""
+else
+    sudo sysctl -w "${KERNCORE}=${COREPATTERN}"
+fi
+# Clean out any cores in core target directory (currently .)
+if ls $(dirname $(sysctl -n $KERNCORE)) | grep -q '^core\|core$' ; then
+    mkdir found.cores.$$ 2> /dev/null || true
+    for i in $(ls $(dirname $(sysctl -n $KERNCORE)) | grep '^core\|core$'); do
+	mv $i found.cores.$$
+    done
+    echo "Stray cores put in $(pwd)/found.cores.$$"
+fi
+
+ulimit -c unlimited
+for f in $(cd $location ; find . -mindepth 2 -perm $exec_mode -type f)
+do
+    f=$(echo $f | sed 's/\.\///')
+    if [[ "$all" = "false" ]]; then
+        found=false
+        for c in "${!select[@]}"
+        do
+            # Get command and any arguments of subset of tests to run
+            allargs="${select[$c]}"
+            arg1=$(echo "$allargs" | cut --delimiter " " --field 1)
+            # Get user args for this selection for use below
+            userargs="$(echo $allargs | cut -s --delimiter " " --field 2-)"
+            if [[ "$arg1" = $(basename $f) ]] || [[  "$arg1" = $(dirname $f) ]]; then
+                found=true
+                break
+            fi
+            if [[ "$arg1" = "$f" ]]; then
+                found=true
+                break
+            fi
+        done
+        if [[ "$found" = "false" ]]; then
+            continue
+        fi
+    fi
+    # Don't run test-failure.sh unless explicitly specified
+    if [ "$all" = "true" -a "$f" = "special/test-failure.sh" ]; then
+        continue
+    fi
+
+    cmd="$location/$f $userargs"
+    count=$(expr $count + 1)
+    echo "--- $cmd ---"
+    if [[ "$dryrun" != "true" ]]; then
+        if ! PATH=$PATH:bin \
+	    CEPH_ROOT=.. \
+	    CEPH_LIB=lib \
+	    LOCALRUN=yes \
+	    time -f "Elapsed %E (%e seconds)" $cmd ; then
+          echo "$f .............. FAILED"
+          errors=$(expr $errors + 1)
+        fi
+    fi
+done
+cleanup
+
+if [ "$errors" != "0" ]; then
+    echo "$errors TESTS FAILED, $count TOTAL TESTS"
+    exit 1
+fi
+
+echo "ALL $count TESTS PASSED"
+exit 0
diff --git a/qa/run_xfstests-obsolete.sh b/qa/run_xfstests-obsolete.sh
new file mode 100644
index 000000000..48afff879
--- /dev/null
+++ b/qa/run_xfstests-obsolete.sh
@@ -0,0 +1,458 @@
+#!/usr/bin/env bash
+
+# Copyright (C) 2012 Dreamhost, LLC
+#
+# This is free software; see the source for copying conditions.
+# There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE.
+#
+# This is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as
+# published by the Free Software Foundation version 2.
+
+# Usage:
+# run_xfs_tests -t /dev/<testdev> -s /dev/<scratchdev> -f <fstype> <tests>
+#   - test device and scratch device will both get trashed
+#   - fstypes can be xfs, ext4, or btrfs (xfs default)
+#   - tests can be listed individually or in ranges:  1 3-5 8
+#     tests can also be specified by group:           -g quick
+#
+# Exit status:
+#     0:  success
+#     1:  usage error
+#     2:  other runtime error
+#    99:  argument count error (programming error)
+#   100:  getopt error (internal error)
+
+# Alex Elder <elder@dreamhost.com>
+# April 13, 2012
+
+set -e
+
+PROGNAME=$(basename $0)
+
+# xfstests is downloaded from this git repository and then built.
+# XFSTESTS_REPO="git://oss.sgi.com/xfs/cmds/xfstests.git"
+XFSTESTS_REPO="git://git.kernel.org/pub/scm/fs/xfs/xfstests-dev.git"
+
+# Default command line option values
+COUNT="1"
+FS_TYPE="xfs"
+SCRATCH_DEV=""	# MUST BE SPECIFIED
+TEST_DEV=""	# MUST BE SPECIFIED
+TESTS="-g auto"	# The "auto" group is supposed to be "known good"
+
+# rbd presents geometry information that causes mkfs.xfs to
+# issue a warning.  This option avoids this class of problems.
+XFS_MKFS_OPTIONS="-l su=32k"
+
+# Override the default test list with a list of tests known to pass
+# until we can work through getting them all passing reliably.
+TESTS="1-7 9 11-15 17 19-21 26-29 31-34 41 46-48 50-54 56 61 63-67 69-70 74-76"
+TESTS="${TESTS} 78 79 84-89 91-92 100 103 105 108 110 116-121 124 126"
+TESTS="${TESTS} 129-135 137-141 164-167 182 184 187-190 192 194"
+TESTS="${TESTS} 196 199 201 203 214-216 220-227 234 236-238 241 243-249"
+TESTS="${TESTS} 253 257-259 261 262 269 273 275 277 278 280 285 286"
+# 275 was the highest available test as of 4/10/12.
+# 289 was the highest available test as of 11/15/12.
+
+######
+# Some explanation of why tests have been excluded above:
+#
+# Test 008 was pulled because it contained a race condition leading to
+#          spurious failures.
+#
+# Test 049 was pulled because it caused a kernel fault.
+#	http://tracker.newdream.net/issues/2260
+# Test 232 was pulled because it caused an XFS error
+#	http://tracker.newdream.net/issues/2302
+#
+# This test passes but takes a LONG time (1+ hours):  127
+#
+# These were not run for one (anticipated) reason or another:
+# 010 016 030 035 040 044 057 058-060 072 077 090 093-095 097-099 104
+# 112 113 122 123 125 128 142 147-163 168 175-178 180 185 191 193
+# 195 197 198 207-213 217 228 230-233 235 239 240 252 254 255 264-266
+# 270-272 276 278-279 281-284 288 289
+#
+# These tests all failed (produced output different from golden):
+# 042 073 083 096 109 169 170 200 202 204-206 218 229 240 242 250
+# 263 276 277 279 287
+#
+# The rest were not part of the "auto" group:
+# 018 022 023 024 025 036 037 038 039 043 055 071 080 081 082 101
+# 102 106 107 111 114 115 136 171 172 173 251 267 268
+######
+
+# print an error message and quit with non-zero status
+function err() {
+	if [ $# -gt 0 ]; then
+		echo "" >&2
+		echo "${PROGNAME}: ${FUNCNAME[1]}: $@" >&2
+	fi
+	exit 2
+}
+
+# routine used to validate argument counts to all shell functions
+function arg_count() {
+	local func
+	local want
+	local got
+
+	if [ $# -eq 2 ]; then
+		func="${FUNCNAME[1]}"	# calling function
+		want=$1
+		got=$2
+	else
+		func="${FUNCNAME[0]}"	# i.e., arg_count
+		want=2
+		got=$#
+	fi
+	[ "${want}" -eq "${got}" ] && return 0
+	echo "${PROGNAME}: ${func}: arg count bad (want ${want} got ${got})" >&2
+	exit 99
+}
+
+# validation function for repeat count argument
+function count_valid() {
+	arg_count 1 $#
+
+	test "$1" -gt 0	# 0 is pointless; negative is wrong
+}
+
+# validation function for filesystem type argument
+function fs_type_valid() {
+	arg_count 1 $#
+
+	case "$1" in
+		xfs|ext4|btrfs)	return 0 ;;
+		*)		return 1 ;;
+	esac
+}
+
+# validation function for device arguments
+function device_valid() {
+	arg_count 1 $#
+
+	# Very simple testing--really should try to be more careful...
+	test -b "$1"
+}
+
+# print a usage message and quit
+#
+# if a message is supplied, print that first, and then exit
+# with non-zero status
+function usage() {
+	if [ $# -gt 0 ]; then
+		echo "" >&2
+		echo "$@" >&2
+	fi
+
+	echo "" >&2
+	echo "Usage: ${PROGNAME} <options> <tests>" >&2
+	echo "" >&2
+	echo "    options:" >&2
+	echo "        -h or --help" >&2
+	echo "            show this message" >&2
+	echo "        -c or --count" >&2
+	echo "            iteration count (1 or more)" >&2
+	echo "        -f or --fs-type" >&2
+	echo "            one of: xfs, ext4, btrfs" >&2
+	echo "            (default fs-type: xfs)" >&2
+	echo "        -s or --scratch-dev     (REQUIRED)" >&2
+	echo "            name of device used for scratch filesystem" >&2
+	echo "        -t or --test-dev        (REQUIRED)" >&2
+	echo "            name of device used for test filesystem" >&2
+	echo "    tests:" >&2
+	echo "        list of test numbers or ranges, e.g.:" >&2
+	echo "            1-9 11-15 17 19-21 26-28 31-34 41" >&2
+	echo "        or possibly an xfstests test group, e.g.:" >&2
+	echo "            -g quick" >&2
+	echo "        (default tests: -g auto)" >&2
+	echo "" >&2
+
+	[ $# -gt 0 ] && exit 1
+
+	exit 0		# This is used for a --help
+}
+
+# parse command line arguments
+function parseargs() {
+	# Short option flags
+	SHORT_OPTS=""
+	SHORT_OPTS="${SHORT_OPTS},h"
+	SHORT_OPTS="${SHORT_OPTS},c:"
+	SHORT_OPTS="${SHORT_OPTS},f:"
+	SHORT_OPTS="${SHORT_OPTS},s:"
+	SHORT_OPTS="${SHORT_OPTS},t:"
+
+	# Short option flags
+	LONG_OPTS=""
+	LONG_OPTS="${LONG_OPTS},help"
+	LONG_OPTS="${LONG_OPTS},count:"
+	LONG_OPTS="${LONG_OPTS},fs-type:"
+	LONG_OPTS="${LONG_OPTS},scratch-dev:"
+	LONG_OPTS="${LONG_OPTS},test-dev:"
+
+	TEMP=$(getopt --name "${PROGNAME}" \
+		--options "${SHORT_OPTS}" \
+		--longoptions "${LONG_OPTS}" \
+		-- "$@")
+	eval set -- "$TEMP"
+
+	while [ "$1" != "--" ]; do
+		case "$1" in
+			-h|--help)
+				usage
+				;;
+			-c|--count)
+				count_valid "$2" ||
+					usage "invalid count '$2'"
+				COUNT="$2"
+				shift
+				;;
+			-f|--fs-type)
+				fs_type_valid "$2" ||
+					usage "invalid fs_type '$2'"
+				FS_TYPE="$2"
+				shift
+				;;
+			-s|--scratch-dev)
+				device_valid "$2" ||
+					usage "invalid scratch-dev '$2'"
+				SCRATCH_DEV="$2"
+				shift
+				;;
+			-t|--test-dev)
+				device_valid "$2" ||
+					usage "invalid test-dev '$2'"
+				TEST_DEV="$2"
+				shift
+				;;
+			*)
+				exit 100	# Internal error
+				;;
+		esac
+		shift
+	done
+	shift
+
+	[ -n "${TEST_DEV}" ] || usage "test-dev must be supplied"
+	[ -n "${SCRATCH_DEV}" ] || usage "scratch-dev must be supplied"
+
+	[ $# -eq 0 ] || TESTS="$@"
+}
+
+################################################################
+
+[ -z "$TESTDIR" ] && export TESTDIR="/tmp/cephtest"
+
+# Set up some environment for normal teuthology test setup.
+# This really should not be necessary but I found it was.
+export CEPH_ARGS="--conf ${TESTDIR}/ceph.conf"
+export CEPH_ARGS="${CEPH_ARGS} --keyring ${TESTDIR}/data/client.0.keyring"
+export CEPH_ARGS="${CEPH_ARGS} --name client.0"
+
+export LD_LIBRARY_PATH="${TESTDIR}/binary/usr/local/lib:${LD_LIBRARY_PATH}"
+export PATH="${TESTDIR}/binary/usr/local/bin:${PATH}"
+export PATH="${TESTDIR}/binary/usr/local/sbin:${PATH}"
+
+################################################################
+
+# Filesystem-specific mkfs options--set if not supplied
+export XFS_MKFS_OPTIONS="${XFS_MKFS_OPTIONS:--f -l su=65536}"
+export EXT4_MKFS_OPTIONS="${EXT4_MKFS_OPTIONS:--F}"
+export BTRFS_MKFS_OPTION	# No defaults
+
+XFSTESTS_DIR="/var/lib/xfstests"	# Where the tests live
+
+# download, build, and install xfstests
+function install_xfstests() {
+	arg_count 0 $#
+
+	local multiple=""
+	local ncpu
+
+	pushd "${TESTDIR}"
+
+	git clone "${XFSTESTS_REPO}"
+
+	cd xfstests-dev
+
+	# FIXME: use an older version before the tests were rearranged!
+	git reset --hard e5f1a13792f20cfac097fef98007610b422f2cac
+
+	ncpu=$(getconf _NPROCESSORS_ONLN 2>&1)
+	[ -n "${ncpu}" -a "${ncpu}" -gt 1 ] && multiple="-j ${ncpu}"
+
+	make realclean
+	make ${multiple}
+	make -k install
+
+	popd
+}
+
+# remove previously-installed xfstests files
+function remove_xfstests() {
+	arg_count 0 $#
+
+	rm -rf "${TESTDIR}/xfstests-dev"
+	rm -rf "${XFSTESTS_DIR}"
+}
+
+# create a host options file that uses the specified devices
+function setup_host_options() {
+	arg_count 0 $#
+
+	# Create mount points for the test and scratch filesystems
+	local test_dir="$(mktemp -d ${TESTDIR}/test_dir.XXXXXXXXXX)"
+	local scratch_dir="$(mktemp -d ${TESTDIR}/scratch_mnt.XXXXXXXXXX)"
+
+	# Write a host options file that uses these devices.
+	# xfstests uses the file defined by HOST_OPTIONS as the
+	# place to get configuration variables for its run, and
+	# all (or most) of the variables set here are required.
+	export HOST_OPTIONS="$(mktemp ${TESTDIR}/host_options.XXXXXXXXXX)"
+	cat > "${HOST_OPTIONS}" <<-!
+		# Created by ${PROGNAME} on $(date)
+		# HOST_OPTIONS="${HOST_OPTIONS}"
+		TEST_DEV="${TEST_DEV}"
+		SCRATCH_DEV="${SCRATCH_DEV}"
+		TEST_DIR="${test_dir}"
+		SCRATCH_MNT="${scratch_dir}"
+		FSTYP="${FS_TYPE}"
+		export TEST_DEV SCRATCH_DEV TEST_DIR SCRATCH_MNT FSTYP
+		#
+		export XFS_MKFS_OPTIONS="${XFS_MKFS_OPTIONS}"
+	!
+
+	# Now ensure we are using the same values
+	. "${HOST_OPTIONS}"
+}
+
+# remove the host options file, plus the directories it refers to
+function cleanup_host_options() {
+	arg_count 0 $#
+
+	rm -rf "${TEST_DIR}" "${SCRATCH_MNT}"
+	rm -f "${HOST_OPTIONS}"
+}
+
+# run mkfs on the given device using the specified filesystem type
+function do_mkfs() {
+	arg_count 1 $#
+
+	local dev="${1}"
+	local options
+
+	case "${FSTYP}" in
+		xfs)	options="${XFS_MKFS_OPTIONS}" ;;
+		ext4)	options="${EXT4_MKFS_OPTIONS}" ;;
+		btrfs)	options="${BTRFS_MKFS_OPTIONS}" ;;
+	esac
+
+	"mkfs.${FSTYP}" ${options} "${dev}" ||
+		err "unable to make ${FSTYP} file system on device \"${dev}\""
+}
+
+# mount the given device on the given mount point
+function do_mount() {
+	arg_count 2 $#
+
+	local dev="${1}"
+	local dir="${2}"
+
+	mount "${dev}" "${dir}" ||
+		err "unable to mount file system \"${dev}\" on \"${dir}\""
+}
+
+# unmount a previously-mounted device
+function do_umount() {
+	arg_count 1 $#
+
+	local dev="${1}"
+
+	if mount | grep "${dev}" > /dev/null; then
+		if ! umount "${dev}"; then
+			err "unable to unmount device \"${dev}\""
+		fi
+	else
+		# Report it but don't error out
+		echo "device \"${dev}\" was not mounted" >&2
+	fi
+}
+
+# do basic xfstests setup--make and mount the test and scratch filesystems
+function setup_xfstests() {
+	arg_count 0 $#
+
+	# TEST_DEV can persist across test runs, but for now we
+	# don't bother.   I believe xfstests prefers its devices to
+	# have been already been formatted for the desired
+	# filesystem type--it uses blkid to identify things or
+	# something.  So we mkfs both here for a fresh start.
+	do_mkfs "${TEST_DEV}"
+	do_mkfs "${SCRATCH_DEV}"
+
+	# I believe the test device is expected to be mounted; the
+	# scratch doesn't need to be (but it doesn't hurt).
+	do_mount "${TEST_DEV}" "${TEST_DIR}"
+	do_mount "${SCRATCH_DEV}" "${SCRATCH_MNT}"
+}
+
+# clean up changes made by setup_xfstests
+function cleanup_xfstests() {
+	arg_count 0 $#
+
+	# Unmount these in case a test left them mounted (plus
+	# the corresponding setup function mounted them...)
+	do_umount "${TEST_DEV}"
+	do_umount "${SCRATCH_DEV}"
+}
+
+# top-level setup routine
+function setup() {
+	arg_count 0 $#
+
+	setup_host_options
+	install_xfstests
+	setup_xfstests
+}
+
+# top-level (final) cleanup routine
+function cleanup() {
+	arg_count 0 $#
+
+	cd /
+	cleanup_xfstests
+	remove_xfstests
+	cleanup_host_options
+}
+trap cleanup EXIT ERR HUP INT QUIT
+
+# ################################################################
+
+start_date="$(date)"
+
+parseargs "$@"
+
+setup
+
+pushd "${XFSTESTS_DIR}"
+for (( i = 1 ; i <= "${COUNT}" ; i++ )); do
+	[ "${COUNT}" -gt 1 ] && echo "=== Iteration "$i" starting at:  $(date)"
+
+	./check ${TESTS}	# Here we actually run the tests
+	status=$?
+
+	[ "${COUNT}" -gt 1 ] && echo "=== Iteration "$i" complete at:  $(date)"
+done
+popd
+
+# cleanup is called via the trap call, above
+
+echo "This xfstests run started at:  ${start_date}"
+echo "xfstests run completed at:     $(date)"
+[ "${COUNT}" -gt 1 ] && echo "xfstests run consisted of ${COUNT} iterations"
+
+exit "${status}"
diff --git a/qa/run_xfstests.sh b/qa/run_xfstests.sh
new file mode 100755
index 000000000..70e494331
--- /dev/null
+++ b/qa/run_xfstests.sh
@@ -0,0 +1,323 @@
+#!/usr/bin/env bash
+
+# Copyright (C) 2012 Dreamhost, LLC
+#
+# This is free software; see the source for copying conditions.
+# There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE.
+#
+# This is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as
+# published by the Free Software Foundation version 2.
+
+# Usage:
+# run_xfstests -t /dev/<testdev> -s /dev/<scratchdev> [-f <fstype>] -- <tests>
+#   - test device and scratch device will both get trashed
+#   - fstypes can be xfs, ext4, or btrfs (xfs default)
+#   - tests can be listed individually: generic/001 xfs/008 xfs/009
+#     tests can also be specified by group: -g quick
+#
+# Exit status:
+#     0:  success
+#     1:  usage error
+#     2:  other runtime error
+#    99:  argument count error (programming error)
+#   100:  getopt error (internal error)
+
+# Alex Elder <elder@dreamhost.com>
+# April 13, 2012
+
+set -e
+
+PROGNAME=$(basename $0)
+
+# Default command line option values
+COUNT="1"
+EXPUNGE_FILE=""
+DO_RANDOMIZE=""	# false
+FSTYP="xfs"
+SCRATCH_DEV=""	# MUST BE SPECIFIED
+TEST_DEV=""	# MUST BE SPECIFIED
+TESTS="-g auto"	# The "auto" group is supposed to be "known good"
+
+# print an error message and quit with non-zero status
+function err() {
+	if [ $# -gt 0 ]; then
+		echo "" >&2
+		echo "${PROGNAME}: ${FUNCNAME[1]}: $@" >&2
+	fi
+	exit 2
+}
+
+# routine used to validate argument counts to all shell functions
+function arg_count() {
+	local func
+	local want
+	local got
+
+	if [ $# -eq 2 ]; then
+		func="${FUNCNAME[1]}"	# calling function
+		want=$1
+		got=$2
+	else
+		func="${FUNCNAME[0]}"	# i.e., arg_count
+		want=2
+		got=$#
+	fi
+	[ "${want}" -eq "${got}" ] && return 0
+	echo "${PROGNAME}: ${func}: arg count bad (want ${want} got ${got})" >&2
+	exit 99
+}
+
+# validation function for repeat count argument
+function count_valid() {
+	arg_count 1 $#
+
+	test "$1" -gt 0	# 0 is pointless; negative is wrong
+}
+
+# validation function for filesystem type argument
+function fs_type_valid() {
+	arg_count 1 $#
+
+	case "$1" in
+		xfs|ext4|btrfs)	return 0 ;;
+		*)		return 1 ;;
+	esac
+}
+
+# validation function for device arguments
+function device_valid() {
+	arg_count 1 $#
+
+	# Very simple testing--really should try to be more careful...
+	test -b "$1"
+}
+
+# validation function for expunge file argument
+function expunge_file_valid() {
+	arg_count 1 $#
+
+	test -s "$1"
+}
+
+# print a usage message and quit
+#
+# if a message is supplied, print that first, and then exit
+# with non-zero status
+function usage() {
+	if [ $# -gt 0 ]; then
+		echo "" >&2
+		echo "$@" >&2
+	fi
+
+	echo "" >&2
+	echo "Usage: ${PROGNAME} <options> -- <tests>" >&2
+	echo "" >&2
+	echo "    options:" >&2
+	echo "        -h or --help" >&2
+	echo "            show this message" >&2
+	echo "        -c or --count" >&2
+	echo "            iteration count (1 or more)" >&2
+	echo "        -f or --fs-type" >&2
+	echo "            one of: xfs, ext4, btrfs" >&2
+	echo "            (default fs-type: xfs)" >&2
+	echo "        -r or --randomize" >&2
+	echo "            randomize test order" >&2
+	echo "        -s or --scratch-dev     (REQUIRED)" >&2
+	echo "            name of device used for scratch filesystem" >&2
+	echo "        -t or --test-dev        (REQUIRED)" >&2
+	echo "            name of device used for test filesystem" >&2
+	echo "        -x or --expunge-file" >&2
+	echo "            name of file with list of tests to skip" >&2
+	echo "    tests:" >&2
+	echo "        list of test numbers, e.g.:" >&2
+	echo "            generic/001 xfs/008 shared/032 btrfs/009" >&2
+	echo "        or possibly an xfstests test group, e.g.:" >&2
+	echo "            -g quick" >&2
+	echo "        (default tests: -g auto)" >&2
+	echo "" >&2
+
+	[ $# -gt 0 ] && exit 1
+
+	exit 0		# This is used for a --help
+}
+
+# parse command line arguments
+function parseargs() {
+	# Short option flags
+	SHORT_OPTS=""
+	SHORT_OPTS="${SHORT_OPTS},h"
+	SHORT_OPTS="${SHORT_OPTS},c:"
+	SHORT_OPTS="${SHORT_OPTS},f:"
+	SHORT_OPTS="${SHORT_OPTS},r"
+	SHORT_OPTS="${SHORT_OPTS},s:"
+	SHORT_OPTS="${SHORT_OPTS},t:"
+	SHORT_OPTS="${SHORT_OPTS},x:"
+
+	# Long option flags
+	LONG_OPTS=""
+	LONG_OPTS="${LONG_OPTS},help"
+	LONG_OPTS="${LONG_OPTS},count:"
+	LONG_OPTS="${LONG_OPTS},fs-type:"
+	LONG_OPTS="${LONG_OPTS},randomize"
+	LONG_OPTS="${LONG_OPTS},scratch-dev:"
+	LONG_OPTS="${LONG_OPTS},test-dev:"
+	LONG_OPTS="${LONG_OPTS},expunge-file:"
+
+	TEMP=$(getopt --name "${PROGNAME}" \
+		--options "${SHORT_OPTS}" \
+		--longoptions "${LONG_OPTS}" \
+		-- "$@")
+	eval set -- "$TEMP"
+
+	while [ "$1" != "--" ]; do
+		case "$1" in
+			-h|--help)
+				usage
+				;;
+			-c|--count)
+				count_valid "$2" ||
+					usage "invalid count '$2'"
+				COUNT="$2"
+				shift
+				;;
+			-f|--fs-type)
+				fs_type_valid "$2" ||
+					usage "invalid fs_type '$2'"
+				FSTYP="$2"
+				shift
+				;;
+			-r|--randomize)
+				DO_RANDOMIZE="t"
+				;;
+			-s|--scratch-dev)
+				device_valid "$2" ||
+					usage "invalid scratch-dev '$2'"
+				SCRATCH_DEV="$2"
+				shift
+				;;
+			-t|--test-dev)
+				device_valid "$2" ||
+					usage "invalid test-dev '$2'"
+				TEST_DEV="$2"
+				shift
+				;;
+			-x|--expunge-file)
+				expunge_file_valid "$2" ||
+					usage "invalid expunge-file '$2'"
+				EXPUNGE_FILE="$2"
+				shift
+				;;
+			*)
+				exit 100	# Internal error
+				;;
+		esac
+		shift
+	done
+	shift
+
+	[ -n "${TEST_DEV}" ] || usage "test-dev must be supplied"
+	[ -n "${SCRATCH_DEV}" ] || usage "scratch-dev must be supplied"
+
+	[ $# -eq 0 ] || TESTS="$@"
+}
+
+################################################################
+
+# run mkfs on the given device using the specified filesystem type
+function do_mkfs() {
+	arg_count 1 $#
+
+	local dev="${1}"
+	local options
+
+	case "${FSTYP}" in
+		xfs)	options="-f" ;;
+		ext4)	options="-F" ;;
+		btrfs)	options="-f" ;;
+	esac
+
+	"mkfs.${FSTYP}" ${options} "${dev}" ||
+		err "unable to make ${FSTYP} file system on device \"${dev}\""
+}
+
+# top-level setup routine
+function setup() {
+	arg_count 0 $#
+
+	wget -P "${TESTDIR}" http://download.ceph.com/qa/xfstests.tar.gz
+	tar zxf "${TESTDIR}/xfstests.tar.gz" -C "$(dirname "${XFSTESTS_DIR}")"
+	mkdir "${TEST_DIR}"
+	mkdir "${SCRATCH_MNT}"
+	do_mkfs "${TEST_DEV}"
+}
+
+# top-level (final) cleanup routine
+function cleanup() {
+	arg_count 0 $#
+
+	# ensure teuthology can clean up the logs
+	chmod -R a+rw "${TESTDIR}/archive"
+
+	findmnt "${TEST_DEV}" && umount "${TEST_DEV}"
+	[ -d "${SCRATCH_MNT}" ] && rmdir "${SCRATCH_MNT}"
+	[ -d "${TEST_DIR}" ] && rmdir "${TEST_DIR}"
+	rm -rf "${XFSTESTS_DIR}"
+	rm -f "${TESTDIR}/xfstests.tar.gz"
+}
+
+# ################################################################
+
+start_date="$(date)"
+parseargs "$@"
+[ -n "${TESTDIR}" ] || usage "TESTDIR env variable must be set"
+[ -d "${TESTDIR}/archive" ] || usage "\$TESTDIR/archive directory must exist"
+TESTDIR="$(readlink -e "${TESTDIR}")"
+[ -n "${EXPUNGE_FILE}" ] && EXPUNGE_FILE="$(readlink -e "${EXPUNGE_FILE}")"
+
+XFSTESTS_DIR="/var/lib/xfstests"  # hardcoded into dbench binary
+TEST_DIR="/mnt/test_dir"
+SCRATCH_MNT="/mnt/scratch_mnt"
+MKFS_OPTIONS=""
+EXT_MOUNT_OPTIONS="-o block_validity,dioread_nolock"
+
+trap cleanup EXIT ERR HUP INT QUIT
+setup
+
+export TEST_DEV
+export TEST_DIR
+export SCRATCH_DEV
+export SCRATCH_MNT
+export FSTYP
+export MKFS_OPTIONS
+export EXT_MOUNT_OPTIONS
+
+pushd "${XFSTESTS_DIR}"
+for (( i = 1 ; i <= "${COUNT}" ; i++ )); do
+	[ "${COUNT}" -gt 1 ] && echo "=== Iteration "$i" starting at:  $(date)"
+
+	RESULT_BASE="${TESTDIR}/archive/results-${i}"
+	mkdir "${RESULT_BASE}"
+	export RESULT_BASE
+
+	EXPUNGE=""
+	[ -n "${EXPUNGE_FILE}" ] && EXPUNGE="-E ${EXPUNGE_FILE}"
+
+	RANDOMIZE=""
+	[ -n "${DO_RANDOMIZE}" ] && RANDOMIZE="-r"
+
+	# -T output timestamps
+	PATH="${PWD}/bin:${PATH}" ./check -T ${RANDOMIZE} ${EXPUNGE} ${TESTS}
+	findmnt "${TEST_DEV}" && umount "${TEST_DEV}"
+
+	[ "${COUNT}" -gt 1 ] && echo "=== Iteration "$i" complete at:  $(date)"
+done
+popd
+
+# cleanup is called via the trap call, above
+
+echo "This xfstests run started at:  ${start_date}"
+echo "xfstests run completed at:     $(date)"
+[ "${COUNT}" -gt 1 ] && echo "xfstests run consisted of ${COUNT} iterations"
+echo OK
diff --git a/qa/run_xfstests_qemu.sh b/qa/run_xfstests_qemu.sh
new file mode 100644
index 000000000..651c75401
--- /dev/null
+++ b/qa/run_xfstests_qemu.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+#
+# TODO switch to run_xfstests.sh (see run_xfstests_krbd.sh)
+
+set -x
+
+[ -n "${TESTDIR}" ] || export TESTDIR="/tmp/cephtest"
+[ -d "${TESTDIR}" ] || mkdir "${TESTDIR}"
+
+URL_BASE="https://git.ceph.com/?p=ceph.git;a=blob_plain;f=qa"
+SCRIPT="run_xfstests-obsolete.sh"
+
+cd "${TESTDIR}"
+
+curl -O "${URL_BASE}/${SCRIPT}"
+# mark executable only if the file isn't empty since ./"${SCRIPT}"
+# on an empty file would succeed
+if [[ -s "${SCRIPT}" ]]; then
+    chmod +x "${SCRIPT}"
+fi
+
+TEST_DEV="/dev/vdb"
+if [[ ! -b "${TEST_DEV}" ]]; then
+    TEST_DEV="/dev/sdb"
+fi
+SCRATCH_DEV="/dev/vdc"
+if [[ ! -b "${SCRATCH_DEV}" ]]; then
+    SCRATCH_DEV="/dev/sdc"
+fi
+
+# tests excluded fail in the current testing vm regardless of whether
+# rbd is used
+
+./"${SCRIPT}" -c 1 -f xfs -t "${TEST_DEV}" -s "${SCRATCH_DEV}" \
+    1-7 9-17 19-26 28-49 51-61 63 66-67 69-79 83 85-105 108-110 112-135 \
+    137-170 174-191 193-204 206-217 220-227 230-231 233 235-241 243-249 \
+    252-259 261-262 264-278 281-286 289
+STATUS=$?
+
+rm -f "${SCRIPT}"
+
+exit "${STATUS}"
diff --git a/qa/runallonce.sh b/qa/runallonce.sh
new file mode 100755
index 000000000..bd809fef7
--- /dev/null
+++ b/qa/runallonce.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+
+set -ex
+
+basedir=`echo $0 | sed 's/[^/]*$//g'`.
+testdir="$1"
+[ -n "$2" ] && logdir=$2 || logdir=$1
+
+[ ${basedir:0:1} == "." ] && basedir=`pwd`/${basedir:1}
+
+PATH="$basedir/src:$PATH"
+
+[ -z "$testdir" ] || [ ! -d "$testdir" ] && echo "specify test dir" && exit 1
+cd $testdir
+
+for test in `cd $basedir/workunits && find . -executable -type f | $basedir/../src/script/permute`
+do
+  echo "------ running test $test ------"
+  pwd
+  [ -d $test ] && rm -r $test
+  mkdir -p $test
+  mkdir -p `dirname $logdir/$test.log`
+  test -e $logdir/$test.log && rm $logdir/$test.log
+  sh -c "cd $test && $basedir/workunits/$test" 2>&1 | tee $logdir/$test.log
+done
diff --git a/qa/runoncfuse.sh b/qa/runoncfuse.sh
new file mode 100755
index 000000000..7be545356
--- /dev/null
+++ b/qa/runoncfuse.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+set -x
+
+mkdir -p testspace
+ceph-fuse testspace -m $1
+
+./runallonce.sh testspace
+killall ceph-fuse
diff --git a/qa/runonkclient.sh b/qa/runonkclient.sh
new file mode 100755
index 000000000..f7e8605fa
--- /dev/null
+++ b/qa/runonkclient.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+set -x
+
+mkdir -p testspace
+/bin/mount -t ceph $1 testspace
+
+./runallonce.sh testspace
+
+/bin/umount testspace
diff --git a/qa/setup-chroot.sh b/qa/setup-chroot.sh
new file mode 100755
index 000000000..a6e12f356
--- /dev/null
+++ b/qa/setup-chroot.sh
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+
+die() {
+        echo ${@}
+        exit 1
+}
+
+usage()
+{
+        cat << EOF
+$0: sets up a chroot environment for building the ceph server
+usage:
+-h                      Show this message
+
+-r [install_dir]        location of the root filesystem to install to
+                        example: -r /images/sepia/
+
+-s [src_dir]            location of the directory with the source code
+                        example: -s ./src/ceph
+EOF
+}
+
+cleanup() {
+        umount -l "${INSTALL_DIR}/mnt/tmp"
+        umount -l "${INSTALL_DIR}/proc"
+        umount -l "${INSTALL_DIR}/sys"
+}
+
+INSTALL_DIR=
+SRC_DIR=
+while getopts “hr:s:” OPTION; do
+        case $OPTION in
+        h) usage; exit 1 ;;
+        r) INSTALL_DIR=$OPTARG ;;
+        s) SRC_DIR=$OPTARG ;;
+        ?) usage; exit
+        ;;
+        esac
+done
+
+[ $EUID -eq 0 ] || die "This script uses chroot, which requires root permissions."
+
+[ -d "${INSTALL_DIR}" ] || die "No such directory as '${INSTALL_DIR}'. \
+You must specify an install directory with -r"
+
+[ -d "${SRC_DIR}" ] || die "no such directory as '${SRC_DIR}'. \
+You must specify a source directory with -s"
+
+readlink -f ${SRC_DIR} || die "readlink failed on ${SRC_DIR}"
+ABS_SRC_DIR=`readlink -f ${SRC_DIR}`
+
+trap cleanup INT TERM EXIT
+
+mount --bind "${ABS_SRC_DIR}" "${INSTALL_DIR}/mnt/tmp" || die "bind mount failed"
+mount -t proc none "${INSTALL_DIR}/proc" || die "mounting proc failed"
+mount -t sysfs none "${INSTALL_DIR}/sys" || die "mounting sys failed"
+
+echo "$0: starting chroot."
+echo "cd /mnt/tmp before building"
+echo
+chroot ${INSTALL_DIR} env HOME=/mnt/tmp /bin/bash
+
+echo "$0: exiting chroot."
+
+exit 0
diff --git a/qa/standalone/README b/qa/standalone/README
new file mode 100644
index 000000000..3082442cb
--- /dev/null
+++ b/qa/standalone/README
@@ -0,0 +1,23 @@
+qa/standalone
+=============
+
+These scripts run standalone clusters, but not in a normal way.  They make
+use of functions ceph-helpers.sh to quickly start/stop daemons against
+toy clusters in a single directory.
+
+They are normally run via teuthology based on qa/suites/rados/standalone/*.yaml.
+
+You can run them in a git checkout + build directory as well:
+
+  * The qa/run-standalone.sh will run all of them in sequence.  This is slow
+     since there is no parallelism.
+
+  * You can run individual script(s) by specifying the basename or path below
+    qa/standalone as arguments to qa/run-standalone.sh.
+
+../qa/run-standalone.sh misc.sh osd/osd-dup.sh
+
+  * Add support for specifying arguments to selected tests by simply adding
+    list of tests to each argument.
+
+../qa/run-standalone.sh "test-ceph-helpers.sh test_get_last_scrub_stamp"
diff --git a/qa/standalone/c2c/c2c.sh b/qa/standalone/c2c/c2c.sh
new file mode 100755
index 000000000..a6969d555
--- /dev/null
+++ b/qa/standalone/c2c/c2c.sh
@@ -0,0 +1,84 @@
+#!/usr/bin/env bash
+
+set -ex
+
+function run_perf_c2c() {
+    # First get some background system info
+    uname -a > uname.out
+    lscpu > lscpu.out
+    cat /proc/cmdline > cmdline.out
+    timeout -s INT 10 vmstat -w 1 > vmstat.out || true
+    sudo dmesg >& dmesg.out
+    cat /proc/cpuinfo > cpuinfo.out
+    ps axo psr,time,stat,ppid,pid,pcpu,comm > ps.1.out
+    ps -eafT > ps.2.out
+    sudo sysctl -a > sysctl.out
+
+    nodecnt=`lscpu|grep "NUMA node(" |awk '{print $3}'`
+    for ((i=0; i<$nodecnt; i++))
+    do
+       sudo cat /sys/devices/system/node/node${i}/meminfo > meminfo.$i.out
+    done
+    sudo more `sudo find /proc -name status` > proc_parent_child_status.out
+    sudo more /proc/*/numa_maps > numa_maps.out
+
+    #
+    # Get separate kernel and user perf-c2c stats
+    #
+    sudo perf c2c record -a --ldlat=70 --all-user -o perf_c2c_a_all_user.data sleep 5
+    sudo perf c2c report --stdio -i perf_c2c_a_all_user.data > perf_c2c_a_all_user.out 2>&1
+    sudo perf c2c report --full-symbols --stdio -i perf_c2c_a_all_user.data > perf_c2c_full-sym_a_all_user.out 2>&1
+
+    sudo perf c2c record --call-graph dwarf -a --ldlat=70 --all-user -o perf_c2c_g_a_all_user.data sleep 5
+    sudo perf c2c report -g --stdio -i perf_c2c_g_a_all_user.data > perf_c2c_g_a_all_user.out 2>&1
+
+    sudo perf c2c record -a --ldlat=70 --all-kernel -o perf_c2c_a_all_kernel.data sleep 4
+    sudo perf c2c report --stdio -i perf_c2c_a_all_kernel.data > perf_c2c_a_all_kernel.out 2>&1
+
+    sudo perf c2c record --call-graph dwarf --ldlat=70 -a --all-kernel -o perf_c2c_g_a_all_kernel.data sleep 4
+
+    sudo perf c2c report -g --stdio -i perf_c2c_g_a_all_kernel.data > perf_c2c_g_a_all_kernel.out 2>&1
+
+    #
+    # Get combined kernel and user perf-c2c stats
+    #
+    sudo perf c2c record -a --ldlat=70 -o perf_c2c_a_both.data sleep 4
+    sudo perf c2c report --stdio -i perf_c2c_a_both.data > perf_c2c_a_both.out 2>&1
+
+    sudo perf c2c record --call-graph dwarf --ldlat=70 -a --all-kernel -o perf_c2c_g_a_both.data sleep 4
+    sudo perf c2c report -g --stdio -i perf_c2c_g_a_both.data > perf_c2c_g_a_both.out 2>&1
+
+    #
+    # Get all-user physical addr stats, in case multiple threads or processes are
+    # accessing shared memory with different vaddrs.
+    #
+    sudo perf c2c record --phys-data -a --ldlat=70 --all-user -o perf_c2c_a_all_user_phys_data.data sleep 5
+    sudo perf c2c report --stdio -i perf_c2c_a_all_user_phys_data.data > perf_c2c_a_all_user_phys_data.out 2>&1
+}
+
+function run() {
+    local dir=$1
+    shift
+    (
+	rm -fr $dir
+	mkdir $dir
+	cd $dir
+	ceph_test_c2c --threads $(($(nproc) * 2)) "$@" &
+	sleep 30 # let it warm up
+	run_perf_c2c
+	kill $! || { echo "ceph_test_c2c WAS NOT RUNNING" ; exit 1 ; }
+    ) || exit 1
+}
+
+function bench() {
+    optimized=$(timeout 30 ceph_test_c2c --threads $(($(nproc) * 2)) --sharding 2> /dev/null || true)
+    not_optimized=$(timeout 30 ceph_test_c2c --threads $(($(nproc) * 2)) 2> /dev/null || true)
+    if ! (( $optimized > ( $not_optimized * 2 ) )) ; then
+	echo "the optimization is expected to be at least x2 faster"
+	exit 1
+    fi
+}
+
+run with-sharding --sharding
+run without-sharding
+bench
diff --git a/qa/standalone/ceph-helpers.sh b/qa/standalone/ceph-helpers.sh
new file mode 100755
index 000000000..bf2c91bc0
--- /dev/null
+++ b/qa/standalone/ceph-helpers.sh
@@ -0,0 +1,2409 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014,2015 Red Hat <contact@redhat.com>
+# Copyright (C) 2014 Federico Gimenez <fgimenez@coit.es>
+#
+# Author: Loic Dachary <loic@dachary.org>
+# Author: Federico Gimenez <fgimenez@coit.es>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+TIMEOUT=300
+WAIT_FOR_CLEAN_TIMEOUT=90
+MAX_TIMEOUT=15
+PG_NUM=4
+TMPDIR=${TMPDIR:-/tmp}
+CEPH_BUILD_VIRTUALENV=${TMPDIR}
+TESTDIR=${TESTDIR:-${TMPDIR}}
+
+if type xmlstarlet > /dev/null 2>&1; then
+    XMLSTARLET=xmlstarlet
+elif type xml > /dev/null 2>&1; then
+    XMLSTARLET=xml
+else
+	echo "Missing xmlstarlet binary!"
+	exit 1
+fi
+
+if [ `uname` = FreeBSD ]; then
+    SED=gsed
+    AWK=gawk
+    DIFFCOLOPTS=""
+    KERNCORE="kern.corefile"
+else
+    SED=sed
+    AWK=awk
+    termwidth=$(stty -a | head -1 | sed -e 's/.*columns \([0-9]*\).*/\1/')
+    if [ -n "$termwidth" -a "$termwidth" != "0" ]; then
+        termwidth="-W ${termwidth}"
+    fi
+    DIFFCOLOPTS="-y $termwidth"
+    KERNCORE="kernel.core_pattern"
+fi
+
+EXTRA_OPTS=""
+
+#! @file ceph-helpers.sh
+#  @brief Toolbox to manage Ceph cluster dedicated to testing
+#
+#  Example use case:
+#
+#  ~~~~~~~~~~~~~~~~{.sh}
+#  source ceph-helpers.sh
+#
+#  function mytest() {
+#    # cleanup leftovers and reset mydir
+#    setup mydir
+#    # create a cluster with one monitor and three osds
+#    run_mon mydir a
+#    run_osd mydir 0
+#    run_osd mydir 2
+#    run_osd mydir 3
+#    # put and get an object
+#    rados --pool rbd put GROUP /etc/group
+#    rados --pool rbd get GROUP /tmp/GROUP
+#    # stop the cluster and cleanup the directory
+#    teardown mydir
+#  }
+#  ~~~~~~~~~~~~~~~~
+#
+#  The focus is on simplicity and efficiency, in the context of
+#  functional tests. The output is intentionally very verbose
+#  and functions return as soon as an error is found. The caller
+#  is also expected to abort on the first error so that debugging
+#  can be done by looking at the end of the output.
+#
+#  Each function is documented, implemented and tested independently.
+#  When modifying a helper, the test and the documentation are
+#  expected to be updated and it is easier of they are collocated. A
+#  test for a given function can be run with
+#
+#  ~~~~~~~~~~~~~~~~{.sh}
+#    ceph-helpers.sh TESTS test_get_osds
+#  ~~~~~~~~~~~~~~~~
+#
+#  and all the tests (i.e. all functions matching test_*) are run
+#  with:
+#
+#  ~~~~~~~~~~~~~~~~{.sh}
+#    ceph-helpers.sh TESTS
+#  ~~~~~~~~~~~~~~~~
+#
+#  A test function takes a single argument : the directory dedicated
+#  to the tests. It is expected to not create any file outside of this
+#  directory and remove it entirely when it completes successfully.
+#
+
+
+function get_asok_dir() {
+    if [ -n "$CEPH_ASOK_DIR" ]; then
+        echo "$CEPH_ASOK_DIR"
+    else
+        echo ${TMPDIR:-/tmp}/ceph-asok.$$
+    fi
+}
+
+function get_asok_path() {
+    local name=$1
+    if [ -n "$name" ]; then
+        echo $(get_asok_dir)/ceph-$name.asok
+    else
+        echo $(get_asok_dir)/\$cluster-\$name.asok
+    fi
+}
+##
+# Cleanup any leftovers found in **dir** via **teardown**
+# and reset **dir** as an empty environment.
+#
+# @param dir path name of the environment
+# @return 0 on success, 1 on error
+#
+function setup() {
+    local dir=$1
+    teardown $dir || return 1
+    mkdir -p $dir
+    mkdir -p $(get_asok_dir)
+    if [ $(ulimit -n) -le 1024 ]; then
+        ulimit -n 4096 || return 1
+    fi
+    if [ -z "$LOCALRUN" ]; then
+        trap "teardown $dir 1" TERM HUP INT
+    fi
+}
+
+function test_setup() {
+    local dir=$dir
+    setup $dir || return 1
+    test -d $dir || return 1
+    setup $dir || return 1
+    test -d $dir || return 1
+    teardown $dir
+}
+
+#######################################################################
+
+##
+# Kill all daemons for which a .pid file exists in **dir** and remove
+# **dir**. If the file system in which **dir** is btrfs, delete all
+# subvolumes that relate to it.
+#
+# @param dir path name of the environment
+# @param dumplogs pass "1" to dump logs otherwise it will only if cores found
+# @return 0 on success, 1 on error
+#
+function teardown() {
+    local dir=$1
+    local dumplogs=$2
+    kill_daemons $dir KILL
+    if [ `uname` != FreeBSD ] \
+        && [ $(stat -f -c '%T' .) == "btrfs" ]; then
+        __teardown_btrfs $dir
+    fi
+    local cores="no"
+    local pattern="$(sysctl -n $KERNCORE)"
+    # See if we have apport core handling
+    if [ "${pattern:0:1}" = "|" ]; then
+      # TODO: Where can we get the dumps?
+      # Not sure where the dumps really are so this will look in the CWD
+      pattern=""
+    fi
+    # Local we start with core and teuthology ends with core
+    if ls $(dirname "$pattern") | grep -q '^core\|core$' ; then
+        cores="yes"
+        if [ -n "$LOCALRUN" ]; then
+	    mkdir /tmp/cores.$$ 2> /dev/null || true
+	    for i in $(ls $(dirname $(sysctl -n $KERNCORE)) | grep '^core\|core$'); do
+		mv $i /tmp/cores.$$
+	    done
+        fi
+    fi
+    if [ "$cores" = "yes" -o "$dumplogs" = "1" ]; then
+	if [ -n "$LOCALRUN" ]; then
+	    display_logs $dir
+        else
+	    # Move logs to where Teuthology will archive it
+	    mkdir -p $TESTDIR/archive/log
+	    mv $dir/*.log $TESTDIR/archive/log
+	fi
+    fi
+    rm -fr $dir
+    rm -rf $(get_asok_dir)
+    if [ "$cores" = "yes" ]; then
+        echo "ERROR: Failure due to cores found"
+        if [ -n "$LOCALRUN" ]; then
+	    echo "Find saved core files in /tmp/cores.$$"
+        fi
+        return 1
+    fi
+    return 0
+}
+
+function __teardown_btrfs() {
+    local btrfs_base_dir=$1
+    local btrfs_root=$(df -P . | tail -1 | $AWK '{print $NF}')
+    local btrfs_dirs=$(cd $btrfs_base_dir; sudo btrfs subvolume list -t . | $AWK '/^[0-9]/ {print $4}' | grep "$btrfs_base_dir/$btrfs_dir")
+    for subvolume in $btrfs_dirs; do
+       sudo btrfs subvolume delete $btrfs_root/$subvolume
+    done
+}
+
+function test_teardown() {
+    local dir=$dir
+    setup $dir || return 1
+    teardown $dir || return 1
+    ! test -d $dir || return 1
+}
+
+#######################################################################
+
+##
+# Sends a signal to a single daemon.
+# This is a helper function for kill_daemons
+#
+# After the daemon is sent **signal**, its actual termination
+# will be verified by sending it signal 0. If the daemon is
+# still alive, kill_daemon will pause for a few seconds and
+# try again. This will repeat for a fixed number of times
+# before kill_daemon returns on failure. The list of
+# sleep intervals can be specified as **delays** and defaults
+# to:
+#
+#  0.1 0.2 1 1 1 2 3 5 5 5 10 10 20 60 60 60 120
+#
+# This sequence is designed to run first a very short sleep time (0.1)
+# if the machine is fast enough and the daemon terminates in a fraction of a
+# second. The increasing sleep numbers should give plenty of time for
+# the daemon to die even on the slowest running machine. If a daemon
+# takes more than a few minutes to stop (the sum of all sleep times),
+# there probably is no point in waiting more and a number of things
+# are likely to go wrong anyway: better give up and return on error.
+#
+# @param pid the process id to send a signal
+# @param send_signal the signal to send
+# @param delays sequence of sleep times before failure
+#
+function kill_daemon() {
+    local pid=$(cat $1)
+    local send_signal=$2
+    local delays=${3:-0.1 0.2 1 1 1 2 3 5 5 5 10 10 20 60 60 60 120}
+    local exit_code=1
+    # In order to try after the last large sleep add 0 at the end so we check
+    # one last time before dropping out of the loop
+    for try in $delays 0 ; do
+         if kill -$send_signal $pid 2> /dev/null ; then
+            exit_code=1
+         else
+            exit_code=0
+            break
+         fi
+         send_signal=0
+         sleep $try
+    done;
+    return $exit_code
+}
+
+function test_kill_daemon() {
+    local dir=$1
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+
+    name_prefix=osd
+    for pidfile in $(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid') ; do
+        #
+        # sending signal 0 won't kill the daemon
+        # waiting just for one second instead of the default schedule
+        # allows us to quickly verify what happens when kill fails
+        # to stop the daemon (i.e. it must return false)
+        #
+        ! kill_daemon $pidfile 0 1 || return 1
+        #
+        # killing just the osd and verify the mon still is responsive
+        #
+        kill_daemon $pidfile TERM || return 1
+    done
+
+    name_prefix=mgr
+    for pidfile in $(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid') ; do
+        #
+        # kill the mgr
+        #
+        kill_daemon $pidfile TERM || return 1
+    done
+
+    name_prefix=mon
+    for pidfile in $(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid') ; do
+        #
+        # kill the mon and verify it cannot be reached
+        #
+        kill_daemon $pidfile TERM || return 1
+        ! timeout 5 ceph status || return 1
+    done
+
+    teardown $dir || return 1
+}
+
+##
+# Kill all daemons for which a .pid file exists in **dir**.  Each
+# daemon is sent a **signal** and kill_daemons waits for it to exit
+# during a few minutes. By default all daemons are killed. If a
+# **name_prefix** is provided, only the daemons for which a pid
+# file is found matching the prefix are killed. See run_osd and
+# run_mon for more information about the name conventions for
+# the pid files.
+#
+# Send TERM to all daemons : kill_daemons $dir
+# Send KILL to all daemons : kill_daemons $dir KILL
+# Send KILL to all osds : kill_daemons $dir KILL osd
+# Send KILL to osd 1 : kill_daemons $dir KILL osd.1
+#
+# If a daemon is sent the TERM signal and does not terminate
+# within a few minutes, it will still be running even after
+# kill_daemons returns.
+#
+# If all daemons are kill successfully the function returns 0
+# if at least one daemon remains, this is treated as an
+# error and the function return 1.
+#
+# @param dir path name of the environment
+# @param signal name of the first signal (defaults to TERM)
+# @param name_prefix only kill match daemons (defaults to all)
+# @param delays sequence of sleep times before failure
+# @return 0 on success, 1 on error
+#
+function kill_daemons() {
+    local trace=$(shopt -q -o xtrace && echo true || echo false)
+    $trace && shopt -u -o xtrace
+    local dir=$1
+    local signal=${2:-TERM}
+    local name_prefix=$3 # optional, osd, mon, osd.1
+    local delays=$4 #optional timing
+    local status=0
+    local pids=""
+
+    for pidfile in $(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid') ; do
+	run_in_background pids kill_daemon $pidfile $signal $delays
+    done
+
+    wait_background pids
+    status=$?
+
+    $trace && shopt -s -o xtrace
+    return $status
+}
+
+function test_kill_daemons() {
+    local dir=$1
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    #
+    # sending signal 0 won't kill the daemon
+    # waiting just for one second instead of the default schedule
+    # allows us to quickly verify what happens when kill fails
+    # to stop the daemon (i.e. it must return false)
+    #
+    ! kill_daemons $dir 0 osd 1 || return 1
+    #
+    # killing just the osd and verify the mon still is responsive
+    #
+    kill_daemons $dir TERM osd || return 1
+    #
+    # kill the mgr
+    #
+    kill_daemons $dir TERM mgr || return 1
+    #
+    # kill the mon and verify it cannot be reached
+    #
+    kill_daemons $dir TERM || return 1
+    ! timeout 5 ceph status || return 1
+    teardown $dir || return 1
+}
+
+#
+# return a random TCP port which is not used yet
+#
+# please note, there could be racing if we use this function for
+# a free port, and then try to bind on this port.
+#
+function get_unused_port() {
+    local ip=127.0.0.1
+    python3 -c "import socket; s=socket.socket(); s.bind(('$ip', 0)); print(s.getsockname()[1]); s.close()"
+}
+
+#######################################################################
+
+##
+# Run a monitor by the name mon.**id** with data in **dir**/**id**.
+# The logs can be found in **dir**/mon.**id**.log and the pid file
+# is **dir**/mon.**id**.pid and the admin socket is
+# **dir**/**id**/ceph-mon.**id**.asok.
+#
+# The remaining arguments are passed verbatim to ceph-mon --mkfs
+# and the ceph-mon daemon.
+#
+# Two mandatory arguments must be provided: --fsid and --mon-host
+# Instead of adding them to every call to run_mon, they can be
+# set in the CEPH_ARGS environment variable to be read implicitly
+# by every ceph command.
+#
+# The CEPH_CONF variable is expected to be set to /dev/null to
+# only rely on arguments for configuration.
+#
+# Examples:
+#
+# CEPH_ARGS="--fsid=$(uuidgen) "
+# CEPH_ARGS+="--mon-host=127.0.0.1:7018 "
+# run_mon $dir a # spawn a mon and bind port 7018
+# run_mon $dir a --debug-filestore=20 # spawn with filestore debugging
+#
+# If mon_initial_members is not set, the default rbd pool is deleted
+# and replaced with a replicated pool with less placement groups to
+# speed up initialization. If mon_initial_members is set, no attempt
+# is made to recreate the rbd pool because it would hang forever,
+# waiting for other mons to join.
+#
+# A **dir**/ceph.conf file is created but not meant to be used by any
+# function.  It is convenient for debugging a failure with:
+#
+#     ceph --conf **dir**/ceph.conf -s
+#
+# @param dir path name of the environment
+# @param id mon identifier
+# @param ... can be any option valid for ceph-mon
+# @return 0 on success, 1 on error
+#
+function run_mon() {
+    local dir=$1
+    shift
+    local id=$1
+    shift
+    local data=$dir/$id
+
+    ceph-mon \
+        --id $id \
+        --mkfs \
+        --mon-data=$data \
+        --run-dir=$dir \
+        "$@" || return 1
+
+    ceph-mon \
+        --id $id \
+	--osd-failsafe-full-ratio=.99 \
+        --mon-osd-full-ratio=.99 \
+        --mon-data-avail-crit=1 \
+        --mon-data-avail-warn=5 \
+        --paxos-propose-interval=0.1 \
+        --osd-crush-chooseleaf-type=0 \
+        $EXTRA_OPTS \
+        --debug-mon 20 \
+        --debug-ms 20 \
+        --debug-paxos 20 \
+        --chdir= \
+        --mon-data=$data \
+        --log-file=$dir/\$name.log \
+        --admin-socket=$(get_asok_path) \
+        --mon-cluster-log-file=$dir/log \
+        --run-dir=$dir \
+        --pid-file=$dir/\$name.pid \
+	--mon-allow-pool-delete \
+	--mon-allow-pool-size-one \
+	--osd-pool-default-pg-autoscale-mode off \
+	--mon-osd-backfillfull-ratio .99 \
+	--mon-warn-on-insecure-global-id-reclaim-allowed=false \
+        "$@" || return 1
+
+    cat > $dir/ceph.conf <<EOF
+[global]
+fsid = $(get_config mon $id fsid)
+mon host = $(get_config mon $id mon_host)
+EOF
+}
+
+function test_run_mon() {
+    local dir=$1
+
+    setup $dir || return 1
+
+    run_mon $dir a || return 1
+    ceph mon dump | grep "mon.a" || return 1
+    kill_daemons $dir || return 1
+
+    run_mon $dir a --osd_pool_default_size=3 || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+    create_rbd_pool || return 1
+    ceph osd dump | grep "pool 1 'rbd'" || return 1
+    local size=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path mon.a) \
+        config get osd_pool_default_size)
+    test "$size" = '{"osd_pool_default_size":"3"}' || return 1
+
+    ! CEPH_ARGS='' ceph status || return 1
+    CEPH_ARGS='' ceph --conf $dir/ceph.conf status || return 1
+
+    kill_daemons $dir || return 1
+
+    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+    local size=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path mon.a) \
+        config get osd_pool_default_size)
+    test "$size" = '{"osd_pool_default_size":"1"}' || return 1
+    kill_daemons $dir || return 1
+
+    CEPH_ARGS="$CEPH_ARGS --osd_pool_default_size=2" \
+        run_mon $dir a || return 1
+    local size=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path mon.a) \
+        config get osd_pool_default_size)
+    test "$size" = '{"osd_pool_default_size":"2"}' || return 1
+    kill_daemons $dir || return 1
+
+    teardown $dir || return 1
+}
+
+function create_rbd_pool() {
+    ceph osd pool delete rbd rbd --yes-i-really-really-mean-it || return 1
+    create_pool rbd $PG_NUM || return 1
+    rbd pool init rbd
+}
+
+function create_pool() {
+    ceph osd pool create "$@"
+    sleep 1
+}
+
+function delete_pool() {
+    local poolname=$1
+    ceph osd pool delete $poolname $poolname --yes-i-really-really-mean-it
+}
+
+#######################################################################
+
+function run_mgr() {
+    local dir=$1
+    shift
+    local id=$1
+    shift
+    local data=$dir/$id
+
+    ceph config set mgr mgr_pool false --force
+    ceph-mgr \
+        --id $id \
+        $EXTRA_OPTS \
+	--osd-failsafe-full-ratio=.99 \
+        --debug-mgr 20 \
+	--debug-objecter 20 \
+        --debug-ms 20 \
+        --debug-paxos 20 \
+        --chdir= \
+        --mgr-data=$data \
+        --log-file=$dir/\$name.log \
+        --admin-socket=$(get_asok_path) \
+        --run-dir=$dir \
+        --pid-file=$dir/\$name.pid \
+        --mgr-module-path=$(realpath ${CEPH_ROOT}/src/pybind/mgr) \
+        "$@" || return 1
+}
+
+function run_mds() {
+    local dir=$1
+    shift
+    local id=$1
+    shift
+    local data=$dir/$id
+
+    ceph-mds \
+        --id $id \
+        $EXTRA_OPTS \
+	--debug-mds 20 \
+	--debug-objecter 20 \
+        --debug-ms 20 \
+        --chdir= \
+        --mds-data=$data \
+        --log-file=$dir/\$name.log \
+        --admin-socket=$(get_asok_path) \
+        --run-dir=$dir \
+        --pid-file=$dir/\$name.pid \
+        "$@" || return 1
+}
+
+#######################################################################
+
+##
+# Create (prepare) and run (activate) an osd by the name osd.**id**
+# with data in **dir**/**id**.  The logs can be found in
+# **dir**/osd.**id**.log, the pid file is **dir**/osd.**id**.pid and
+# the admin socket is **dir**/**id**/ceph-osd.**id**.asok.
+#
+# The remaining arguments are passed verbatim to ceph-osd.
+#
+# Two mandatory arguments must be provided: --fsid and --mon-host
+# Instead of adding them to every call to run_osd, they can be
+# set in the CEPH_ARGS environment variable to be read implicitly
+# by every ceph command.
+#
+# The CEPH_CONF variable is expected to be set to /dev/null to
+# only rely on arguments for configuration.
+#
+# The run_osd function creates the OSD data directory on the **dir**/**id**
+# directory and relies on the activate_osd function to run the daemon.
+#
+# Examples:
+#
+# CEPH_ARGS="--fsid=$(uuidgen) "
+# CEPH_ARGS+="--mon-host=127.0.0.1:7018 "
+# run_osd $dir 0 # prepare and activate an osd using the monitor listening on 7018
+#
+# @param dir path name of the environment
+# @param id osd identifier
+# @param ... can be any option valid for ceph-osd
+# @return 0 on success, 1 on error
+#
+function run_osd() {
+    local dir=$1
+    shift
+    local id=$1
+    shift
+    local osd_data=$dir/$id
+
+    local ceph_args="$CEPH_ARGS"
+    ceph_args+=" --osd-failsafe-full-ratio=.99"
+    ceph_args+=" --osd-journal-size=100"
+    ceph_args+=" --osd-scrub-load-threshold=2000"
+    ceph_args+=" --osd-data=$osd_data"
+    ceph_args+=" --osd-journal=${osd_data}/journal"
+    ceph_args+=" --chdir="
+    ceph_args+=$EXTRA_OPTS
+    ceph_args+=" --run-dir=$dir"
+    ceph_args+=" --admin-socket=$(get_asok_path)"
+    ceph_args+=" --debug-osd=20"
+    ceph_args+=" --debug-ms=1"
+    ceph_args+=" --debug-monc=20"
+    ceph_args+=" --log-file=$dir/\$name.log"
+    ceph_args+=" --pid-file=$dir/\$name.pid"
+    ceph_args+=" --osd-max-object-name-len=460"
+    ceph_args+=" --osd-max-object-namespace-len=64"
+    ceph_args+=" --enable-experimental-unrecoverable-data-corrupting-features=*"
+    ceph_args+=" --osd-mclock-profile=high_recovery_ops"
+    ceph_args+=" "
+    ceph_args+="$@"
+    mkdir -p $osd_data
+
+    local uuid=`uuidgen`
+    echo "add osd$id $uuid"
+    OSD_SECRET=$(ceph-authtool --gen-print-key)
+    echo "{\"cephx_secret\": \"$OSD_SECRET\"}" > $osd_data/new.json
+    ceph osd new $uuid -i $osd_data/new.json
+    rm $osd_data/new.json
+    ceph-osd -i $id $ceph_args --mkfs --key $OSD_SECRET --osd-uuid $uuid
+
+    local key_fn=$osd_data/keyring
+    cat > $key_fn<<EOF
+[osd.$id]
+key = $OSD_SECRET
+EOF
+    echo adding osd$id key to auth repository
+    ceph -i "$key_fn" auth add osd.$id osd "allow *" mon "allow profile osd" mgr "allow profile osd"
+    echo start osd.$id
+    ceph-osd -i $id $ceph_args &
+
+    # If noup is set, then can't wait for this osd
+    if ceph osd dump --format=json | jq '.flags_set[]' | grep -q '"noup"' ; then
+      return 0
+    fi
+    wait_for_osd up $id || return 1
+
+}
+
+function run_osd_filestore() {
+    local dir=$1
+    shift
+    local id=$1
+    shift
+    local osd_data=$dir/$id
+
+    local ceph_args="$CEPH_ARGS"
+    ceph_args+=" --osd-failsafe-full-ratio=.99"
+    ceph_args+=" --osd-journal-size=100"
+    ceph_args+=" --osd-scrub-load-threshold=2000"
+    ceph_args+=" --osd-data=$osd_data"
+    ceph_args+=" --osd-journal=${osd_data}/journal"
+    ceph_args+=" --chdir="
+    ceph_args+=$EXTRA_OPTS
+    ceph_args+=" --run-dir=$dir"
+    ceph_args+=" --admin-socket=$(get_asok_path)"
+    ceph_args+=" --debug-osd=20"
+    ceph_args+=" --debug-ms=1"
+    ceph_args+=" --debug-monc=20"
+    ceph_args+=" --log-file=$dir/\$name.log"
+    ceph_args+=" --pid-file=$dir/\$name.pid"
+    ceph_args+=" --osd-max-object-name-len=460"
+    ceph_args+=" --osd-max-object-namespace-len=64"
+    ceph_args+=" --enable-experimental-unrecoverable-data-corrupting-features=*"
+    ceph_args+=" "
+    ceph_args+="$@"
+    mkdir -p $osd_data
+
+    local uuid=`uuidgen`
+    echo "add osd$osd $uuid"
+    OSD_SECRET=$(ceph-authtool --gen-print-key)
+    echo "{\"cephx_secret\": \"$OSD_SECRET\"}" > $osd_data/new.json
+    ceph osd new $uuid -i $osd_data/new.json
+    rm $osd_data/new.json
+    ceph-osd -i $id $ceph_args --mkfs --key $OSD_SECRET --osd-uuid $uuid --osd-objectstore=filestore
+
+    local key_fn=$osd_data/keyring
+    cat > $key_fn<<EOF
+[osd.$osd]
+key = $OSD_SECRET
+EOF
+    echo adding osd$id key to auth repository
+    ceph -i "$key_fn" auth add osd.$id osd "allow *" mon "allow profile osd" mgr "allow profile osd"
+    echo start osd.$id
+    ceph-osd -i $id $ceph_args &
+
+    # If noup is set, then can't wait for this osd
+    if ceph osd dump --format=json | jq '.flags_set[]' | grep -q '"noup"' ; then
+      return 0
+    fi
+    wait_for_osd up $id || return 1
+
+
+}
+
+function test_run_osd() {
+    local dir=$1
+
+    setup $dir || return 1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+
+    run_osd $dir 0 || return 1
+    local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+        config get osd_max_backfills)
+    echo "$backfills" | grep --quiet 'osd_max_backfills' || return 1
+
+    run_osd $dir 1 --osd-max-backfills 20 || return 1
+    local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.1) \
+        config get osd_max_backfills)
+    test "$backfills" = '{"osd_max_backfills":"20"}' || return 1
+
+    CEPH_ARGS="$CEPH_ARGS --osd-max-backfills 30" run_osd $dir 2 || return 1
+    local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.2) \
+        config get osd_max_backfills)
+    test "$backfills" = '{"osd_max_backfills":"30"}' || return 1
+
+    teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Shutdown and remove all traces of the osd by the name osd.**id**.
+#
+# The OSD is shutdown with the TERM signal. It is then removed from
+# the auth list, crush map, osd map etc and the files associated with
+# it are also removed.
+#
+# @param dir path name of the environment
+# @param id osd identifier
+# @return 0 on success, 1 on error
+#
+function destroy_osd() {
+    local dir=$1
+    local id=$2
+
+    ceph osd out osd.$id || return 1
+    kill_daemons $dir TERM osd.$id || return 1
+    ceph osd down osd.$id || return 1
+    ceph osd purge osd.$id --yes-i-really-mean-it || return 1
+    teardown $dir/$id || return 1
+    rm -fr $dir/$id
+}
+
+function test_destroy_osd() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    destroy_osd $dir 0 || return 1
+    ! ceph osd dump | grep "osd.$id " || return 1
+    teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Run (activate) an osd by the name osd.**id** with data in
+# **dir**/**id**.  The logs can be found in **dir**/osd.**id**.log,
+# the pid file is **dir**/osd.**id**.pid and the admin socket is
+# **dir**/**id**/ceph-osd.**id**.asok.
+#
+# The remaining arguments are passed verbatim to ceph-osd.
+#
+# Two mandatory arguments must be provided: --fsid and --mon-host
+# Instead of adding them to every call to activate_osd, they can be
+# set in the CEPH_ARGS environment variable to be read implicitly
+# by every ceph command.
+#
+# The CEPH_CONF variable is expected to be set to /dev/null to
+# only rely on arguments for configuration.
+#
+# The activate_osd function expects a valid OSD data directory
+# in **dir**/**id**, either just created via run_osd or re-using
+# one left by a previous run of ceph-osd. The ceph-osd daemon is
+# run directly on the foreground
+#
+# The activate_osd function blocks until the monitor reports the osd
+# up. If it fails to do so within $TIMEOUT seconds, activate_osd
+# fails.
+#
+# Examples:
+#
+# CEPH_ARGS="--fsid=$(uuidgen) "
+# CEPH_ARGS+="--mon-host=127.0.0.1:7018 "
+# activate_osd $dir 0 # activate an osd using the monitor listening on 7018
+#
+# @param dir path name of the environment
+# @param id osd identifier
+# @param ... can be any option valid for ceph-osd
+# @return 0 on success, 1 on error
+#
+function activate_osd() {
+    local dir=$1
+    shift
+    local id=$1
+    shift
+    local osd_data=$dir/$id
+
+    local ceph_args="$CEPH_ARGS"
+    ceph_args+=" --osd-failsafe-full-ratio=.99"
+    ceph_args+=" --osd-journal-size=100"
+    ceph_args+=" --osd-scrub-load-threshold=2000"
+    ceph_args+=" --osd-data=$osd_data"
+    ceph_args+=" --osd-journal=${osd_data}/journal"
+    ceph_args+=" --chdir="
+    ceph_args+=$EXTRA_OPTS
+    ceph_args+=" --run-dir=$dir"
+    ceph_args+=" --admin-socket=$(get_asok_path)"
+    ceph_args+=" --debug-osd=20"
+    ceph_args+=" --log-file=$dir/\$name.log"
+    ceph_args+=" --pid-file=$dir/\$name.pid"
+    ceph_args+=" --osd-max-object-name-len=460"
+    ceph_args+=" --osd-max-object-namespace-len=64"
+    ceph_args+=" --enable-experimental-unrecoverable-data-corrupting-features=*"
+    ceph_args+=" --osd-mclock-profile=high_recovery_ops"
+    ceph_args+=" "
+    ceph_args+="$@"
+    mkdir -p $osd_data
+
+    echo start osd.$id
+    ceph-osd -i $id $ceph_args &
+
+    [ "$id" = "$(cat $osd_data/whoami)" ] || return 1
+
+    # If noup is set, then can't wait for this osd
+    if ceph osd dump --format=json | jq '.flags_set[]' | grep -q '"noup"' ; then
+      return 0
+    fi
+    wait_for_osd up $id || return 1
+}
+
+function test_activate_osd() {
+    local dir=$1
+
+    setup $dir || return 1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+
+    run_osd $dir 0 || return 1
+    local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+        config get osd_max_backfills)
+    echo "$backfills" | grep --quiet 'osd_max_backfills' || return 1
+
+    kill_daemons $dir TERM osd || return 1
+
+    activate_osd $dir 0 --osd-max-backfills 20 || return 1
+    local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+        config get osd_max_backfills)
+    test "$backfills" = '{"osd_max_backfills":"20"}' || return 1
+
+    teardown $dir || return 1
+}
+
+function test_activate_osd_after_mark_down() {
+    local dir=$1
+
+    setup $dir || return 1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+
+    run_osd $dir 0 || return 1
+    local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+        config get osd_max_backfills)
+    echo "$backfills" | grep --quiet 'osd_max_backfills' || return 1
+
+    kill_daemons $dir TERM osd || return 1
+    ceph osd down 0 || return 1
+    wait_for_osd down 0 || return 1
+
+    activate_osd $dir 0 --osd-max-backfills 20 || return 1
+    local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+        config get osd_max_backfills)
+    test "$backfills" = '{"osd_max_backfills":"20"}' || return 1
+
+    teardown $dir || return 1
+}
+
+function test_activate_osd_skip_benchmark() {
+    local dir=$1
+
+    setup $dir || return 1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+
+    # Skip the osd benchmark during first osd bring-up.
+    run_osd $dir 0 --osd-op-queue=mclock_scheduler \
+        --osd-mclock-skip-benchmark=true || return 1
+    local max_iops_hdd_def=$(CEPH_ARGS='' ceph --format=json daemon \
+        $(get_asok_path osd.0) config get osd_mclock_max_capacity_iops_hdd)
+    local max_iops_ssd_def=$(CEPH_ARGS='' ceph --format=json daemon \
+        $(get_asok_path osd.0) config get osd_mclock_max_capacity_iops_ssd)
+
+    kill_daemons $dir TERM osd || return 1
+    ceph osd down 0 || return 1
+    wait_for_osd down 0 || return 1
+
+    # Skip the osd benchmark during activation as well. Validate that
+    # the max osd capacities are left unchanged.
+    activate_osd $dir 0 --osd-op-queue=mclock_scheduler \
+        --osd-mclock-skip-benchmark=true || return 1
+    local max_iops_hdd_after_boot=$(CEPH_ARGS='' ceph --format=json daemon \
+        $(get_asok_path osd.0) config get osd_mclock_max_capacity_iops_hdd)
+    local max_iops_ssd_after_boot=$(CEPH_ARGS='' ceph --format=json daemon \
+        $(get_asok_path osd.0) config get osd_mclock_max_capacity_iops_ssd)
+
+    test "$max_iops_hdd_def" = "$max_iops_hdd_after_boot" || return 1
+    test "$max_iops_ssd_def" = "$max_iops_ssd_after_boot" || return 1
+
+    teardown $dir || return 1
+}
+#######################################################################
+
+##
+# Wait until the OSD **id** is either up or down, as specified by
+# **state**. It fails after $TIMEOUT seconds.
+#
+# @param state either up or down
+# @param id osd identifier
+# @return 0 on success, 1 on error
+#
+function wait_for_osd() {
+    local state=$1
+    local id=$2
+
+    status=1
+    for ((i=0; i < $TIMEOUT; i++)); do
+        echo $i
+        if ! ceph osd dump | grep "osd.$id $state"; then
+            sleep 1
+        else
+            status=0
+            break
+        fi
+    done
+    return $status
+}
+
+function test_wait_for_osd() {
+    local dir=$1
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=1  --mon_allow_pool_size_one=true || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    wait_for_osd up 0 || return 1
+    wait_for_osd up 1 || return 1
+    kill_daemons $dir TERM osd.0 || return 1
+    wait_for_osd down 0 || return 1
+    ( TIMEOUT=1 ; ! wait_for_osd up 0 ) || return 1
+    teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Display the list of OSD ids supporting the **objectname** stored in
+# **poolname**, as reported by ceph osd map.
+#
+# @param poolname an existing pool
+# @param objectname an objectname (may or may not exist)
+# @param STDOUT white space separated list of OSD ids
+# @return 0 on success, 1 on error
+#
+function get_osds() {
+    local poolname=$1
+    local objectname=$2
+
+    local osds=$(ceph --format json osd map $poolname $objectname 2>/dev/null | \
+        jq '.acting | .[]')
+    # get rid of the trailing space
+    echo $osds
+}
+
+function test_get_osds() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=2 || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+    create_rbd_pool || return 1
+    get_osds rbd GROUP | grep --quiet '^[0-1] [0-1]$' || return 1
+    teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Wait for the monitor to form quorum (optionally, of size N)
+#
+# @param timeout duration (lower-bound) to wait for quorum to be formed
+# @param quorumsize size of quorum to wait for
+# @return 0 on success, 1 on error
+#
+function wait_for_quorum() {
+    local timeout=$1
+    local quorumsize=$2
+
+    if [[ -z "$timeout" ]]; then
+      timeout=300
+    fi
+
+    if [[ -z "$quorumsize" ]]; then
+      timeout $timeout ceph quorum_status --format=json >&/dev/null || return 1
+      return 0
+    fi
+
+    no_quorum=1
+    wait_until=$((`date +%s` + $timeout))
+    while [[ $(date +%s) -lt $wait_until ]]; do
+        jqfilter='.quorum | length == '$quorumsize
+        jqinput="$(timeout $timeout ceph quorum_status --format=json 2>/dev/null)"
+        res=$(echo $jqinput | jq "$jqfilter")
+        if [[ "$res" == "true" ]]; then
+          no_quorum=0
+          break
+        fi
+    done
+    return $no_quorum
+}
+
+#######################################################################
+
+##
+# Return the PG of supporting the **objectname** stored in
+# **poolname**, as reported by ceph osd map.
+#
+# @param poolname an existing pool
+# @param objectname an objectname (may or may not exist)
+# @param STDOUT a PG
+# @return 0 on success, 1 on error
+#
+function get_pg() {
+    local poolname=$1
+    local objectname=$2
+
+    ceph --format json osd map $poolname $objectname 2>/dev/null | jq -r '.pgid'
+}
+
+function test_get_pg() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=1  --mon_allow_pool_size_one=true || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+    get_pg rbd GROUP | grep --quiet '^[0-9]\.[0-9a-f][0-9a-f]*$' || return 1
+    teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Return the value of the **config**, obtained via the config get command
+# of the admin socket of **daemon**.**id**.
+#
+# @param daemon mon or osd
+# @param id mon or osd ID
+# @param config the configuration variable name as found in config_opts.h
+# @param STDOUT the config value
+# @return 0 on success, 1 on error
+#
+function get_config() {
+    local daemon=$1
+    local id=$2
+    local config=$3
+
+    CEPH_ARGS='' \
+        ceph --format json daemon $(get_asok_path $daemon.$id) \
+        config get $config 2> /dev/null | \
+        jq -r ".$config"
+}
+
+function test_get_config() {
+    local dir=$1
+
+    # override the default config using command line arg and check it
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+    test $(get_config mon a osd_pool_default_size) = 1 || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 --osd_max_scrubs=3 || return 1
+    test $(get_config osd 0 osd_max_scrubs) = 3 || return 1
+    teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Set the **config** to specified **value**, via the config set command
+# of the admin socket of **daemon**.**id**
+#
+# @param daemon mon or osd
+# @param id mon or osd ID
+# @param config the configuration variable name as found in config_opts.h
+# @param value the config value
+# @return 0 on success, 1 on error
+#
+function set_config() {
+    local daemon=$1
+    local id=$2
+    local config=$3
+    local value=$4
+
+    test $(env CEPH_ARGS='' ceph --format json daemon $(get_asok_path $daemon.$id) \
+               config set $config $value 2> /dev/null | \
+           jq 'has("success")') == true
+}
+
+function test_set_config() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+    test $(get_config mon a ms_crc_header) = true || return 1
+    set_config mon a ms_crc_header false || return 1
+    test $(get_config mon a ms_crc_header) = false || return 1
+    set_config mon a ms_crc_header true || return 1
+    test $(get_config mon a ms_crc_header) = true || return 1
+    teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Return the OSD id of the primary OSD supporting the **objectname**
+# stored in **poolname**, as reported by ceph osd map.
+#
+# @param poolname an existing pool
+# @param objectname an objectname (may or may not exist)
+# @param STDOUT the primary OSD id
+# @return 0 on success, 1 on error
+#
+function get_primary() {
+    local poolname=$1
+    local objectname=$2
+
+    ceph --format json osd map $poolname $objectname 2>/dev/null | \
+        jq '.acting_primary'
+}
+
+function test_get_primary() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+    local osd=0
+    run_mgr $dir x || return 1
+    run_osd $dir $osd || return 1
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+    test $(get_primary rbd GROUP) = $osd || return 1
+    teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Return the id of any OSD supporting the **objectname** stored in
+# **poolname**, as reported by ceph osd map, except the primary.
+#
+# @param poolname an existing pool
+# @param objectname an objectname (may or may not exist)
+# @param STDOUT the OSD id
+# @return 0 on success, 1 on error
+#
+function get_not_primary() {
+    local poolname=$1
+    local objectname=$2
+
+    local primary=$(get_primary $poolname $objectname)
+    ceph --format json osd map $poolname $objectname 2>/dev/null | \
+        jq ".acting | map(select (. != $primary)) | .[0]"
+}
+
+function test_get_not_primary() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=2 || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+    local primary=$(get_primary rbd GROUP)
+    local not_primary=$(get_not_primary rbd GROUP)
+    test $not_primary != $primary || return 1
+    test $not_primary = 0 -o $not_primary = 1 || return 1
+    teardown $dir || return 1
+}
+
+#######################################################################
+
+function _objectstore_tool_nodown() {
+    local dir=$1
+    shift
+    local id=$1
+    shift
+    local osd_data=$dir/$id
+
+    ceph-objectstore-tool \
+        --data-path $osd_data \
+        "$@" || return 1
+}
+
+function _objectstore_tool_nowait() {
+    local dir=$1
+    shift
+    local id=$1
+    shift
+
+    kill_daemons $dir TERM osd.$id >&2 < /dev/null || return 1
+
+    _objectstore_tool_nodown $dir $id "$@" || return 1
+    activate_osd $dir $id $ceph_osd_args >&2 || return 1
+}
+
+##
+# Run ceph-objectstore-tool against the OSD **id** using the data path
+# **dir**. The OSD is killed with TERM prior to running
+# ceph-objectstore-tool because access to the data path is
+# exclusive. The OSD is restarted after the command completes. The
+# objectstore_tool returns after all PG are active+clean again.
+#
+# @param dir the data path of the OSD
+# @param id the OSD id
+# @param ... arguments to ceph-objectstore-tool
+# @param STDIN the input of ceph-objectstore-tool
+# @param STDOUT the output of ceph-objectstore-tool
+# @return 0 on success, 1 on error
+#
+# The value of $ceph_osd_args will be passed to restarted osds
+#
+function objectstore_tool() {
+    local dir=$1
+    shift
+    local id=$1
+    shift
+
+    _objectstore_tool_nowait $dir $id "$@" || return 1
+    wait_for_clean >&2
+}
+
+function test_objectstore_tool() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+    local osd=0
+    run_mgr $dir x || return 1
+    run_osd $dir $osd || return 1
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+    rados --pool rbd put GROUP /etc/group || return 1
+    objectstore_tool $dir $osd GROUP get-bytes | \
+        diff - /etc/group
+    ! objectstore_tool $dir $osd NOTEXISTS get-bytes || return 1
+    teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Predicate checking if there is an ongoing recovery in the
+# cluster. If any of the recovering_{keys,bytes,objects}_per_sec
+# counters are reported by ceph status, it means recovery is in
+# progress.
+#
+# @return 0 if recovery in progress, 1 otherwise
+#
+function get_is_making_recovery_progress() {
+    local recovery_progress
+    recovery_progress+=".recovering_keys_per_sec + "
+    recovery_progress+=".recovering_bytes_per_sec + "
+    recovery_progress+=".recovering_objects_per_sec"
+    local progress=$(ceph --format json status 2>/dev/null | \
+                     jq -r ".pgmap | $recovery_progress")
+    test "$progress" != null
+}
+
+function test_get_is_making_recovery_progress() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    ! get_is_making_recovery_progress || return 1
+    teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Return the number of active PGs in the cluster. A PG is active if
+# ceph pg dump pgs reports it both **active** and **clean** and that
+# not **stale**.
+#
+# @param STDOUT the number of active PGs
+# @return 0 on success, 1 on error
+#
+function get_num_active_clean() {
+    local expression
+    expression+="select(contains(\"active\") and contains(\"clean\")) | "
+    expression+="select(contains(\"stale\") | not)"
+    ceph --format json pg dump pgs 2>/dev/null | \
+        jq ".pg_stats | [.[] | .state | $expression] | length"
+}
+
+function test_get_num_active_clean() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+    local num_active_clean=$(get_num_active_clean)
+    test "$num_active_clean" = $PG_NUM || return 1
+    teardown $dir || return 1
+}
+
+##
+# Return the number of active or peered PGs in the cluster. A PG matches if
+# ceph pg dump pgs reports it is either **active** or **peered** and that
+# not **stale**.
+#
+# @param STDOUT the number of active PGs
+# @return 0 on success, 1 on error
+#
+function get_num_active_or_peered() {
+    local expression
+    expression+="select(contains(\"active\") or contains(\"peered\")) | "
+    expression+="select(contains(\"stale\") | not)"
+    ceph --format json pg dump pgs 2>/dev/null | \
+        jq ".pg_stats | [.[] | .state | $expression] | length"
+}
+
+function test_get_num_active_or_peered() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+    local num_peered=$(get_num_active_or_peered)
+    test "$num_peered" = $PG_NUM || return 1
+    teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Return the number of PGs in the cluster, according to
+# ceph pg dump pgs.
+#
+# @param STDOUT the number of PGs
+# @return 0 on success, 1 on error
+#
+function get_num_pgs() {
+    ceph --format json status 2>/dev/null | jq '.pgmap.num_pgs'
+}
+
+function test_get_num_pgs() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+    local num_pgs=$(get_num_pgs)
+    test "$num_pgs" -gt 0 || return 1
+    teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Return the OSD ids in use by at least one PG in the cluster (either
+# in the up or the acting set), according to ceph pg dump pgs. Every
+# OSD id shows as many times as they are used in up and acting sets.
+# If an OSD id is in both the up and acting set of a given PG, it will
+# show twice.
+#
+# @param STDOUT a sorted list of OSD ids
+# @return 0 on success, 1 on error
+#
+function get_osd_id_used_by_pgs() {
+    ceph --format json pg dump pgs 2>/dev/null | jq '.pg_stats | .[] | .up[], .acting[]' | sort
+}
+
+function test_get_osd_id_used_by_pgs() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+    local osd_ids=$(get_osd_id_used_by_pgs | uniq)
+    test "$osd_ids" = "0" || return 1
+    teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Wait until the OSD **id** shows **count** times in the
+# PGs (see get_osd_id_used_by_pgs for more information about
+# how OSD ids are counted).
+#
+# @param id the OSD id
+# @param count the number of time it must show in the PGs
+# @return 0 on success, 1 on error
+#
+function wait_osd_id_used_by_pgs() {
+    local id=$1
+    local count=$2
+
+    status=1
+    for ((i=0; i < $TIMEOUT / 5; i++)); do
+        echo $i
+        if ! test $(get_osd_id_used_by_pgs | grep -c $id) = $count ; then
+            sleep 5
+        else
+            status=0
+            break
+        fi
+    done
+    return $status
+}
+
+function test_wait_osd_id_used_by_pgs() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+    wait_osd_id_used_by_pgs 0 8 || return 1
+    ! TIMEOUT=1 wait_osd_id_used_by_pgs 123 5 || return 1
+    teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Return the date and time of the last completed scrub for **pgid**,
+# as reported by ceph pg dump pgs. Note that a repair also sets this
+# date.
+#
+# @param pgid the id of the PG
+# @param STDOUT the date and time of the last scrub
+# @return 0 on success, 1 on error
+#
+function get_last_scrub_stamp() {
+    local pgid=$1
+    local sname=${2:-last_scrub_stamp}
+    ceph --format json pg dump pgs 2>/dev/null | \
+        jq -r ".pg_stats | .[] | select(.pgid==\"$pgid\") | .$sname"
+}
+
+function test_get_last_scrub_stamp() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+    stamp=$(get_last_scrub_stamp 1.0)
+    test -n "$stamp" || return 1
+    teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Predicate checking if the cluster is clean, i.e. all of its PGs are
+# in a clean state (see get_num_active_clean for a definition).
+#
+# @return 0 if the cluster is clean, 1 otherwise
+#
+function is_clean() {
+    num_pgs=$(get_num_pgs)
+    test $num_pgs != 0 || return 1
+    test $(get_num_active_clean) = $num_pgs || return 1
+}
+
+function test_is_clean() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+    is_clean || return 1
+    teardown $dir || return 1
+}
+
+#######################################################################
+
+calc() { $AWK "BEGIN{print $*}"; }
+
+##
+# Return a list of numbers that are increasingly larger and whose
+# total is **timeout** seconds. It can be used to have short sleep
+# delay while waiting for an event on a fast machine. But if running
+# very slowly the larger delays avoid stressing the machine even
+# further or spamming the logs.
+#
+# @param timeout sum of all delays, in seconds
+# @return a list of sleep delays
+#
+function get_timeout_delays() {
+    local trace=$(shopt -q -o xtrace && echo true || echo false)
+    $trace && shopt -u -o xtrace
+    local timeout=$1
+    local first_step=${2:-1}
+    local max_timeout=${3:-$MAX_TIMEOUT}
+
+    local i
+    local total="0"
+    i=$first_step
+    while test "$(calc $total + $i \<= $timeout)" = "1"; do
+        echo -n "$(calc $i) "
+        total=$(calc $total + $i)
+        i=$(calc $i \* 2)
+        if [ $max_timeout -gt 0 ]; then
+            # Did we reach max timeout ?
+            if [ ${i%.*} -eq ${max_timeout%.*} ] && [ ${i#*.} \> ${max_timeout#*.} ] || [ ${i%.*} -gt ${max_timeout%.*} ]; then
+                # Yes, so let's cap the max wait time to max
+                i=$max_timeout
+            fi
+        fi
+    done
+    if test "$(calc $total \< $timeout)" = "1"; then
+        echo -n "$(calc $timeout - $total) "
+    fi
+    $trace && shopt -s -o xtrace
+}
+
+function test_get_timeout_delays() {
+    test "$(get_timeout_delays 1)" = "1 " || return 1
+    test "$(get_timeout_delays 5)" = "1 2 2 " || return 1
+    test "$(get_timeout_delays 6)" = "1 2 3 " || return 1
+    test "$(get_timeout_delays 7)" = "1 2 4 " || return 1
+    test "$(get_timeout_delays 8)" = "1 2 4 1 " || return 1
+    test "$(get_timeout_delays 1 .1)" = "0.1 0.2 0.4 0.3 " || return 1
+    test "$(get_timeout_delays 1.5 .1)" = "0.1 0.2 0.4 0.8 " || return 1
+    test "$(get_timeout_delays 5 .1)" = "0.1 0.2 0.4 0.8 1.6 1.9 " || return 1
+    test "$(get_timeout_delays 6 .1)" = "0.1 0.2 0.4 0.8 1.6 2.9 " || return 1
+    test "$(get_timeout_delays 6.3 .1)" = "0.1 0.2 0.4 0.8 1.6 3.2 " || return 1
+    test "$(get_timeout_delays 20 .1)" = "0.1 0.2 0.4 0.8 1.6 3.2 6.4 7.3 " || return 1
+    test "$(get_timeout_delays 300 .1 0)" = "0.1 0.2 0.4 0.8 1.6 3.2 6.4 12.8 25.6 51.2 102.4 95.3 " || return 1
+    test "$(get_timeout_delays 300 .1 10)" = "0.1 0.2 0.4 0.8 1.6 3.2 6.4 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 7.3 " || return 1
+}
+
+#######################################################################
+
+##
+# Wait until the cluster becomes clean or if it does not make progress
+# for $WAIT_FOR_CLEAN_TIMEOUT seconds.
+# Progress is measured either via the **get_is_making_recovery_progress**
+# predicate or if the number of clean PGs changes (as returned by get_num_active_clean)
+#
+# @return 0 if the cluster is clean, 1 otherwise
+#
+function wait_for_clean() {
+    local cmd=$1
+    local num_active_clean=-1
+    local cur_active_clean
+    local -a delays=($(get_timeout_delays $WAIT_FOR_CLEAN_TIMEOUT .1))
+    local -i loop=0
+
+    flush_pg_stats || return 1
+    while test $(get_num_pgs) == 0 ; do
+	sleep 1
+    done
+
+    while true ; do
+        # Comparing get_num_active_clean & get_num_pgs is used to determine
+        # if the cluster is clean. That's almost an inline of is_clean() to
+        # get more performance by avoiding multiple calls of get_num_active_clean.
+        cur_active_clean=$(get_num_active_clean)
+        test $cur_active_clean = $(get_num_pgs) && break
+        if test $cur_active_clean != $num_active_clean ; then
+            loop=0
+            num_active_clean=$cur_active_clean
+        elif get_is_making_recovery_progress ; then
+            loop=0
+        elif (( $loop >= ${#delays[*]} )) ; then
+            ceph report
+            return 1
+        fi
+	# eval is a no-op if cmd is empty
+        eval $cmd
+        sleep ${delays[$loop]}
+        loop+=1
+    done
+    return 0
+}
+
+function test_wait_for_clean() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=2 || return 1
+    run_osd $dir 0 || return 1
+    run_mgr $dir x || return 1
+    create_rbd_pool || return 1
+    ! WAIT_FOR_CLEAN_TIMEOUT=1 wait_for_clean || return 1
+    run_osd $dir 1 || return 1
+    wait_for_clean || return 1
+    teardown $dir || return 1
+}
+
+##
+# Wait until the cluster becomes peered or if it does not make progress
+# for $WAIT_FOR_CLEAN_TIMEOUT seconds.
+# Progress is measured either via the **get_is_making_recovery_progress**
+# predicate or if the number of peered PGs changes (as returned by get_num_active_or_peered)
+#
+# @return 0 if the cluster is clean, 1 otherwise
+#
+function wait_for_peered() {
+    local cmd=$1
+    local num_peered=-1
+    local cur_peered
+    local -a delays=($(get_timeout_delays $WAIT_FOR_CLEAN_TIMEOUT .1))
+    local -i loop=0
+
+    flush_pg_stats || return 1
+    while test $(get_num_pgs) == 0 ; do
+	sleep 1
+    done
+
+    while true ; do
+        # Comparing get_num_active_clean & get_num_pgs is used to determine
+        # if the cluster is clean. That's almost an inline of is_clean() to
+        # get more performance by avoiding multiple calls of get_num_active_clean.
+        cur_peered=$(get_num_active_or_peered)
+        test $cur_peered = $(get_num_pgs) && break
+        if test $cur_peered != $num_peered ; then
+            loop=0
+            num_peered=$cur_peered
+        elif get_is_making_recovery_progress ; then
+            loop=0
+        elif (( $loop >= ${#delays[*]} )) ; then
+            ceph report
+            return 1
+        fi
+	# eval is a no-op if cmd is empty
+        eval $cmd
+        sleep ${delays[$loop]}
+        loop+=1
+    done
+    return 0
+}
+
+function test_wait_for_peered() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=2 || return 1
+    run_osd $dir 0 || return 1
+    run_mgr $dir x || return 1
+    create_rbd_pool || return 1
+    ! WAIT_FOR_CLEAN_TIMEOUT=1 wait_for_clean || return 1
+    run_osd $dir 1 || return 1
+    wait_for_peered || return 1
+    teardown $dir || return 1
+}
+
+
+#######################################################################
+
+##
+# Wait until the cluster's health condition disappeared.
+# $TIMEOUT default
+#
+# @param string to grep for in health detail
+# @return 0 if the cluster health doesn't matches request,
+# 1 otherwise if after $TIMEOUT seconds health condition remains.
+#
+function wait_for_health_gone() {
+    local grepstr=$1
+    local -a delays=($(get_timeout_delays $TIMEOUT .1))
+    local -i loop=0
+
+    while ceph health detail | grep "$grepstr" ; do
+	if (( $loop >= ${#delays[*]} )) ; then
+            ceph health detail
+            return 1
+        fi
+        sleep ${delays[$loop]}
+        loop+=1
+    done
+}
+
+##
+# Wait until the cluster has health condition passed as arg
+# again for $TIMEOUT seconds.
+#
+# @param string to grep for in health detail
+# @return 0 if the cluster health matches request, 1 otherwise
+#
+function wait_for_health() {
+    local grepstr=$1
+    local -a delays=($(get_timeout_delays $TIMEOUT .1))
+    local -i loop=0
+
+    while ! ceph health detail | grep "$grepstr" ; do
+	if (( $loop >= ${#delays[*]} )) ; then
+            ceph health detail
+            return 1
+        fi
+        sleep ${delays[$loop]}
+        loop+=1
+    done
+}
+
+##
+# Wait until the cluster becomes HEALTH_OK again or if it does not make progress
+# for $TIMEOUT seconds.
+#
+# @return 0 if the cluster is HEALTHY, 1 otherwise
+#
+function wait_for_health_ok() {
+     wait_for_health "HEALTH_OK" || return 1
+}
+
+function test_wait_for_health_ok() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a --osd_failsafe_full_ratio=.99 --mon_pg_warn_min_per_osd=0 || return 1
+    run_mgr $dir x --mon_pg_warn_min_per_osd=0 || return 1
+    # start osd_pool_default_size OSDs
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+    kill_daemons $dir TERM osd || return 1
+    ceph osd down 0 || return 1
+    # expect TOO_FEW_OSDS warning
+    ! TIMEOUT=1 wait_for_health_ok || return 1
+    # resurrect all OSDs
+    activate_osd $dir 0 || return 1
+    activate_osd $dir 1 || return 1
+    activate_osd $dir 2 || return 1
+    wait_for_health_ok || return 1
+    teardown $dir || return 1
+}
+
+
+#######################################################################
+
+##
+# Run repair on **pgid** and wait until it completes. The repair
+# function will fail if repair does not complete within $TIMEOUT
+# seconds.
+#
+# @param pgid the id of the PG
+# @return 0 on success, 1 on error
+#
+function repair() {
+    local pgid=$1
+    local last_scrub=$(get_last_scrub_stamp $pgid)
+    ceph pg repair $pgid
+    wait_for_scrub $pgid "$last_scrub"
+}
+
+function test_repair() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=1  --mon_allow_pool_size_one=true || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+    repair 1.0 || return 1
+    kill_daemons $dir KILL osd || return 1
+    ! TIMEOUT=1 repair 1.0 || return 1
+    teardown $dir || return 1
+}
+#######################################################################
+
+##
+# Run scrub on **pgid** and wait until it completes. The pg_scrub
+# function will fail if repair does not complete within $TIMEOUT
+# seconds. The pg_scrub is complete whenever the
+# **get_last_scrub_stamp** function reports a timestamp different from
+# the one stored before starting the scrub.
+#
+# @param pgid the id of the PG
+# @return 0 on success, 1 on error
+#
+function pg_scrub() {
+    local pgid=$1
+    local last_scrub=$(get_last_scrub_stamp $pgid)
+    ceph pg scrub $pgid
+    wait_for_scrub $pgid "$last_scrub"
+}
+
+function pg_deep_scrub() {
+    local pgid=$1
+    local last_scrub=$(get_last_scrub_stamp $pgid last_deep_scrub_stamp)
+    ceph pg deep-scrub $pgid
+    wait_for_scrub $pgid "$last_scrub" last_deep_scrub_stamp
+}
+
+function test_pg_scrub() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+    pg_scrub 1.0 || return 1
+    kill_daemons $dir KILL osd || return 1
+    ! TIMEOUT=1 pg_scrub 1.0 || return 1
+    teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Run the *command* and expect it to fail (i.e. return a non zero status).
+# The output (stderr and stdout) is stored in a temporary file in *dir*
+# and is expected to contain the string *expected*.
+#
+# Return 0 if the command failed and the string was found. Otherwise
+# return 1 and cat the full output of the command on stderr for debug.
+#
+# @param dir temporary directory to store the output
+# @param expected string to look for in the output
+# @param command ... the command and its arguments
+# @return 0 on success, 1 on error
+#
+
+function expect_failure() {
+    local dir=$1
+    shift
+    local expected="$1"
+    shift
+    local success
+
+    if "$@" > $dir/out 2>&1 ; then
+        success=true
+    else
+        success=false
+    fi
+
+    if $success || ! grep --quiet "$expected" $dir/out ; then
+        cat $dir/out >&2
+        return 1
+    else
+        return 0
+    fi
+}
+
+function test_expect_failure() {
+    local dir=$1
+
+    setup $dir || return 1
+    expect_failure $dir FAIL bash -c 'echo FAIL ; exit 1' || return 1
+    # the command did not fail
+    ! expect_failure $dir FAIL bash -c 'echo FAIL ; exit 0' > $dir/out || return 1
+    grep --quiet FAIL $dir/out || return 1
+    # the command failed but the output does not contain the expected string
+    ! expect_failure $dir FAIL bash -c 'echo UNEXPECTED ; exit 1' > $dir/out || return 1
+    ! grep --quiet FAIL $dir/out || return 1
+    teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Given the *last_scrub*, wait for scrub to happen on **pgid**.  It
+# will fail if scrub does not complete within $TIMEOUT seconds. The
+# repair is complete whenever the **get_last_scrub_stamp** function
+# reports a timestamp different from the one given in argument.
+#
+# @param pgid the id of the PG
+# @param last_scrub timestamp of the last scrub for *pgid*
+# @return 0 on success, 1 on error
+#
+function wait_for_scrub() {
+    local pgid=$1
+    local last_scrub="$2"
+    local sname=${3:-last_scrub_stamp}
+
+    for ((i=0; i < $TIMEOUT; i++)); do
+        if test "$(get_last_scrub_stamp $pgid $sname)" '>' "$last_scrub" ; then
+            return 0
+        fi
+        sleep 1
+    done
+    return 1
+}
+
+function test_wait_for_scrub() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+    local pgid=1.0
+    ceph pg repair $pgid
+    local last_scrub=$(get_last_scrub_stamp $pgid)
+    wait_for_scrub $pgid "$last_scrub" || return 1
+    kill_daemons $dir KILL osd || return 1
+    last_scrub=$(get_last_scrub_stamp $pgid)
+    ! TIMEOUT=1 wait_for_scrub $pgid "$last_scrub" || return 1
+    teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Return 0 if the erasure code *plugin* is available, 1 otherwise.
+#
+# @param plugin erasure code plugin
+# @return 0 on success, 1 on error
+#
+
+function erasure_code_plugin_exists() {
+    local plugin=$1
+    local status
+    local grepstr
+    local s
+    case `uname` in
+        FreeBSD) grepstr="Cannot open.*$plugin" ;;
+        *) grepstr="$plugin.*No such file" ;;
+    esac
+
+    s=$(ceph osd erasure-code-profile set TESTPROFILE plugin=$plugin 2>&1)
+    local status=$?
+    if [ $status -eq 0 ]; then
+        ceph osd erasure-code-profile rm TESTPROFILE
+    elif ! echo $s | grep --quiet "$grepstr" ; then
+        status=1
+        # display why the string was rejected.
+        echo $s
+    fi
+    return $status
+}
+
+function test_erasure_code_plugin_exists() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    erasure_code_plugin_exists jerasure || return 1
+    ! erasure_code_plugin_exists FAKE || return 1
+    teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Display all log files from **dir** on stdout.
+#
+# @param dir directory in which all data is stored
+#
+
+function display_logs() {
+    local dir=$1
+
+    find $dir -maxdepth 1 -name '*.log' | \
+        while read file ; do
+            echo "======================= $file"
+            cat $file
+        done
+}
+
+function test_display_logs() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a || return 1
+    kill_daemons $dir || return 1
+    display_logs $dir > $dir/log.out
+    grep --quiet mon.a.log $dir/log.out || return 1
+    teardown $dir || return 1
+}
+
+#######################################################################
+##
+# Spawn a command in background and save the pid in the variable name
+# passed in argument. To make the output reading easier, the output is
+# prepend with the process id.
+#
+# Example:
+#   pids1=""
+#   run_in_background pids1 bash -c 'sleep 1; exit 1'
+#
+# @param pid_variable the variable name (not value) where the pids will be stored
+# @param ... the command to execute
+# @return only the pid_variable output should be considered and used with **wait_background**
+#
+function run_in_background() {
+    local pid_variable=$1
+    shift
+    # Execute the command and prepend the output with its pid
+    # We enforce to return the exit status of the command and not the sed one.
+    ("$@" |& sed 's/^/'$BASHPID': /'; return "${PIPESTATUS[0]}") >&2 &
+    eval "$pid_variable+=\" $!\""
+}
+
+function save_stdout {
+    local out="$1"
+    shift
+    "$@" > "$out"
+}
+
+function test_run_in_background() {
+    local pids
+    run_in_background pids sleep 1
+    run_in_background pids sleep 1
+    test $(echo $pids | wc -w) = 2 || return 1
+    wait $pids || return 1
+}
+
+#######################################################################
+##
+# Wait for pids running in background to complete.
+# This function is usually used after a **run_in_background** call
+# Example:
+#   pids1=""
+#   run_in_background pids1 bash -c 'sleep 1; exit 1'
+#   wait_background pids1
+#
+# @param pids The variable name that contains the active PIDS. Set as empty at then end of the function.
+# @return returns 1 if at least one process exits in error unless returns 0
+#
+function wait_background() {
+    # We extract the PIDS from the variable name
+    pids=${!1}
+
+    return_code=0
+    for pid in $pids; do
+        if ! wait $pid; then
+            # If one process failed then return 1
+            return_code=1
+        fi
+    done
+
+    # We empty the variable reporting that all process ended
+    eval "$1=''"
+
+    return $return_code
+}
+
+
+function test_wait_background() {
+    local pids=""
+    run_in_background pids bash -c "sleep 1; exit 1"
+    run_in_background pids bash -c "sleep 2; exit 0"
+    wait_background pids
+    if [ $? -ne 1 ]; then return 1; fi
+
+    run_in_background pids bash -c "sleep 1; exit 0"
+    run_in_background pids bash -c "sleep 2; exit 0"
+    wait_background pids
+    if [ $? -ne 0 ]; then return 1; fi
+
+    if [ ! -z "$pids" ]; then return 1; fi
+}
+
+function flush_pg_stats()
+{
+    local timeout=${1:-$TIMEOUT}
+
+    ids=`ceph osd ls`
+    seqs=''
+    for osd in $ids; do
+	    seq=`ceph tell osd.$osd flush_pg_stats`
+	    if test -z "$seq"
+	    then
+		continue
+	    fi
+	    seqs="$seqs $osd-$seq"
+    done
+
+    for s in $seqs; do
+	    osd=`echo $s | cut -d - -f 1`
+	    seq=`echo $s | cut -d - -f 2`
+	    echo "waiting osd.$osd seq $seq"
+	    while test $(ceph osd last-stat-seq $osd) -lt $seq; do
+            sleep 1
+            if [ $((timeout--)) -eq 0 ]; then
+                return 1
+            fi
+        done
+    done
+}
+
+function test_flush_pg_stats()
+{
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    create_rbd_pool || return 1
+    rados -p rbd put obj /etc/group
+    flush_pg_stats || return 1
+    local jq_filter='.pools | .[] | select(.name == "rbd") | .stats'
+    stored=`ceph df detail --format=json | jq "$jq_filter.stored"`
+    stored_raw=`ceph df detail --format=json | jq "$jq_filter.stored_raw"`
+    test $stored -gt 0 || return 1
+    test $stored == $stored_raw || return 1
+    teardown $dir
+}
+
+########################################################################
+##
+# Get the current op scheduler enabled on an osd by reading the
+# osd_op_queue config option
+#
+# Example:
+#   get_op_scheduler $osdid
+#
+# @param id the id of the OSD
+# @return the name of the op scheduler enabled for the OSD
+#
+function get_op_scheduler() {
+   local id=$1
+
+   get_config osd $id osd_op_queue
+}
+
+function test_get_op_scheduler() {
+    local dir=$1
+
+    setup $dir || return 1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+
+    run_osd $dir 0 --osd_op_queue=wpq || return 1
+    test $(get_op_scheduler 0) = "wpq" || return 1
+
+    run_osd $dir 1 --osd_op_queue=mclock_scheduler || return 1
+    test $(get_op_scheduler 1) = "mclock_scheduler" || return 1
+    teardown $dir || return 1
+}
+
+#######################################################################
+
+##
+# Call the **run** function (which must be defined by the caller) with
+# the **dir** argument followed by the caller argument list.
+#
+# If the **run** function returns on error, all logs found in **dir**
+# are displayed for diagnostic purposes.
+#
+# **teardown** function is called when the **run** function returns
+# (on success or on error), to cleanup leftovers. The CEPH_CONF is set
+# to /dev/null and CEPH_ARGS is unset so that the tests are protected from
+# external interferences.
+#
+# It is the responsibility of the **run** function to call the
+# **setup** function to prepare the test environment (create a temporary
+# directory etc.).
+#
+# The shell is required (via PS4) to display the function and line
+# number whenever a statement is executed to help debugging.
+#
+# @param dir directory in which all data is stored
+# @param ... arguments passed transparently to **run**
+# @return 0 on success, 1 on error
+#
+function main() {
+    local dir=td/$1
+    shift
+
+    shopt -s -o xtrace
+    PS4='${BASH_SOURCE[0]}:$LINENO: ${FUNCNAME[0]}:  '
+
+    export PATH=.:$PATH # make sure program from sources are preferred
+    export PYTHONWARNINGS=ignore
+    export CEPH_CONF=/dev/null
+    unset CEPH_ARGS
+
+    local code
+    if run $dir "$@" ; then
+        code=0
+    else
+        code=1
+    fi
+    teardown $dir $code || return 1
+    return $code
+}
+
+#######################################################################
+
+function run_tests() {
+    shopt -s -o xtrace
+    PS4='${BASH_SOURCE[0]}:$LINENO: ${FUNCNAME[0]}:  '
+
+    export .:$PATH # make sure program from sources are preferred
+
+    export CEPH_MON="127.0.0.1:7109" # git grep '\<7109\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+=" --fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    export CEPH_CONF=/dev/null
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(test_[0-9a-z_]*\) .*/\1/p')}
+    local dir=td/ceph-helpers
+
+    for func in $funcs ; do
+        if ! $func $dir; then
+            teardown $dir 1
+            return 1
+        fi
+    done
+}
+
+if test "$1" = TESTS ; then
+    shift
+    run_tests "$@"
+    exit $?
+fi
+
+# NOTE:
+# jq only support --exit-status|-e from version 1.4 forwards, which makes
+# returning on error waaaay prettier and straightforward.
+# However, the current automated upstream build is running with v1.3,
+# which has no idea what -e is. Hence the convoluted error checking we
+# need. Sad.
+# The next time someone changes this code, please check if v1.4 is now
+# a thing, and, if so, please change these to use -e. Thanks.
+
+# jq '.all.supported | select([.[] == "foo"] | any)'
+function jq_success() {
+  input="$1"
+  filter="$2"
+  expects="\"$3\""
+
+  in_escaped=$(printf %s "$input" | sed "s/'/'\\\\''/g")
+  filter_escaped=$(printf %s "$filter" | sed "s/'/'\\\\''/g")
+
+  ret=$(echo "$in_escaped" | jq "$filter_escaped")
+  if [[ "$ret" == "true" ]]; then
+    return 0
+  elif [[ -n "$expects" ]]; then
+    if [[ "$ret" == "$expects" ]]; then
+      return 0
+    fi
+  fi
+  return 1
+  input=$1
+  filter=$2
+  expects="$3"
+
+  ret="$(echo $input | jq \"$filter\")"
+  if [[ "$ret" == "true" ]]; then
+    return 0
+  elif [[ -n "$expects" && "$ret" == "$expects" ]]; then
+    return 0
+  fi
+  return 1
+}
+
+function inject_eio() {
+    local pooltype=$1
+    shift
+    local which=$1
+    shift
+    local poolname=$1
+    shift
+    local objname=$1
+    shift
+    local dir=$1
+    shift
+    local shard_id=$1
+    shift
+
+    local -a initial_osds=($(get_osds $poolname $objname))
+    local osd_id=${initial_osds[$shard_id]}
+    if [ "$pooltype" != "ec" ]; then
+        shard_id=""
+    fi
+    type=$(cat $dir/$osd_id/type)
+    set_config osd $osd_id ${type}_debug_inject_read_err true || return 1
+    local loop=0
+    while ( CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.$osd_id) \
+             inject${which}err $poolname $objname $shard_id | grep -q Invalid ); do
+        loop=$(expr $loop + 1)
+        if [ $loop = "10" ]; then
+            return 1
+        fi
+        sleep 1
+    done
+}
+
+function multidiff() {
+    if ! diff $@ ; then
+        if [ "$DIFFCOLOPTS" = "" ]; then
+            return 1
+        fi
+        diff $DIFFCOLOPTS $@
+    fi
+}
+
+function create_ec_pool() {
+    local pool_name=$1
+    shift
+    local allow_overwrites=$1
+    shift
+
+    ceph osd erasure-code-profile set myprofile crush-failure-domain=osd "$@" || return 1
+
+    create_pool "$poolname" 1 1 erasure myprofile || return 1
+
+    if [ "$allow_overwrites" = "true" ]; then
+        ceph osd pool set "$poolname" allow_ec_overwrites true || return 1
+    fi
+
+    wait_for_clean || return 1
+    return 0
+}
+
+# Local Variables:
+# compile-command: "cd ../../src ; make -j4 && ../qa/standalone/ceph-helpers.sh TESTS # test_get_config"
+# End:
diff --git a/qa/standalone/crush/crush-choose-args.sh b/qa/standalone/crush/crush-choose-args.sh
new file mode 100755
index 000000000..ee548db12
--- /dev/null
+++ b/qa/standalone/crush/crush-choose-args.sh
@@ -0,0 +1,243 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2017 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7131" # git grep '\<7131\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    CEPH_ARGS+="--crush-location=root=default,host=HOST "
+    CEPH_ARGS+="--osd-crush-initial-weight=3 "
+    #
+    # Disable device auto class feature for now.
+    # The device class is non-deterministic and will
+    # crash the crushmap comparison below.
+    #
+    CEPH_ARGS+="--osd-class-update-on-start=false "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_choose_args_update() {
+    #
+    # adding a weighted OSD updates the weight up to the top
+    #
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+
+    ceph osd set-require-min-compat-client luminous
+    ceph osd getcrushmap > $dir/map || return 1
+    crushtool -d $dir/map -o $dir/map.txt || return 1
+    sed -i -e '/end crush map/d' $dir/map.txt
+    cat >> $dir/map.txt <<EOF
+# choose_args
+choose_args 0 {
+  {
+    bucket_id -1
+    weight_set [
+      [ 2.00000 ]
+      [ 2.00000 ]
+    ]
+    ids [ -10 ]
+  }
+  {
+    bucket_id -2
+    weight_set [
+      [ 2.00000 ]
+      [ 2.00000 ]
+    ]
+    ids [ -20 ]
+  }
+}
+
+# end crush map
+EOF
+    crushtool -c $dir/map.txt -o $dir/map-new || return 1
+    ceph osd setcrushmap -i $dir/map-new || return 1
+    ceph osd crush tree
+
+    run_osd $dir 1 || return 1
+    ceph osd crush tree
+    ceph osd getcrushmap > $dir/map-one-more || return 1
+    crushtool -d $dir/map-one-more -o $dir/map-one-more.txt || return 1
+    cat $dir/map-one-more.txt
+    diff -u $dir/map-one-more.txt $CEPH_ROOT/src/test/crush/crush-choose-args-expected-one-more-3.txt || return 1
+
+    destroy_osd $dir 1 || return 1
+    ceph osd crush tree
+    ceph osd getcrushmap > $dir/map-one-less || return 1
+    crushtool -d $dir/map-one-less -o $dir/map-one-less.txt || return 1
+    diff -u $dir/map-one-less.txt $dir/map.txt || return 1
+}
+
+function TEST_no_update_weight_set() {
+    #
+    # adding a zero weight OSD does not update the weight set at all
+    #
+    local dir=$1
+
+    ORIG_CEPH_ARGS="$CEPH_ARGS"
+    CEPH_ARGS+="--osd-crush-update-weight-set=false "
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+
+    ceph osd set-require-min-compat-client luminous
+    ceph osd crush tree
+    ceph osd getcrushmap > $dir/map || return 1
+    crushtool -d $dir/map -o $dir/map.txt || return 1
+    sed -i -e '/end crush map/d' $dir/map.txt
+    cat >> $dir/map.txt <<EOF
+# choose_args
+choose_args 0 {
+  {
+    bucket_id -1
+    weight_set [
+      [ 2.00000 ]
+      [ 1.00000 ]
+    ]
+    ids [ -10 ]
+  }
+  {
+    bucket_id -2
+    weight_set [
+      [ 2.00000 ]
+      [ 1.00000 ]
+    ]
+    ids [ -20 ]
+  }
+}
+
+# end crush map
+EOF
+    crushtool -c $dir/map.txt -o $dir/map-new || return 1
+    ceph osd setcrushmap -i $dir/map-new || return 1
+    ceph osd crush tree
+
+
+    run_osd $dir 1 || return 1
+    ceph osd crush tree
+    ceph osd getcrushmap > $dir/map-one-more || return 1
+    crushtool -d $dir/map-one-more -o $dir/map-one-more.txt || return 1
+    cat $dir/map-one-more.txt
+    diff -u $dir/map-one-more.txt $CEPH_ROOT/src/test/crush/crush-choose-args-expected-one-more-0.txt || return 1
+
+    destroy_osd $dir 1 || return 1
+    ceph osd crush tree
+    ceph osd getcrushmap > $dir/map-one-less || return 1
+    crushtool -d $dir/map-one-less -o $dir/map-one-less.txt || return 1
+    diff -u $dir/map-one-less.txt $dir/map.txt || return 1
+
+    CEPH_ARGS="$ORIG_CEPH_ARGS"
+}
+
+function TEST_reweight() {
+    # reweight and reweight-compat behave appropriately
+    local dir=$1
+
+    ORIG_CEPH_ARGS="$CEPH_ARGS"
+    CEPH_ARGS+="--osd-crush-update-weight-set=false "
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+
+    ceph osd crush weight-set create-compat || return 1
+    ceph osd crush tree
+
+    ceph osd crush weight-set reweight-compat osd.0 2 || return 1
+    ceph osd crush tree
+    ceph osd crush tree | grep host | grep '6.00000   5.00000' || return 1
+
+    run_osd $dir 2 || return 1
+    ceph osd crush tree
+    ceph osd crush tree | grep host | grep '9.00000   5.00000' || return 1
+
+    ceph osd crush reweight osd.2 4
+    ceph osd crush tree
+    ceph osd crush tree | grep host | grep '10.00000   5.00000' || return 1
+
+    ceph osd crush weight-set reweight-compat osd.2 4
+    ceph osd crush tree
+    ceph osd crush tree | grep host | grep '10.00000   9.00000' || return 1
+}
+
+function TEST_move_bucket() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+
+    ceph osd crush weight-set create-compat || return 1
+    ceph osd crush weight-set reweight-compat osd.0 2 || return 1
+    ceph osd crush weight-set reweight-compat osd.1 2 || return 1
+    ceph osd crush tree
+    ceph osd crush tree | grep HOST | grep '6.00000   4.00000' || return 1
+
+    # moving a bucket adjusts the weights
+    ceph osd crush add-bucket RACK rack root=default || return 1
+    ceph osd crush move HOST rack=RACK || return 1
+    ceph osd crush tree
+    ceph osd crush tree | grep HOST | grep '6.00000   4.00000' || return 1
+    ceph osd crush tree | grep RACK | grep '6.00000   4.00000' || return 1
+
+    # weight-set reweight adjusts containing buckets
+    ceph osd crush weight-set reweight-compat osd.0 1 || return 1
+    ceph osd crush tree
+    ceph osd crush tree | grep HOST | grep '6.00000   3.00000' || return 1
+    ceph osd crush tree | grep RACK | grep '6.00000   3.00000' || return 1
+
+    # moving a leaf resets its weight-set to the canonical weight...
+    ceph config set mon osd_crush_update_weight_set true || return 1
+    ceph osd crush add-bucket FOO host root=default || return 1
+    ceph osd crush move osd.0 host=FOO || return 1
+    ceph osd crush tree
+    ceph osd crush tree | grep osd.0 | grep '3.00000   3.00000' || return 1
+    ceph osd crush tree | grep HOST | grep '3.00000   2.00000' || return 1
+    ceph osd crush tree | grep RACK | grep '3.00000   2.00000' || return 1
+
+    # ...or to zero.
+    ceph config set mon osd_crush_update_weight_set false || return 1
+    ceph osd crush move osd.1 host=FOO || return 1
+    ceph osd crush tree
+    ceph osd crush tree | grep osd.0 | grep '3.00000   3.00000' || return 1
+    ceph osd crush tree | grep osd.1 | grep '3.00000         0' || return 1
+    ceph osd crush tree | grep FOO | grep '6.00000   3.00000' || return 1
+}
+
+main crush-choose-args "$@"
+
+# Local Variables:
+# compile-command: "cd ../../../build ; ln -sf ../src/ceph-disk/ceph_disk/main.py bin/ceph-disk && make -j4 && ../src/test/crush/crush-choose-args.sh"
+# End:
diff --git a/qa/standalone/crush/crush-classes.sh b/qa/standalone/crush/crush-classes.sh
new file mode 100755
index 000000000..558aabe6d
--- /dev/null
+++ b/qa/standalone/crush/crush-classes.sh
@@ -0,0 +1,265 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2017 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7130" # git grep '\<7130\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    #
+    # Disable auto-class, so we can inject device class manually below
+    #
+    CEPH_ARGS+="--osd-class-update-on-start=false "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function add_something() {
+    local dir=$1
+    local obj=${2:-SOMETHING}
+
+    local payload=ABCDEF
+    echo $payload > $dir/ORIGINAL
+    rados --pool rbd put $obj $dir/ORIGINAL || return 1
+}
+
+function get_osds_up() {
+    local poolname=$1
+    local objectname=$2
+
+    local osds=$(ceph --format xml osd map $poolname $objectname 2>/dev/null | \
+        $XMLSTARLET sel -t -m "//up/osd" -v . -o ' ')
+    # get rid of the trailing space
+    echo $osds
+}
+
+function TEST_reweight_vs_classes() {
+    local dir=$1
+
+    # CrushWrapper::update_item (and ceph osd crush set) must rebuild the shadow
+    # tree too. https://tracker.ceph.com/issues/48065
+
+    run_mon $dir a || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+
+    ceph osd crush set-device-class ssd osd.0 || return 1
+    ceph osd crush class ls-osd ssd | grep 0 || return 1
+    ceph osd crush set-device-class ssd osd.1 || return 1
+    ceph osd crush class ls-osd ssd | grep 1 || return 1
+
+    ceph osd crush reweight osd.0 1
+
+    h=`hostname -s`
+    ceph osd crush dump | jq ".buckets[] | select(.name==\"$h\") | .items[0].weight" | grep 65536
+    ceph osd crush dump | jq ".buckets[] | select(.name==\"$h~ssd\") | .items[0].weight" | grep 65536
+
+    ceph osd crush set 0 2 host=$h
+
+    ceph osd crush dump | jq ".buckets[] | select(.name==\"$h\") | .items[0].weight" | grep 131072
+    ceph osd crush dump | jq ".buckets[] | select(.name==\"$h~ssd\") | .items[0].weight" | grep 131072
+}
+
+function TEST_classes() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+    create_rbd_pool || return 1
+
+    test "$(get_osds_up rbd SOMETHING)" == "1 2 0" || return 1
+    add_something $dir SOMETHING || return 1
+
+    #
+    # osd.0 has class ssd and the rule is modified
+    # to only take ssd devices.
+    #
+    ceph osd getcrushmap > $dir/map || return 1
+    crushtool -d $dir/map -o $dir/map.txt || return 1
+    ${SED} -i \
+        -e '/device 0 osd.0/s/$/ class ssd/' \
+        -e '/step take default/s/$/ class ssd/' \
+        $dir/map.txt || return 1
+    crushtool -c $dir/map.txt -o $dir/map-new || return 1
+    ceph osd setcrushmap -i $dir/map-new || return 1
+
+    #
+    # There can only be one mapping since there only is
+    # one device with ssd class.
+    #
+    ok=false
+    for delay in 2 4 8 16 32 64 128 256 ; do
+        if test "$(get_osds_up rbd SOMETHING_ELSE)" == "0" ; then
+            ok=true
+            break
+        fi
+        sleep $delay
+        ceph osd dump # for debugging purposes
+        ceph pg dump # for debugging purposes
+    done
+    $ok || return 1
+    #
+    # Writing keeps working because the pool is min_size 1 by
+    # default.
+    #
+    add_something $dir SOMETHING_ELSE || return 1
+
+    #
+    # Sanity check that the rule indeed has ssd
+    # generated bucket with a name including ~ssd.
+    #
+    ceph osd crush dump | grep -q '~ssd' || return 1
+}
+
+function TEST_set_device_class() {
+    local dir=$1
+
+    TEST_classes $dir || return 1
+
+    ceph osd crush set-device-class ssd osd.0 || return 1
+    ceph osd crush class ls-osd ssd | grep 0 || return 1
+    ceph osd crush set-device-class ssd osd.1 || return 1
+    ceph osd crush class ls-osd ssd | grep 1 || return 1
+    ceph osd crush set-device-class ssd 0 1 || return 1 # should be idempotent
+
+    ok=false
+    for delay in 2 4 8 16 32 64 128 256 ; do
+        if test "$(get_osds_up rbd SOMETHING_ELSE)" == "0 1" ; then
+            ok=true
+            break
+        fi
+        sleep $delay
+        ceph osd crush dump
+        ceph osd dump # for debugging purposes
+        ceph pg dump # for debugging purposes
+    done
+    $ok || return 1
+}
+
+function TEST_mon_classes() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+    create_rbd_pool || return 1
+
+    test "$(get_osds_up rbd SOMETHING)" == "1 2 0" || return 1
+    add_something $dir SOMETHING || return 1
+
+    # test create and remove class
+    ceph osd crush class create CLASS || return 1
+    ceph osd crush class create CLASS || return 1 # idempotent
+    ceph osd crush class ls | grep CLASS  || return 1
+    ceph osd crush class rename CLASS TEMP || return 1
+    ceph osd crush class ls | grep TEMP || return 1
+    ceph osd crush class rename TEMP CLASS || return 1
+    ceph osd crush class ls | grep CLASS  || return 1
+    ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd crush-device-class=CLASS || return 1
+    expect_failure $dir EBUSY ceph osd crush class rm CLASS || return 1
+    ceph osd erasure-code-profile rm myprofile || return 1
+    ceph osd crush class rm CLASS || return 1
+    ceph osd crush class rm CLASS || return 1 # test idempotence
+
+    # test rm-device-class
+    ceph osd crush set-device-class aaa osd.0 || return 1
+    ceph osd tree | grep -q 'aaa' || return 1
+    ceph osd crush dump | grep -q '~aaa' || return 1
+    ceph osd crush tree --show-shadow | grep -q '~aaa' || return 1
+    ceph osd crush set-device-class bbb osd.1 || return 1
+    ceph osd tree | grep -q 'bbb' || return 1
+    ceph osd crush dump | grep -q '~bbb' || return 1
+    ceph osd crush tree --show-shadow | grep -q '~bbb' || return 1
+    ceph osd crush set-device-class ccc osd.2 || return 1
+    ceph osd tree | grep -q 'ccc' || return 1
+    ceph osd crush dump | grep -q '~ccc' || return 1
+    ceph osd crush tree --show-shadow | grep -q '~ccc' || return 1
+    ceph osd crush rm-device-class 0 || return 1
+    ceph osd tree | grep -q 'aaa' && return 1
+    ceph osd crush class ls | grep -q 'aaa' && return 1 # class 'aaa' should gone
+    ceph osd crush rm-device-class 1 || return 1
+    ceph osd tree | grep -q 'bbb' && return 1
+    ceph osd crush class ls | grep -q 'bbb' && return 1 # class 'bbb' should gone
+    ceph osd crush rm-device-class 2 || return 1
+    ceph osd tree | grep -q 'ccc' && return 1
+    ceph osd crush class ls | grep -q 'ccc' && return 1 # class 'ccc' should gone
+    ceph osd crush set-device-class asdf all || return 1
+    ceph osd tree | grep -q 'asdf' || return 1
+    ceph osd crush dump | grep -q '~asdf' || return 1
+    ceph osd crush tree --show-shadow | grep -q '~asdf' || return 1
+    ceph osd crush rule create-replicated asdf-rule default host asdf || return 1
+    ceph osd crush rm-device-class all || return 1
+    ceph osd tree | grep -q 'asdf' && return 1
+    ceph osd crush class ls | grep -q 'asdf' || return 1 # still referenced by asdf-rule
+
+    ceph osd crush set-device-class abc osd.2 || return 1
+    ceph osd crush move osd.2 root=foo rack=foo-rack host=foo-host || return 1
+    out=`ceph osd tree |awk '$1 == 2 && $2 == "abc" {print $0}'`
+    if [ "$out" == "" ]; then
+        return 1
+    fi
+
+    # verify 'crush move' too
+    ceph osd crush dump | grep -q 'foo~abc' || return 1
+    ceph osd crush tree --show-shadow | grep -q 'foo~abc' || return 1
+    ceph osd crush dump | grep -q 'foo-rack~abc' || return 1
+    ceph osd crush tree --show-shadow | grep -q 'foo-rack~abc' || return 1
+    ceph osd crush dump | grep -q 'foo-host~abc' || return 1
+    ceph osd crush tree --show-shadow | grep -q 'foo-host~abc' || return 1
+    ceph osd crush rm-device-class osd.2 || return 1
+    # restore class, so we can continue to test create-replicated
+    ceph osd crush set-device-class abc osd.2 || return 1
+
+    ceph osd crush rule create-replicated foo-rule foo host abc || return 1
+
+    # test set-device-class implicitly change class
+    ceph osd crush set-device-class hdd osd.0 || return 1
+    expect_failure $dir EBUSY ceph osd crush set-device-class nvme osd.0 || return 1
+
+    # test class rename
+    ceph osd crush rm-device-class all || return 1
+    ceph osd crush set-device-class class_1 all || return 1
+    ceph osd crush class ls | grep 'class_1' || return 1
+    ceph osd crush tree --show-shadow | grep 'class_1' || return 1
+    ceph osd crush rule create-replicated class_1_rule default host class_1 || return 1
+    ceph osd crush class rename class_1 class_2
+    ceph osd crush class rename class_1 class_2 # idempotent
+    ceph osd crush class ls | grep 'class_1' && return 1
+    ceph osd crush tree --show-shadow | grep 'class_1' && return 1
+    ceph osd crush class ls | grep 'class_2' || return 1
+    ceph osd crush tree --show-shadow | grep 'class_2' || return 1
+}
+
+main crush-classes "$@"
+
+# Local Variables:
+# compile-command: "cd ../../../build ; ln -sf ../src/ceph-disk/ceph_disk/main.py bin/ceph-disk && make -j4 && ../src/test/crush/crush-classes.sh"
+# End:
diff --git a/qa/standalone/erasure-code/test-erasure-code-plugins.sh b/qa/standalone/erasure-code/test-erasure-code-plugins.sh
new file mode 100755
index 000000000..b5648d472
--- /dev/null
+++ b/qa/standalone/erasure-code/test-erasure-code-plugins.sh
@@ -0,0 +1,118 @@
+#!/usr/bin/env bash
+set -x
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+arch=$(uname -m)
+
+case $arch in
+    i[[3456]]86*|x86_64*|amd64*)
+        legacy_jerasure_plugins=(jerasure_generic jerasure_sse3 jerasure_sse4)
+        legacy_shec_plugins=(shec_generic shec_sse3 shec_sse4)
+        plugins=(jerasure shec lrc isa)
+        ;;
+    aarch64*|arm*)
+        legacy_jerasure_plugins=(jerasure_generic jerasure_neon)
+        legacy_shec_plugins=(shec_generic shec_neon)
+        plugins=(jerasure shec lrc)
+        ;;
+    *)
+        echo "unsupported platform ${arch}."
+        return 1
+        ;;
+esac
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:17110" # git grep '\<17110\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        $func $dir || return 1
+    done
+}
+
+function TEST_preload_warning() {
+    local dir=$1
+
+    for plugin in ${legacy_jerasure_plugins[*]} ${legacy_shec_plugins[*]}; do
+        setup $dir || return 1
+        run_mon $dir a --osd_erasure_code_plugins="${plugin}" || return 1
+	run_mgr $dir x || return 1
+        CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1
+        run_osd $dir 0 --osd_erasure_code_plugins="${plugin}" || return 1
+        CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1
+        grep "WARNING: osd_erasure_code_plugins contains plugin ${plugin}" $dir/mon.a.log || return 1
+        grep "WARNING: osd_erasure_code_plugins contains plugin ${plugin}" $dir/osd.0.log || return 1
+        teardown $dir || return 1
+    done
+    return 0
+}
+
+function TEST_preload_no_warning() {
+    local dir=$1
+
+    for plugin in ${plugins[*]}; do
+        setup $dir || return 1
+        run_mon $dir a --osd_erasure_code_plugins="${plugin}" || return 1
+	run_mgr $dir x || return 1
+        CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1
+        run_osd $dir 0 --osd_erasure_code_plugins="${plugin}" || return 1
+        CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1
+        ! grep "WARNING: osd_erasure_code_plugins contains plugin" $dir/mon.a.log || return 1
+        ! grep "WARNING: osd_erasure_code_plugins contains plugin" $dir/osd.0.log || return 1
+        teardown $dir || return 1
+    done
+
+    return 0
+}
+
+function TEST_preload_no_warning_default() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a || return 1
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1
+    ! grep "WARNING: osd_erasure_code_plugins" $dir/mon.a.log || return 1
+    ! grep "WARNING: osd_erasure_code_plugins" $dir/osd.0.log || return 1
+    teardown $dir || return 1
+
+    return 0
+}
+
+function TEST_ec_profile_warning() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    for id in $(seq 0 2) ; do
+        run_osd $dir $id || return 1
+    done
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+
+    for plugin in ${legacy_jerasure_plugins[*]}; do
+        ceph osd erasure-code-profile set prof-${plugin} crush-failure-domain=osd technique=reed_sol_van plugin=${plugin} || return 1
+        CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1
+        grep "WARNING: erasure coding profile prof-${plugin} uses plugin ${plugin}" $dir/mon.a.log || return 1
+    done
+
+    for plugin in ${legacy_shec_plugins[*]}; do
+        ceph osd erasure-code-profile set prof-${plugin} crush-failure-domain=osd plugin=${plugin} || return 1
+        CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1
+        grep "WARNING: erasure coding profile prof-${plugin} uses plugin ${plugin}" $dir/mon.a.log || return 1
+    done
+
+    teardown $dir || return 1
+}
+
+main test-erasure-code-plugins "$@"
diff --git a/qa/standalone/erasure-code/test-erasure-code.sh b/qa/standalone/erasure-code/test-erasure-code.sh
new file mode 100755
index 000000000..b93151233
--- /dev/null
+++ b/qa/standalone/erasure-code/test-erasure-code.sh
@@ -0,0 +1,337 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7101" # git grep '\<7101\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON --mon-osd-prime-pg-temp=false"
+
+    setup $dir || return 1
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    # check that erasure code plugins are preloaded
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1
+    grep 'load: jerasure.*lrc' $dir/mon.a.log || return 1
+    for id in $(seq 0 10) ; do
+        run_osd $dir $id || return 1
+    done
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+    # check that erasure code plugins are preloaded
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1
+    grep 'load: jerasure.*lrc' $dir/osd.0.log || return 1
+    create_erasure_coded_pool ecpool || return 1
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        $func $dir || return 1
+    done
+
+    delete_pool ecpool || return 1
+    teardown $dir || return 1
+}
+
+function create_erasure_coded_pool() {
+    local poolname=$1
+
+    ceph osd erasure-code-profile set myprofile \
+        crush-failure-domain=osd || return 1
+    create_pool $poolname 12 12 erasure myprofile \
+        || return 1
+    wait_for_clean || return 1
+}
+
+function rados_put_get() {
+    local dir=$1
+    local poolname=$2
+    local objname=${3:-SOMETHING}
+
+
+    for marker in AAA BBB CCCC DDDD ; do
+        printf "%*s" 1024 $marker
+    done > $dir/ORIGINAL
+
+    #
+    # get and put an object, compare they are equal
+    #
+    rados --pool $poolname put $objname $dir/ORIGINAL || return 1
+    rados --pool $poolname get $objname $dir/COPY || return 1
+    diff $dir/ORIGINAL $dir/COPY || return 1
+    rm $dir/COPY
+
+    #
+    # take out an OSD used to store the object and
+    # check the object can still be retrieved, which implies
+    # recovery
+    #
+    local -a initial_osds=($(get_osds $poolname $objname))
+    local last=$((${#initial_osds[@]} - 1))
+    ceph osd out ${initial_osds[$last]} || return 1
+
+    # give the osdmap up to 5 seconds to refresh
+    sleep 5
+    ! get_osds $poolname $objname | grep '\<'${initial_osds[$last]}'\>' || return 1
+
+    rados --pool $poolname get $objname $dir/COPY || return 1
+    diff $dir/ORIGINAL $dir/COPY || return 1
+    ceph osd in ${initial_osds[$last]} || return 1
+
+    rm $dir/ORIGINAL
+}
+
+function rados_osds_out_in() {
+    local dir=$1
+    local poolname=$2
+    local objname=${3:-SOMETHING}
+
+
+    for marker in FFFF GGGG HHHH IIII ; do
+        printf "%*s" 1024 $marker
+    done > $dir/ORIGINAL
+
+    #
+    # get and put an object, compare they are equal
+    #
+    rados --pool $poolname put $objname $dir/ORIGINAL || return 1
+    rados --pool $poolname get $objname $dir/COPY || return 1
+    diff $dir/ORIGINAL $dir/COPY || return 1
+    rm $dir/COPY
+
+    #
+    # take out two OSDs used to store the object, wait for the cluster
+    # to be clean (i.e. all PG are clean and active) again which
+    # implies the PG have been moved to use the remaining OSDs.  Check
+    # the object can still be retrieved.
+    #
+    wait_for_clean || return 1
+    local osds_list=$(get_osds $poolname $objname)
+    local -a osds=($osds_list)
+    for osd in 0 1 ; do
+      ceph osd out ${osds[$osd]} || return 1
+    done
+    wait_for_clean || return 1
+    #
+    # verify the object is no longer mapped to the osds that are out
+    #
+    for osd in 0 1 ; do
+        ! get_osds $poolname $objname | grep '\<'${osds[$osd]}'\>' || return 1
+    done
+    rados --pool $poolname get $objname $dir/COPY || return 1
+    diff $dir/ORIGINAL $dir/COPY || return 1
+    #
+    # bring the osds back in, , wait for the cluster
+    # to be clean (i.e. all PG are clean and active) again which
+    # implies the PG go back to using the same osds as before
+    #
+    for osd in 0 1 ; do
+      ceph osd in ${osds[$osd]} || return 1
+    done
+    wait_for_clean || return 1
+    test "$osds_list" = "$(get_osds $poolname $objname)" || return 1
+    rm $dir/ORIGINAL
+}
+
+function TEST_rados_put_get_lrc_advanced() {
+    local dir=$1
+    local poolname=pool-lrc-a
+    local profile=profile-lrc-a
+
+    ceph osd erasure-code-profile set $profile \
+        plugin=lrc \
+        mapping=DD_ \
+        crush-steps='[ [ "chooseleaf", "osd", 0 ] ]' \
+        layers='[ [ "DDc", "" ] ]'  || return 1
+    create_pool $poolname 12 12 erasure $profile \
+        || return 1
+
+    rados_put_get $dir $poolname || return 1
+
+    delete_pool $poolname
+    ceph osd erasure-code-profile rm $profile
+}
+
+function TEST_rados_put_get_lrc_kml() {
+    local dir=$1
+    local poolname=pool-lrc
+    local profile=profile-lrc
+
+    ceph osd erasure-code-profile set $profile \
+        plugin=lrc \
+        k=4 m=2 l=3 \
+        crush-failure-domain=osd || return 1
+    create_pool $poolname 12 12 erasure $profile \
+        || return 1
+
+    rados_put_get $dir $poolname || return 1
+
+    delete_pool $poolname
+    ceph osd erasure-code-profile rm $profile
+}
+
+function TEST_rados_put_get_isa() {
+    if ! erasure_code_plugin_exists isa ; then
+        echo "SKIP because plugin isa has not been built"
+        return 0
+    fi
+    local dir=$1
+    local poolname=pool-isa
+
+    ceph osd erasure-code-profile set profile-isa \
+        plugin=isa \
+        crush-failure-domain=osd || return 1
+    create_pool $poolname 1 1 erasure profile-isa \
+        || return 1
+
+    rados_put_get $dir $poolname || return 1
+
+    delete_pool $poolname
+}
+
+function TEST_rados_put_get_jerasure() {
+    local dir=$1
+
+    rados_put_get $dir ecpool || return 1
+
+    local poolname=pool-jerasure
+    local profile=profile-jerasure
+
+    ceph osd erasure-code-profile set $profile \
+        plugin=jerasure \
+        k=4 m=2 \
+        crush-failure-domain=osd || return 1
+    create_pool $poolname 12 12 erasure $profile \
+        || return 1
+
+    rados_put_get $dir $poolname || return 1
+    rados_osds_out_in $dir $poolname || return 1
+
+    delete_pool $poolname
+    ceph osd erasure-code-profile rm $profile
+}
+
+function TEST_rados_put_get_shec() {
+    local dir=$1
+
+    local poolname=pool-shec
+    local profile=profile-shec
+
+    ceph osd erasure-code-profile set $profile \
+        plugin=shec \
+        k=2 m=1 c=1 \
+        crush-failure-domain=osd || return 1
+    create_pool $poolname 12 12 erasure $profile \
+        || return 1
+
+    rados_put_get $dir $poolname || return 1
+
+    delete_pool $poolname
+    ceph osd erasure-code-profile rm $profile
+}
+
+function TEST_alignment_constraints() {
+    local payload=ABC
+    echo "$payload" > $dir/ORIGINAL
+    #
+    # Verify that the rados command enforces alignment constraints
+    # imposed by the stripe width
+    # See http://tracker.ceph.com/issues/8622
+    #
+    local stripe_unit=$(ceph-conf --show-config-value osd_pool_erasure_code_stripe_unit)
+    eval local $(ceph osd erasure-code-profile get myprofile | grep k=)
+    local block_size=$((stripe_unit * k - 1))
+    dd if=/dev/zero of=$dir/ORIGINAL bs=$block_size count=2
+    rados --block-size=$block_size \
+        --pool ecpool put UNALIGNED $dir/ORIGINAL || return 1
+    rm $dir/ORIGINAL
+}
+
+function chunk_size() {
+    echo $(ceph-conf --show-config-value osd_pool_erasure_code_stripe_unit)
+}
+
+#
+# By default an object will be split in two (k=2) with the first part
+# of the object in the first OSD of the up set and the second part in
+# the next OSD in the up set. This layout is defined by the mapping
+# parameter and this function helps verify that the first and second
+# part of the object are located in the OSD where they should be.
+#
+function verify_chunk_mapping() {
+    local dir=$1
+    local poolname=$2
+    local first=$3
+    local second=$4
+
+    local payload=$(printf '%*s' $(chunk_size) FIRST$poolname ; printf '%*s' $(chunk_size) SECOND$poolname)
+    echo -n "$payload" > $dir/ORIGINAL
+
+    rados --pool $poolname put SOMETHING$poolname $dir/ORIGINAL || return 1
+    rados --pool $poolname get SOMETHING$poolname $dir/COPY || return 1
+    local -a osds=($(get_osds $poolname SOMETHING$poolname))
+    for (( i = 0; i < ${#osds[@]}; i++ )) ; do
+        ceph daemon osd.${osds[$i]} flush_journal
+    done
+    diff $dir/ORIGINAL $dir/COPY || return 1
+    rm $dir/COPY
+
+    local -a osds=($(get_osds $poolname SOMETHING$poolname))
+    objectstore_tool $dir ${osds[$first]} SOMETHING$poolname get-bytes | grep --quiet FIRST$poolname || return 1
+    objectstore_tool $dir ${osds[$second]} SOMETHING$poolname get-bytes | grep --quiet SECOND$poolname || return 1
+}
+
+function TEST_chunk_mapping() {
+    local dir=$1
+
+    #
+    # mapping=DD_ is the default:
+    #  first OSD (i.e. 0) in the up set has the first part of the object
+    #  second OSD (i.e. 1) in the up set has the second part of the object
+    #
+    verify_chunk_mapping $dir ecpool 0 1 || return 1
+
+    ceph osd erasure-code-profile set remap-profile \
+        plugin=lrc \
+        layers='[ [ "cDD", "" ] ]' \
+        mapping='_DD' \
+        crush-steps='[ [ "choose", "osd", 0 ] ]' || return 1
+    ceph osd erasure-code-profile get remap-profile
+    create_pool remap-pool 12 12 erasure remap-profile \
+        || return 1
+
+    #
+    # mapping=_DD
+    #  second OSD (i.e. 1) in the up set has the first part of the object
+    #  third OSD (i.e. 2) in the up set has the second part of the object
+    #
+    verify_chunk_mapping $dir remap-pool 1 2 || return 1
+
+    delete_pool remap-pool
+    ceph osd erasure-code-profile rm remap-profile
+}
+
+main test-erasure-code "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/erasure-code/test-erasure-code.sh"
+# End:
diff --git a/qa/standalone/erasure-code/test-erasure-eio.sh b/qa/standalone/erasure-code/test-erasure-eio.sh
new file mode 100755
index 000000000..42c538eb9
--- /dev/null
+++ b/qa/standalone/erasure-code/test-erasure-eio.sh
@@ -0,0 +1,700 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2015 Red Hat <contact@redhat.com>
+#
+#
+# Author: Kefu Chai <kchai@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7112" # git grep '\<7112\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    CEPH_ARGS+="--osd_mclock_override_recovery_settings=true "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        run_mon $dir a || return 1
+	run_mgr $dir x || return 1
+	create_pool rbd 4 || return 1
+
+        # check that erasure code plugins are preloaded
+        CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1
+        grep 'load: jerasure.*lrc' $dir/mon.a.log || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function setup_osds() {
+    local count=$1
+    shift
+
+    for id in $(seq 0 $(expr $count - 1)) ; do
+        run_osd $dir $id || return 1
+    done
+
+    # check that erasure code plugins are preloaded
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1
+    grep 'load: jerasure.*lrc' $dir/osd.0.log || return 1
+}
+
+function get_state() {
+    local pgid=$1
+    local sname=state
+    ceph --format json pg dump pgs 2>/dev/null | \
+        jq -r ".pg_stats | .[] | select(.pgid==\"$pgid\") | .$sname"
+}
+
+function create_erasure_coded_pool() {
+    local poolname=$1
+    shift
+    local k=$1
+    shift
+    local m=$1
+    shift
+
+    ceph osd erasure-code-profile set myprofile \
+        plugin=jerasure \
+        k=$k m=$m \
+        crush-failure-domain=osd || return 1
+    create_pool $poolname 1 1 erasure myprofile \
+        || return 1
+    wait_for_clean || return 1
+}
+
+function delete_erasure_coded_pool() {
+    local poolname=$1
+    ceph osd pool delete $poolname $poolname --yes-i-really-really-mean-it
+    ceph osd erasure-code-profile rm myprofile
+}
+
+function rados_put() {
+    local dir=$1
+    local poolname=$2
+    local objname=${3:-SOMETHING}
+
+    for marker in AAA BBB CCCC DDDD ; do
+        printf "%*s" 1024 $marker
+    done > $dir/ORIGINAL
+    #
+    # get and put an object, compare they are equal
+    #
+    rados --pool $poolname put $objname $dir/ORIGINAL || return 1
+}
+
+function rados_get() {
+    local dir=$1
+    local poolname=$2
+    local objname=${3:-SOMETHING}
+    local expect=${4:-ok}
+
+    #
+    # Expect a failure to get object
+    #
+    if [ $expect = "fail" ];
+    then
+        ! rados --pool $poolname get $objname $dir/COPY
+        return
+    fi
+    #
+    # get an object, compare with $dir/ORIGINAL
+    #
+    rados --pool $poolname get $objname $dir/COPY || return 1
+    diff $dir/ORIGINAL $dir/COPY || return 1
+    rm $dir/COPY
+}
+
+
+function inject_remove() {
+    local pooltype=$1
+    shift
+    local which=$1
+    shift
+    local poolname=$1
+    shift
+    local objname=$1
+    shift
+    local dir=$1
+    shift
+    local shard_id=$1
+    shift
+
+    local -a initial_osds=($(get_osds $poolname $objname))
+    local osd_id=${initial_osds[$shard_id]}
+    objectstore_tool $dir $osd_id $objname remove || return 1
+}
+
+# Test with an inject error
+function rados_put_get_data() {
+    local inject=$1
+    shift
+    local dir=$1
+    shift
+    local shard_id=$1
+    shift
+    local arg=$1
+
+    # inject eio to speificied shard
+    #
+    local poolname=pool-jerasure
+    local objname=obj-$inject-$$-$shard_id
+    rados_put $dir $poolname $objname || return 1
+    inject_$inject ec data $poolname $objname $dir $shard_id || return 1
+    rados_get $dir $poolname $objname || return 1
+
+    if [ "$arg" = "recovery" ];
+    then
+        #
+        # take out the last OSD used to store the object,
+        # bring it back, and check for clean PGs which means
+        # recovery didn't crash the primary.
+        #
+        local -a initial_osds=($(get_osds $poolname $objname))
+        local last_osd=${initial_osds[-1]}
+        # Kill OSD
+        kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1
+        ceph osd out ${last_osd} || return 1
+        ! get_osds $poolname $objname | grep '\<'${last_osd}'\>' || return 1
+        ceph osd in ${last_osd} || return 1
+        activate_osd $dir ${last_osd} || return 1
+        wait_for_clean || return 1
+        # Won't check for eio on get here -- recovery above might have fixed it
+    else
+        shard_id=$(expr $shard_id + 1)
+        inject_$inject ec data $poolname $objname $dir $shard_id || return 1
+        rados_get $dir $poolname $objname fail || return 1
+        rm $dir/ORIGINAL
+    fi
+
+}
+
+# Change the size of speificied shard
+#
+function set_size() {
+    local objname=$1
+    shift
+    local dir=$1
+    shift
+    local shard_id=$1
+    shift
+    local bytes=$1
+    shift
+    local mode=${1}
+
+    local poolname=pool-jerasure
+    local -a initial_osds=($(get_osds $poolname $objname))
+    local osd_id=${initial_osds[$shard_id]}
+    ceph osd set noout
+    if [ "$mode" = "add" ];
+    then
+      objectstore_tool $dir $osd_id $objname get-bytes $dir/CORRUPT || return 1
+      dd if=/dev/urandom bs=$bytes count=1 >> $dir/CORRUPT
+    elif [ "$bytes" = "0" ];
+    then
+      touch $dir/CORRUPT
+    else
+      dd if=/dev/urandom bs=$bytes count=1 of=$dir/CORRUPT
+    fi
+    objectstore_tool $dir $osd_id $objname set-bytes $dir/CORRUPT || return 1
+    rm -f $dir/CORRUPT
+    ceph osd unset noout
+}
+
+function rados_get_data_bad_size() {
+    local dir=$1
+    shift
+    local shard_id=$1
+    shift
+    local bytes=$1
+    shift
+    local mode=${1:-set}
+
+    local poolname=pool-jerasure
+    local objname=obj-size-$$-$shard_id-$bytes
+    rados_put $dir $poolname $objname || return 1
+
+    # Change the size of speificied shard
+    #
+    set_size $objname $dir $shard_id $bytes $mode || return 1
+
+    rados_get $dir $poolname $objname || return 1
+
+    # Leave objname and modify another shard
+    shard_id=$(expr $shard_id + 1)
+    set_size $objname $dir $shard_id $bytes $mode || return 1
+    rados_get $dir $poolname $objname fail || return 1
+    rm $dir/ORIGINAL
+}
+
+#
+# These two test cases try to validate the following behavior:
+#  For object on EC pool, if there is one shard having read error (
+#  either primary or replica), client can still read object.
+#
+# If 2 shards have read errors the client will get an error.
+#
+function TEST_rados_get_subread_eio_shard_0() {
+    local dir=$1
+    setup_osds 4 || return 1
+
+    local poolname=pool-jerasure
+    create_erasure_coded_pool $poolname 2 1 || return 1
+    # inject eio on primary OSD (0) and replica OSD (1)
+    local shard_id=0
+    rados_put_get_data eio $dir $shard_id || return 1
+    delete_erasure_coded_pool $poolname
+}
+
+function TEST_rados_get_subread_eio_shard_1() {
+    local dir=$1
+    setup_osds 4 || return 1
+
+    local poolname=pool-jerasure
+    create_erasure_coded_pool $poolname 2 1 || return 1
+    # inject eio into replicas OSD (1) and OSD (2)
+    local shard_id=1
+    rados_put_get_data eio $dir $shard_id || return 1
+    delete_erasure_coded_pool $poolname
+}
+
+# We don't remove the object from the primary because
+# that just causes it to appear to be missing
+
+function TEST_rados_get_subread_missing() {
+    local dir=$1
+    setup_osds 4 || return 1
+
+    local poolname=pool-jerasure
+    create_erasure_coded_pool $poolname 2 1 || return 1
+    # inject remove into replicas OSD (1) and OSD (2)
+    local shard_id=1
+    rados_put_get_data remove $dir $shard_id || return 1
+    delete_erasure_coded_pool $poolname
+}
+
+#
+#
+# These two test cases try to validate that following behavior:
+#  For object on EC pool, if there is one shard which an incorrect
+# size this will cause an internal read error, client can still read object.
+#
+# If 2 shards have incorrect size the client will get an error.
+#
+function TEST_rados_get_bad_size_shard_0() {
+    local dir=$1
+    setup_osds 4 || return 1
+
+    local poolname=pool-jerasure
+    create_erasure_coded_pool $poolname 2 1 || return 1
+    # Set incorrect size into primary OSD (0) and replica OSD (1)
+    local shard_id=0
+    rados_get_data_bad_size $dir $shard_id 10 || return 1
+    rados_get_data_bad_size $dir $shard_id 0 || return 1
+    rados_get_data_bad_size $dir $shard_id 256 add || return 1
+    delete_erasure_coded_pool $poolname
+}
+
+function TEST_rados_get_bad_size_shard_1() {
+    local dir=$1
+    setup_osds 4 || return 1
+
+    local poolname=pool-jerasure
+    create_erasure_coded_pool $poolname 2 1 || return 1
+    # Set incorrect size into replicas OSD (1) and OSD (2)
+    local shard_id=1
+    rados_get_data_bad_size $dir $shard_id 10 || return 1
+    rados_get_data_bad_size $dir $shard_id 0 || return 1
+    rados_get_data_bad_size $dir $shard_id 256 add || return 1
+    delete_erasure_coded_pool $poolname
+}
+
+function TEST_rados_get_with_subreadall_eio_shard_0() {
+    local dir=$1
+    local shard_id=0
+
+    setup_osds 4 || return 1
+
+    local poolname=pool-jerasure
+    create_erasure_coded_pool $poolname 2 1 || return 1
+    # inject eio on primary OSD (0)
+    rados_put_get_data eio $dir $shard_id recovery || return 1
+
+    delete_erasure_coded_pool $poolname
+}
+
+function TEST_rados_get_with_subreadall_eio_shard_1() {
+    local dir=$1
+    local shard_id=1
+
+    setup_osds 4 || return 1
+
+    local poolname=pool-jerasure
+    create_erasure_coded_pool $poolname 2 1 || return 1
+    # inject eio on replica OSD (1)
+    rados_put_get_data eio $dir $shard_id recovery || return 1
+
+    delete_erasure_coded_pool $poolname
+}
+
+# Test recovery the object attr read error
+function TEST_ec_object_attr_read_error() {
+    local dir=$1
+    local objname=myobject
+
+    setup_osds 7 || return 1
+
+    local poolname=pool-jerasure
+    create_erasure_coded_pool $poolname 3 2 || return 1
+
+    local primary_osd=$(get_primary $poolname $objname)
+    # Kill primary OSD
+    kill_daemons $dir TERM osd.${primary_osd} >&2 < /dev/null || return 1
+
+    # Write data
+    rados_put $dir $poolname $objname || return 1
+
+    # Inject eio, shard 1 is the one read attr
+    inject_eio ec mdata $poolname $objname $dir 1 || return 1
+
+    # Restart OSD
+    activate_osd $dir ${primary_osd} || return 1
+
+    # Cluster should recover this object
+    wait_for_clean || return 1
+
+    rados_get $dir $poolname myobject || return 1
+
+    delete_erasure_coded_pool $poolname
+}
+
+# Test recovery the first k copies aren't all available
+function TEST_ec_single_recovery_error() {
+    local dir=$1
+    local objname=myobject
+
+    setup_osds 7 || return 1
+
+    local poolname=pool-jerasure
+    create_erasure_coded_pool $poolname 3 2 || return 1
+
+    rados_put $dir $poolname $objname || return 1
+    inject_eio ec data $poolname $objname $dir 0 || return 1
+
+    local -a initial_osds=($(get_osds $poolname $objname))
+    local last_osd=${initial_osds[-1]}
+    # Kill OSD
+    kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1
+    ceph osd down ${last_osd} || return 1
+    ceph osd out ${last_osd} || return 1
+
+    # Cluster should recover this object
+    wait_for_clean || return 1
+
+    rados_get $dir $poolname myobject || return 1
+
+    delete_erasure_coded_pool $poolname
+}
+
+# Test recovery when repeated reads are needed due to EIO
+function TEST_ec_recovery_multiple_errors() {
+    local dir=$1
+    local objname=myobject
+
+    setup_osds 9 || return 1
+
+    local poolname=pool-jerasure
+    create_erasure_coded_pool $poolname 4 4 || return 1
+
+    rados_put $dir $poolname $objname || return 1
+    inject_eio ec data $poolname $objname $dir 0 || return 1
+    # first read will try shards 0,1,2 when 0 gets EIO, shard 3 gets
+    # tried as well. Make that fail to test multiple-EIO handling.
+    inject_eio ec data $poolname $objname $dir 3 || return 1
+    inject_eio ec data $poolname $objname $dir 4 || return 1
+
+    local -a initial_osds=($(get_osds $poolname $objname))
+    local last_osd=${initial_osds[-1]}
+    # Kill OSD
+    kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1
+    ceph osd down ${last_osd} || return 1
+    ceph osd out ${last_osd} || return 1
+
+    # Cluster should recover this object
+    wait_for_clean || return 1
+
+    rados_get $dir $poolname myobject || return 1
+
+    delete_erasure_coded_pool $poolname
+}
+
+# Test recovery when there's only one shard to recover, but multiple
+# objects recovering in one RecoveryOp
+function TEST_ec_recovery_multiple_objects() {
+    local dir=$1
+    local objname=myobject
+
+    ORIG_ARGS=$CEPH_ARGS
+    CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 '
+    setup_osds 7 || return 1
+    CEPH_ARGS=$ORIG_ARGS
+
+    local poolname=pool-jerasure
+    create_erasure_coded_pool $poolname 3 2 || return 1
+
+    rados_put $dir $poolname test1
+    rados_put $dir $poolname test2
+    rados_put $dir $poolname test3
+
+    ceph osd out 0 || return 1
+
+    # Cluster should recover these objects all at once
+    wait_for_clean || return 1
+
+    rados_get $dir $poolname test1
+    rados_get $dir $poolname test2
+    rados_get $dir $poolname test3
+
+    delete_erasure_coded_pool $poolname
+}
+
+# test multi-object recovery when the one missing shard gets EIO
+function TEST_ec_recovery_multiple_objects_eio() {
+    local dir=$1
+    local objname=myobject
+
+    ORIG_ARGS=$CEPH_ARGS
+    CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 '
+    setup_osds 7 || return 1
+    CEPH_ARGS=$ORIG_ARGS
+
+    local poolname=pool-jerasure
+    create_erasure_coded_pool $poolname 3 2 || return 1
+
+    rados_put $dir $poolname test1
+    rados_put $dir $poolname test2
+    rados_put $dir $poolname test3
+
+    # can't read from this shard anymore
+    inject_eio ec data $poolname $objname $dir 0 || return 1
+    ceph osd out 0 || return 1
+
+    # Cluster should recover these objects all at once
+    wait_for_clean || return 1
+
+    rados_get $dir $poolname test1
+    rados_get $dir $poolname test2
+    rados_get $dir $poolname test3
+
+    delete_erasure_coded_pool $poolname
+}
+
+# Test backfill with unfound object
+function TEST_ec_backfill_unfound() {
+    local dir=$1
+    local objname=myobject
+    local lastobj=300
+    # Must be between 1 and $lastobj
+    local testobj=obj250
+
+    ORIG_ARGS=$CEPH_ARGS
+    CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10'
+    setup_osds 5 || return 1
+    CEPH_ARGS=$ORIG_ARGS
+
+    local poolname=pool-jerasure
+    create_erasure_coded_pool $poolname 3 2 || return 1
+
+    ceph pg dump pgs
+
+    rados_put $dir $poolname $objname || return 1
+    local primary=$(get_primary $poolname $objname)
+
+    local -a initial_osds=($(get_osds $poolname $objname))
+    local last_osd=${initial_osds[-1]}
+    kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1
+    ceph osd down ${last_osd} || return 1
+    ceph osd out ${last_osd} || return 1
+
+    ceph pg dump pgs
+
+    dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4
+    for i in $(seq 1 $lastobj)
+    do
+      rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1
+    done
+
+    inject_eio ec data $poolname $testobj $dir 0 || return 1
+    inject_eio ec data $poolname $testobj $dir 1 || return 1
+
+    activate_osd $dir ${last_osd} || return 1
+    ceph osd in ${last_osd} || return 1
+
+    sleep 15
+
+    for tmp in $(seq 1 240); do
+      state=$(get_state 2.0)
+      echo $state | grep backfill_unfound
+      if [ "$?" = "0" ]; then
+        break
+      fi
+      echo $state
+      sleep 1
+    done
+
+    ceph pg dump pgs
+    kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1
+    sleep 5
+
+    ceph pg dump pgs
+    ceph pg 2.0 list_unfound
+    ceph pg 2.0 query
+
+    ceph pg 2.0 list_unfound | grep -q $testobj || return 1
+
+    check=$(ceph pg 2.0 list_unfound | jq ".available_might_have_unfound")
+    test "$check" == "true" || return 1
+
+    eval check=$(ceph pg 2.0 list_unfound | jq .might_have_unfound[0].status)
+    test "$check" == "osd is down" || return 1
+
+    eval check=$(ceph pg 2.0 list_unfound | jq .might_have_unfound[0].osd)
+    test "$check" == "2(4)" || return 1
+
+    activate_osd $dir ${last_osd} || return 1
+
+    # Command should hang because object is unfound
+    timeout 5 rados -p $poolname get $testobj $dir/CHECK
+    test $? = "124" || return 1
+
+    ceph pg 2.0 mark_unfound_lost delete
+
+    wait_for_clean || return 1
+
+    for i in $(seq 1 $lastobj)
+    do
+      if [ obj${i} = "$testobj" ]; then
+        # Doesn't exist anymore
+        ! rados -p $poolname get $testobj $dir/CHECK || return 1
+      else
+        rados --pool $poolname get obj${i} $dir/CHECK || return 1
+        diff -q $dir/ORIGINAL $dir/CHECK || return 1
+      fi
+    done
+
+    rm -f ${dir}/ORIGINAL ${dir}/CHECK
+
+    delete_erasure_coded_pool $poolname
+}
+
+# Test recovery with unfound object
+function TEST_ec_recovery_unfound() {
+    local dir=$1
+    local objname=myobject
+    local lastobj=100
+    # Must be between 1 and $lastobj
+    local testobj=obj75
+
+    ORIG_ARGS=$CEPH_ARGS
+    CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 '
+    CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10'
+    setup_osds 5 || return 1
+    CEPH_ARGS=$ORIG_ARGS
+
+    local poolname=pool-jerasure
+    create_erasure_coded_pool $poolname 3 2 || return 1
+
+    ceph pg dump pgs
+
+    rados_put $dir $poolname $objname || return 1
+
+    local -a initial_osds=($(get_osds $poolname $objname))
+    local last_osd=${initial_osds[-1]}
+    kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1
+    ceph osd down ${last_osd} || return 1
+    ceph osd out ${last_osd} || return 1
+
+    ceph pg dump pgs
+
+    dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4
+    for i in $(seq 1 $lastobj)
+    do
+      rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1
+    done
+
+    inject_eio ec data $poolname $testobj $dir 0 || return 1
+    inject_eio ec data $poolname $testobj $dir 1 || return 1
+
+    activate_osd $dir ${last_osd} || return 1
+    ceph osd in ${last_osd} || return 1
+
+    sleep 15
+
+    for tmp in $(seq 1 100); do
+      state=$(get_state 2.0)
+      echo $state | grep recovery_unfound
+      if [ "$?" = "0" ]; then
+        break
+      fi
+      echo "$state "
+      sleep 1
+    done
+
+    ceph pg dump pgs
+    ceph pg 2.0 list_unfound
+    ceph pg 2.0 query
+
+    ceph pg 2.0 list_unfound | grep -q $testobj || return 1
+
+    check=$(ceph pg 2.0 list_unfound | jq ".available_might_have_unfound")
+    test "$check" == "true" || return 1
+
+    check=$(ceph pg 2.0 list_unfound | jq ".might_have_unfound |  length")
+    test $check == 0 || return 1
+
+    # Command should hang because object is unfound
+    timeout 5 rados -p $poolname get $testobj $dir/CHECK
+    test $? = "124" || return 1
+
+    ceph pg 2.0 mark_unfound_lost delete
+
+    wait_for_clean || return 1
+
+    for i in $(seq 1 $lastobj)
+    do
+      if [ obj${i} = "$testobj" ]; then
+        # Doesn't exist anymore
+        ! rados -p $poolname get $testobj $dir/CHECK || return 1
+      else
+        rados --pool $poolname get obj${i} $dir/CHECK || return 1
+        diff -q $dir/ORIGINAL $dir/CHECK || return 1
+      fi
+    done
+
+    rm -f ${dir}/ORIGINAL ${dir}/CHECK
+
+    delete_erasure_coded_pool $poolname
+}
+
+main test-erasure-eio "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/erasure-code/test-erasure-eio.sh"
+# End:
diff --git a/qa/standalone/mgr/balancer.sh b/qa/standalone/mgr/balancer.sh
new file mode 100755
index 000000000..2d7b2f35d
--- /dev/null
+++ b/qa/standalone/mgr/balancer.sh
@@ -0,0 +1,223 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2019 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7102" # git grep '\<7102\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        $func $dir || return 1
+    done
+}
+
+TEST_POOL1=test1
+TEST_POOL2=test2
+
+function TEST_balancer() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+    create_pool $TEST_POOL1 8
+    create_pool $TEST_POOL2 8
+
+    wait_for_clean || return 1
+
+    ceph pg dump pgs
+    ceph balancer status || return 1
+    eval MODE=$(ceph balancer status | jq '.mode')
+    test $MODE = "upmap" || return 1
+    ACTIVE=$(ceph balancer status | jq '.active')
+    test $ACTIVE = "true" || return 1
+
+    ceph balancer ls || return 1
+    PLANS=$(ceph balancer ls)
+    test "$PLANS" = "[]" || return 1
+    ceph balancer eval || return 1
+    EVAL="$(ceph balancer eval)"
+    test "$EVAL" = "current cluster score 0.000000 (lower is better)"
+    ceph balancer eval-verbose || return 1
+
+    ceph balancer pool add $TEST_POOL1 || return 1
+    ceph balancer pool add $TEST_POOL2 || return 1
+    ceph balancer pool ls || return 1
+    eval POOL=$(ceph balancer pool ls | jq 'sort | .[0]')
+    test "$POOL" = "$TEST_POOL1" || return 1
+    eval POOL=$(ceph balancer pool ls | jq 'sort | .[1]')
+    test "$POOL" = "$TEST_POOL2" || return 1
+    ceph balancer pool rm $TEST_POOL1 || return 1
+    ceph balancer pool rm $TEST_POOL2 || return 1
+    ceph balancer pool ls || return 1
+    ceph balancer pool add $TEST_POOL1 || return 1
+
+    ceph balancer mode crush-compat || return 1
+    ceph balancer status || return 1
+    eval MODE=$(ceph balancer status | jq '.mode')
+    test $MODE = "crush-compat" || return 1
+    ceph balancer off || return 1
+    ! ceph balancer optimize plan_crush $TEST_POOL1 || return 1
+    ceph balancer status || return 1
+    eval RESULT=$(ceph balancer status | jq '.optimize_result')
+    test "$RESULT" = "Distribution is already perfect" || return 1
+
+    ceph balancer on || return 1
+    ACTIVE=$(ceph balancer status | jq '.active')
+    test $ACTIVE = "true" || return 1
+    sleep 2
+    ceph balancer status || return 1
+    ceph balancer off || return 1
+    ACTIVE=$(ceph balancer status | jq '.active')
+    test $ACTIVE = "false" || return 1
+    sleep 2
+
+    ceph balancer reset || return 1
+
+    ceph balancer mode upmap || return 1
+    ceph balancer status || return 1
+    eval MODE=$(ceph balancer status | jq '.mode')
+    test $MODE = "upmap" || return 1
+    ! ceph balancer optimize plan_upmap $TEST_POOL || return 1
+    ceph balancer status || return 1
+    eval RESULT=$(ceph balancer status | jq '.optimize_result')
+    test "$RESULT" = "Unable to find further optimization, or pool(s) pg_num is decreasing, or distribution is already perfect" || return 1
+
+    ceph balancer on || return 1
+    ACTIVE=$(ceph balancer status | jq '.active')
+    test $ACTIVE = "true" || return 1
+    sleep 2
+    ceph balancer status || return 1
+    ceph balancer off || return 1
+    ACTIVE=$(ceph balancer status | jq '.active')
+    test $ACTIVE = "false" || return 1
+
+    teardown $dir || return 1
+}
+
+function TEST_balancer2() {
+    local dir=$1
+    TEST_PGS1=118
+    TEST_PGS2=132
+    TOTAL_PGS=$(expr $TEST_PGS1 + $TEST_PGS2)
+    OSDS=5
+    DEFAULT_REPLICAS=3
+    # Integer average of PGS per OSD (70.8), so each OSD >= this
+    FINAL_PER_OSD1=$(expr \( $TEST_PGS1 \* $DEFAULT_REPLICAS \) / $OSDS)
+    # Integer average of PGS per OSD (150)
+    FINAL_PER_OSD2=$(expr \( \( $TEST_PGS1 + $TEST_PGS2 \) \* $DEFAULT_REPLICAS \) / $OSDS)
+
+    CEPH_ARGS+="--osd_pool_default_pg_autoscale_mode=off "
+    CEPH_ARGS+="--debug_osd=20 "
+    setup $dir || return 1
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    for i in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $i || return 1
+    done
+
+    ceph osd set-require-min-compat-client luminous
+    ceph config set mgr mgr/balancer/upmap_max_deviation 1
+    ceph balancer mode upmap || return 1
+    ceph balancer on || return 1
+    ceph config set mgr mgr/balancer/sleep_interval 5
+
+    create_pool $TEST_POOL1 $TEST_PGS1
+
+    wait_for_clean || return 1
+
+    # Wait up to 2 minutes
+    OK=no
+    for i in $(seq 1 25)
+    do
+      sleep 5
+      if grep -q "Optimization plan is almost perfect" $dir/mgr.x.log
+      then
+        OK=yes
+        break
+      fi
+    done
+    test $OK = "yes" || return 1
+    # Plan is found, but PGs still need to move
+    sleep 10
+    wait_for_clean || return 1
+    ceph osd df
+
+    PGS=$(ceph osd df --format=json-pretty | jq '.nodes[0].pgs')
+    test $PGS -ge $FINAL_PER_OSD1 || return 1
+    PGS=$(ceph osd df --format=json-pretty | jq '.nodes[1].pgs')
+    test $PGS -ge $FINAL_PER_OSD1 || return 1
+    PGS=$(ceph osd df --format=json-pretty | jq '.nodes[2].pgs')
+    test $PGS -ge $FINAL_PER_OSD1 || return 1
+    PGS=$(ceph osd df --format=json-pretty | jq '.nodes[3].pgs')
+    test $PGS -ge $FINAL_PER_OSD1 || return 1
+    PGS=$(ceph osd df --format=json-pretty | jq '.nodes[4].pgs')
+    test $PGS -ge $FINAL_PER_OSD1 || return 1
+
+    create_pool $TEST_POOL2 $TEST_PGS2
+
+    # Wait up to 2 minutes
+    OK=no
+    for i in $(seq 1 25)
+    do
+      sleep 5
+      COUNT=$(grep "Optimization plan is almost perfect" $dir/mgr.x.log | wc -l)
+      if test $COUNT = "2"
+      then
+        OK=yes
+        break
+      fi
+    done
+    test $OK = "yes" || return 1
+    # Plan is found, but PGs still need to move
+    sleep 10
+    wait_for_clean || return 1
+    ceph osd df
+
+    # We should be with plus or minus 2 of FINAL_PER_OSD2
+    # This is because here each pool is balanced independently
+    MIN=$(expr $FINAL_PER_OSD2 - 2)
+    MAX=$(expr $FINAL_PER_OSD2 + 2)
+    PGS=$(ceph osd df --format=json-pretty | jq '.nodes[0].pgs')
+    test $PGS -ge $MIN -a $PGS -le $MAX || return 1
+    PGS=$(ceph osd df --format=json-pretty | jq '.nodes[1].pgs')
+    test $PGS -ge $MIN -a $PGS -le $MAX || return 1
+    PGS=$(ceph osd df --format=json-pretty | jq '.nodes[2].pgs')
+    test $PGS -ge $MIN -a $PGS -le $MAX || return 1
+    PGS=$(ceph osd df --format=json-pretty | jq '.nodes[3].pgs')
+    test $PGS -ge $MIN -a $PGS -le $MAX || return 1
+    PGS=$(ceph osd df --format=json-pretty | jq '.nodes[4].pgs')
+    test $PGS -ge $MIN -a $PGS -le $MAX || return 1
+
+    teardown $dir || return 1
+}
+
+main balancer "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh balancer.sh"
+# End:
diff --git a/qa/standalone/misc/mclock-config.sh b/qa/standalone/misc/mclock-config.sh
new file mode 100755
index 000000000..59f002584
--- /dev/null
+++ b/qa/standalone/misc/mclock-config.sh
@@ -0,0 +1,467 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2022 Red Hat <contact@redhat.com>
+#
+# Author: Sridhar Seshasayee <sseshasa@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7124" # git grep '\<7124\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    CEPH_ARGS+="--debug-mclock 20 "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_profile_builtin_to_custom() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1
+
+    # Verify the default mclock profile on the OSD
+    local mclock_profile=$(ceph config get osd.0 osd_mclock_profile)
+    test "$mclock_profile" = "balanced" || return 1
+
+    # Verify the running mClock profile
+    mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \
+      $(get_asok_path osd.0) config get osd_mclock_profile |\
+      jq .osd_mclock_profile)
+    mclock_profile=$(eval echo $mclock_profile)
+    test "$mclock_profile" = "high_recovery_ops" || return 1
+
+    # Change the mclock profile to 'custom'
+    ceph tell osd.0 config set osd_mclock_profile custom || return 1
+
+    # Verify that the mclock profile is set to 'custom' on the OSDs
+    mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
+      osd.0) config get osd_mclock_profile | jq .osd_mclock_profile)
+    mclock_profile=$(eval echo $mclock_profile)
+    test "$mclock_profile" = "custom" || return 1
+
+    # Change a mclock config param and confirm the change
+    local client_res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
+      osd.0) config get osd_mclock_scheduler_client_res | \
+      jq .osd_mclock_scheduler_client_res | bc)
+    echo "client_res = $client_res"
+    local client_res_new=$(echo "$client_res + 0.1" | bc -l)
+    echo "client_res_new = $client_res_new"
+    ceph config set osd.0 osd_mclock_scheduler_client_res \
+      $client_res_new || return 1
+
+    # Check value in config monitor db
+    local res=$(ceph config get osd.0 \
+      osd_mclock_scheduler_client_res) || return 1
+    if (( $(echo "$res != $client_res_new" | bc -l) )); then
+      return 1
+    fi
+    # Check value in the in-memory 'values' map
+    res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
+      osd.0) config get osd_mclock_scheduler_client_res | \
+      jq .osd_mclock_scheduler_client_res | bc)
+    if (( $(echo "$res != $client_res_new" | bc -l) )); then
+      return 1
+    fi
+
+    teardown $dir || return 1
+}
+
+function TEST_profile_custom_to_builtin() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1
+
+    # Verify the default mclock profile on the OSD
+    local def_mclock_profile
+    def_mclock_profile=$(ceph config get osd.0 osd_mclock_profile)
+    test "$def_mclock_profile" = "balanced" || return 1
+
+    # Verify the running mClock profile
+    local orig_mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \
+      $(get_asok_path osd.0) config get osd_mclock_profile |\
+      jq .osd_mclock_profile)
+    orig_mclock_profile=$(eval echo $orig_mclock_profile)
+    test $orig_mclock_profile = "high_recovery_ops" || return 1
+
+    # Change the mclock profile to 'custom'
+    ceph tell osd.0 config set osd_mclock_profile custom || return 1
+
+    # Verify that the mclock profile is set to 'custom' on the OSDs
+    local mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \
+      $(get_asok_path osd.0) config get osd_mclock_profile | \
+      jq .osd_mclock_profile)
+    mclock_profile=$(eval echo $mclock_profile)
+    test $mclock_profile = "custom" || return 1
+
+    # Save the original client reservations allocated to the OSDs
+    local client_res
+    client_res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
+      osd.0) config get osd_mclock_scheduler_client_res | \
+      jq .osd_mclock_scheduler_client_res | bc)
+    echo "Original client_res for osd.0 = $client_res"
+
+    # Change a mclock config param and confirm the change
+    local client_res_new=$(echo "$client_res + 0.1" | bc -l)
+    echo "client_res_new = $client_res_new"
+    ceph config set osd osd_mclock_scheduler_client_res \
+      $client_res_new || return 1
+    # Check value in config monitor db
+    local res=$(ceph config get osd.0 \
+      osd_mclock_scheduler_client_res) || return 1
+    if (( $(echo "$res != $client_res_new" | bc -l) )); then
+      return 1
+    fi
+    # Check value in the in-memory 'values' map
+    res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
+      osd.0) config get osd_mclock_scheduler_client_res | \
+      jq .osd_mclock_scheduler_client_res | bc)
+    if (( $(echo "$res != $client_res_new" | bc -l) )); then
+      return 1
+    fi
+
+    # Switch the mclock profile back to the original built-in profile.
+    # The config subsystem prevents the overwrite of the changed QoS config
+    # option above i.e. osd_mclock_scheduler_client_res. This fact is verified
+    # before proceeding to remove the entry from the config monitor db. After
+    # the config entry is removed, the original value for the config option is
+    # restored and is verified.
+    ceph tell osd.0 config set osd_mclock_profile $orig_mclock_profile || return 1
+    # Verify that the mclock profile is set back to the original on the OSD
+    eval mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \
+      $(get_asok_path osd.0) config get osd_mclock_profile | \
+      jq .osd_mclock_profile)
+    #mclock_profile=$(ceph config get osd.0 osd_mclock_profile)
+    test "$mclock_profile" = "$orig_mclock_profile" || return 1
+
+    # Verify that the new value is still in effect
+    # Check value in config monitor db
+    local res=$(ceph config get osd.0 \
+      osd_mclock_scheduler_client_res) || return 1
+    if (( $(echo "$res != $client_res_new" | bc -l) )); then
+      return 1
+    fi
+    # Check value in the in-memory 'values' map
+    res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
+      osd.0) config get osd_mclock_scheduler_client_res | \
+      jq .osd_mclock_scheduler_client_res | bc)
+    if (( $(echo "$res != $client_res_new" | bc -l) )); then
+      return 1
+    fi
+
+    # Remove the changed QoS config option from monitor db
+    ceph config rm osd osd_mclock_scheduler_client_res || return 1
+
+    sleep 5 # Allow time for change to take effect
+
+    # Verify that the original values are now restored
+    # Check value in config monitor db
+    res=$(ceph config get osd.0 \
+      osd_mclock_scheduler_client_res) || return 1
+    if (( $(echo "$res != 0.0" | bc -l) )); then
+      return 1
+    fi
+
+    # Check value in the in-memory 'values' map
+    res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
+      osd.0) config get osd_mclock_scheduler_client_res | \
+      jq .osd_mclock_scheduler_client_res | bc)
+    if (( $(echo "$res != $client_res" | bc -l) )); then
+      return 1
+    fi
+
+    teardown $dir || return 1
+}
+
+function TEST_recovery_limit_adjustment_mclock() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+
+    run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1
+    local recoveries=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+        config get osd_recovery_max_active)
+    # Get default value
+    echo "$recoveries" | grep --quiet 'osd_recovery_max_active' || return 1
+
+    # Change the recovery limit without setting
+    # osd_mclock_override_recovery_settings option. Verify that the recovery
+    # limit is retained at its default value.
+    ceph config set osd.0 osd_recovery_max_active 10 || return 1
+    sleep 2 # Allow time for change to take effect
+    local max_recoveries=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+        config get osd_recovery_max_active)
+    test "$max_recoveries" = "$recoveries" || return 1
+
+    # Change recovery limit after setting osd_mclock_override_recovery_settings.
+    # Verify that the recovery limit is modified.
+    ceph config set osd.0 osd_mclock_override_recovery_settings true || return 1
+    ceph config set osd.0 osd_recovery_max_active 10 || return 1
+    sleep 2 # Allow time for change to take effect
+    max_recoveries=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+        config get osd_recovery_max_active)
+    test "$max_recoveries" = '{"osd_recovery_max_active":"10"}' || return 1
+
+    teardown $dir || return 1
+}
+
+function TEST_backfill_limit_adjustment_mclock() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+
+    run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1
+    local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+        config get osd_max_backfills | jq .osd_max_backfills | bc)
+    # Get default value
+    echo "osd_max_backfills: $backfills" || return 1
+
+    # Change the backfill limit without setting
+    # osd_mclock_override_recovery_settings option. Verify that the backfill
+    # limit is retained at its default value.
+    ceph config set osd.0 osd_max_backfills 20 || return 1
+    sleep 2 # Allow time for change to take effect
+    local max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+        config get osd_max_backfills | jq .osd_max_backfills | bc)
+    test $max_backfills = $backfills || return 1
+
+    # Verify local and async reserver settings are not changed
+    max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+        dump_recovery_reservations | jq .local_reservations.max_allowed | bc)
+    test $max_backfills = $backfills || return 1
+    max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+        dump_recovery_reservations | jq .remote_reservations.max_allowed | bc)
+    test $max_backfills = $backfills || return 1
+
+    # Change backfills limit after setting osd_mclock_override_recovery_settings.
+    # Verify that the backfills limit is modified.
+    ceph config set osd.0 osd_mclock_override_recovery_settings true || return 1
+    ceph config set osd.0 osd_max_backfills 20 || return 1
+    sleep 2 # Allow time for change to take effect
+    max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+        config get osd_max_backfills | jq .osd_max_backfills | bc)
+    test $max_backfills = 20 || return 1
+
+    # Verify local and async reserver settings are changed
+    max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+        dump_recovery_reservations | jq .local_reservations.max_allowed | bc)
+    test $max_backfills = 20 || return 1
+    max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+        dump_recovery_reservations | jq .remote_reservations.max_allowed | bc)
+    test $max_backfills = 20 || return 1
+
+    # Kill osd and bring it back up.
+    # Confirm that the backfill settings are retained.
+    kill_daemons $dir TERM osd || return 1
+    ceph osd down 0 || return 1
+    wait_for_osd down 0 || return 1
+    activate_osd $dir 0 --osd-op-queue=mclock_scheduler || return 1
+
+    max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+        config get osd_max_backfills | jq .osd_max_backfills | bc)
+    test $max_backfills = 20 || return 1
+
+    # Verify local and async reserver settings are changed
+    max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+        dump_recovery_reservations | jq .local_reservations.max_allowed | bc)
+    test $max_backfills = 20 || return 1
+    max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \
+        dump_recovery_reservations | jq .remote_reservations.max_allowed | bc)
+    test $max_backfills = 20 || return 1
+
+    teardown $dir || return 1
+}
+
+function TEST_profile_disallow_builtin_params_modify() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+
+    run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1
+
+    # Verify that the default mclock profile is set on the OSD
+    local def_mclock_profile=$(ceph config get osd.0 osd_mclock_profile)
+    test "$def_mclock_profile" = "balanced" || return 1
+
+    # Verify the running mClock profile
+    local cur_mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \
+      $(get_asok_path osd.0) config get osd_mclock_profile |\
+      jq .osd_mclock_profile)
+    cur_mclock_profile=$(eval echo $cur_mclock_profile)
+    test $cur_mclock_profile = "high_recovery_ops" || return 1
+
+    declare -a options=("osd_mclock_scheduler_background_recovery_res"
+      "osd_mclock_scheduler_client_res")
+
+    local retries=10
+    local errors=0
+    for opt in "${options[@]}"
+    do
+      # Try and change a mclock config param and confirm that no change occurred
+      local opt_val_orig=$(CEPH_ARGS='' ceph --format=json daemon \
+        $(get_asok_path osd.0) config get $opt | jq .$opt | bc)
+      local opt_val_new=$(echo "$opt_val_orig + 0.1" | bc -l)
+      ceph config set osd.0 $opt $opt_val_new || return 1
+
+      # Check configuration values
+      for count in $(seq 0 $(expr $retries - 1))
+      do
+        errors=0
+        sleep 2 # Allow time for changes to take effect
+
+        echo "Check configuration values - Attempt#: $count"
+        # Check configuration value on Mon store (or the default) for the osd
+        local res=$(ceph config get osd.0 $opt) || return 1
+        echo "Mon db (or default): osd.0 $opt = $res"
+        if (( $(echo "$res == $opt_val_new" | bc -l) )); then
+          errors=$(expr $errors + 1)
+        fi
+
+        # Check running configuration value using "config show" cmd
+        res=$(ceph config show osd.0 | grep $opt |\
+          awk '{ print $2 }' | bc ) || return 1
+        echo "Running config: osd.0 $opt = $res"
+        if (( $(echo "$res == $opt_val_new" | bc -l) || \
+              $(echo "$res != $opt_val_orig" | bc -l)  )); then
+          errors=$(expr $errors + 1)
+        fi
+
+        # Check value in the in-memory 'values' map is unmodified
+        res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
+          osd.0) config get $opt | jq .$opt | bc)
+        echo "Values map: osd.0 $opt = $res"
+        if (( $(echo "$res == $opt_val_new" | bc -l) || \
+              $(echo "$res != $opt_val_orig" | bc -l) )); then
+          errors=$(expr $errors + 1)
+        fi
+
+        # Check if we succeeded or exhausted retry count
+        if [ $errors -eq 0 ]
+        then
+          break
+        elif [ $count -eq $(expr $retries - 1) ]
+        then
+          return 1
+        fi
+      done
+    done
+
+    teardown $dir || return 1
+}
+
+function TEST_profile_disallow_builtin_params_override() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+
+    run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1
+
+    # Verify that the default mclock profile is set on the OSD
+    local def_mclock_profile=$(ceph config get osd.0 osd_mclock_profile)
+    test "$def_mclock_profile" = "balanced" || return 1
+
+    # Verify the running mClock profile
+    local cur_mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \
+      $(get_asok_path osd.0) config get osd_mclock_profile |\
+      jq .osd_mclock_profile)
+    cur_mclock_profile=$(eval echo $cur_mclock_profile)
+    test $cur_mclock_profile = "high_recovery_ops" || return 1
+
+    declare -a options=("osd_mclock_scheduler_background_recovery_res"
+      "osd_mclock_scheduler_client_res")
+
+    local retries=10
+    local errors=0
+    for opt in "${options[@]}"
+    do
+      # Override a mclock config param and confirm that no change occurred
+      local opt_val_orig=$(CEPH_ARGS='' ceph --format=json daemon \
+        $(get_asok_path osd.0) config get $opt | jq .$opt | bc)
+      local opt_val_new=$(echo "$opt_val_orig + 0.1" | bc -l)
+      ceph tell osd.0 config set $opt $opt_val_new || return 1
+
+      # Check configuration values
+      for count in $(seq 0 $(expr $retries - 1))
+      do
+        errors=0
+        sleep 2 # Allow time for changes to take effect
+
+        echo "Check configuration values - Attempt#: $count"
+        # Check configuration value on Mon store (or the default) for the osd
+        local res=$(ceph config get osd.0 $opt) || return 1
+        echo "Mon db (or default): osd.0 $opt = $res"
+        if (( $(echo "$res == $opt_val_new" | bc -l) )); then
+          errors=$(expr $errors + 1)
+        fi
+
+        # Check running configuration value using "config show" cmd
+        res=$(ceph config show osd.0 | grep $opt |\
+          awk '{ print $2 }' | bc ) || return 1
+        echo "Running config: osd.0 $opt = $res"
+        if (( $(echo "$res == $opt_val_new" | bc -l) || \
+              $(echo "$res != $opt_val_orig" | bc -l)  )); then
+          errors=$(expr $errors + 1)
+        fi
+
+        # Check value in the in-memory 'values' map is unmodified
+        res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \
+          osd.0) config get $opt | jq .$opt | bc)
+        echo "Values map: osd.0 $opt = $res"
+        if (( $(echo "$res == $opt_val_new" | bc -l) || \
+              $(echo "$res != $opt_val_orig" | bc -l) )); then
+          errors=$(expr $errors + 1)
+        fi
+
+        # Check if we succeeded or exhausted retry count
+        if [ $errors -eq 0 ]
+        then
+          break
+        elif [ $count -eq $(expr $retries - 1) ]
+        then
+          return 1
+        fi
+      done
+    done
+
+    teardown $dir || return 1
+}
+
+main mclock-config "$@"
+
+# Local Variables:
+# compile-command: "cd build ; make -j4 && \
+#   ../qa/run-standalone.sh mclock-config.sh"
+# End:
diff --git a/qa/standalone/misc/network-ping.sh b/qa/standalone/misc/network-ping.sh
new file mode 100755
index 000000000..4745108c5
--- /dev/null
+++ b/qa/standalone/misc/network-ping.sh
@@ -0,0 +1,169 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    CEPH_ARGS+="--debug_disable_randomized_ping=true "
+    CEPH_ARGS+="--debug_heartbeat_testing_span=5 "
+    CEPH_ARGS+="--osd_heartbeat_interval=1 "
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_network_ping_test1() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+
+    sleep 5
+
+    create_pool foo 16
+
+    # write some objects
+    timeout 20 rados bench -p foo 10 write -b 4096 --no-cleanup || return 1
+
+    # Get 1 cycle worth of ping data "1 minute"
+    sleep 10
+    flush_pg_stats
+
+    CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network | tee $dir/json
+    test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
+    test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1
+
+    CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network | tee $dir/json
+    test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
+    test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1
+
+    CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 0 | tee $dir/json
+    test "$(cat $dir/json | jq '.entries | length')" = "4" || return 1
+    test "$(cat $dir/json | jq '.threshold')" = "0" || return 1
+
+    CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 0 | tee $dir/json
+    test "$(cat $dir/json | jq '.entries | length')" = "12" || return 1
+    test "$(cat $dir/json | jq '.threshold')" = "0" || return 1
+
+    # Wait another 4 cycles to get "5 minute interval"
+    sleep 20
+    flush_pg_stats
+    CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network | tee $dir/json
+    test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
+    test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1
+
+    CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network | tee $dir/json
+    test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
+    test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1
+
+    CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 0 | tee $dir/json
+    test "$(cat $dir/json | jq '.entries | length')" = "4" || return 1
+    test "$(cat $dir/json | jq '.threshold')" = "0" || return 1
+
+    CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 0 | tee $dir/json
+    test "$(cat $dir/json | jq '.entries | length')" = "12" || return 1
+    test "$(cat $dir/json | jq '.threshold')" = "0" || return 1
+
+
+    # Wait another 10 cycles to get "15 minute interval"
+    sleep 50
+    flush_pg_stats
+    CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network | tee $dir/json
+    test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
+    test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1
+
+    CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network | tee $dir/json
+    test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1
+    test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1
+
+    CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 0 | tee $dir/json
+    test "$(cat $dir/json | jq '.entries | length')" = "4" || return 1
+    test "$(cat $dir/json | jq '.threshold')" = "0" || return 1
+
+    CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 0 | tee $dir/json
+    test "$(cat $dir/json | jq '.entries | length')" = "12" || return 1
+    test "$(cat $dir/json | jq '.threshold')" = "0" || return 1
+
+    # Just check the threshold output matches the input
+    CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 99 | tee $dir/json
+    test "$(cat $dir/json | jq '.threshold')" = "99" || return 1
+    CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 98 | tee $dir/json
+    test "$(cat $dir/json | jq '.threshold')" = "98" || return 1
+
+    rm -f $dir/json
+}
+
+# Test setting of mon_warn_on_slow_ping_time very low to
+# get health warning
+function TEST_network_ping_test2() {
+    local dir=$1
+
+    export CEPH_ARGS
+    export EXTRA_OPTS=" --mon_warn_on_slow_ping_time=0.001"
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+
+    sleep 5
+    ceph osd crush add-bucket dc1 datacenter
+    ceph osd crush add-bucket dc2 datacenter
+    ceph osd crush add-bucket dc3 datacenter
+    ceph osd crush add-bucket rack1 rack
+    ceph osd crush add-bucket rack2 rack
+    ceph osd crush add-bucket rack3 rack
+    ceph osd crush add-bucket host1 host
+    ceph osd crush add-bucket host2 host
+    ceph osd crush add-bucket host3 host
+    ceph osd crush move dc1 root=default
+    ceph osd crush move dc2 root=default
+    ceph osd crush move dc3 root=default
+    ceph osd crush move rack1 datacenter=dc1
+    ceph osd crush move rack2 datacenter=dc2
+    ceph osd crush move rack3 datacenter=dc3
+    ceph osd crush move host1 rack=rack1
+    ceph osd crush move host2 rack=rack2
+    ceph osd crush move host3 rack=rack3
+    ceph osd crush set osd.0 1.0 host=host1
+    ceph osd crush set osd.1 1.0 host=host2
+    ceph osd crush set osd.2 1.0 host=host3
+    ceph osd crush rule create-simple myrule default host firstn
+
+    create_pool foo 16 16 replicated myrule
+
+    # write some objects
+    timeout 20 rados bench -p foo 10 write -b 4096 --no-cleanup || return 1
+
+    # Get at least 1 cycle of ping data (this test runs with 5 second cycles of 1 second pings)
+    sleep 10
+    flush_pg_stats
+
+    ceph health | tee $dir/health
+    grep -q "Slow OSD heartbeats" $dir/health || return 1
+
+    ceph health detail | tee $dir/health
+    grep -q "OSD_SLOW_PING_TIME_BACK" $dir/health || return 1
+    grep -q "OSD_SLOW_PING_TIME_FRONT" $dir/health || return 1
+    grep -q "Slow OSD heartbeats on front from osd[.][0-2] [[]dc[1-3],rack[1-3][]] \
+to osd[.][0-2] [[]dc[1-3],rack[1-3][]]" $dir/health || return 1
+    rm -f $dir/health
+}
+
+main network-ping "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && ../qa/run-standalone.sh network-ping.sh"
+# End:
diff --git a/qa/standalone/misc/ok-to-stop.sh b/qa/standalone/misc/ok-to-stop.sh
new file mode 100755
index 000000000..dc9e7422f
--- /dev/null
+++ b/qa/standalone/misc/ok-to-stop.sh
@@ -0,0 +1,296 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON_A="127.0.0.1:7150" # git grep '\<7150\>' : there must be only one
+    export CEPH_MON_B="127.0.0.1:7151" # git grep '\<7151\>' : there must be only one
+    export CEPH_MON_C="127.0.0.1:7152" # git grep '\<7152\>' : there must be only one
+    export CEPH_MON_D="127.0.0.1:7153" # git grep '\<7153\>' : there must be only one
+    export CEPH_MON_E="127.0.0.1:7154" # git grep '\<7154\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    export ORIG_CEPH_ARGS="$CEPH_ARGS"
+
+    local funcs=${@:-$(set | ${SED} -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        kill_daemons $dir KILL || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_1_mon_checks() {
+    local dir=$1
+
+    CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A "
+
+    run_mon $dir a --public-addr=$CEPH_MON_A || return 1
+
+    ceph mon ok-to-stop dne || return 1
+    ! ceph mon ok-to-stop a || return 1
+
+    ! ceph mon ok-to-add-offline || return 1
+
+    ! ceph mon ok-to-rm a || return 1
+    ceph mon ok-to-rm dne || return 1
+}
+
+function TEST_2_mons_checks() {
+    local dir=$1
+
+    CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B "
+
+    run_mon $dir a --public-addr=$CEPH_MON_A || return 1
+    run_mon $dir b --public-addr=$CEPH_MON_B || return 1
+
+    ceph mon ok-to-stop dne || return 1
+    ! ceph mon ok-to-stop a || return 1
+    ! ceph mon ok-to-stop b || return 1
+    ! ceph mon ok-to-stop a b || return 1
+
+    ceph mon ok-to-add-offline || return 1
+
+    ceph mon ok-to-rm a || return 1
+    ceph mon ok-to-rm b || return 1
+    ceph mon ok-to-rm dne || return 1
+}
+
+function TEST_3_mons_checks() {
+    local dir=$1
+
+    CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C "
+
+    run_mon $dir a --public-addr=$CEPH_MON_A || return 1
+    run_mon $dir b --public-addr=$CEPH_MON_B || return 1
+    run_mon $dir c --public-addr=$CEPH_MON_C || return 1
+    wait_for_quorum 60 3
+
+    ceph mon ok-to-stop dne || return 1
+    ceph mon ok-to-stop a || return 1
+    ceph mon ok-to-stop b || return 1
+    ceph mon ok-to-stop c || return 1
+    ! ceph mon ok-to-stop a b || return 1
+    ! ceph mon ok-to-stop b c || return 1
+    ! ceph mon ok-to-stop a b c || return 1
+
+    ceph mon ok-to-add-offline || return 1
+
+    ceph mon ok-to-rm a || return 1
+    ceph mon ok-to-rm b || return 1
+    ceph mon ok-to-rm c || return 1
+
+    kill_daemons $dir KILL mon.b
+    wait_for_quorum 60 2
+
+    ! ceph mon ok-to-stop a || return 1
+    ceph mon ok-to-stop b || return 1
+    ! ceph mon ok-to-stop c || return 1
+
+    ! ceph mon ok-to-add-offline || return 1
+
+    ! ceph mon ok-to-rm a || return 1
+    ceph mon ok-to-rm b || return 1
+    ! ceph mon ok-to-rm c || return 1
+}
+
+function TEST_4_mons_checks() {
+    local dir=$1
+
+    CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D "
+
+    run_mon $dir a --public-addr=$CEPH_MON_A || return 1
+    run_mon $dir b --public-addr=$CEPH_MON_B || return 1
+    run_mon $dir c --public-addr=$CEPH_MON_C || return 1
+    run_mon $dir d --public-addr=$CEPH_MON_D || return 1
+    wait_for_quorum 60 4
+
+    ceph mon ok-to-stop dne || return 1
+    ceph mon ok-to-stop a || return 1
+    ceph mon ok-to-stop b || return 1
+    ceph mon ok-to-stop c || return 1
+    ceph mon ok-to-stop d || return 1
+    ! ceph mon ok-to-stop a b || return 1
+    ! ceph mon ok-to-stop c d || return 1
+
+    ceph mon ok-to-add-offline || return 1
+
+    ceph mon ok-to-rm a || return 1
+    ceph mon ok-to-rm b || return 1
+    ceph mon ok-to-rm c || return 1
+
+    kill_daemons $dir KILL mon.a
+    wait_for_quorum 60 3
+
+    ceph mon ok-to-stop a || return 1
+    ! ceph mon ok-to-stop b || return 1
+    ! ceph mon ok-to-stop c || return 1
+    ! ceph mon ok-to-stop d || return 1
+
+    ceph mon ok-to-add-offline || return 1
+
+    ceph mon ok-to-rm a || return 1
+    ceph mon ok-to-rm b || return 1
+    ceph mon ok-to-rm c || return 1
+    ceph mon ok-to-rm d || return 1
+}
+
+function TEST_5_mons_checks() {
+    local dir=$1
+
+    CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D,$CEPH_MON_E "
+
+    run_mon $dir a --public-addr=$CEPH_MON_A || return 1
+    run_mon $dir b --public-addr=$CEPH_MON_B || return 1
+    run_mon $dir c --public-addr=$CEPH_MON_C || return 1
+    run_mon $dir d --public-addr=$CEPH_MON_D || return 1
+    run_mon $dir e --public-addr=$CEPH_MON_E || return 1
+    wait_for_quorum 60 5
+
+    ceph mon ok-to-stop dne || return 1
+    ceph mon ok-to-stop a || return 1
+    ceph mon ok-to-stop b || return 1
+    ceph mon ok-to-stop c || return 1
+    ceph mon ok-to-stop d || return 1
+    ceph mon ok-to-stop e || return 1
+    ceph mon ok-to-stop a b || return 1
+    ceph mon ok-to-stop c d || return 1
+    ! ceph mon ok-to-stop a b c || return 1
+
+    ceph mon ok-to-add-offline || return 1
+
+    ceph mon ok-to-rm a || return 1
+    ceph mon ok-to-rm b || return 1
+    ceph mon ok-to-rm c || return 1
+    ceph mon ok-to-rm d || return 1
+    ceph mon ok-to-rm e || return 1
+
+    kill_daemons $dir KILL mon.a
+    wait_for_quorum 60 4
+
+    ceph mon ok-to-stop a || return 1
+    ceph mon ok-to-stop b || return 1
+    ceph mon ok-to-stop c || return 1
+    ceph mon ok-to-stop d || return 1
+    ceph mon ok-to-stop e || return 1
+
+    ceph mon ok-to-add-offline || return 1
+
+    ceph mon ok-to-rm a || return 1
+    ceph mon ok-to-rm b || return 1
+    ceph mon ok-to-rm c || return 1
+    ceph mon ok-to-rm d || return 1
+    ceph mon ok-to-rm e || return 1
+
+    kill_daemons $dir KILL mon.e
+    wait_for_quorum 60 3
+
+    ceph mon ok-to-stop a || return 1
+    ! ceph mon ok-to-stop b || return 1
+    ! ceph mon ok-to-stop c || return 1
+    ! ceph mon ok-to-stop d || return 1
+    ceph mon ok-to-stop e || return 1
+
+    ! ceph mon ok-to-add-offline || return 1
+
+    ceph mon ok-to-rm a || return 1
+    ! ceph mon ok-to-rm b || return 1
+    ! ceph mon ok-to-rm c || return 1
+    ! ceph mon ok-to-rm d || return 1
+    ceph mon ok-to-rm e || return 1
+}
+
+function TEST_0_mds() {
+    local dir=$1
+
+    CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A "
+
+    run_mon $dir a --public-addr=$CEPH_MON_A || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_mds $dir a || return 1
+
+    ceph osd pool create meta 1 || return 1
+    ceph osd pool create data 1 || return 1
+    ceph fs new myfs meta data || return 1
+    sleep 5
+
+    ! ceph mds ok-to-stop a || return 1
+    ! ceph mds ok-to-stop a dne || return 1
+    ceph mds ok-to-stop dne || return 1
+
+    run_mds $dir b || return 1
+    sleep 5
+
+    ceph mds ok-to-stop a || return 1
+    ceph mds ok-to-stop b || return 1
+    ! ceph mds ok-to-stop a b || return 1
+    ceph mds ok-to-stop a dne1 dne2 || return 1
+    ceph mds ok-to-stop b dne || return 1
+    ! ceph mds ok-to-stop a b dne || return 1
+    ceph mds ok-to-stop dne1 dne2 || return 1
+
+    kill_daemons $dir KILL mds.a
+}
+
+function TEST_0_osd() {
+    local dir=$1
+
+    CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A "
+
+    run_mon $dir a --public-addr=$CEPH_MON_A || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+    run_osd $dir 3 || return 1
+
+    ceph osd erasure-code-profile set ec-profile m=2 k=2 crush-failure-domain=osd || return 1
+    ceph osd pool create ec erasure ec-profile || return 1
+
+    wait_for_clean || return 1
+
+    # with min_size 3, we can stop only 1 osd
+    ceph osd pool set ec min_size 3 || return 1
+    wait_for_clean || return 1
+
+    ceph osd ok-to-stop 0 || return 1
+    ceph osd ok-to-stop 1 || return 1
+    ceph osd ok-to-stop 2 || return 1
+    ceph osd ok-to-stop 3 || return 1
+    ! ceph osd ok-to-stop 0 1 || return 1
+    ! ceph osd ok-to-stop 2 3 || return 1
+    ceph osd ok-to-stop 0 --max 2 | grep '[0]' || return 1
+    ceph osd ok-to-stop 1 --max 2 | grep '[1]' || return 1
+
+    # with min_size 2 we can stop 1 osds
+    ceph osd pool set ec min_size 2 || return 1
+    wait_for_clean || return 1
+
+    ceph osd ok-to-stop 0 1 || return 1
+    ceph osd ok-to-stop 2 3 || return 1
+    ! ceph osd ok-to-stop 0 1 2 || return 1
+    ! ceph osd ok-to-stop 1 2 3 || return 1
+
+    ceph osd ok-to-stop 0 --max 2 | grep '[0,1]' || return 1
+    ceph osd ok-to-stop 0 --max 20 | grep '[0,1]' || return 1
+    ceph osd ok-to-stop 2 --max 2 | grep '[2,3]' || return 1
+    ceph osd ok-to-stop 2 --max 20 | grep '[2,3]' || return 1
+
+    # we should get the same result with one of the osds already down
+    kill_daemons $dir TERM osd.0 || return 1
+    ceph osd down 0 || return 1
+    wait_for_peered || return 1
+
+    ceph osd ok-to-stop 0 || return 1
+    ceph osd ok-to-stop 0 1 || return 1
+    ! ceph osd ok-to-stop 0 1 2 || return 1
+    ! ceph osd ok-to-stop 1 2 3 || return 1
+}
+
+
+main ok-to-stop "$@"
diff --git a/qa/standalone/misc/rados-striper.sh b/qa/standalone/misc/rados-striper.sh
new file mode 100755
index 000000000..be6349b81
--- /dev/null
+++ b/qa/standalone/misc/rados-striper.sh
@@ -0,0 +1,101 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Red Hat <contact@redhat.com>
+#
+# Author: Sebastien Ponce <sebastien.ponce@cern.ch>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7116" # git grep '\<7116\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    # setup
+    setup $dir || return 1
+
+    # create a cluster with one monitor and three osds
+    run_mon $dir a || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+    create_rbd_pool || return 1
+
+    # create toyfile
+    dd if=/dev/urandom of=$dir/toyfile bs=1234 count=1
+
+    # put a striped object
+    rados --pool rbd --striper put toyfile $dir/toyfile || return 1
+
+    # stat it, with and without striping
+    rados --pool rbd --striper stat toyfile | cut -d ',' -f 2 > $dir/stripedStat || return 1
+    rados --pool rbd stat toyfile.0000000000000000 | cut -d ',' -f 2 > $dir/stat || return 1
+    echo ' size 1234' > $dir/refstat
+    diff -w $dir/stripedStat $dir/refstat || return 1
+    diff -w $dir/stat $dir/refstat || return 1
+    rados --pool rbd stat toyfile >& $dir/staterror
+    grep -q 'No such file or directory' $dir/staterror ||  return 1
+
+    # get the file back with and without striping
+    rados --pool rbd --striper get toyfile $dir/stripedGroup || return 1
+    diff -w $dir/toyfile $dir/stripedGroup || return 1
+    rados --pool rbd get toyfile.0000000000000000 $dir/nonSTripedGroup || return 1
+    diff -w $dir/toyfile $dir/nonSTripedGroup || return 1
+
+    # test truncate
+    rados --pool rbd --striper truncate toyfile 12
+    rados --pool rbd --striper stat toyfile | cut -d ',' -f 2 > $dir/stripedStat || return 1
+    rados --pool rbd stat toyfile.0000000000000000 | cut -d ',' -f 2 > $dir/stat || return 1
+    echo ' size 12' > $dir/reftrunc
+    diff -w $dir/stripedStat $dir/reftrunc || return 1
+    diff -w $dir/stat $dir/reftrunc || return 1
+
+    # test xattrs
+
+    rados --pool rbd --striper setxattr toyfile somexattr somevalue || return 1
+    rados --pool rbd --striper getxattr toyfile somexattr > $dir/xattrvalue || return 1
+    rados --pool rbd getxattr toyfile.0000000000000000 somexattr > $dir/xattrvalue2 || return 1
+    echo 'somevalue' > $dir/refvalue
+    diff -w $dir/xattrvalue $dir/refvalue || return 1
+    diff -w $dir/xattrvalue2 $dir/refvalue || return 1
+    rados --pool rbd --striper listxattr toyfile > $dir/xattrlist || return 1
+    echo 'somexattr' > $dir/reflist
+    diff -w $dir/xattrlist $dir/reflist || return 1
+    rados --pool rbd listxattr toyfile.0000000000000000 | grep -v striper > $dir/xattrlist2 || return 1
+    diff -w $dir/xattrlist2 $dir/reflist || return 1
+    rados --pool rbd --striper rmxattr toyfile somexattr || return 1
+
+    local attr_not_found_str="No data available"
+    [ `uname` = FreeBSD ] && \
+        attr_not_found_str="Attribute not found"
+    expect_failure $dir "$attr_not_found_str"  \
+        rados --pool rbd --striper getxattr toyfile somexattr || return 1
+    expect_failure $dir "$attr_not_found_str"  \
+        rados --pool rbd getxattr toyfile.0000000000000000 somexattr || return 1
+
+    # test rm
+    rados --pool rbd --striper rm toyfile || return 1
+    expect_failure $dir 'No such file or directory' \
+        rados --pool rbd --striper stat toyfile  || return 1
+    expect_failure $dir 'No such file or directory' \
+        rados --pool rbd stat toyfile.0000000000000000 || return 1
+
+    # cleanup
+    teardown $dir || return 1
+}
+
+main rados-striper "$@"
diff --git a/qa/standalone/misc/test-ceph-helpers.sh b/qa/standalone/misc/test-ceph-helpers.sh
new file mode 100755
index 000000000..e7805858a
--- /dev/null
+++ b/qa/standalone/misc/test-ceph-helpers.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014 Red Hat <contact@redhat.com>
+# Copyright (C) 2014 Federico Gimenez <fgimenez@coit.es>
+#
+# Author: Loic Dachary <loic@dachary.org>
+# Author: Federico Gimenez <fgimenez@coit.es>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+$CEPH_ROOT/qa/standalone/ceph-helpers.sh TESTS "$@"
diff --git a/qa/standalone/misc/test-snaptrim-stats.sh b/qa/standalone/misc/test-snaptrim-stats.sh
new file mode 100755
index 000000000..98b3e4fdd
--- /dev/null
+++ b/qa/standalone/misc/test-snaptrim-stats.sh
@@ -0,0 +1,188 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2022 Red Hat <contact@redhat.com>
+#
+# Author: Sridhar Seshasayee <sseshasa@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7124" # git grep '\<7124\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    CEPH_ARGS+="--debug-bluestore 20 "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_snaptrim_stats() {
+    local dir=$1
+    local poolname=test
+    local OSDS=3
+    local PGNUM=8
+    local PGPNUM=8
+    local objects=10
+    local WAIT_FOR_UPDATE=10
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=$OSDS || return 1
+    run_mgr $dir x || return 1
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd --osd_pool_default_pg_autoscale_mode=off || return 1
+    done
+
+    # disable scrubs
+    ceph osd set noscrub || return 1
+    ceph osd set nodeep-scrub || return 1
+
+    # Create a pool
+    create_pool $poolname $PGNUM $PGPNUM
+    wait_for_clean || return 1
+    poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }')
+
+    # write a few objects
+    TESTDATA="testdata.1"
+    dd if=/dev/urandom of=$TESTDATA bs=4096 count=1
+    for i in `seq 1 $objects`
+    do
+        rados -p $poolname put obj${i} $TESTDATA
+    done
+    rm -f $TESTDATA
+
+    # create a snapshot, clones
+    SNAP=1
+    rados -p $poolname mksnap snap${SNAP}
+    TESTDATA="testdata.2"
+    dd if=/dev/urandom of=$TESTDATA  bs=4096 count=1
+    for i in `seq 1 $objects`
+    do
+        rados -p $poolname put obj${i} $TESTDATA
+    done
+    rm -f $TESTDATA
+
+    # remove the snapshot, should trigger snaptrim
+    rados -p $poolname rmsnap snap${SNAP}
+
+    # check for snaptrim stats
+    wait_for_clean || return 1
+    sleep $WAIT_FOR_UPDATE
+    local objects_trimmed=0
+    local snaptrim_duration_total=0.0
+    for i in $(seq 0 $(expr $PGNUM - 1))
+    do
+        local pgid="${poolid}.${i}"
+        objects_trimmed=$(expr $objects_trimmed + $(ceph pg $pgid query | \
+            jq '.info.stats.objects_trimmed'))
+        snaptrim_duration_total=`echo $snaptrim_duration_total + $(ceph pg \
+            $pgid query | jq '.info.stats.snaptrim_duration') | bc`
+    done
+    test $objects_trimmed -eq $objects || return 1
+    echo "$snaptrim_duration_total > 0.0" | bc || return 1
+
+    teardown $dir || return 1
+}
+
+function TEST_snaptrim_stats_multiple_snaps() {
+    local dir=$1
+    local poolname=test
+    local OSDS=3
+    local PGNUM=8
+    local PGPNUM=8
+    local objects=10
+    local WAIT_FOR_UPDATE=10
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=$OSDS || return 1
+    run_mgr $dir x || return 1
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd --osd_pool_default_pg_autoscale_mode=off || return 1
+    done
+
+    # disable scrubs
+    ceph osd set noscrub || return 1
+    ceph osd set nodeep-scrub || return 1
+
+    # Create a pool
+    create_pool $poolname $PGNUM $PGPNUM
+    wait_for_clean || return 1
+    poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }')
+
+    # write a few objects
+    local TESTDATA="testdata.0"
+    dd if=/dev/urandom of=$TESTDATA bs=4096 count=1
+    for i in `seq 1 $objects`
+    do
+        rados -p $poolname put obj${i} $TESTDATA
+    done
+    rm -f $TESTDATA
+
+    # create snapshots, clones
+    NUMSNAPS=2
+    for i in `seq 1 $NUMSNAPS`
+    do
+        rados -p $poolname mksnap snap${i}
+        TESTDATA="testdata".${i}
+        dd if=/dev/urandom of=$TESTDATA  bs=4096 count=1
+        for i in `seq 1 $objects`
+        do
+            rados -p $poolname put obj${i} $TESTDATA
+        done
+        rm -f $TESTDATA
+    done
+
+    # remove the snapshots, should trigger snaptrim
+    local total_objects_trimmed=0
+    for i in `seq 1 $NUMSNAPS`
+    do
+        rados -p $poolname rmsnap snap${i}
+
+        # check for snaptrim stats
+        wait_for_clean || return 1
+        sleep $WAIT_FOR_UPDATE
+        local objects_trimmed=0
+        local snaptrim_duration_total=0.0
+        for i in $(seq 0 $(expr $PGNUM - 1))
+        do
+            local pgid="${poolid}.${i}"
+            objects_trimmed=$(expr $objects_trimmed + $(ceph pg $pgid query | \
+                jq '.info.stats.objects_trimmed'))
+            snaptrim_duration_total=`echo $snaptrim_duration_total + $(ceph pg \
+                $pgid query | jq '.info.stats.snaptrim_duration') | bc`
+        done
+        test $objects_trimmed -eq $objects || return 1
+        echo "$snaptrim_duration_total > 0.0" | bc || return 1
+        total_objects_trimmed=$(expr $total_objects_trimmed + $objects_trimmed)
+    done
+
+    test $total_objects_trimmed -eq $((objects * NUMSNAPS)) || return 1
+
+    teardown $dir || return 1
+}
+main test-snaptrim-stats "$@"
+
+# Local Variables:
+# compile-command: "cd build ; make -j4 && \
+#   ../qa/run-standalone.sh test-snaptrim-stats.sh"
+# End:
diff --git a/qa/standalone/misc/ver-health.sh b/qa/standalone/misc/ver-health.sh
new file mode 100755
index 000000000..e03f8f4f5
--- /dev/null
+++ b/qa/standalone/misc/ver-health.sh
@@ -0,0 +1,231 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2020 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON_A="127.0.0.1:7165" # git grep '\<7165\>' : there must be only one
+    export CEPH_MON_B="127.0.0.1:7166" # git grep '\<7166\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    CEPH_ARGS+="--mon_health_to_clog_tick_interval=1.0 "
+    export ORIG_CEPH_ARGS="$CEPH_ARGS"
+
+    local funcs=${@:-$(set | ${SED} -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function wait_for_health_string() {
+    local grep_string=$1
+    local seconds=${2:-20}
+
+    # Allow mon to notice version difference
+    set -o pipefail
+    PASSED="false"
+    for ((i=0; i < $seconds; i++)); do
+      if ceph health | grep -q "$grep_string"
+      then
+	PASSED="true"
+        break
+      fi
+      sleep 1
+    done
+    set +o pipefail
+
+    # Make sure health changed
+    if [ $PASSED = "false" ];
+    then
+      return 1
+    fi
+    return 0
+}
+
+
+
+# Test a single OSD with an old version and multiple OSDs with 2 different old versions
+function TEST_check_version_health_1() {
+    local dir=$1
+
+    # Asssume MON_A is leader?
+    CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A "
+    # setup
+    setup $dir || return 1
+
+    # create a cluster with two monitors and three osds
+    run_mon $dir a --public-addr=$CEPH_MON_A --mon_warn_older_version_delay=0 || return 1
+    run_mon $dir b --public-addr=$CEPH_MON_B --mon_warn_older_version_delay=0 || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+
+    sleep 5
+    ceph health detail
+    # should not see this yet
+    ceph health detail | grep DAEMON_OLD_VERSION && return 1
+
+    kill_daemons $dir KILL osd.1
+    ceph_debug_version_for_testing=01.00.00-gversion-test activate_osd $dir 1
+
+    wait_for_health_string "HEALTH_WARN .*There is a daemon running an older version of ceph" || return 1
+
+    ceph health detail
+    # Should notice that osd.1 is a different version
+    ceph health detail | grep -q "HEALTH_WARN .*There is a daemon running an older version of ceph" || return 1
+    ceph health detail | grep -q "^[[]WRN[]] DAEMON_OLD_VERSION: There is a daemon running an older version of ceph" || return 1
+    ceph health detail | grep -q "osd.1 is running an older version of ceph: 01.00.00-gversion-test" || return 1
+
+    kill_daemons $dir KILL osd.2
+    ceph_debug_version_for_testing=01.00.00-gversion-test activate_osd $dir 2
+    kill_daemons $dir KILL osd.0
+    ceph_debug_version_for_testing=02.00.00-gversion-test activate_osd $dir 0
+
+    wait_for_health_string "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
+
+    ceph health detail
+    ceph health detail | grep -q "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
+    ceph health detail | grep -q "^[[]ERR[]] DAEMON_OLD_VERSION: There are daemons running multiple old versions of ceph" || return 1
+    ceph health detail | grep -q "osd.1 osd.2 are running an older version of ceph: 01.00.00-gversion-test" || return 1
+    ceph health detail | grep -q "osd.0 is running an older version of ceph: 02.00.00-gversion-test" || return 1
+}
+
+# Test with 1 MON and 1 MDS with an older version, and add 2 OSDs with different versions
+function TEST_check_version_health_2() {
+    local dir=$1
+
+    # Asssume MON_A is leader?
+    CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A "
+    # setup
+    setup $dir || return 1
+
+    # create a cluster with all daemon types
+    run_mon $dir a --public-addr=$CEPH_MON_A --mon_warn_older_version_delay=0 || return 1
+    run_mon $dir b --public-addr=$CEPH_MON_B --mon_warn_older_version_delay=0 || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+    run_mgr $dir x || return 1
+    run_mgr $dir y || return 1
+    run_mds $dir m || return 1
+    run_mds $dir n || return 1
+
+    sleep 5
+    ceph health detail
+    # should not see this yet
+    ceph health detail | grep DAEMON_OLD_VERSION && return 1
+
+    kill_daemons $dir KILL mon.b
+    ceph_debug_version_for_testing=01.00.00-gversion-test run_mon $dir b --mon_warn_older_version_delay=0
+    # XXX: Manager doesn't seem to use the test specific config for version
+    #kill_daemons $dir KILL mgr.x
+    #ceph_debug_version_for_testing=02.00.00-gversion-test run_mgr $dir x
+    kill_daemons $dir KILL mds.m
+    ceph_debug_version_for_testing=01.00.00-gversion-test run_mds $dir m
+
+    wait_for_health_string "HEALTH_WARN .*There are daemons running an older version of ceph" || return 1
+
+    ceph health detail
+    # Should notice that mon.b and mds.m is a different version
+    ceph health detail | grep -q "HEALTH_WARN .*There are daemons running an older version of ceph" || return 1
+    ceph health detail | grep -q "^[[]WRN[]] DAEMON_OLD_VERSION: There are daemons running an older version of ceph" || return 1
+    ceph health detail | grep -q "mon.b mds.m are running an older version of ceph: 01.00.00-gversion-test" || return 1
+
+    kill_daemons $dir KILL osd.2
+    ceph_debug_version_for_testing=01.00.00-gversion-test activate_osd $dir 2
+    kill_daemons $dir KILL osd.0
+    ceph_debug_version_for_testing=02.00.00-gversion-test activate_osd $dir 0
+
+    wait_for_health_string "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
+
+    ceph health detail
+    ceph health | grep -q "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
+    ceph health detail | grep -q "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
+    ceph health detail | grep -q "^[[]ERR[]] DAEMON_OLD_VERSION: There are daemons running multiple old versions of ceph" || return 1
+    ceph health detail | grep -q "mon.b osd.2 mds.m are running an older version of ceph: 01.00.00-gversion-test" || return 1
+    ceph health detail | grep -q "osd.0 is running an older version of ceph: 02.00.00-gversion-test" || return 1
+}
+
+# Verify delay handling with same setup as test 1
+function TEST_check_version_health_3() {
+    local dir=$1
+
+    # Asssume MON_A is leader?
+    CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A "
+    # setup
+    setup $dir || return 1
+
+    # create a cluster with two monitors and three osds
+    run_mon $dir a --public-addr=$CEPH_MON_A || return 1
+    run_mon $dir b --public-addr=$CEPH_MON_B || return 1
+
+    local start_osd_time=$SECONDS
+    # use memstore for faster bootup
+    EXTRA_OPTS=" --osd-objectstore=memstore" run_osd $dir 0 || return 1
+    EXTRA_OPTS=" --osd-objectstore=memstore" run_osd $dir 1 || return 1
+    EXTRA_OPTS=" --osd-objectstore=memstore" run_osd $dir 2 || return 1
+    # take the time used for boot osds into consideration
+    local warn_older_version_delay=$(($SECONDS - $start_osd_time + 20))
+
+    sleep 5
+    ceph health detail
+    # should not see this yet
+    ceph health detail | grep DAEMON_OLD_VERSION && return 1
+    ceph tell 'mon.*' injectargs "--mon_warn_older_version_delay $warn_older_version_delay"
+    kill_daemons $dir KILL osd.1
+    EXTRA_OPTS=" --osd-objectstore=memstore" \
+          ceph_debug_version_for_testing=01.00.00-gversion-test \
+          activate_osd $dir 1
+
+    # Wait 50% of 20 second delay config
+    sleep 10
+    # should not see this yet
+    ceph health detail | grep DAEMON_OLD_VERSION && return 1
+
+    # Now make sure that at least 20 seconds have passed
+    wait_for_health_string "HEALTH_WARN .*There is a daemon running an older version of ceph" 20 || return 1
+
+    ceph health detail
+    # Should notice that osd.1 is a different version
+    ceph health detail | grep -q "HEALTH_WARN .*There is a daemon running an older version of ceph" || return 1
+    ceph health detail | grep -q "^[[]WRN[]] DAEMON_OLD_VERSION: There is a daemon running an older version of ceph" || return 1
+    ceph health detail | grep -q "osd.1 is running an older version of ceph: 01.00.00-gversion-test" || return 1
+
+    kill_daemons $dir KILL osd.2
+    ceph_debug_version_for_testing=01.00.00-gversion-test activate_osd $dir 2
+    kill_daemons $dir KILL osd.0
+    ceph_debug_version_for_testing=02.00.00-gversion-test activate_osd $dir 0
+
+    wait_for_health_string "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
+
+    ceph health detail
+    ceph health detail | grep -q "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
+    ceph health detail | grep -q "^[[]ERR[]] DAEMON_OLD_VERSION: There are daemons running multiple old versions of ceph" || return 1
+    ceph health detail | grep -q "osd.1 osd.2 are running an older version of ceph: 01.00.00-gversion-test" || return 1
+    ceph health detail | grep -q "osd.0 is running an older version of ceph: 02.00.00-gversion-test" || return 1
+}
+
+main ver-health "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && ../qa/run-standalone.sh ver-health.sh"
+# End:
diff --git a/qa/standalone/mon-stretch/mon-stretch-fail-recovery.sh b/qa/standalone/mon-stretch/mon-stretch-fail-recovery.sh
new file mode 100755
index 000000000..276d26aab
--- /dev/null
+++ b/qa/standalone/mon-stretch/mon-stretch-fail-recovery.sh
@@ -0,0 +1,148 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON_A="127.0.0.1:7139" # git grep '\<7139\>' : there must be only one
+    export CEPH_MON_B="127.0.0.1:7141" # git grep '\<7141\>' : there must be only one
+    export CEPH_MON_C="127.0.0.1:7142" # git grep '\<7142\>' : there must be only one
+    export CEPH_MON_D="127.0.0.1:7143" # git grep '\<7143\>' : there must be only one
+    export CEPH_MON_E="127.0.0.1:7144" # git grep '\<7144\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+
+    export BASE_CEPH_ARGS=$CEPH_ARGS
+    CEPH_ARGS+="--mon-host=$CEPH_MON_A"
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+TEST_stretched_cluster_failover_add_three_osds(){
+    local dir=$1
+    local OSDS=8
+    setup $dir || return 1
+
+    run_mon $dir a --public-addr $CEPH_MON_A || return 1
+    wait_for_quorum 300 1 || return 1
+
+    run_mon $dir b --public-addr $CEPH_MON_B || return 1
+    CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B"
+    wait_for_quorum 300 2 || return 1
+
+    run_mon $dir c --public-addr $CEPH_MON_C || return 1
+    CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C"
+    wait_for_quorum 300 3 || return 1
+
+    run_mon $dir d --public-addr $CEPH_MON_D || return 1
+    CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D"
+    wait_for_quorum 300 4 || return 1
+
+    run_mon $dir e --public-addr $CEPH_MON_E || return 1
+    CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D,$CEPH_MON_E"
+    wait_for_quorum 300 5 || return 1
+
+    ceph mon set election_strategy connectivity
+    ceph mon add disallowed_leader e
+
+    run_mgr $dir x || return 1
+    run_mgr $dir y || return 1
+    run_mgr $dir z || return 1
+
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd || return 1
+    done
+    
+    for zone in iris pze
+    do
+      ceph osd crush add-bucket $zone zone
+      ceph osd crush move $zone root=default
+    done
+
+
+    ceph osd crush add-bucket node-2 host
+    ceph osd crush add-bucket node-3 host
+    ceph osd crush add-bucket node-4 host
+    ceph osd crush add-bucket node-5 host
+
+    ceph osd crush move node-2 zone=iris
+    ceph osd crush move node-3 zone=iris
+    ceph osd crush move node-4 zone=pze
+    ceph osd crush move node-5 zone=pze
+
+    ceph osd crush move osd.0 host=node-2
+    ceph osd crush move osd.1 host=node-2
+    ceph osd crush move osd.2 host=node-3
+    ceph osd crush move osd.3 host=node-3
+    ceph osd crush move osd.4 host=node-4
+    ceph osd crush move osd.5 host=node-4
+    ceph osd crush move osd.6 host=node-5
+    ceph osd crush move osd.7 host=node-5
+    
+    ceph mon set_location a zone=iris host=node-2
+    ceph mon set_location b zone=iris host=node-3
+    ceph mon set_location c zone=pze host=node-4
+    ceph mon set_location d zone=pze  host=node-5
+
+    hostname=$(hostname -s)
+    ceph osd crush remove $hostname || return 1
+    ceph osd getcrushmap > crushmap || return 1
+    crushtool --decompile crushmap > crushmap.txt || return 1
+    sed 's/^# end crush map$//' crushmap.txt > crushmap_modified.txt || return 1
+    cat >> crushmap_modified.txt << EOF
+rule stretch_rule {
+        id 1
+        type replicated
+        min_size 1
+        max_size 10
+        step take iris
+        step chooseleaf firstn 2 type host
+        step emit
+        step take pze
+        step chooseleaf firstn 2 type host
+        step emit
+}
+
+# end crush map
+EOF
+
+    crushtool --compile crushmap_modified.txt -o crushmap.bin || return 1
+    ceph osd setcrushmap -i crushmap.bin  || return 1
+    local stretched_poolname=stretched_rbdpool
+    ceph osd pool create $stretched_poolname 32 32 stretch_rule || return 1
+    ceph osd pool set $stretched_poolname size 4 || return 1
+
+    sleep 3
+
+    ceph mon set_location e zone=arbiter host=node-1
+    ceph mon enable_stretch_mode e stretch_rule zone
+
+    kill_daemons $dir KILL mon.c || return 1
+    kill_daemons $dir KILL mon.d || return 1
+
+    kill_daemons $dir KILL osd.4 || return 1
+    kill_daemons $dir KILL osd.5 || return 1
+    kill_daemons $dir KILL osd.6 || return 1
+    kill_daemons $dir KILL osd.7 || return 1
+
+    ceph -s
+
+    sleep 3
+
+    run_osd $dir 8 || return 1
+    run_osd $dir 9 || return 1
+    run_osd $dir 10 || return 1
+
+    ceph -s
+
+    sleep 3
+
+    teardown $dir || return 1
+}
+main mon-stretch-fail-recovery "$@"
+\ No newline at end of file
diff --git a/qa/standalone/mon-stretch/mon-stretch-uneven-crush-weights.sh b/qa/standalone/mon-stretch/mon-stretch-uneven-crush-weights.sh
new file mode 100755
index 000000000..7e13f4076
--- /dev/null
+++ b/qa/standalone/mon-stretch/mon-stretch-uneven-crush-weights.sh
@@ -0,0 +1,145 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON_A="127.0.0.1:7139" # git grep '\<7139\>' : there must be only one
+    export CEPH_MON_B="127.0.0.1:7141" # git grep '\<7141\>' : there must be only one
+    export CEPH_MON_C="127.0.0.1:7142" # git grep '\<7142\>' : there must be only one
+    export CEPH_MON_D="127.0.0.1:7143" # git grep '\<7143\>' : there must be only one
+    export CEPH_MON_E="127.0.0.1:7144" # git grep '\<7144\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+
+    export BASE_CEPH_ARGS=$CEPH_ARGS
+    CEPH_ARGS+="--mon-host=$CEPH_MON_A"
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+TEST_stretched_cluster_uneven_weight() {
+    local dir=$1
+    local OSDS=4
+    local weight=0.09000
+    setup $dir || return 1
+
+    run_mon $dir a --public-addr $CEPH_MON_A || return 1
+    wait_for_quorum 300 1 || return 1
+
+    run_mon $dir b --public-addr $CEPH_MON_B || return 1
+    CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B"
+    wait_for_quorum 300 2 || return 1
+
+    run_mon $dir c --public-addr $CEPH_MON_C || return 1
+    CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C"
+    wait_for_quorum 300 3 || return 1
+
+    run_mon $dir d --public-addr $CEPH_MON_D || return 1
+    CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D"
+    wait_for_quorum 300 4 || return 1
+
+    run_mon $dir e --public-addr $CEPH_MON_E || return 1
+    CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D,$CEPH_MON_E"
+    wait_for_quorum 300 5 || return 1
+
+    ceph mon set election_strategy connectivity
+    ceph mon add disallowed_leader e
+
+    run_mgr $dir x || return 1
+    run_mgr $dir y || return 1
+    run_mgr $dir z || return 1
+
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd || return 1
+    done
+    
+    for zone in iris pze
+    do
+      ceph osd crush add-bucket $zone zone
+      ceph osd crush move $zone root=default
+    done
+
+    ceph osd crush add-bucket node-2 host
+    ceph osd crush add-bucket node-3 host
+    ceph osd crush add-bucket node-4 host
+    ceph osd crush add-bucket node-5 host
+
+    ceph osd crush move node-2 zone=iris
+    ceph osd crush move node-3 zone=iris
+    ceph osd crush move node-4 zone=pze
+    ceph osd crush move node-5 zone=pze
+
+    ceph osd crush move osd.0 host=node-2
+    ceph osd crush move osd.1 host=node-3
+    ceph osd crush move osd.2 host=node-4
+    ceph osd crush move osd.3 host=node-5
+    
+    ceph mon set_location a zone=iris host=node-2
+    ceph mon set_location b zone=iris host=node-3
+    ceph mon set_location c zone=pze host=node-4
+    ceph mon set_location d zone=pze host=node-5
+
+    hostname=$(hostname -s)
+    ceph osd crush remove $hostname || return 1
+    ceph osd getcrushmap > crushmap || return 1
+    crushtool --decompile crushmap > crushmap.txt || return 1
+    sed 's/^# end crush map$//' crushmap.txt > crushmap_modified.txt || return 1
+    cat >> crushmap_modified.txt << EOF
+rule stretch_rule {
+        id 1
+        type replicated
+        min_size 1
+        max_size 10
+        step take iris
+        step chooseleaf firstn 2 type host
+        step emit
+        step take pze
+        step chooseleaf firstn 2 type host
+        step emit
+}
+# end crush map
+EOF
+
+    crushtool --compile crushmap_modified.txt -o crushmap.bin || return 1
+    ceph osd setcrushmap -i crushmap.bin  || return 1
+    local stretched_poolname=stretched_rbdpool
+    ceph osd pool create $stretched_poolname 32 32 stretch_rule || return 1
+    ceph osd pool set $stretched_poolname size 4 || return 1
+
+    ceph mon set_location e zone=arbiter host=node-1 || return 1
+    ceph mon enable_stretch_mode e stretch_rule zone || return 1 # Enter strech mode
+
+    # reweight to a more round decimal.
+    ceph osd crush reweight osd.0 $weight
+    ceph osd crush reweight osd.1 $weight
+    ceph osd crush reweight osd.2 $weight
+    ceph osd crush reweight osd.3 $weight
+
+    # Firstly, we test for stretch mode buckets != 2
+    ceph osd crush add-bucket sham zone || return 1
+    ceph osd crush move sham root=default || return 1
+    wait_for_health "INCORRECT_NUM_BUCKETS_STRETCH_MODE" || return 1
+
+    ceph osd crush rm sham # clear the health warn
+    wait_for_health_gone "INCORRECT_NUM_BUCKETS_STRETCH_MODE" || return 1
+
+    # Next, we test for uneven weights across buckets
+
+    ceph osd crush reweight osd.0 0.07000
+
+    wait_for_health "UNEVEN_WEIGHTS_STRETCH_MODE" || return 1
+
+    ceph osd crush reweight osd.0 $weight # clear the health warn
+
+    wait_for_health_gone "UNEVEN_WEIGHTS_STRETCH_MODE" || return 1
+
+    teardown $dir || return 1
+}
+main mon-stretched-cluster-uneven-weight "$@"
+\ No newline at end of file
diff --git a/qa/standalone/mon/health-mute.sh b/qa/standalone/mon/health-mute.sh
new file mode 100755
index 000000000..d8e07ca06
--- /dev/null
+++ b/qa/standalone/mon/health-mute.sh
@@ -0,0 +1,124 @@
+#!/bin/bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7143" # git grep '\<714\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none --mon-pg-warn-min-per-osd 0 --mon-max-pg-per-osd 1000 "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_mute() {
+    local dir=$1
+    setup $dir || return 1
+
+    set -o pipefail
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+
+    ceph osd pool create foo 8
+    ceph osd pool application enable foo rbd --yes-i-really-mean-it
+    wait_for_clean || return 1
+
+    ceph -s
+    ceph health | grep HEALTH_OK || return 1
+    # test warning on setting pool size=1
+    ceph osd pool set foo size 1 --yes-i-really-mean-it
+    ceph -s
+    ceph health | grep HEALTH_WARN || return 1
+    ceph health detail | grep POOL_NO_REDUNDANCY || return 1
+    ceph health mute POOL_NO_REDUNDANCY
+    ceph -s
+    ceph health | grep HEALTH_OK | grep POOL_NO_REDUNDANCY || return 1
+    ceph health unmute POOL_NO_REDUNDANCY
+    ceph -s
+    ceph health | grep HEALTH_WARN || return 1
+    # restore pool size to default
+    ceph osd pool set foo size 3
+    ceph -s
+    ceph health | grep HEALTH_OK || return 1
+    ceph osd set noup
+    ceph -s
+    ceph health detail | grep OSDMAP_FLAGS || return 1
+    ceph osd down 0
+    ceph -s
+    ceph health detail | grep OSD_DOWN || return 1
+    ceph health detail | grep HEALTH_WARN || return 1
+
+    ceph health mute OSD_DOWN
+    ceph health mute OSDMAP_FLAGS
+    ceph -s
+    ceph health | grep HEALTH_OK | grep OSD_DOWN | grep OSDMAP_FLAGS || return 1
+    ceph health unmute OSD_DOWN
+    ceph -s
+    ceph health | grep HEALTH_WARN || return 1
+
+    # ttl
+    ceph health mute OSD_DOWN 10s
+    ceph -s
+    ceph health | grep HEALTH_OK || return 1
+    sleep 15
+    ceph -s
+    ceph health | grep HEALTH_WARN || return 1
+
+    # sticky
+    ceph health mute OSDMAP_FLAGS --sticky
+    ceph osd unset noup
+    sleep 5
+    ceph -s
+    ceph health | grep OSDMAP_FLAGS || return 1
+    ceph osd set noup
+    ceph -s
+    ceph health | grep HEALTH_OK || return 1
+
+    # rachet down on OSD_DOWN count
+    ceph osd down 0 1
+    ceph -s
+    ceph health detail | grep OSD_DOWN || return 1
+
+    ceph health mute OSD_DOWN
+    kill_daemons $dir TERM osd.0
+    ceph osd unset noup
+    sleep 10
+    ceph -s
+    ceph health detail | grep OSD_DOWN || return 1
+    ceph health detail | grep '1 osds down' || return 1
+    ceph health | grep HEALTH_OK || return 1
+
+    sleep 10 # give time for mon tick to rachet the mute
+    ceph osd set noup
+    ceph health mute OSDMAP_FLAGS
+    ceph -s
+    ceph health detail
+    ceph health | grep HEALTH_OK || return 1
+
+    ceph osd down 1
+    ceph -s
+    ceph health detail
+    ceph health detail | grep '2 osds down' || return 1
+
+    sleep 10 # give time for mute to clear
+    ceph -s
+    ceph health detail
+    ceph health | grep HEALTH_WARN || return 1
+    ceph health detail | grep '2 osds down' || return 1
+
+    teardown $dir || return 1
+}
+
+main health-mute "$@"
diff --git a/qa/standalone/mon/misc.sh b/qa/standalone/mon/misc.sh
new file mode 100755
index 000000000..c7fc6d441
--- /dev/null
+++ b/qa/standalone/mon/misc.sh
@@ -0,0 +1,284 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7102" # git grep '\<7102\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        $func $dir || return 1
+    done
+}
+
+TEST_POOL=rbd
+
+function TEST_osd_pool_get_set() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a || return 1
+    create_pool $TEST_POOL 8
+
+    local flag
+    for flag in nodelete nopgchange nosizechange write_fadvise_dontneed noscrub nodeep-scrub; do
+	ceph osd pool set $TEST_POOL $flag 0 || return 1
+	! ceph osd dump | grep 'pool ' | grep $flag || return 1
+	ceph osd pool set $TEST_POOL $flag 1 || return 1
+	ceph osd dump | grep 'pool ' | grep $flag || return 1
+	ceph osd pool set $TEST_POOL $flag false || return 1
+	! ceph osd dump | grep 'pool ' | grep $flag || return 1
+	ceph osd pool set $TEST_POOL $flag false || return 1
+        # check that setting false twice does not toggle to true (bug)
+	! ceph osd dump | grep 'pool ' | grep $flag || return 1
+	ceph osd pool set $TEST_POOL $flag true || return 1
+	ceph osd dump | grep 'pool ' | grep $flag || return 1
+	# cleanup
+	ceph osd pool set $TEST_POOL $flag 0 || return 1
+    done
+
+    local size=$(ceph osd pool get $TEST_POOL size|awk '{print $2}')
+    local min_size=$(ceph osd pool get $TEST_POOL min_size|awk '{print $2}')
+    local expected_min_size=$(expr $size - $size / 2)
+    if [ $min_size -ne $expected_min_size ]; then
+	echo "default min_size is wrong: expected $expected_min_size, got $min_size"
+	return 1
+    fi
+
+    ceph osd pool set $TEST_POOL scrub_min_interval 123456 || return 1
+    ceph osd dump | grep 'pool ' | grep 'scrub_min_interval 123456' || return 1
+    ceph osd pool set $TEST_POOL scrub_min_interval 0 || return 1
+    ceph osd dump | grep 'pool ' | grep 'scrub_min_interval' && return 1
+    ceph osd pool set $TEST_POOL scrub_max_interval 123456 || return 1
+    ceph osd dump | grep 'pool ' | grep 'scrub_max_interval 123456' || return 1
+    ceph osd pool set $TEST_POOL scrub_max_interval 0 || return 1
+    ceph osd dump | grep 'pool ' | grep 'scrub_max_interval' && return 1
+    ceph osd pool set $TEST_POOL deep_scrub_interval 123456 || return 1
+    ceph osd dump | grep 'pool ' | grep 'deep_scrub_interval 123456' || return 1
+    ceph osd pool set $TEST_POOL deep_scrub_interval 0 || return 1
+    ceph osd dump | grep 'pool ' | grep 'deep_scrub_interval' && return 1
+
+    #replicated pool size restrict in 1 and 10
+    ! ceph osd pool set $TEST_POOL 11 || return 1
+    #replicated pool min_size must be between in 1 and size
+    ! ceph osd pool set $TEST_POOL min_size $(expr $size + 1) || return 1
+    ! ceph osd pool set $TEST_POOL min_size 0 || return 1
+
+    local ecpool=erasepool
+    create_pool $ecpool 12 12 erasure default || return 1
+    #erasue pool size=k+m, min_size=k
+    local size=$(ceph osd pool get $ecpool size|awk '{print $2}')
+    local min_size=$(ceph osd pool get $ecpool min_size|awk '{print $2}')
+    local k=$(expr $min_size - 1)  # default min_size=k+1
+    #erasure pool size can't change
+    ! ceph osd pool set $ecpool size  $(expr $size + 1) || return 1
+    #erasure pool min_size must be between in k and size
+    ceph osd pool set $ecpool min_size $(expr $k + 1) || return 1
+    ! ceph osd pool set $ecpool min_size $(expr $k - 1) || return 1
+    ! ceph osd pool set $ecpool min_size $(expr $size + 1) || return 1
+
+    teardown $dir || return 1
+}
+
+function TEST_mon_add_to_single_mon() {
+    local dir=$1
+
+    fsid=$(uuidgen)
+    MONA=127.0.0.1:7117 # git grep '\<7117\>' : there must be only one
+    MONB=127.0.0.1:7118 # git grep '\<7118\>' : there must be only one
+    CEPH_ARGS_orig=$CEPH_ARGS
+    CEPH_ARGS="--fsid=$fsid --auth-supported=none "
+    CEPH_ARGS+="--mon-initial-members=a "
+    CEPH_ARGS+="--mon-host=$MONA "
+
+    setup $dir || return 1
+    run_mon $dir a --public-addr $MONA || return 1
+    # wait for the quorum
+    timeout 120 ceph -s > /dev/null || return 1
+    run_mon $dir b --public-addr $MONB || return 1
+    teardown $dir || return 1
+
+    setup $dir || return 1
+    run_mon $dir a --public-addr $MONA || return 1
+    # without the fix of #5454, mon.a will assert failure at seeing the MMonJoin
+    # from mon.b
+    run_mon $dir b --public-addr $MONB || return 1
+    # make sure mon.b get's it's join request in first, then
+    sleep 2
+    # wait for the quorum
+    timeout 120 ceph -s > /dev/null || return 1
+    ceph mon dump
+    ceph mon dump -f json-pretty
+    local num_mons
+    num_mons=$(ceph mon dump --format=json 2>/dev/null | jq ".mons | length") || return 1
+    [ $num_mons == 2 ] || return 1
+    # no reason to take more than 120 secs to get this submitted
+    timeout 120 ceph mon add b $MONB || return 1
+    teardown $dir || return 1
+}
+
+function TEST_no_segfault_for_bad_keyring() {
+    local dir=$1
+    setup $dir || return 1
+    # create a client.admin key and add it to ceph.mon.keyring
+    ceph-authtool --create-keyring $dir/ceph.mon.keyring --gen-key -n mon. --cap mon 'allow *'
+    ceph-authtool --create-keyring $dir/ceph.client.admin.keyring --gen-key -n client.admin --cap mon 'allow *'
+    ceph-authtool $dir/ceph.mon.keyring --import-keyring $dir/ceph.client.admin.keyring
+    CEPH_ARGS_TMP="--fsid=$(uuidgen) --mon-host=127.0.0.1:7102 --auth-supported=cephx "
+    CEPH_ARGS_orig=$CEPH_ARGS
+    CEPH_ARGS="$CEPH_ARGS_TMP --keyring=$dir/ceph.mon.keyring "
+    run_mon $dir a
+    # create a bad keyring and make sure no segfault occurs when using the bad keyring
+    echo -e "[client.admin]\nkey = BQAUlgtWoFePIxAAQ9YLzJSVgJX5V1lh5gyctg==" > $dir/bad.keyring
+    CEPH_ARGS="$CEPH_ARGS_TMP --keyring=$dir/bad.keyring"
+    ceph osd dump 2> /dev/null
+    # 139(11|128) means segfault and core dumped
+    [ $? -eq 139 ] && return 1
+    CEPH_ARGS=$CEPH_ARGS_orig
+    teardown $dir || return 1
+}
+
+function TEST_mon_features() {
+    local dir=$1
+    setup $dir || return 1
+
+    fsid=$(uuidgen)
+    MONA=127.0.0.1:7127 # git grep '\<7127\>' ; there must be only one
+    MONB=127.0.0.1:7128 # git grep '\<7128\>' ; there must be only one
+    MONC=127.0.0.1:7129 # git grep '\<7129\>' ; there must be only one
+    CEPH_ARGS_orig=$CEPH_ARGS
+    CEPH_ARGS="--fsid=$fsid --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$MONA,$MONB,$MONC "
+    CEPH_ARGS+="--mon-debug-no-initial-persistent-features "
+    CEPH_ARGS+="--mon-debug-no-require-reef "
+
+    run_mon $dir a --public-addr $MONA || return 1
+    run_mon $dir b --public-addr $MONB || return 1
+    timeout 120 ceph -s > /dev/null || return 1
+
+    # expect monmap to contain 3 monitors (a, b, and c)
+    jqinput="$(ceph quorum_status --format=json 2>/dev/null)"
+    jq_success "$jqinput" '.monmap.mons | length == 3' || return 1
+    # quorum contains two monitors
+    jq_success "$jqinput" '.quorum | length == 2' || return 1
+    # quorum's monitor features contain kraken, luminous, mimic, nautilus,
+    # octopus, pacific, quincy
+    jqfilter='.features.quorum_mon[]|select(. == "kraken")'
+    jq_success "$jqinput" "$jqfilter" "kraken" || return 1
+    jqfilter='.features.quorum_mon[]|select(. == "luminous")'
+    jq_success "$jqinput" "$jqfilter" "luminous" || return 1
+    jqfilter='.features.quorum_mon[]|select(. == "mimic")'
+    jq_success "$jqinput" "$jqfilter" "mimic" || return 1
+    jqfilter='.features.quorum_mon[]|select(. == "nautilus")'
+    jq_success "$jqinput" "$jqfilter" "nautilus" || return 1
+    jqfilter='.features.quorum_mon[]|select(. == "octopus")'
+    jq_success "$jqinput" "$jqfilter" "octopus" || return 1
+    jqfilter='.features.quorum_mon[]|select(. == "pacific")'
+    jq_success "$jqinput" "$jqfilter" "pacific" || return 1
+    jqfilter='.features.quorum_mon[]|select(. == "quincy")'
+    jq_success "$jqinput" "$jqfilter" "quincy" || return 1
+    jqfilter='.features.quorum_mon[]|select(. == "reef")'
+    jq_success "$jqinput" "$jqfilter" "reef" || return 1
+
+    # monmap must have no persistent features set, because we
+    # don't currently have a quorum made out of all the monitors
+    # in the monmap.
+    jqfilter='.monmap.features.persistent | length == 0'
+    jq_success "$jqinput" "$jqfilter" || return 1
+
+    # nor do we have any optional features, for that matter.
+    jqfilter='.monmap.features.optional | length == 0'
+    jq_success "$jqinput" "$jqfilter" || return 1
+
+    # validate 'mon feature ls'
+
+    jqinput="$(ceph mon feature ls --format=json 2>/dev/null)"
+    # k l m n o p q are supported
+    jqfilter='.all.supported[] | select(. == "kraken")'
+    jq_success "$jqinput" "$jqfilter" "kraken" || return 1
+    jqfilter='.all.supported[] | select(. == "luminous")'
+    jq_success "$jqinput" "$jqfilter" "luminous" || return 1
+    jqfilter='.all.supported[] | select(. == "mimic")'
+    jq_success "$jqinput" "$jqfilter" "mimic" || return 1
+    jqfilter='.all.supported[] | select(. == "nautilus")'
+    jq_success "$jqinput" "$jqfilter" "nautilus" || return 1
+    jqfilter='.all.supported[] | select(. == "octopus")'
+    jq_success "$jqinput" "$jqfilter" "octopus" || return 1
+    jqfilter='.all.supported[] | select(. == "pacific")'
+    jq_success "$jqinput" "$jqfilter" "pacific" || return 1
+    jqfilter='.all.supported[] | select(. == "quincy")'
+    jq_success "$jqinput" "$jqfilter" "quincy" || return 1
+    jqfilter='.all.supported[] | select(. == "reef")'
+    jq_success "$jqinput" "$jqfilter" "reef" || return 1
+
+    # start third monitor
+    run_mon $dir c --public-addr $MONC || return 1
+
+    wait_for_quorum 300 3 || return 1
+
+    timeout 300 ceph -s > /dev/null || return 1
+
+    jqinput="$(ceph quorum_status --format=json 2>/dev/null)"
+    # expect quorum to have all three monitors
+    jqfilter='.quorum | length == 3'
+    jq_success "$jqinput" "$jqfilter" || return 1
+
+    # quorum's monitor features should have p now too
+    jqfilter='.features.quorum_mon[]|select(. == "pacific")'
+    jq_success "$jqinput" "$jqfilter" "pacific" || return 1
+
+    # persistent too
+    jqfilter='.monmap.features.persistent[]|select(. == "kraken")'
+    jq_success "$jqinput" "$jqfilter" "kraken" || return 1
+    jqfilter='.monmap.features.persistent[]|select(. == "luminous")'
+    jq_success "$jqinput" "$jqfilter" "luminous" || return 1
+    jqfilter='.monmap.features.persistent[]|select(. == "mimic")'
+    jq_success "$jqinput" "$jqfilter" "mimic" || return 1
+    jqfilter='.monmap.features.persistent[]|select(. == "osdmap-prune")'
+    jq_success "$jqinput" "$jqfilter" "osdmap-prune" || return 1
+    jqfilter='.monmap.features.persistent[]|select(. == "nautilus")'
+    jq_success "$jqinput" "$jqfilter" "nautilus" || return 1
+    jqfilter='.monmap.features.persistent[]|select(. == "octopus")'
+    jq_success "$jqinput" "$jqfilter" "octopus" || return 1
+    jqfilter='.monmap.features.persistent[]|select(. == "pacific")'
+    jq_success "$jqinput" "$jqfilter" "pacific" || return 1
+    jqfilter='.monmap.features.persistent[]|select(. == "elector-pinging")'
+    jq_success "$jqinput" "$jqfilter" "elector-pinging" || return 1
+    jqfilter='.monmap.features.persistent | length == 10'
+    jq_success "$jqinput" "$jqfilter" || return 1
+    jqfilter='.monmap.features.persistent[]|select(. == "quincy")'
+    jq_success "$jqinput" "$jqfilter" "quincy" || return 1
+    jqfilter='.monmap.features.persistent[]|select(. == "reef")'
+    jq_success "$jqinput" "$jqfilter" "reef" || return 1
+
+    CEPH_ARGS=$CEPH_ARGS_orig
+    # that's all folks. thank you for tuning in.
+    teardown $dir || return 1
+}
+
+main misc "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/mon/misc.sh"
+# End:
diff --git a/qa/standalone/mon/mkfs.sh b/qa/standalone/mon/mkfs.sh
new file mode 100755
index 000000000..6650bdb49
--- /dev/null
+++ b/qa/standalone/mon/mkfs.sh
@@ -0,0 +1,193 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+set -xe
+PS4='${BASH_SOURCE[0]}:$LINENO: ${FUNCNAME[0]}:  '
+
+
+DIR=mkfs
+export CEPH_CONF=/dev/null
+unset CEPH_ARGS
+MON_ID=a
+MON_DIR=$DIR/$MON_ID
+CEPH_MON=127.0.0.1:7110 # git grep '\<7110\>' : there must be only one
+TIMEOUT=360
+
+EXTRAOPTS=""
+
+function setup() {
+    teardown
+    mkdir $DIR
+}
+
+function teardown() {
+    kill_daemons
+    rm -fr $DIR
+}
+
+function mon_mkfs() {
+    local fsid=$(uuidgen)
+
+    ceph-mon \
+        --id $MON_ID \
+        --fsid $fsid \
+	$EXTRAOPTS \
+        --mkfs \
+        --mon-data=$MON_DIR \
+        --mon-initial-members=$MON_ID \
+        --mon-host=$CEPH_MON \
+        "$@"
+}
+
+function mon_run() {
+    ceph-mon \
+        --id $MON_ID \
+        --chdir= \
+        --mon-osd-full-ratio=.99 \
+        --mon-data-avail-crit=1 \
+	$EXTRAOPTS \
+        --mon-data=$MON_DIR \
+        --log-file=$MON_DIR/log \
+        --mon-cluster-log-file=$MON_DIR/log \
+        --run-dir=$MON_DIR \
+        --pid-file=$MON_DIR/pidfile \
+        --public-addr $CEPH_MON \
+        "$@"
+}
+
+function kill_daemons() {
+    for pidfile in $(find $DIR -name pidfile) ; do
+        pid=$(cat $pidfile)
+        for try in 0 1 1 1 2 3 ; do
+            kill $pid || break
+            sleep $try
+        done
+    done
+}
+
+function auth_none() {
+    mon_mkfs --auth-supported=none
+
+    ceph-mon \
+        --id $MON_ID \
+        --mon-osd-full-ratio=.99 \
+        --mon-data-avail-crit=1 \
+	$EXTRAOPTS \
+        --mon-data=$MON_DIR \
+        --extract-monmap $MON_DIR/monmap
+
+    [ -f $MON_DIR/monmap ] || return 1
+
+    [ ! -f $MON_DIR/keyring ] || return 1
+
+    mon_run --auth-supported=none
+
+    timeout $TIMEOUT ceph --mon-host $CEPH_MON mon stat || return 1
+}
+
+function auth_cephx_keyring() {
+    cat > $DIR/keyring <<EOF
+[mon.]
+	key = AQDUS79S0AF9FRAA2cgRLFscVce0gROn/s9WMg==
+	caps mon = "allow *"
+EOF
+
+    mon_mkfs --keyring=$DIR/keyring
+
+    [ -f $MON_DIR/keyring ] || return 1
+
+    mon_run
+
+    timeout $TIMEOUT ceph \
+        --name mon. \
+        --keyring $MON_DIR/keyring \
+        --mon-host $CEPH_MON mon stat || return 1
+}
+
+function auth_cephx_key() {
+    if [ -f /etc/ceph/keyring ] ; then
+	echo "Please move /etc/ceph/keyring away for testing!"
+	return 1
+    fi
+
+    local key=$(ceph-authtool --gen-print-key)
+
+    if mon_mkfs --key='corrupted key' ; then
+        return 1
+    else
+        rm -fr $MON_DIR/store.db
+        rm -fr $MON_DIR/kv_backend
+    fi
+
+    mon_mkfs --key=$key
+
+    [ -f $MON_DIR/keyring ] || return 1
+    grep $key $MON_DIR/keyring
+
+    mon_run
+
+    timeout $TIMEOUT ceph \
+        --name mon. \
+        --keyring $MON_DIR/keyring \
+        --mon-host $CEPH_MON mon stat || return 1
+}
+
+function makedir() {
+    local toodeep=$MON_DIR/toodeep
+
+    # fail if recursive directory creation is needed
+    ceph-mon \
+        --id $MON_ID \
+        --mon-osd-full-ratio=.99 \
+        --mon-data-avail-crit=1 \
+	$EXTRAOPTS \
+        --mkfs \
+        --mon-data=$toodeep 2>&1 | tee $DIR/makedir.log
+    grep 'toodeep.*No such file' $DIR/makedir.log > /dev/null
+    rm $DIR/makedir.log
+
+    # an empty directory does not mean the mon exists
+    mkdir $MON_DIR
+    mon_mkfs --auth-supported=none 2>&1 | tee $DIR/makedir.log
+    ! grep "$MON_DIR already exists" $DIR/makedir.log || return 1
+}
+
+function idempotent() {
+    mon_mkfs --auth-supported=none
+    mon_mkfs --auth-supported=none 2>&1 | tee $DIR/makedir.log
+    grep "'$MON_DIR' already exists" $DIR/makedir.log > /dev/null || return 1
+}
+
+function run() {
+    local actions
+    actions+="makedir "
+    actions+="idempotent "
+    actions+="auth_cephx_key "
+    actions+="auth_cephx_keyring "
+    actions+="auth_none "
+    for action in $actions  ; do
+        setup
+        $action || return 1
+        teardown
+    done
+}
+
+run
+
+# Local Variables:
+# compile-command: "cd ../.. ; make TESTS=test/mon/mkfs.sh check"
+# End:
diff --git a/qa/standalone/mon/mon-bind.sh b/qa/standalone/mon/mon-bind.sh
new file mode 100755
index 000000000..41982b916
--- /dev/null
+++ b/qa/standalone/mon/mon-bind.sh
@@ -0,0 +1,143 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2017 Quantum Corp.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+SOCAT_PIDS=()
+
+function port_forward() {
+    local source_port=$1
+    local target_port=$2
+
+    socat TCP-LISTEN:${source_port},fork,reuseaddr TCP:localhost:${target_port} &
+    SOCAT_PIDS+=( $! )
+}
+
+function cleanup() {
+    for p in "${SOCAT_PIDS[@]}"; do
+        kill $p
+    done
+    SOCAT_PIDS=()
+}
+
+trap cleanup SIGTERM SIGKILL SIGQUIT SIGINT
+
+function run() {
+    local dir=$1
+    shift
+
+    export MON_IP=127.0.0.1
+    export MONA_PUBLIC=7132 # git grep '\<7132\>' ; there must be only one
+    export MONB_PUBLIC=7133 # git grep '\<7133\>' ; there must be only one
+    export MONC_PUBLIC=7134 # git grep '\<7134\>' ; there must be only one
+    export MONA_BIND=7135   # git grep '\<7135\>' ; there must be only one
+    export MONB_BIND=7136   # git grep '\<7136\>' ; there must be only one
+    export MONC_BIND=7137   # git grep '\<7137\>' ; there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir && cleanup || { cleanup; return 1; }
+        teardown $dir
+    done
+}
+
+function TEST_mon_client_connect_fails() {
+    local dir=$1
+
+    # start the mon with a public-bind-addr that is different
+    # from the public-addr.
+    CEPH_ARGS+="--mon-host=${MON_IP}:${MONA_PUBLIC} "
+    run_mon $dir a --mon-host=${MON_IP}:${MONA_PUBLIC} --public-bind-addr=${MON_IP}:${MONA_BIND} || return 1
+
+    # now attempt to ping it that should fail.
+    timeout 3 ceph ping mon.a || return 0
+    return 1
+}
+
+function TEST_mon_client_connect() {
+    local dir=$1
+
+    # start the mon with a public-bind-addr that is different
+    # from the public-addr.
+    CEPH_ARGS+="--mon-host=${MON_IP}:${MONA_PUBLIC} "
+    run_mon $dir a --mon-host=${MON_IP}:${MONA_PUBLIC} --public-bind-addr=${MON_IP}:${MONA_BIND} || return 1
+
+    # now forward the public port to the bind port.
+    port_forward ${MONA_PUBLIC} ${MONA_BIND}
+
+    # attempt to connect. we expect that to work
+    ceph ping mon.a || return 1
+}
+
+function TEST_mon_quorum() {
+    local dir=$1
+
+    # start the mon with a public-bind-addr that is different
+    # from the public-addr.
+    CEPH_ARGS+="--mon-host=${MON_IP}:${MONA_PUBLIC},${MON_IP}:${MONB_PUBLIC},${MON_IP}:${MONC_PUBLIC} "
+    run_mon $dir a --public-addr=${MON_IP}:${MONA_PUBLIC} --public-bind-addr=${MON_IP}:${MONA_BIND} || return 1
+    run_mon $dir b --public-addr=${MON_IP}:${MONB_PUBLIC} --public-bind-addr=${MON_IP}:${MONB_BIND} || return 1
+    run_mon $dir c --public-addr=${MON_IP}:${MONC_PUBLIC} --public-bind-addr=${MON_IP}:${MONC_BIND} || return 1
+
+    # now forward the public port to the bind port.
+    port_forward ${MONA_PUBLIC} ${MONA_BIND}
+    port_forward ${MONB_PUBLIC} ${MONB_BIND}
+    port_forward ${MONC_PUBLIC} ${MONC_BIND}
+
+    # expect monmap to contain 3 monitors (a, b, and c)
+    jqinput="$(ceph quorum_status --format=json 2>/dev/null)"
+    jq_success "$jqinput" '.monmap.mons | length == 3' || return 1
+
+    # quorum should form
+    wait_for_quorum 300 3 || return 1
+    # expect quorum to have all three monitors
+    jqfilter='.quorum | length == 3'
+    jq_success "$jqinput" "$jqfilter" || return 1
+}
+
+function TEST_put_get() {
+    local dir=$1
+
+    # start the mon with a public-bind-addr that is different
+    # from the public-addr.
+    CEPH_ARGS+="--mon-host=${MON_IP}:${MONA_PUBLIC},${MON_IP}:${MONB_PUBLIC},${MON_IP}:${MONC_PUBLIC} "
+    run_mon $dir a --public-addr=${MON_IP}:${MONA_PUBLIC} --public-bind-addr=${MON_IP}:${MONA_BIND} || return 1
+    run_mon $dir b --public-addr=${MON_IP}:${MONB_PUBLIC} --public-bind-addr=${MON_IP}:${MONB_BIND} || return 1
+    run_mon $dir c --public-addr=${MON_IP}:${MONC_PUBLIC} --public-bind-addr=${MON_IP}:${MONC_BIND} || return 1
+
+    # now forward the public port to the bind port.
+    port_forward ${MONA_PUBLIC} ${MONA_BIND}
+    port_forward ${MONB_PUBLIC} ${MONB_BIND}
+    port_forward ${MONC_PUBLIC} ${MONC_BIND}
+
+    # quorum should form
+    wait_for_quorum 300 3 || return 1
+
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+
+    create_pool hello 8 || return 1
+
+    echo "hello world" > $dir/hello
+    rados --pool hello put foo $dir/hello || return 1
+    rados --pool hello get foo $dir/hello2 || return 1
+    diff $dir/hello $dir/hello2 || return 1
+}
+
+main mon-bind "$@"
diff --git a/qa/standalone/mon/mon-created-time.sh b/qa/standalone/mon/mon-created-time.sh
new file mode 100755
index 000000000..4b8446059
--- /dev/null
+++ b/qa/standalone/mon/mon-created-time.sh
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2015 SUSE LINUX GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7125" # git grep '\<7125\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_mon_created_time() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+
+    ceph mon dump || return 1
+
+    if test "$(ceph mon dump 2>/dev/null | sed -n '/created/p' | awk '{print $NF}')"x = ""x ; then
+        return 1
+    fi
+
+    if test "$(ceph mon dump 2>/dev/null | sed -n '/created/p' | awk '{print $NF}')"x = "0.000000"x ; then
+        return 1
+    fi
+}
+
+main mon-created-time "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/mon/mon-created-time.sh"
+# End:
diff --git a/qa/standalone/mon/mon-handle-forward.sh b/qa/standalone/mon/mon-handle-forward.sh
new file mode 100755
index 000000000..01c8f130f
--- /dev/null
+++ b/qa/standalone/mon/mon-handle-forward.sh
@@ -0,0 +1,64 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014,2015 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+
+    setup $dir || return 1
+
+    MONA=127.0.0.1:7300
+    MONB=127.0.0.1:7301
+    (
+        FSID=$(uuidgen)
+        export CEPH_ARGS
+        CEPH_ARGS+="--fsid=$FSID --auth-supported=none "
+        CEPH_ARGS+="--mon-host=$MONA,$MONB "
+        run_mon $dir a --public-addr $MONA || return 1
+        run_mon $dir b --public-addr $MONB || return 1
+    )
+
+    timeout 360 ceph --mon-host-override $MONA mon stat || return 1
+    # check that MONB is indeed a peon
+    ceph --admin-daemon $(get_asok_path mon.b) mon_status |
+       grep '"peon"' || return 1
+    # when the leader ( MONA ) is used, there is no message forwarding
+    ceph --mon-host-override $MONA osd pool create POOL1 12
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1
+    grep 'mon_command(.*"POOL1"' $dir/mon.a.log || return 1
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.b) log flush || return 1
+    grep 'mon_command(.*"POOL1"' $dir/mon.b.log && return 1
+    # when the peon ( MONB ) is used, the message is forwarded to the leader
+    ceph --mon-host-override $MONB osd pool create POOL2 12
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.b) log flush || return 1
+    grep 'forward_request.*mon_command(.*"POOL2"' $dir/mon.b.log || return 1
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1
+    grep ' forward(mon_command(.*"POOL2"' $dir/mon.a.log || return 1
+    # forwarded messages must retain features from the original connection
+    features=$(sed -n -e 's|.*127.0.0.1:0.*accept features \([0-9][0-9]*\)|\1|p' < \
+        $dir/mon.b.log)
+    grep ' forward(mon_command(.*"POOL2".*con_features '$features $dir/mon.a.log || return 1
+
+    teardown $dir || return 1
+}
+
+main mon-handle-forward "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 TESTS=test/mon/mon-handle-forward.sh check"
+# End:
diff --git a/qa/standalone/mon/mon-last-epoch-clean.sh b/qa/standalone/mon/mon-last-epoch-clean.sh
new file mode 100755
index 000000000..82243103e
--- /dev/null
+++ b/qa/standalone/mon/mon-last-epoch-clean.sh
@@ -0,0 +1,307 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7302" # git grep '\<7105\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+
+function check_lec_equals_pools() {
+
+  local pool_id=$1
+
+  report=$(ceph report)
+  lec=$(echo $report | \
+    jq '.osdmap_clean_epochs.min_last_epoch_clean')
+
+  if [[ -z "$pool_id" ]]; then
+    pools=($(echo $report | \
+      jq \
+      ".osdmap_clean_epochs.last_epoch_clean.per_pool[] |" \
+      " select(.floor == $lec) | .poolid"))
+
+    [[ ${#pools[*]} -eq 2 ]] || ( echo $report ; return 1 )
+  else
+    floor=($(echo $report | \
+      jq \
+      ".osdmap_clean_epochs.last_epoch_clean.per_pool[] |" \
+      " select(.poolid == $pool_id) | .floor"))
+
+    [[ $lec -eq $floor ]] || ( echo $report ; return 1 )
+  fi
+  return 0
+}
+
+function check_lec_lower_than_pool() {
+
+  local pool_id=$1
+  [[ -z "$pool_id" ]] && ( echo "expected pool_id as parameter" ; exit 1 )
+
+  report=$(ceph report)
+  lec=$(echo $report | \
+    jq '.osdmap_clean_epochs.min_last_epoch_clean')
+
+  floor=($(echo $report | \
+    jq \
+    ".osdmap_clean_epochs.last_epoch_clean.per_pool[] |" \
+    " select(.poolid == $pool_id) | .floor"))
+
+  [[ $lec -lt $floor ]] || ( echo $report ; return 1 )
+  return 0
+}
+
+function check_floor_pool_greater_than_pool() {
+
+  local pool_a=$1
+  local pool_b=$1
+  [[ -z "$pool_a" ]] && ( echo "expected id as first parameter" ; exit 1 )
+  [[ -z "$pool_b" ]] && ( echo "expected id as second parameter" ; exit 1 )
+
+  report=$(ceph report)
+
+  floor_a=($(echo $report | \
+    jq \
+    ".osdmap_clean_epochs.last_epoch_clean.per_pool[] |" \
+    " select(.poolid == $pool_a) | .floor"))
+
+  floor_b=($(echo $report | \
+    jq \
+    ".osdmap_clean_epochs.last_epoch_clean.per_pool[] |" \
+    " select(.poolid == $pool_b) | .floor"))
+
+  [[ $floor_a -gt $floor_b ]] || ( echo $report ; return 1 )
+  return 0
+}
+
+function check_lec_honours_osd() {
+
+  local osd=$1
+
+  report=$(ceph report)
+  lec=$(echo $report | \
+    jq '.osdmap_clean_epochs.min_last_epoch_clean')
+
+  if [[ -z "$osd" ]]; then
+    osds=($(echo $report | \
+      jq \
+      ".osdmap_clean_epochs.osd_epochs[] |" \
+      " select(.epoch >= $lec) | .id"))
+
+    [[ ${#osds[*]} -eq 3 ]] || ( echo $report ; return 1 )
+  else
+    epoch=($(echo $report | \
+      jq \
+      ".osdmap_clean_epochs.osd_epochs[] |" \
+      " select(.id == $id) | .epoch"))
+    [[ ${#epoch[*]} -eq 1 ]] || ( echo $report ; return 1 )
+    [[ ${epoch[0]} -ge $lec ]] || ( echo $report ; return 1 )
+  fi
+
+  return 0
+}
+
+function validate_fc() {
+  report=$(ceph report)
+  lec=$(echo $report | \
+    jq '.osdmap_clean_epochs.min_last_epoch_clean')
+  osdm_fc=$(echo $report | \
+    jq '.osdmap_first_committed')
+
+  [[ $lec -eq $osdm_fc ]] || ( echo $report ; return 1 )
+  return 0
+}
+
+function get_fc_lc_diff() {
+  report=$(ceph report)
+  osdm_fc=$(echo $report | \
+    jq '.osdmap_first_committed')
+  osdm_lc=$(echo $report | \
+    jq '.osdmap_last_committed')
+
+  echo $((osdm_lc - osdm_fc))
+}
+
+function get_pool_id() {
+
+  local pn=$1
+  [[ -z "$pn" ]] && ( echo "expected pool name as argument" ; exit 1 )
+
+  report=$(ceph report)
+  pool_id=$(echo $report | \
+    jq ".osdmap.pools[] | select(.pool_name == \"$pn\") | .pool")
+
+  [[ $pool_id -ge 0 ]] || \
+    ( echo "unexpected pool id for pool \'$pn\': $pool_id" ; return -1 )
+
+  echo $pool_id
+  return 0
+}
+
+function wait_for_total_num_maps() {
+  # rip wait_for_health, becaue it's easier than deduplicating the code
+  local -a delays=($(get_timeout_delays $TIMEOUT .1))
+  local -i loop=0
+  local -i v_diff=$1
+
+  while [[ $(get_fc_lc_diff) -gt $v_diff ]]; do
+    if (( $loop >= ${#delays[*]} )) ; then
+      echo "maps were not trimmed"
+      return 1
+    fi
+    sleep ${delays[$loop]}
+    loop+=1
+  done 
+}
+
+function TEST_mon_last_clean_epoch() {
+
+  local dir=$1
+
+  run_mon $dir a || return 1
+  run_mgr $dir x --mon-warn-on-pool-no-app=false || return 1
+  run_osd $dir 0 || return 1
+  run_osd $dir 1 || return 1
+  run_osd $dir 2 || return 1
+  osd_pid=$(cat $dir/osd.2.pid)
+
+  sleep 5
+
+  ceph tell 'osd.*' injectargs '--osd-beacon-report-interval 10' || exit 1
+  ceph tell 'mon.*' injectargs \
+    '--mon-min-osdmap-epochs 2 --paxos-service-trim-min 1' || exit 1
+
+  create_pool foo 32
+  create_pool bar 32
+
+  foo_id=$(get_pool_id "foo")
+  bar_id=$(get_pool_id "bar")
+
+  [[ $foo_id -lt 0 ]] && ( echo "couldn't find pool 'foo' id" ; exit 1 )
+  [[ $bar_id -lt 0 ]] && ( echo "couldn't find pool 'bar' id" ; exit 1 )
+
+  # no real clue why we are getting these warnings, but let's make them go
+  # away so we can be happy.
+
+  ceph osd set-full-ratio 0.97
+  ceph osd set-backfillfull-ratio 0.97
+
+  wait_for_health_ok || exit 1
+
+  pre_map_diff=$(get_fc_lc_diff)
+  wait_for_total_num_maps 2
+  post_map_diff=$(get_fc_lc_diff)
+
+  [[ $post_map_diff -le $pre_map_diff ]] || exit 1
+
+  pre_map_diff=$post_map_diff
+
+  ceph osd pool set foo size 3
+  ceph osd pool set bar size 3
+
+  wait_for_health_ok || exit 1
+
+  check_lec_equals_pools || exit 1
+  check_lec_honours_osd || exit 1
+  validate_fc || exit 1
+
+  # down osd.2; expected result (because all pools' size equals 3):
+  # - number of committed maps increase over 2
+  # - lec equals fc
+  # - lec equals osd.2's epoch
+  # - all pools have floor equal to lec
+
+  while kill $osd_pid ; do sleep 1 ; done
+  ceph osd out 2
+  sleep 5 # seriously, just to make sure things settle; we may not need this.
+
+  # generate some maps
+  for ((i=0; i <= 10; ++i)); do
+    ceph osd set noup
+    sleep 1
+    ceph osd unset noup
+    sleep 1
+  done
+
+  post_map_diff=$(get_fc_lc_diff)
+  [[ $post_map_diff -gt 2 ]] || exit 1
+
+  validate_fc || exit 1
+  check_lec_equals_pools || exit 1
+  check_lec_honours_osd 2 || exit 1
+
+  # adjust pool 'bar' size to 2; expect:
+  # - number of committed maps still over 2
+  # - lec equals fc
+  # - lec equals pool 'foo' floor
+  # - pool 'bar' floor greater than pool 'foo'
+
+  ceph osd pool set bar size 2
+
+  diff_ver=$(get_fc_lc_diff)
+  [[ $diff_ver -gt 2 ]] || exit 1
+
+  validate_fc || exit 1
+
+  check_lec_equals_pools $foo_id || exit 1
+  check_lec_lower_than_pool $bar_id || exit 1
+
+  check_floor_pool_greater_than_pool $bar_id $foo_id || exit 1
+
+  # set pool 'foo' size to 2; expect:
+  # - health_ok
+  # - lec equals pools
+  # - number of committed maps decreases
+  # - lec equals fc
+
+  pre_map_diff=$(get_fc_lc_diff)
+
+  ceph osd pool set foo size 2 || exit 1
+  wait_for_clean || exit 1
+
+  check_lec_equals_pools || exit 1
+  validate_fc || exit 1
+
+  if ! wait_for_total_num_maps 2 ; then
+    post_map_diff=$(get_fc_lc_diff)
+    # number of maps is decreasing though, right?
+    [[ $post_map_diff -lt $pre_map_diff ]] || exit 1
+  fi
+
+  # bring back osd.2; expect:
+  # - health_ok
+  # - lec equals fc
+  # - number of committed maps equals 2
+  # - all pools have floor equal to lec
+
+  pre_map_diff=$(get_fc_lc_diff)
+
+  activate_osd $dir 2 || exit 1
+  wait_for_health_ok || exit 1
+  validate_fc || exit 1
+  check_lec_equals_pools || exit 1
+
+  if ! wait_for_total_num_maps 2 ; then
+    post_map_diff=$(get_fc_lc_diff)
+    # number of maps is decreasing though, right?
+    [[ $post_map_diff -lt $pre_map_diff ]] || exit 1
+  fi
+
+  return 0
+}
+
+main mon-last-clean-epoch "$@"
diff --git a/qa/standalone/mon/mon-osdmap-prune.sh b/qa/standalone/mon/mon-osdmap-prune.sh
new file mode 100755
index 000000000..f8f7876bb
--- /dev/null
+++ b/qa/standalone/mon/mon-osdmap-prune.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+base_test=$CEPH_ROOT/qa/workunits/mon/test_mon_osdmap_prune.sh
+
+function run() {
+
+  local dir=$1
+  shift
+
+  export CEPH_MON="127.0.0.1:7115"
+  export CEPH_ARGS
+  CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none --mon-host=$CEPH_MON "
+
+  local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+  for func in $funcs; do
+    setup $dir || return 1
+    $func $dir || return 1
+    teardown $dir || return 1
+  done
+}
+
+function TEST_osdmap_prune() {
+
+  local dir=$1
+
+  run_mon $dir a || return 1
+  run_mgr $dir x || return 1
+  run_osd $dir 0 || return 1
+  run_osd $dir 1 || return 1
+  run_osd $dir 2 || return 1
+
+  sleep 5
+
+  # we are getting OSD_OUT_OF_ORDER_FULL health errors, and it's not clear
+  # why. so, to make the health checks happy, mask those errors.
+  ceph osd set-full-ratio 0.97
+  ceph osd set-backfillfull-ratio 0.97
+
+  ceph config set osd osd_beacon_report_interval 10 || return 1
+  ceph config set mon mon_debug_extra_checks true || return 1
+
+  ceph config set mon mon_min_osdmap_epochs 100 || return 1
+  ceph config set mon mon_osdmap_full_prune_enabled true || return 1
+  ceph config set mon mon_osdmap_full_prune_min 200 || return 1
+  ceph config set mon mon_osdmap_full_prune_interval 10 || return 1
+  ceph config set mon mon_osdmap_full_prune_txsize 100 || return 1
+
+
+  bash -x $base_test || return 1
+
+  return 0
+}
+
+main mon-osdmap-prune "$@"
+
diff --git a/qa/standalone/mon/mon-ping.sh b/qa/standalone/mon/mon-ping.sh
new file mode 100755
index 000000000..1f5096be1
--- /dev/null
+++ b/qa/standalone/mon/mon-ping.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2015 SUSE LINUX GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7119" # git grep '\<7119\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_mon_ping() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+
+    ceph ping mon.a || return 1
+}
+
+main mon-ping "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/mon/mon-ping.sh"
+# End:
diff --git a/qa/standalone/mon/mon-scrub.sh b/qa/standalone/mon/mon-scrub.sh
new file mode 100755
index 000000000..158bd434c
--- /dev/null
+++ b/qa/standalone/mon/mon-scrub.sh
@@ -0,0 +1,49 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7120" # git grep '\<7120\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_mon_scrub() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+
+    ceph mon scrub || return 1
+}
+
+main mon-scrub "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/mon/mon-scrub.sh"
+# End:
diff --git a/qa/standalone/mon/mon-seesaw.sh b/qa/standalone/mon/mon-seesaw.sh
new file mode 100755
index 000000000..1c97847b9
--- /dev/null
+++ b/qa/standalone/mon/mon-seesaw.sh
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON_A="127.0.0.1:7139" # git grep '\<7139\>' : there must be only one
+    export CEPH_MON_B="127.0.0.1:7141" # git grep '\<7141\>' : there must be only one
+    export CEPH_MON_C="127.0.0.1:7142" # git grep '\<7142\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+
+    export BASE_CEPH_ARGS=$CEPH_ARGS
+    CEPH_ARGS+="--mon-host=$CEPH_MON_A "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_mon_seesaw() {
+    local dir=$1
+
+    setup $dir || return
+
+    # start with 1 mon
+    run_mon $dir aa --public-addr $CEPH_MON_A || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+
+    wait_for_quorum 300 1 || return 1
+
+    # add in a second
+    run_mon $dir bb --public-addr $CEPH_MON_B || return 1
+    CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B"
+    wait_for_quorum 300 2 || return 1
+
+    # remove the first one
+    ceph mon rm aa || return 1
+    CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_B"
+    sleep 5
+    wait_for_quorum 300 1 || return 1
+
+    # do some stuff that requires the osds be able to communicate with the
+    # mons.  (see http://tracker.ceph.com/issues/17558)
+    ceph osd pool create foo 8
+    rados -p foo bench 1 write
+    wait_for_clean || return 1
+
+    # nuke monstore so that it will rejoin (otherwise we get
+    # "not in monmap and have been in a quorum before; must have been removed"
+    rm -rf $dir/aa
+
+    # add a back in
+    # (use a different addr to avoid bind issues)
+    run_mon $dir aa --public-addr $CEPH_MON_C || return 1
+    CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_C,$CEPH_MON_B"
+    wait_for_quorum 300 2 || return 1
+}
+
+main mon-seesaw "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/mon/mon-ping.sh"
+# End:
diff --git a/qa/standalone/mon/osd-crush.sh b/qa/standalone/mon/osd-crush.sh
new file mode 100755
index 000000000..aa7cac694
--- /dev/null
+++ b/qa/standalone/mon/osd-crush.sh
@@ -0,0 +1,196 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7104" # git grep '\<7104\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    local funcs=${@:-$(set | ${SED} -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_crush_rule_create_simple() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+
+    ceph --format xml osd crush rule dump replicated_rule | \
+        egrep '<op>take</op><item>[^<]+</item><item_name>default</item_name>' | \
+        grep '<op>choose_firstn</op><num>0</num><type>osd</type>' || return 1
+    local rule=rule0
+    local root=host1
+    ceph osd crush add-bucket $root host
+    local failure_domain=osd
+    ceph osd crush rule create-simple $rule $root $failure_domain || return 1
+    ceph osd crush rule create-simple $rule $root $failure_domain 2>&1 | \
+        grep "$rule already exists" || return 1
+    ceph --format xml osd crush rule dump $rule | \
+        egrep '<op>take</op><item>[^<]+</item><item_name>'$root'</item_name>' | \
+        grep '<op>choose_firstn</op><num>0</num><type>'$failure_domain'</type>' || return 1
+    ceph osd crush rule rm $rule || return 1
+}
+
+function TEST_crush_rule_dump() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+
+    local rule=rule1
+    ceph osd crush rule create-erasure $rule || return 1
+    test $(ceph --format json osd crush rule dump $rule | \
+           jq ".rule_name == \"$rule\"") == true || return 1
+    test $(ceph --format json osd crush rule dump | \
+           jq "map(select(.rule_name == \"$rule\")) | length == 1") == true || return 1
+    ! ceph osd crush rule dump non_existent_rule || return 1
+    ceph osd crush rule rm $rule || return 1
+}
+
+function TEST_crush_rule_rm() {
+    local rule=erasure2
+
+    run_mon $dir a || return 1
+
+    ceph osd crush rule create-erasure $rule default || return 1
+    ceph osd crush rule ls | grep $rule || return 1
+    ceph osd crush rule rm $rule || return 1
+    ! ceph osd crush rule ls | grep $rule || return 1
+}
+
+function TEST_crush_rule_create_erasure() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    # should have at least one OSD
+    run_osd $dir 0 || return 1
+
+    local rule=rule3
+    #
+    # create a new rule with the default profile, implicitly
+    #
+    ceph osd crush rule create-erasure $rule || return 1
+    ceph osd crush rule create-erasure $rule 2>&1 | \
+        grep "$rule already exists" || return 1
+    ceph --format xml osd crush rule dump $rule | \
+        egrep '<op>take</op><item>[^<]+</item><item_name>default</item_name>' | \
+        grep '<op>chooseleaf_indep</op><num>0</num><type>host</type>' || return 1
+    ceph osd crush rule rm $rule || return 1
+    ! ceph osd crush rule ls | grep $rule || return 1
+    #
+    # create a new rule with the default profile, explicitly
+    #
+    ceph osd crush rule create-erasure $rule default || return 1
+    ceph osd crush rule ls | grep $rule || return 1
+    ceph osd crush rule rm $rule || return 1
+    ! ceph osd crush rule ls | grep $rule || return 1
+    #
+    # create a new rule and the default profile, implicitly
+    #
+    ceph osd erasure-code-profile rm default || return 1
+    ! ceph osd erasure-code-profile ls | grep default || return 1
+    ceph osd crush rule create-erasure $rule || return 1
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1
+    grep 'profile set default' $dir/mon.a.log || return 1
+    ceph osd erasure-code-profile ls | grep default || return 1
+    ceph osd crush rule rm $rule || return 1
+    ! ceph osd crush rule ls | grep $rule || return 1
+}
+
+function TEST_add_rule_failed() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+
+    local root=host1
+
+    ceph osd crush add-bucket $root host
+    ceph osd crush rule create-simple test_rule1 $root osd firstn || return 1
+    ceph osd crush rule create-simple test_rule2 $root osd firstn || return 1
+    ceph osd getcrushmap > $dir/crushmap || return 1
+    crushtool --decompile $dir/crushmap > $dir/crushmap.txt || return 1
+    for i in $(seq 3 255)
+        do
+            cat <<EOF
+rule test_rule$i {
+	id $i
+	type replicated
+	step take $root
+	step choose firstn 0 type osd
+	step emit
+}
+EOF
+    done >> $dir/crushmap.txt
+    crushtool --compile $dir/crushmap.txt -o $dir/crushmap || return 1
+    ceph osd setcrushmap -i $dir/crushmap  || return 1
+    ceph osd crush rule create-simple test_rule_nospace $root osd firstn 2>&1 | grep "Error ENOSPC" || return 1
+
+}
+
+function TEST_crush_rename_bucket() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+
+    ceph osd crush add-bucket host1 host
+    ceph osd tree
+    ! ceph osd tree | grep host2 || return 1
+    ceph osd crush rename-bucket host1 host2 || return 1
+    ceph osd tree
+    ceph osd tree | grep host2 || return 1
+    ceph osd crush rename-bucket host1 host2 || return 1 # idempotency
+    ceph osd crush rename-bucket nonexistent something 2>&1 | grep "Error ENOENT" || return 1
+}
+
+function TEST_crush_ls_node() {
+    local dir=$1
+    run_mon $dir a || return 1
+    ceph osd crush add-bucket default1 root
+    ceph osd crush add-bucket host1 host
+    ceph osd crush move host1 root=default1
+    ceph osd crush ls default1 | grep host1 || return 1
+    ceph osd crush ls default2 2>&1 | grep "Error ENOENT" || return 1
+}
+
+function TEST_crush_reject_empty() {
+    local dir=$1
+    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+    # should have at least one OSD
+    run_osd $dir 0 || return 1
+    create_rbd_pool || return 1
+
+    local empty_map=$dir/empty_map
+    :> $empty_map.txt
+    crushtool -c $empty_map.txt -o $empty_map.map || return 1
+    expect_failure $dir "Error EINVAL" \
+        ceph osd setcrushmap -i $empty_map.map || return 1
+}
+
+main osd-crush "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/mon/osd-crush.sh"
+# End:
diff --git a/qa/standalone/mon/osd-df.sh b/qa/standalone/mon/osd-df.sh
new file mode 100755
index 000000000..962909fdb
--- /dev/null
+++ b/qa/standalone/mon/osd-df.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7113" # git grep '\<7113\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_osd_df() {
+    local dir=$1
+    setup $dir || return 1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+    run_osd $dir 3 || return 1
+    run_osd $dir 4 || return 1
+    run_osd $dir 5 || return 1
+
+    # normal case
+    ceph osd df --f json-pretty | grep osd.0 || return 1
+    ceph osd df --f json-pretty | grep osd.1 || return 1
+    ceph osd df --f json-pretty | grep osd.2 || return 1
+    ceph osd df --f json-pretty | grep osd.3 || return 1
+    ceph osd df --f json-pretty | grep osd.4 || return 1
+    ceph osd df --f json-pretty | grep osd.5 || return 1
+
+    # filter by device class
+    osd_class=$(ceph osd crush get-device-class 0)
+    ceph osd df class $osd_class --f json-pretty | grep 'osd.0' || return 1
+    # post-nautilus we require filter-type no more
+    ceph osd df $osd_class --f json-pretty | grep 'osd.0' || return 1
+    ceph osd crush rm-device-class 0 || return 1
+    ceph osd crush set-device-class aaa 0 || return 1
+    ceph osd df aaa --f json-pretty | grep 'osd.0' || return 1
+    ceph osd df aaa --f json-pretty | grep 'osd.1' && return 1
+    # reset osd.1's device class
+    ceph osd crush rm-device-class 0 || return 1
+    ceph osd crush set-device-class $osd_class 0 || return 1
+
+    # filter by crush node
+    ceph osd df osd.0 --f json-pretty | grep osd.0 || return 1
+    ceph osd df osd.0 --f json-pretty | grep osd.1 && return 1
+    ceph osd crush move osd.0 root=default host=foo || return 1
+    ceph osd crush move osd.1 root=default host=foo || return 1
+    ceph osd crush move osd.2 root=default host=foo || return 1
+    ceph osd crush move osd.3 root=default host=bar || return 1
+    ceph osd crush move osd.4 root=default host=bar || return 1
+    ceph osd crush move osd.5 root=default host=bar || return 1
+    ceph osd df tree foo --f json-pretty | grep foo || return 1
+    ceph osd df tree foo --f json-pretty | grep bar && return 1
+    ceph osd df foo --f json-pretty | grep osd.0 || return 1
+    ceph osd df foo --f json-pretty | grep osd.1 || return 1
+    ceph osd df foo --f json-pretty | grep osd.2 || return 1
+    ceph osd df foo --f json-pretty | grep osd.3 && return 1
+    ceph osd df foo --f json-pretty | grep osd.4 && return 1
+    ceph osd df foo --f json-pretty | grep osd.5 && return 1
+    ceph osd df tree bar --f json-pretty | grep bar || return 1
+    ceph osd df tree bar --f json-pretty | grep foo && return 1
+    ceph osd df bar --f json-pretty | grep osd.0 && return 1
+    ceph osd df bar --f json-pretty | grep osd.1 && return 1
+    ceph osd df bar --f json-pretty | grep osd.2 && return 1
+    ceph osd df bar --f json-pretty | grep osd.3 || return 1
+    ceph osd df bar --f json-pretty | grep osd.4 || return 1
+    ceph osd df bar --f json-pretty | grep osd.5 || return 1
+
+    # filter by pool
+    ceph osd crush rm-device-class all || return 1
+    ceph osd crush set-device-class nvme 0 1 3 4 || return 1
+    ceph osd crush rule create-replicated nvme-rule default host nvme || return 1
+    ceph osd pool create nvme-pool 12 12 nvme-rule || return 1
+    ceph osd df nvme-pool --f json-pretty | grep osd.0 || return 1
+    ceph osd df nvme-pool --f json-pretty | grep osd.1 || return 1
+    ceph osd df nvme-pool --f json-pretty | grep osd.2 && return 1
+    ceph osd df nvme-pool --f json-pretty | grep osd.3 || return 1
+    ceph osd df nvme-pool --f json-pretty | grep osd.4 || return 1
+    ceph osd df nvme-pool --f json-pretty | grep osd.5 && return 1
+
+    teardown $dir || return 1
+}
+
+main osd-df "$@"
diff --git a/qa/standalone/mon/osd-erasure-code-profile.sh b/qa/standalone/mon/osd-erasure-code-profile.sh
new file mode 100755
index 000000000..0afc5fc0b
--- /dev/null
+++ b/qa/standalone/mon/osd-erasure-code-profile.sh
@@ -0,0 +1,240 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7220" # git grep '\<7220\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_set() {
+    local dir=$1
+    local id=$2
+
+    run_mon $dir a || return 1
+
+    local profile=myprofile
+    #
+    # no key=value pairs : use the default configuration
+    #
+    ceph osd erasure-code-profile set $profile 2>&1 || return 1
+    ceph osd erasure-code-profile get $profile | \
+        grep plugin=jerasure || return 1
+    ceph osd erasure-code-profile rm $profile
+    #
+    # key=value pairs override the default
+    #
+    ceph osd erasure-code-profile set $profile \
+        key=value plugin=isa || return 1
+    ceph osd erasure-code-profile get $profile | \
+        grep -e key=value -e plugin=isa || return 1
+    #
+    # --force is required to override an existing profile
+    #
+    ! ceph osd erasure-code-profile set $profile > $dir/out 2>&1 || return 1
+    grep 'will not override' $dir/out || return 1
+    ceph osd erasure-code-profile set $profile key=other --force || return 1
+    ceph osd erasure-code-profile get $profile | \
+        grep key=other || return 1
+
+    ceph osd erasure-code-profile rm $profile # cleanup
+}
+
+function TEST_ls() {
+    local dir=$1
+    local id=$2
+
+    run_mon $dir a || return 1
+
+    local profile=myprofile
+    ! ceph osd erasure-code-profile ls | grep $profile || return 1
+    ceph osd erasure-code-profile set $profile 2>&1 || return 1
+    ceph osd erasure-code-profile ls | grep $profile || return 1
+    ceph --format xml osd erasure-code-profile ls | \
+        grep "<profile>$profile</profile>" || return 1
+
+    ceph osd erasure-code-profile rm $profile # cleanup
+}
+
+function TEST_rm() {
+    local dir=$1
+    local id=$2
+
+    run_mon $dir a || return 1
+
+    local profile=myprofile
+    ceph osd erasure-code-profile set $profile 2>&1 || return 1
+    ceph osd erasure-code-profile ls | grep $profile || return 1
+    ceph osd erasure-code-profile rm $profile || return 1
+    ! ceph osd erasure-code-profile ls | grep $profile || return 1
+    ceph osd erasure-code-profile rm WRONG 2>&1 | \
+        grep "WRONG does not exist" || return 1
+
+    ceph osd erasure-code-profile set $profile || return 1
+    create_pool poolname 12 12 erasure $profile || return 1
+    ! ceph osd erasure-code-profile rm $profile > $dir/out 2>&1 || return 1
+    grep "poolname.*using.*$profile" $dir/out || return 1
+    ceph osd pool delete poolname poolname --yes-i-really-really-mean-it || return 1
+    ceph osd erasure-code-profile rm $profile || return 1
+
+    ceph osd erasure-code-profile rm $profile # cleanup
+}
+
+function TEST_get() {
+    local dir=$1
+    local id=$2
+
+    run_mon $dir a || return 1
+
+    local default_profile=default
+    ceph osd erasure-code-profile get $default_profile | \
+        grep plugin=jerasure || return 1
+    ceph --format xml osd erasure-code-profile get $default_profile | \
+        grep '<plugin>jerasure</plugin>' || return 1
+    ! ceph osd erasure-code-profile get WRONG > $dir/out 2>&1 || return 1
+    grep -q "unknown erasure code profile 'WRONG'" $dir/out || return 1
+}
+
+function TEST_set_idempotent() {
+    local dir=$1
+    local id=$2
+
+    run_mon $dir a || return 1
+    #
+    # The default profile is set using a code path different from
+    # ceph osd erasure-code-profile set: verify that it is idempotent,
+    # as if it was using the same code path.
+    #
+    ceph osd erasure-code-profile set default k=2 m=2 2>&1 || return 1
+    local profile
+    #
+    # Because plugin=jerasure is the default, it uses a slightly
+    # different code path where defaults (m=1 for instance) are added
+    # implicitly.
+    #
+    profile=profileidempotent1
+    ! ceph osd erasure-code-profile ls | grep $profile || return 1
+    ceph osd erasure-code-profile set $profile k=2 crush-failure-domain=osd 2>&1 || return 1
+    ceph osd erasure-code-profile ls | grep $profile || return 1
+    ceph osd erasure-code-profile set $profile k=2 crush-failure-domain=osd 2>&1 || return 1
+    ceph osd erasure-code-profile rm $profile # cleanup
+
+    #
+    # In the general case the profile is exactly what is on
+    #
+    profile=profileidempotent2
+    ! ceph osd erasure-code-profile ls | grep $profile || return 1
+    ceph osd erasure-code-profile set $profile plugin=lrc k=4 m=2 l=3 crush-failure-domain=osd 2>&1 || return 1
+    ceph osd erasure-code-profile ls | grep $profile || return 1
+    ceph osd erasure-code-profile set $profile plugin=lrc k=4 m=2 l=3 crush-failure-domain=osd 2>&1 || return 1
+    ceph osd erasure-code-profile rm $profile # cleanup
+}
+
+function TEST_format_invalid() {
+    local dir=$1
+
+    local profile=profile
+    # osd_pool_default_erasure-code-profile is
+    # valid JSON but not of the expected type
+    run_mon $dir a \
+        --osd_pool_default_erasure-code-profile 1 || return 1
+    ! ceph osd erasure-code-profile set $profile > $dir/out 2>&1 || return 1
+    cat $dir/out
+    grep 'must be a JSON object' $dir/out || return 1
+}
+
+function TEST_format_json() {
+    local dir=$1
+
+    # osd_pool_default_erasure-code-profile is JSON
+    expected='"plugin":"isa"'
+    run_mon $dir a \
+        --osd_pool_default_erasure-code-profile "{$expected}" || return 1
+    ceph --format json osd erasure-code-profile get default | \
+        grep "$expected" || return 1
+}
+
+function TEST_format_plain() {
+    local dir=$1
+
+    # osd_pool_default_erasure-code-profile is plain text
+    expected='"plugin":"isa"'
+    run_mon $dir a \
+        --osd_pool_default_erasure-code-profile "plugin=isa" || return 1
+    ceph --format json osd erasure-code-profile get default | \
+        grep "$expected" || return 1
+}
+
+function TEST_profile_k_sanity() {
+    local dir=$1
+    local profile=profile-sanity
+
+    run_mon $dir a || return 1
+
+    expect_failure $dir 'k must be a multiple of (k + m) / l' \
+        ceph osd erasure-code-profile set $profile \
+        plugin=lrc \
+        l=1 \
+        k=1 \
+        m=1 || return 1
+
+    if erasure_code_plugin_exists isa ; then
+        expect_failure $dir 'k=1 must be >= 2' \
+            ceph osd erasure-code-profile set $profile \
+            plugin=isa \
+            k=1 \
+            m=1 || return 1
+    else
+        echo "SKIP because plugin isa has not been built"
+    fi
+
+    expect_failure $dir 'k=1 must be >= 2' \
+        ceph osd erasure-code-profile set $profile \
+        plugin=jerasure \
+        k=1 \
+        m=1 || return 1
+}
+
+function TEST_invalid_crush_failure_domain() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+
+    local profile=ec_profile
+    local crush_failure_domain=invalid_failure_domain
+
+    ! ceph osd erasure-code-profile set $profile k=4 m=2 crush-failure-domain=$crush_failure_domain 2>&1 || return 1
+}
+
+main osd-erasure-code-profile "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/mon/osd-erasure-code-profile.sh"
+# End:
diff --git a/qa/standalone/mon/osd-pool-create.sh b/qa/standalone/mon/osd-pool-create.sh
new file mode 100755
index 000000000..6d2c5ad3e
--- /dev/null
+++ b/qa/standalone/mon/osd-pool-create.sh
@@ -0,0 +1,307 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2013, 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7105" # git grep '\<7105\>' : there must be only one
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    export CEPH_ARGS
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+# Before http://tracker.ceph.com/issues/8307 the invalid profile was created
+function TEST_erasure_invalid_profile() {
+    local dir=$1
+    run_mon $dir a || return 1
+    local poolname=pool_erasure
+    local notaprofile=not-a-valid-erasure-code-profile
+    ! ceph osd pool create $poolname 12 12 erasure $notaprofile || return 1
+    ! ceph osd erasure-code-profile ls | grep $notaprofile || return 1
+}
+
+function TEST_erasure_crush_rule() {
+    local dir=$1
+    run_mon $dir a || return 1
+    #
+    # choose the crush rule used with an erasure coded pool
+    #
+    local crush_rule=myrule
+    ! ceph osd crush rule ls | grep $crush_rule || return 1
+    ceph osd crush rule create-erasure $crush_rule
+    ceph osd crush rule ls | grep $crush_rule
+    local poolname
+    poolname=pool_erasure1
+    ! ceph --format json osd dump | grep '"crush_rule":1' || return 1
+    ceph osd pool create $poolname 12 12 erasure default $crush_rule
+    ceph --format json osd dump | grep '"crush_rule":1' || return 1
+    #
+    # a crush rule by the name of the pool is implicitly created
+    #
+    poolname=pool_erasure2
+    ceph osd erasure-code-profile set myprofile
+    ceph osd pool create $poolname 12 12 erasure myprofile
+    ceph osd crush rule ls | grep $poolname || return 1
+    #
+    # a non existent crush rule given in argument is an error
+    # http://tracker.ceph.com/issues/9304
+    #
+    poolname=pool_erasure3
+    ! ceph osd pool create $poolname 12 12 erasure myprofile INVALIDRULE || return 1
+}
+
+function TEST_erasure_code_profile_default() {
+    local dir=$1
+    run_mon $dir a || return 1
+    ceph osd erasure-code-profile rm default || return 1
+    ! ceph osd erasure-code-profile ls | grep default || return 1
+    ceph osd pool create $poolname 12 12 erasure default
+    ceph osd erasure-code-profile ls | grep default || return 1
+}
+
+function TEST_erasure_crush_stripe_unit() {
+    local dir=$1
+    # the default stripe unit is used to initialize the pool
+    run_mon $dir a --public-addr $CEPH_MON
+    stripe_unit=$(ceph-conf --show-config-value osd_pool_erasure_code_stripe_unit)
+    eval local $(ceph osd erasure-code-profile get myprofile | grep k=)
+    stripe_width = $((stripe_unit * k))
+    ceph osd pool create pool_erasure 12 12 erasure
+    ceph --format json osd dump | tee $dir/osd.json
+    grep '"stripe_width":'$stripe_width $dir/osd.json > /dev/null || return 1
+}
+
+function TEST_erasure_crush_stripe_unit_padded() {
+    local dir=$1
+    # setting osd_pool_erasure_code_stripe_unit modifies the stripe_width
+    # and it is padded as required by the default plugin
+    profile+=" plugin=jerasure"
+    profile+=" technique=reed_sol_van"
+    k=4
+    profile+=" k=$k"
+    profile+=" m=2"
+    actual_stripe_unit=2048
+    desired_stripe_unit=$((actual_stripe_unit - 1))
+    actual_stripe_width=$((actual_stripe_unit * k))
+    run_mon $dir a \
+        --osd_pool_erasure_code_stripe_unit $desired_stripe_unit \
+        --osd_pool_default_erasure_code_profile "$profile" || return 1
+    ceph osd pool create pool_erasure 12 12 erasure
+    ceph osd dump | tee $dir/osd.json
+    grep "stripe_width $actual_stripe_width" $dir/osd.json > /dev/null || return 1
+}
+
+function TEST_erasure_code_pool() {
+    local dir=$1
+    run_mon $dir a || return 1
+    ceph --format json osd dump > $dir/osd.json
+    local expected='"erasure_code_profile":"default"'
+    ! grep "$expected" $dir/osd.json || return 1
+    ceph osd pool create erasurecodes 12 12 erasure
+    ceph --format json osd dump | tee $dir/osd.json
+    grep "$expected" $dir/osd.json > /dev/null || return 1
+
+    ceph osd pool create erasurecodes 12 12 erasure 2>&1 | \
+        grep 'already exists' || return 1
+    ceph osd pool create erasurecodes 12 12 2>&1 | \
+        grep 'cannot change to type replicated' || return 1
+}
+
+function TEST_replicated_pool_with_rule() {
+    local dir=$1
+    run_mon $dir a
+    local rule=rule0
+    local root=host1
+    ceph osd crush add-bucket $root host
+    local failure_domain=osd
+    local poolname=mypool
+    ceph osd crush rule create-simple $rule $root $failure_domain || return 1
+    ceph osd crush rule ls | grep $rule
+    ceph osd pool create $poolname 12 12 replicated $rule || return 1
+    rule_id=`ceph osd crush rule dump $rule | grep "rule_id" | awk -F[' ':,] '{print $4}'`
+    ceph osd pool get $poolname crush_rule  2>&1 | \
+        grep "crush_rule: $rule_id" || return 1
+    #non-existent crush rule
+    ceph osd pool create newpool 12 12 replicated non-existent 2>&1 | \
+        grep "doesn't exist" || return 1
+}
+
+function TEST_erasure_code_pool_lrc() {
+    local dir=$1
+    run_mon $dir a || return 1
+
+    ceph osd erasure-code-profile set LRCprofile \
+             plugin=lrc \
+             mapping=DD_ \
+             layers='[ [ "DDc", "" ] ]' || return 1
+
+    ceph --format json osd dump > $dir/osd.json
+    local expected='"erasure_code_profile":"LRCprofile"'
+    local poolname=erasurecodes
+    ! grep "$expected" $dir/osd.json || return 1
+    ceph osd pool create $poolname 12 12 erasure LRCprofile
+    ceph --format json osd dump | tee $dir/osd.json
+    grep "$expected" $dir/osd.json > /dev/null || return 1
+    ceph osd crush rule ls | grep $poolname || return 1
+}
+
+function TEST_replicated_pool() {
+    local dir=$1
+    run_mon $dir a || return 1
+    ceph osd pool create replicated 12 12 replicated replicated_rule || return 1
+    ceph osd pool create replicated 12 12 replicated replicated_rule 2>&1 | \
+        grep 'already exists' || return 1
+    # default is replicated
+    ceph osd pool create replicated1 12 12 || return 1
+    # default is replicated, pgp_num = pg_num
+    ceph osd pool create replicated2 12 || return 1
+    ceph osd pool create replicated 12 12 erasure 2>&1 | \
+        grep 'cannot change to type erasure' || return 1
+}
+
+function TEST_no_pool_delete() {
+    local dir=$1
+    run_mon $dir a || return 1
+    ceph osd pool create foo 1 || return 1
+    ceph tell mon.a injectargs -- --no-mon-allow-pool-delete || return 1
+    ! ceph osd pool delete foo foo --yes-i-really-really-mean-it || return 1
+    ceph tell mon.a injectargs -- --mon-allow-pool-delete || return 1
+    ceph osd pool delete foo foo --yes-i-really-really-mean-it || return 1
+}
+
+function TEST_utf8_cli() {
+    local dir=$1
+    run_mon $dir a || return 1
+    # Hopefully it's safe to include literal UTF-8 characters to test
+    # the fix for http://tracker.ceph.com/issues/7387.  If it turns out
+    # to not be OK (when is the default encoding *not* UTF-8?), maybe
+    # the character '黄' can be replaced with the escape $'\xe9\xbb\x84'
+    OLDLANG="$LANG"
+    export LANG=en_US.UTF-8
+    ceph osd pool create 黄 16 || return 1
+    ceph osd lspools 2>&1 | \
+        grep "黄" || return 1
+    ceph -f json-pretty osd dump | \
+        python3 -c "import json; import sys; json.load(sys.stdin)" || return 1
+    ceph osd pool delete 黄 黄 --yes-i-really-really-mean-it
+    export LANG="$OLDLANG"
+}
+
+function check_pool_priority() {
+    local dir=$1
+    shift
+    local pools=$1
+    shift
+    local spread="$1"
+    shift
+    local results="$1"
+
+    setup $dir || return 1
+
+    EXTRA_OPTS="--debug_allow_any_pool_priority=true"
+    export EXTRA_OPTS
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+
+    # Add pool 0 too
+    for i in $(seq 0 $pools)
+    do
+      num=$(expr $i + 1)
+      ceph osd pool create test${num} 1 1
+    done
+
+    wait_for_clean || return 1
+    for i in $(seq 0 $pools)
+    do
+	num=$(expr $i + 1)
+	ceph osd pool set test${num} recovery_priority $(expr $i \* $spread)
+    done
+
+    #grep "recovery_priority.*pool set" out/mon.a.log
+
+    bin/ceph osd dump
+
+    # Restart everything so mon converts the priorities
+    kill_daemons
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    activate_osd $dir 0 || return 1
+    activate_osd $dir 1 || return 1
+    activate_osd $dir 2 || return 1
+    sleep 5
+
+    grep convert $dir/mon.a.log
+    ceph osd dump
+
+    pos=1
+    for i in $(ceph osd dump | grep ^pool | sed 's/.*recovery_priority //' | awk '{ print $1 }')
+    do
+      result=$(echo $results | awk "{ print \$${pos} }")
+      # A value of 0 is an unset value so sed/awk gets "pool"
+      if test $result = "0"
+      then
+        result="pool"
+      fi
+      test "$result" = "$i" || return 1
+      pos=$(expr $pos + 1)
+    done
+}
+
+function TEST_pool_pos_only_prio() {
+   local dir=$1
+   check_pool_priority $dir 20 5 "0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10" || return 1
+}
+
+function TEST_pool_neg_only_prio() {
+   local dir=$1
+   check_pool_priority $dir 20 -5 "0 0 -1 -1 -2 -2 -3 -3 -4 -4 -5 -5 -6 -6 -7 -7 -8 -8 -9 -9 -10" || return 1
+}
+
+function TEST_pool_both_prio() {
+   local dir=$1
+   check_pool_priority $dir 20 "5 - 50" "-10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10" || return 1
+}
+
+function TEST_pool_both_prio_no_neg() {
+   local dir=$1
+   check_pool_priority $dir 20 "2 - 4" "-4 -2 0 0 1 1 2 2 3 3 4 5 5 6 6 7 7 8 8 9 10" || return 1
+}
+
+function TEST_pool_both_prio_no_pos() {
+   local dir=$1
+   check_pool_priority $dir 20 "2 - 36" "-10 -9 -8 -8 -7 -7 -6 -6 -5 -5 -4 -3 -3 -2 -2 -1 -1 0 0 2 4" || return 1
+}
+
+
+main osd-pool-create "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/mon/osd-pool-create.sh"
+# End:
diff --git a/qa/standalone/mon/osd-pool-df.sh b/qa/standalone/mon/osd-pool-df.sh
new file mode 100755
index 000000000..d2b80ec72
--- /dev/null
+++ b/qa/standalone/mon/osd-pool-df.sh
@@ -0,0 +1,76 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2017 Tencent <contact@tencent.com>
+#
+# Author: Chang Liu <liuchang0812@gmail.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7113" # git grep '\<7113\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_ceph_df() {
+    local dir=$1
+    setup $dir || return 1
+
+    run_mon $dir a || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+    run_osd $dir 3 || return 1
+    run_osd $dir 4 || return 1
+    run_osd $dir 5 || return 1
+    run_mgr $dir x || return 1
+
+    profile+=" plugin=jerasure"
+    profile+=" technique=reed_sol_van"
+    profile+=" k=4"
+    profile+=" m=2"
+    profile+=" crush-failure-domain=osd"
+
+    ceph osd erasure-code-profile set ec42profile ${profile}
+ 
+    local rep_poolname=testcephdf_replicate
+    local ec_poolname=testcephdf_erasurecode
+    create_pool $rep_poolname 6 6 replicated
+    create_pool $ec_poolname 6 6 erasure ec42profile
+    flush_pg_stats
+
+    local global_avail=`ceph df -f json | jq '.stats.total_avail_bytes'`
+    local rep_avail=`ceph df -f json | jq '.pools | map(select(.name == "'$rep_poolname'"))[0].stats.max_avail'`
+    local ec_avail=`ceph df -f json | jq '.pools | map(select(.name == "'$ec_poolname'"))[0].stats.max_avail'`
+
+    echo "${global_avail} >= ${rep_avail}*3" | bc || return 1
+    echo "${global_avail} >= ${ec_avail}*1.5" | bc || return 1
+
+    ceph osd pool delete  $rep_poolname $rep_poolname  --yes-i-really-really-mean-it
+    ceph osd pool delete  $ec_poolname $ec_poolname  --yes-i-really-really-mean-it
+    ceph osd erasure-code-profile rm ec42profile
+    teardown $dir || return 1
+}
+
+main osd-pool-df "$@"
diff --git a/qa/standalone/mon/test_pool_quota.sh b/qa/standalone/mon/test_pool_quota.sh
new file mode 100755
index 000000000..b87ec2232
--- /dev/null
+++ b/qa/standalone/mon/test_pool_quota.sh
@@ -0,0 +1,63 @@
+#!/usr/bin/env bash
+
+#
+# Generic pool quota test
+#
+
+# Includes
+
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:17108" # git grep '\<17108\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        $func $dir || return 1
+    done
+}
+
+function TEST_pool_quota() {
+    local dir=$1
+    setup $dir || return 1
+
+    run_mon $dir a || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+
+    local poolname=testquota
+    create_pool $poolname 20
+    local objects=`ceph df detail | grep -w $poolname|awk '{print $3}'`
+    local bytes=`ceph df detail | grep -w $poolname|awk '{print $4}'`
+
+    echo $objects
+    echo $bytes
+    if [ $objects != 'N/A' ] || [ $bytes != 'N/A' ] ;
+      then
+      return 1
+    fi
+
+    ceph osd pool set-quota  $poolname   max_objects 1000
+    ceph osd pool set-quota  $poolname  max_bytes 1024
+
+    objects=`ceph df detail | grep -w $poolname|awk '{print $3}'`
+    bytes=`ceph df detail | grep -w $poolname|awk '{print $4}'`
+   
+    if [ $objects != '1000' ] || [ $bytes != '1K' ] ;
+      then
+      return 1
+    fi
+
+    ceph osd pool delete  $poolname $poolname  --yes-i-really-really-mean-it
+    teardown $dir || return 1
+}
+
+main testpoolquota
diff --git a/qa/standalone/osd-backfill/osd-backfill-prio.sh b/qa/standalone/osd-backfill/osd-backfill-prio.sh
new file mode 100755
index 000000000..9749ca34c
--- /dev/null
+++ b/qa/standalone/osd-backfill/osd-backfill-prio.sh
@@ -0,0 +1,522 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2019 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    # Fix port????
+    export CEPH_MON="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON --osd_max_backfills=1 --debug_reserver=20 "
+    CEPH_ARGS+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 "
+    # Set osd op queue = wpq for the tests. Backfill priority is not
+    # considered by mclock_scheduler leading to unexpected results.
+    CEPH_ARGS+="--osd-op-queue=wpq "
+    export objects=50
+    export poolprefix=test
+    export FORCE_PRIO="254"     # See OSD_BACKFILL_PRIORITY_FORCED
+    export DEGRADED_PRIO="150"  # See OSD_BACKFILL_DEGRADED_PRIORITY_BASE + 10
+    export NORMAL_PRIO="110"    # See OSD_BACKFILL_PRIORITY_BASE + 10
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+
+function TEST_backfill_priority() {
+    local dir=$1
+    local pools=10
+    local OSDS=5
+    # size 2 -> 1 means degraded by 1, so add 1 to base prio
+    local degraded_prio=$(expr $DEGRADED_PRIO + 1)
+    local max_tries=10
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    export CEPH_ARGS
+
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd || return 1
+    done
+
+    for p in $(seq 1 $pools)
+    do
+      create_pool "${poolprefix}$p" 1 1
+      ceph osd pool set "${poolprefix}$p" size 2
+    done
+    sleep 5
+
+    wait_for_clean || return 1
+
+    ceph pg dump pgs
+
+    # Find 3 pools with a pg with the same primaries but second
+    # replica on another osd.
+    local PG1
+    local POOLNUM1
+    local pool1
+    local chk_osd1_1
+    local chk_osd1_2
+
+    local PG2
+    local POOLNUM2
+    local pool2
+    local chk_osd2
+
+    local PG3
+    local POOLNUM3
+    local pool3
+
+    for p in $(seq 1 $pools)
+    do
+      ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting
+      local test_osd1=$(head -1 $dir/acting)
+      local test_osd2=$(tail -1 $dir/acting)
+      if [ -z "$PG1" ];
+      then
+        PG1="${p}.0"
+        POOLNUM1=$p
+        pool1="${poolprefix}$p"
+        chk_osd1_1=$test_osd1
+        chk_osd1_2=$test_osd2
+      elif [ -z "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 ];
+      then
+        PG2="${p}.0"
+        POOLNUM2=$p
+        pool2="${poolprefix}$p"
+        chk_osd2=$test_osd2
+      elif [ -n "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 -a "$chk_osd2" != $test_osd2 ];
+      then
+        PG3="${p}.0"
+        POOLNUM3=$p
+        pool3="${poolprefix}$p"
+        break
+      fi
+    done
+    rm -f $dir/acting
+
+    if [ "$pool2" = "" -o "pool3" = "" ];
+    then
+      echo "Failure to find appropirate PGs"
+      return 1
+    fi
+
+    for p in $(seq 1 $pools)
+    do
+      if [ $p != $POOLNUM1 -a $p != $POOLNUM2 -a $p != $POOLNUM3 ];
+      then
+        delete_pool ${poolprefix}$p
+      fi
+    done
+
+    ceph osd pool set $pool2 size 1 --yes-i-really-mean-it
+    ceph osd pool set $pool3 size 1 --yes-i-really-mean-it
+    wait_for_clean || return 1
+
+    dd if=/dev/urandom of=$dir/data bs=1M count=10
+    p=1
+    for pname in $pool1 $pool2 $pool3
+    do
+      for i in $(seq 1 $objects)
+      do
+	rados -p ${pname} put obj${i}-p${p} $dir/data
+      done
+      p=$(expr $p + 1)
+    done
+
+    local otherosd=$(get_not_primary $pool1 obj1-p1)
+
+    ceph pg dump pgs
+    ERRORS=0
+
+    ceph osd set nobackfill
+    ceph osd set noout
+
+    # Get a pg to want to backfill and quickly force it
+    # to be preempted.
+    ceph osd pool set $pool3 size 2
+    sleep 2
+
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
+
+    # 3. Item is in progress, adjust priority with no higher priority waiting
+    for i in $(seq 1 $max_tries)
+    do
+      if ! ceph pg force-backfill $PG3 2>&1 | grep -q "doesn't require backfilling"; then
+        break
+      fi
+      if [ "$i" = "$max_tries" ]; then
+        echo "ERROR: Didn't appear to be able to force-backfill"
+        ERRORS=$(expr $ERRORS + 1)
+      fi
+      sleep 2
+    done
+    flush_pg_stats || return 1
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
+
+    ceph osd out osd.$chk_osd1_2
+    sleep 2
+    flush_pg_stats || return 1
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
+    ceph pg dump pgs
+
+    ceph osd pool set $pool2 size 2
+    sleep 2
+    flush_pg_stats || return 1
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
+    cat $dir/out
+    ceph pg dump pgs
+
+    PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG1}\")).prio")
+    if [ "$PRIO" != "$NORMAL_PRIO" ];
+    then
+      echo "The normal PG ${PG1} doesn't have prio $NORMAL_PRIO queued waiting"
+      ERRORS=$(expr $ERRORS + 1)
+    fi
+
+    # Using eval will strip double-quotes from item
+    eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
+    if [ "$ITEM" != ${PG3} ];
+    then
+      echo "The force-backfill PG $PG3 didn't become the in progress item"
+      ERRORS=$(expr $ERRORS + 1)
+    else
+      PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
+      if [ "$PRIO" != $FORCE_PRIO ];
+      then
+        echo "The force-backfill PG ${PG3} doesn't have prio $FORCE_PRIO"
+        ERRORS=$(expr $ERRORS + 1)
+      fi
+    fi
+
+    # 1. Item is queued, re-queue with new priority
+    for i in $(seq 1 $max_tries)
+    do
+      if ! ceph pg force-backfill $PG2 2>&1 | grep -q "doesn't require backfilling"; then
+        break
+      fi
+      if [ "$i" = "$max_tries" ]; then
+        echo "ERROR: Didn't appear to be able to force-backfill"
+        ERRORS=$(expr $ERRORS + 1)
+      fi
+      sleep 2
+    done
+    sleep 2
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
+    cat $dir/out
+    PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
+    if [ "$PRIO" != "$FORCE_PRIO" ];
+    then
+      echo "The second force-backfill PG ${PG2} doesn't have prio $FORCE_PRIO"
+      ERRORS=$(expr $ERRORS + 1)
+    fi
+    flush_pg_stats || return 1
+
+    # 4. Item is in progress, if higher priority items waiting prempt item
+    ceph pg cancel-force-backfill $PG3 || return 1
+    sleep 2
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
+    cat $dir/out
+    PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG3}\")).prio")
+    if [ "$PRIO" != "$degraded_prio" ];
+    then
+      echo "After cancel-force-backfill PG ${PG3} doesn't have prio $degraded_prio"
+      ERRORS=$(expr $ERRORS + 1)
+    fi
+
+    eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
+    if [ "$ITEM" != ${PG2} ];
+    then
+      echo "The force-recovery PG $PG2 didn't become the in progress item"
+      ERRORS=$(expr $ERRORS + 1)
+    else
+      PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
+      if [ "$PRIO" != $FORCE_PRIO ];
+      then
+        echo "The first force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
+        ERRORS=$(expr $ERRORS + 1)
+      fi
+    fi
+
+    ceph pg cancel-force-backfill $PG2 || return 1
+    sleep 5
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
+
+    # 2. Item is queued, re-queue and preempt because new priority higher than an in progress item
+    flush_pg_stats || return 1
+    ceph pg force-backfill $PG3 || return 1
+    sleep 2
+
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
+    cat $dir/out
+    PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
+    if [ "$PRIO" != "$degraded_prio" ];
+    then
+      echo "After cancel-force-backfill PG ${PG2} doesn't have prio $degraded_prio"
+      ERRORS=$(expr $ERRORS + 1)
+    fi
+
+    eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
+    if [ "$ITEM" != ${PG3} ];
+    then
+      echo "The force-backfill PG $PG3 didn't get promoted to an in progress item"
+      ERRORS=$(expr $ERRORS + 1)
+    else
+      PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
+      if [ "$PRIO" != $FORCE_PRIO ];
+      then
+        echo "The force-backfill PG ${PG2} doesn't have prio $FORCE_PRIO"
+        ERRORS=$(expr $ERRORS + 1)
+      fi
+    fi
+
+    ceph osd unset noout
+    ceph osd unset nobackfill
+
+    wait_for_clean "CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations" || return 1
+
+    ceph pg dump pgs
+
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_pgstate_history
+
+    if [ $ERRORS != "0" ];
+    then
+      echo "$ERRORS error(s) found"
+    else
+      echo TEST PASSED
+    fi
+
+    delete_pool $pool1
+    delete_pool $pool2
+    delete_pool $pool3
+    kill_daemons $dir || return 1
+    return $ERRORS
+}
+
+#
+# Show that pool recovery_priority is added to the backfill priority
+#
+# Create 2 pools with 2 OSDs with different primarys
+# pool 1 with recovery_priority 1
+# pool 2 with recovery_priority 2
+#
+# Start backfill by changing the pool sizes from 1 to 2
+# Use dump_recovery_reservations to verify priorities
+function TEST_backfill_pool_priority() {
+    local dir=$1
+    local pools=3 # Don't assume the first 2 pools are exact what we want
+    local OSDS=2
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    export CEPH_ARGS
+
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd || return 1
+    done
+
+    for p in $(seq 1 $pools)
+    do
+      create_pool "${poolprefix}$p" 1 1
+      ceph osd pool set "${poolprefix}$p" size 2
+    done
+    sleep 5
+
+    wait_for_clean || return 1
+
+    ceph pg dump pgs
+
+    # Find 2 pools with different primaries which
+    # means the replica must be on another osd.
+    local PG1
+    local POOLNUM1
+    local pool1
+    local chk_osd1_1
+    local chk_osd1_2
+
+    local PG2
+    local POOLNUM2
+    local pool2
+    local chk_osd2_1
+    local chk_osd2_2
+
+    for p in $(seq 1 $pools)
+    do
+      ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting
+      local test_osd1=$(head -1 $dir/acting)
+      local test_osd2=$(tail -1 $dir/acting)
+      if [ -z "$PG1" ];
+      then
+        PG1="${p}.0"
+        POOLNUM1=$p
+        pool1="${poolprefix}$p"
+        chk_osd1_1=$test_osd1
+        chk_osd1_2=$test_osd2
+      elif [ $chk_osd1_1 != $test_osd1 ];
+      then
+        PG2="${p}.0"
+        POOLNUM2=$p
+        pool2="${poolprefix}$p"
+        chk_osd2_1=$test_osd1
+        chk_osd2_2=$test_osd2
+        break
+      fi
+    done
+    rm -f $dir/acting
+
+    if [ "$pool2" = "" ];
+    then
+      echo "Failure to find appropirate PGs"
+      return 1
+    fi
+
+    for p in $(seq 1 $pools)
+    do
+      if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ];
+      then
+        delete_pool ${poolprefix}$p
+      fi
+    done
+
+    pool1_extra_prio=1
+    pool2_extra_prio=2
+    # size 2 -> 1 means degraded by 1, so add 1 to base prio
+    pool1_prio=$(expr $DEGRADED_PRIO + 1 + $pool1_extra_prio)
+    pool2_prio=$(expr $DEGRADED_PRIO + 1 + $pool2_extra_prio)
+
+    ceph osd pool set $pool1 size 1 --yes-i-really-mean-it
+    ceph osd pool set $pool1 recovery_priority $pool1_extra_prio
+    ceph osd pool set $pool2 size 1 --yes-i-really-mean-it
+    ceph osd pool set $pool2 recovery_priority $pool2_extra_prio
+    wait_for_clean || return 1
+
+    dd if=/dev/urandom of=$dir/data bs=1M count=10
+    p=1
+    for pname in $pool1 $pool2
+    do
+      for i in $(seq 1 $objects)
+      do
+	rados -p ${pname} put obj${i}-p${p} $dir/data
+      done
+      p=$(expr $p + 1)
+    done
+
+    local otherosd=$(get_not_primary $pool1 obj1-p1)
+
+    ceph pg dump pgs
+    ERRORS=0
+
+    ceph osd pool set $pool1 size 2
+    ceph osd pool set $pool2 size 2
+    sleep 5
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/dump.${chk_osd1_1}.out
+    echo osd.${chk_osd1_1}
+    cat $dir/dump.${chk_osd1_1}.out
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_2}) dump_recovery_reservations > $dir/dump.${chk_osd1_2}.out
+    echo osd.${chk_osd1_2}
+    cat $dir/dump.${chk_osd1_2}.out
+
+    # Using eval will strip double-quotes from item
+    eval ITEM=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].item')
+    if [ "$ITEM" != ${PG1} ];
+    then
+      echo "The primary PG ${PG1} didn't become the in progress item"
+      ERRORS=$(expr $ERRORS + 1)
+    else
+      PRIO=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].prio')
+      if [ "$PRIO" != $pool1_prio ];
+      then
+        echo "The primary PG ${PG1} doesn't have prio $pool1_prio"
+        ERRORS=$(expr $ERRORS + 1)
+      fi
+    fi
+
+    # Using eval will strip double-quotes from item
+    eval ITEM=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].item')
+    if [ "$ITEM" != ${PG1} ];
+    then
+      echo "The primary PG ${PG1} didn't become the in progress item on remote"
+      ERRORS=$(expr $ERRORS + 1)
+    else
+      PRIO=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].prio')
+      if [ "$PRIO" != $pool1_prio ];
+      then
+        echo "The primary PG ${PG1} doesn't have prio $pool1_prio on remote"
+        ERRORS=$(expr $ERRORS + 1)
+      fi
+    fi
+
+    # Using eval will strip double-quotes from item
+    eval ITEM=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].item')
+    if [ "$ITEM" != ${PG2} ];
+    then
+      echo "The primary PG ${PG2} didn't become the in progress item"
+      ERRORS=$(expr $ERRORS + 1)
+    else
+      PRIO=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].prio')
+      if [ "$PRIO" != $pool2_prio ];
+      then
+        echo "The primary PG ${PG2} doesn't have prio $pool2_prio"
+        ERRORS=$(expr $ERRORS + 1)
+      fi
+    fi
+
+    # Using eval will strip double-quotes from item
+    eval ITEM=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].item')
+    if [ "$ITEM" != ${PG2} ];
+    then
+      echo "The primary PG $PG2 didn't become the in progress item on remote"
+      ERRORS=$(expr $ERRORS + 1)
+    else
+      PRIO=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].prio')
+      if [ "$PRIO" != $pool2_prio ];
+      then
+        echo "The primary PG ${PG2} doesn't have prio $pool2_prio on remote"
+        ERRORS=$(expr $ERRORS + 1)
+      fi
+    fi
+
+    wait_for_clean || return 1
+
+    if [ $ERRORS != "0" ];
+    then
+      echo "$ERRORS error(s) found"
+    else
+      echo TEST PASSED
+    fi
+
+    delete_pool $pool1
+    delete_pool $pool2
+    kill_daemons $dir || return 1
+    return $ERRORS
+}
+
+main osd-backfill-prio "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-prio.sh"
+# End:
diff --git a/qa/standalone/osd-backfill/osd-backfill-recovery-log.sh b/qa/standalone/osd-backfill/osd-backfill-recovery-log.sh
new file mode 100755
index 000000000..f9a144932
--- /dev/null
+++ b/qa/standalone/osd-backfill/osd-backfill-recovery-log.sh
@@ -0,0 +1,139 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2019 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    # Fix port????
+    export CEPH_MON="127.0.0.1:7129" # git grep '\<7129\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON --osd_max_backfills=1 --debug_reserver=20 "
+    CEPH_ARGS+="--osd_mclock_override_recovery_settings=true "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+
+function _common_test() {
+    local dir=$1
+    local extra_opts="$2"
+    local loglen="$3"
+    local dupslen="$4"
+    local objects="$5"
+    local moreobjects=${6:-0}
+
+    local OSDS=6
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    export CEPH_ARGS
+    export EXTRA_OPTS=" $extra_opts"
+
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd || return 1
+    done
+
+    create_pool test 1 1
+
+    for j in $(seq 1 $objects)
+    do
+       rados -p test put obj-${j} /etc/passwd
+    done
+
+    # Mark out all OSDs for this pool
+    ceph osd out $(ceph pg dump pgs --format=json | jq '.pg_stats[0].up[]')
+    if [ "$moreobjects" != "0" ]; then
+      for j in $(seq 1 $moreobjects)
+      do
+        rados -p test put obj-more-${j} /etc/passwd
+      done
+    fi
+    sleep 1
+    wait_for_clean
+
+    flush_pg_stats
+
+    newprimary=$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')
+    kill_daemons
+
+    ERRORS=0
+    _objectstore_tool_nodown $dir $newprimary --no-mon-config --pgid 1.0 --op log | tee $dir/result.log
+    LOGLEN=$(jq '.pg_log_t.log | length' $dir/result.log)
+    if [ $LOGLEN != "$loglen" ]; then
+	echo "FAILED: Wrong log length got $LOGLEN (expected $loglen)"
+	ERRORS=$(expr $ERRORS + 1)
+    fi
+    DUPSLEN=$(jq '.pg_log_t.dups | length' $dir/result.log)
+    if [ $DUPSLEN != "$dupslen" ]; then
+	echo "FAILED: Wrong dups length got $DUPSLEN (expected $dupslen)"
+	ERRORS=$(expr $ERRORS + 1)
+    fi
+    grep "copy_up_to\|copy_after" $dir/osd.*.log
+    rm -f $dir/result.log
+    if [ $ERRORS != "0" ]; then
+	 echo TEST FAILED
+	 return 1
+    fi
+}
+
+
+# Cause copy_up_to() to only partially copy logs, copy additional dups, and trim dups
+function TEST_backfill_log_1() {
+    local dir=$1
+
+    _common_test $dir "--osd_min_pg_log_entries=1 --osd_max_pg_log_entries=2 --osd_pg_log_dups_tracked=10" 2 8 150
+}
+
+
+# Cause copy_up_to() to only partially copy logs, copy additional dups
+function TEST_backfill_log_2() {
+    local dir=$1
+
+    _common_test $dir "--osd_min_pg_log_entries=1 --osd_max_pg_log_entries=2" 2 148 150
+}
+
+
+# Cause copy_after() to only copy logs, no dups
+function TEST_recovery_1() {
+    local dir=$1
+
+    _common_test $dir "--osd_min_pg_log_entries=50 --osd_max_pg_log_entries=50 --osd_pg_log_dups_tracked=60 --osd_pg_log_trim_min=10" 40 0 40
+}
+
+
+# Cause copy_after() to copy logs with dups
+function TEST_recovery_2() {
+    local dir=$1
+
+    _common_test $dir "--osd_min_pg_log_entries=150 --osd_max_pg_log_entries=150 --osd_pg_log_dups_tracked=3000 --osd_pg_log_trim_min=10" 151 10 141 20
+}
+
+main osd-backfill-recovery-log "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-recovery-log.sh"
+# End:
diff --git a/qa/standalone/osd-backfill/osd-backfill-space.sh b/qa/standalone/osd-backfill/osd-backfill-space.sh
new file mode 100755
index 000000000..6a5c69412
--- /dev/null
+++ b/qa/standalone/osd-backfill/osd-backfill-space.sh
@@ -0,0 +1,1176 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2018 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7180" # git grep '\<7180\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    CEPH_ARGS+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 "
+    CEPH_ARGS+="--fake_statfs_for_testing=3686400 "
+    CEPH_ARGS+="--osd_max_backfills=10 "
+    CEPH_ARGS+="--osd_mclock_override_recovery_settings=true "
+    export objects=600
+    export poolprefix=test
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+
+function get_num_in_state() {
+    local state=$1
+    local expression
+    expression+="select(contains(\"${state}\"))"
+    ceph --format json pg dump pgs 2>/dev/null | \
+        jq ".pg_stats | [.[] | .state | $expression] | length"
+}
+
+
+function wait_for_not_state() {
+    local state=$1
+    local num_in_state=-1
+    local cur_in_state
+    local -a delays=($(get_timeout_delays $2 5))
+    local -i loop=0
+
+    flush_pg_stats || return 1
+    while test $(get_num_pgs) == 0 ; do
+	sleep 1
+    done
+
+    while true ; do
+        cur_in_state=$(get_num_in_state ${state})
+        test $cur_in_state = "0" && break
+        if test $cur_in_state != $num_in_state ; then
+            loop=0
+            num_in_state=$cur_in_state
+        elif (( $loop >= ${#delays[*]} )) ; then
+            ceph pg dump pgs
+            return 1
+        fi
+        sleep ${delays[$loop]}
+        loop+=1
+    done
+    return 0
+}
+
+
+function wait_for_not_backfilling() {
+    local timeout=$1
+    wait_for_not_state backfilling $timeout
+}
+
+
+function wait_for_not_activating() {
+    local timeout=$1
+    wait_for_not_state activating $timeout
+}
+
+# All tests are created in an environment which has fake total space
+# of 3600K (3686400) which can hold 600 6K replicated objects or
+# 200 18K shards of erasure coded objects.  For a k=3, m=2 EC pool
+# we have a theoretical 54K object but with the chunk size of 4K
+# and a rounding of 4K to account for the chunks is 36K max object
+# which is ((36K / 3) + 4K) * 200  = 3200K which is 88% of
+# 3600K for a shard.
+
+# Create 2 pools with size 1
+# Write enough data that only 1 pool pg can fit per osd
+# Incresase the pool size to 2
+# On 3 OSDs this should result in 1 OSD with overlapping replicas,
+# so both pools can't fit.  We assume pgid 1.0 and 2.0 won't
+# map to the same 2 OSDs.
+# At least 1 pool shouldn't have room to backfill
+# All other pools should go active+clean
+function TEST_backfill_test_simple() {
+    local dir=$1
+    local pools=2
+    local OSDS=3
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    export CEPH_ARGS
+
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd || return 1
+    done
+
+    ceph osd set-backfillfull-ratio .85
+
+    for p in $(seq 1 $pools)
+    do
+      create_pool "${poolprefix}$p" 1 1
+      ceph osd pool set "${poolprefix}$p" size 1 --yes-i-really-mean-it
+    done
+
+    wait_for_clean || return 1
+
+    # This won't work is if the 2 pools primary and only osds
+    # are the same.
+
+    dd if=/dev/urandom of=$dir/datafile bs=1024 count=4
+    for o in $(seq 1 $objects)
+    do
+      for p in $(seq 1 $pools)
+      do
+	rados -p "${poolprefix}$p" put obj$o $dir/datafile
+      done
+    done
+
+    ceph pg dump pgs
+
+    for p in $(seq 1 $pools)
+    do
+      ceph osd pool set "${poolprefix}$p" size 2
+    done
+    sleep 30
+
+    wait_for_not_backfilling 1200 || return 1
+    wait_for_not_activating 60 || return 1
+
+    ERRORS=0
+    if [ "$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" != "1" ];
+    then
+      echo "One pool should have been in backfill_toofull"
+      ERRORS="$(expr $ERRORS + 1)"
+    fi
+
+    expected="$(expr $pools - 1)"
+    if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "$expected" ];
+    then
+      echo "$expected didn't finish backfill"
+      ERRORS="$(expr $ERRORS + 1)"
+    fi
+
+    ceph pg dump pgs
+
+    if [ $ERRORS != "0" ];
+    then
+      return 1
+    fi
+
+    for i in $(seq 1 $pools)
+    do
+      delete_pool "${poolprefix}$i"
+    done
+    kill_daemons $dir || return 1
+    ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
+}
+
+
+# Create 8 pools of size 1 on 20 OSDs
+# Write 4K * 600 objects (only 1 pool pg can fit on any given osd)
+# Increase pool size to 2
+# At least 1 pool shouldn't have room to backfill
+# All other pools should go active+clean
+function TEST_backfill_test_multi() {
+    local dir=$1
+    local pools=8
+    local OSDS=20
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    export CEPH_ARGS
+
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd || return 1
+    done
+
+    ceph osd set-backfillfull-ratio .85
+
+    for p in $(seq 1 $pools)
+    do
+      create_pool "${poolprefix}$p" 1 1
+      ceph osd pool set "${poolprefix}$p" size 1 --yes-i-really-mean-it
+    done
+
+    wait_for_clean || return 1
+
+    dd if=/dev/urandom of=$dir/datafile bs=1024 count=4
+    for o in $(seq 1 $objects)
+    do
+      for p in $(seq 1 $pools)
+      do
+	rados -p "${poolprefix}$p" put obj$o $dir/datafile
+      done
+    done
+
+    ceph pg dump pgs
+
+    for p in $(seq 1 $pools)
+    do
+      ceph osd pool set "${poolprefix}$p" size 2
+    done
+    sleep 30
+
+    wait_for_not_backfilling 1200 || return 1
+    wait_for_not_activating 60 || return 1
+
+    ERRORS=0
+    full="$(ceph pg dump pgs | grep +backfill_toofull | wc -l)"
+    if [ "$full" -lt "1" ];
+    then
+      echo "At least one pool should have been in backfill_toofull"
+      ERRORS="$(expr $ERRORS + 1)"
+    fi
+
+    expected="$(expr $pools - $full)"
+    if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "$expected" ];
+    then
+      echo "$expected didn't finish backfill"
+      ERRORS="$(expr $ERRORS + 1)"
+    fi
+
+    ceph pg dump pgs
+    ceph status
+
+    ceph status --format=json-pretty > $dir/stat.json
+
+    eval SEV=$(jq '.health.checks.PG_BACKFILL_FULL.severity' $dir/stat.json)
+    if [ "$SEV" != "HEALTH_WARN" ]; then
+      echo "PG_BACKFILL_FULL severity $SEV not HEALTH_WARN"
+      ERRORS="$(expr $ERRORS + 1)"
+    fi
+    eval MSG=$(jq '.health.checks.PG_BACKFILL_FULL.summary.message' $dir/stat.json)
+    if [ "$MSG" != "Low space hindering backfill (add storage if this doesn't resolve itself): 4 pgs backfill_toofull" ]; then
+      echo "PG_BACKFILL_FULL message '$MSG' mismatched"
+      ERRORS="$(expr $ERRORS + 1)"
+    fi
+    rm -f $dir/stat.json
+
+    if [ $ERRORS != "0" ];
+    then
+      return 1
+    fi
+
+    for i in $(seq 1 $pools)
+    do
+      delete_pool "${poolprefix}$i"
+    done
+    # Work around for http://tracker.ceph.com/issues/38195
+    kill_daemons $dir #|| return 1
+    ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
+}
+
+
+# To make sure that when 2 pg try to backfill at the same time to
+# the same target.  This might be covered by the simple test above
+# but this makes sure we get it.
+#
+# Create 10 pools of size 2 and identify 2 that have the same
+# non-primary osd.
+# Delete all other pools
+# Set size to 1 and write 4K * 600 to each pool
+# Set size back to 2
+# The 2 pools should race to backfill.
+# One pool goes active+clean
+# The other goes acitve+...+backfill_toofull
+function TEST_backfill_test_sametarget() {
+    local dir=$1
+    local pools=10
+    local OSDS=5
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    export CEPH_ARGS
+
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd || return 1
+    done
+
+    ceph osd set-backfillfull-ratio .85
+
+    for p in $(seq 1 $pools)
+    do
+      create_pool "${poolprefix}$p" 1 1
+      ceph osd pool set "${poolprefix}$p" size 2
+    done
+    sleep 5
+
+    wait_for_clean || return 1
+
+    ceph pg dump pgs
+
+    # Find 2 pools with a pg that distinct primaries but second
+    # replica on the same osd.
+    local PG1
+    local POOLNUM1
+    local pool1
+    local chk_osd1
+    local chk_osd2
+
+    local PG2
+    local POOLNUM2
+    local pool2
+    for p in $(seq 1 $pools)
+    do
+      ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting
+      local test_osd1=$(head -1 $dir/acting)
+      local test_osd2=$(tail -1 $dir/acting)
+      if [ $p = "1" ];
+      then
+        PG1="${p}.0"
+        POOLNUM1=$p
+        pool1="${poolprefix}$p"
+        chk_osd1=$test_osd1
+        chk_osd2=$test_osd2
+      elif [ $chk_osd1 != $test_osd1 -a $chk_osd2 = $test_osd2 ];
+      then
+        PG2="${p}.0"
+        POOLNUM2=$p
+        pool2="${poolprefix}$p"
+        break
+      fi
+    done
+    rm -f $dir/acting
+
+    if [ "$pool2" = "" ];
+    then
+      echo "Failure to find appropirate PGs"
+      return 1
+    fi
+
+    for p in $(seq 1 $pools)
+    do
+      if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ];
+      then
+        delete_pool ${poolprefix}$p
+      fi
+    done
+
+    ceph osd pool set $pool1 size 1 --yes-i-really-mean-it
+    ceph osd pool set $pool2 size 1 --yes-i-really-mean-it
+
+    wait_for_clean || return 1
+
+    dd if=/dev/urandom of=$dir/datafile bs=1024 count=4
+    for i in $(seq 1 $objects)
+    do
+	rados -p $pool1 put obj$i $dir/datafile
+        rados -p $pool2 put obj$i $dir/datafile
+    done
+
+    ceph osd pool set $pool1 size 2
+    ceph osd pool set $pool2 size 2
+    sleep 30
+
+    wait_for_not_backfilling 1200 || return 1
+    wait_for_not_activating 60 || return 1
+
+    ERRORS=0
+    if [ "$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" != "1" ];
+    then
+      echo "One pool should have been in backfill_toofull"
+      ERRORS="$(expr $ERRORS + 1)"
+    fi
+
+    if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "1" ];
+    then
+      echo "One didn't finish backfill"
+      ERRORS="$(expr $ERRORS + 1)"
+    fi
+
+    ceph pg dump pgs
+
+    if [ $ERRORS != "0" ];
+    then
+      return 1
+    fi
+
+    delete_pool $pool1
+    delete_pool $pool2
+    kill_daemons $dir || return 1
+    ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
+}
+
+# 2 pools can't both backfill to a target which has other data
+# 1 of the pools has objects that increase from 1024 to 2611 bytes
+#
+# Write to fill pool which is size 1
+# Take fill pool osd down (other 2 pools must go to the remaining OSDs
+# Save an export of data on fill OSD and restart it
+# Write an intial 1K to pool1 which has pg 2.0
+# Export 2.0 from non-fillpool OSD don't wait for it to start-up
+# Take down fillpool OSD
+# Put 1K object version of 2.0 on fillpool OSD
+# Put back fillpool data on fillpool OSD
+# With fillpool down write 2611 byte objects 
+# Take down $osd and bring back $fillosd simultaneously
+# Wait for backfilling
+# One PG will be able to backfill its remaining data
+# One PG must get backfill_toofull
+function TEST_backfill_multi_partial() {
+    local dir=$1
+    local EC=$2
+    local pools=2
+    local OSDS=3
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    export CEPH_ARGS
+
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd || return 1
+    done
+
+    ceph osd set-backfillfull-ratio .85
+
+    ceph osd set-require-min-compat-client luminous
+    create_pool fillpool 1 1
+    ceph osd pool set fillpool size 1 --yes-i-really-mean-it
+    for p in $(seq 1 $pools)
+    do
+      create_pool "${poolprefix}$p" 1 1
+      ceph osd pool set "${poolprefix}$p" size 2
+    done
+
+    wait_for_clean || return 1
+
+    # Partially fill an osd
+    # We have room for 600 6K replicated objects, if we create 2611 byte objects
+    # there is 3600K - (2611 * 600) = 2070K, so the fill pool and one
+    # replica from the other 2 is 85% of 3600K
+
+    dd if=/dev/urandom of=$dir/datafile bs=2611 count=1
+    for o in $(seq 1 $objects)
+    do
+      rados -p fillpool put obj-fill-${o} $dir/datafile
+    done
+
+    local fillosd=$(get_primary fillpool obj-fill-1)
+    osd=$(expr $fillosd + 1)
+    if [ "$osd" = "$OSDS" ]; then
+      osd="0"
+    fi
+
+    kill_daemon $dir/osd.$fillosd.pid TERM
+    ceph osd out osd.$fillosd
+
+    _objectstore_tool_nodown $dir $fillosd --op export-remove --pgid 1.0 --file $dir/fillexport.out || return 1
+    activate_osd $dir $fillosd || return 1
+
+    ceph pg dump pgs
+
+    dd if=/dev/urandom of=$dir/datafile bs=1024 count=1
+    for o in $(seq 1 $objects)
+    do
+      rados -p "${poolprefix}1" put obj-1-${o} $dir/datafile
+    done
+
+    ceph pg dump pgs
+    # The $osd OSD is started, but we don't wait so we can kill $fillosd at the same time
+    _objectstore_tool_nowait $dir $osd --op export --pgid 2.0 --file $dir/export.out
+    kill_daemon $dir/osd.$fillosd.pid TERM
+    _objectstore_tool_nodown $dir $fillosd --force --op remove --pgid 2.0
+    _objectstore_tool_nodown $dir $fillosd --op import --pgid 2.0 --file $dir/export.out || return 1
+    _objectstore_tool_nodown $dir $fillosd --op import --pgid 1.0 --file $dir/fillexport.out || return 1
+    ceph pg dump pgs
+    sleep 20
+    ceph pg dump pgs
+
+    # re-write everything
+    dd if=/dev/urandom of=$dir/datafile bs=2611 count=1
+    for o in $(seq 1 $objects)
+    do
+      for p in $(seq 1 $pools)
+      do
+	rados -p "${poolprefix}$p" put obj-${p}-${o} $dir/datafile
+      done
+    done
+
+    kill_daemon $dir/osd.$osd.pid TERM
+    ceph osd out osd.$osd
+
+    activate_osd $dir $fillosd || return 1
+    ceph osd in osd.$fillosd
+    sleep 30
+
+    wait_for_not_backfilling 1200 || return 1
+    wait_for_not_activating 60 || return 1
+
+    flush_pg_stats || return 1
+    ceph pg dump pgs
+
+    ERRORS=0
+    if [ "$(get_num_in_state backfill_toofull)" != "1" ];
+    then
+      echo "One PG should be in backfill_toofull"
+      ERRORS="$(expr $ERRORS + 1)"
+    fi
+
+    if [ "$(get_num_in_state active+clean)" != "2" ];
+    then
+      echo "Two PGs should be active+clean after one PG completed backfill"
+      ERRORS="$(expr $ERRORS + 1)"
+    fi
+
+    if [ $ERRORS != "0" ];
+    then
+      return 1
+    fi
+
+    delete_pool fillpool
+    for i in $(seq 1 $pools)
+    do
+      delete_pool "${poolprefix}$i"
+    done
+    kill_daemons $dir || return 1
+    ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
+}
+
+# Make sure that the amount of bytes already on the replica doesn't
+# cause an out of space condition
+#
+# Create 1 pool and write 4K * 600 objects
+# Remove 25% (150) of the objects with one OSD down (noout set)
+# Increase the size of the remaining 75% (450) of the objects to 6K
+# Bring back down OSD
+# The pool should go active+clean
+function TEST_backfill_grow() {
+    local dir=$1
+    local poolname="test"
+    local OSDS=3
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd || return 1
+    done
+
+    ceph osd set-backfillfull-ratio .85
+
+    create_pool $poolname 1 1
+    ceph osd pool set $poolname size 3
+    sleep 5
+
+    wait_for_clean || return 1
+
+    dd if=/dev/urandom of=${dir}/4kdata bs=1k count=4
+    for i in $(seq 1 $objects)
+    do
+	rados -p $poolname put obj$i $dir/4kdata
+    done
+
+    local PG=$(get_pg $poolname obj1)
+    # Remember primary during the backfill
+    local primary=$(get_primary $poolname obj1)
+    local otherosd=$(get_not_primary $poolname obj1)
+
+    ceph osd set noout
+    kill_daemons $dir TERM $otherosd || return 1
+
+    rmobjects=$(expr $objects / 4)
+    for i in $(seq 1 $rmobjects)
+    do
+        rados -p $poolname rm obj$i
+    done
+
+    dd if=/dev/urandom of=${dir}/6kdata bs=6k count=1
+    for i in $(seq $(expr $rmobjects + 1) $objects)
+    do
+	rados -p $poolname put obj$i $dir/6kdata
+    done
+
+    activate_osd $dir $otherosd || return 1
+
+    ceph tell osd.$primary debug kick_recovery_wq 0
+
+    sleep 2
+
+    wait_for_clean || return 1
+
+    delete_pool $poolname
+    kill_daemons $dir || return 1
+    ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
+}
+
+# Create a 5 shard EC pool on 6 OSD cluster
+# Fill 1 OSD with 2600K of data take that osd down.
+# Write the EC pool on 5 OSDs
+# Take down 1 (must contain an EC shard)
+# Bring up OSD with fill data
+# Not enought room to backfill to partially full OSD
+function TEST_ec_backfill_simple() {
+    local dir=$1
+    local EC=$2
+    local pools=1
+    local OSDS=6
+    local k=3
+    local m=2
+    local ecobjects=$(expr $objects / $k)
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    export CEPH_ARGS
+
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd || return 1
+    done
+
+    ceph osd set-backfillfull-ratio .85
+    create_pool fillpool 1 1
+    ceph osd pool set fillpool size 1 --yes-i-really-mean-it
+
+    # Partially fill an osd
+    # We have room for 200 18K replicated objects, if we create 13K objects
+    # there is only 3600K - (13K * 200) = 1000K which won't hold
+    # a k=3 shard below ((18K / 3) + 4K) * 200 = 2000K
+    # Actual usage per shard is 8K * 200 = 1600K because 18K/3 is 6K which
+    # rounds to 8K.  The 2000K is the ceiling on the 18K * 200 = 3600K logical
+    # bytes in the pool.
+    dd if=/dev/urandom of=$dir/datafile bs=1024 count=13
+    for o in $(seq 1 $ecobjects)
+    do
+      rados -p fillpool put obj$o $dir/datafile
+    done
+
+    local fillosd=$(get_primary fillpool obj1)
+    osd=$(expr $fillosd + 1)
+    if [ "$osd" = "$OSDS" ]; then
+      osd="0"
+    fi
+
+    sleep 5
+    kill_daemon $dir/osd.$fillosd.pid TERM
+    ceph osd out osd.$fillosd
+    sleep 2
+    ceph osd erasure-code-profile set ec-profile k=$k m=$m crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
+
+    for p in $(seq 1 $pools)
+    do
+        ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
+    done
+
+    # Can't wait for clean here because we created a stale pg
+    #wait_for_clean || return 1
+    sleep 5
+
+    ceph pg dump pgs
+
+    dd if=/dev/urandom of=$dir/datafile bs=1024 count=18
+    for o in $(seq 1 $ecobjects)
+    do
+      for p in $(seq 1 $pools)
+      do
+	rados -p "${poolprefix}$p" put obj$o $dir/datafile
+      done
+    done
+
+    kill_daemon $dir/osd.$osd.pid TERM
+    ceph osd out osd.$osd
+
+    activate_osd $dir $fillosd || return 1
+    ceph osd in osd.$fillosd
+    sleep 30
+
+    ceph pg dump pgs
+
+    wait_for_not_backfilling 1200 || return 1
+    wait_for_not_activating 60 || return 1
+
+    ceph pg dump pgs
+
+    ERRORS=0
+    if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ]; then
+      echo "One pool should have been in backfill_toofull"
+      ERRORS="$(expr $ERRORS + 1)"
+    fi
+
+    if [ $ERRORS != "0" ];
+    then
+      return 1
+    fi
+
+    delete_pool fillpool
+    for i in $(seq 1 $pools)
+    do
+      delete_pool "${poolprefix}$i"
+    done
+    kill_daemons $dir || return 1
+}
+
+function osdlist() {
+    local OSDS=$1
+    local excludeosd=$2
+
+    osds=""
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      if [ $osd = $excludeosd ];
+      then
+        continue
+      fi
+      if [ -n "$osds" ]; then
+        osds="${osds} "
+      fi
+      osds="${osds}${osd}"
+    done
+    echo $osds
+}
+
+# Create a pool with size 1 and fill with data so that only 1 EC shard can fit.
+# Write data to 2 EC pools mapped to the same OSDs (excluding filled one)
+# Remap the last OSD to partially full OSD on both pools
+# The 2 pools should race to backfill.
+# One pool goes active+clean
+# The other goes acitve+...+backfill_toofull
+function TEST_ec_backfill_multi() {
+    local dir=$1
+    local EC=$2
+    local pools=2
+    local OSDS=6
+    local k=3
+    local m=2
+    local ecobjects=$(expr $objects / $k)
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    export CEPH_ARGS
+
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd || return 1
+    done
+
+    # This test requires that shards from 2 different pools
+    # fit on a given OSD, but both will not fix.  I'm using
+    # making the fillosd plus 1 shard use 75% of the space,
+    # leaving not enough to be under the 85% set here.
+    ceph osd set-backfillfull-ratio .85
+
+    ceph osd set-require-min-compat-client luminous
+    create_pool fillpool 1 1
+    ceph osd pool set fillpool size 1 --yes-i-really-mean-it
+
+    # Partially fill an osd
+    # We have room for 200 18K replicated objects, if we create 9K objects
+    # there is only 3600K - (9K * 200) = 1800K which will only hold
+    # one k=3 shard below ((12K / 3) + 4K) * 200 = 1600K
+    # The actual data will be (12K / 3) * 200 = 800K because the extra
+    # is the reservation padding for chunking.
+    dd if=/dev/urandom of=$dir/datafile bs=1024 count=9
+    for o in $(seq 1 $ecobjects)
+    do
+      rados -p fillpool put obj$o $dir/datafile
+    done
+
+    local fillosd=$(get_primary fillpool obj1)
+    ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
+
+    nonfillosds="$(osdlist $OSDS $fillosd)"
+
+    for p in $(seq 1 $pools)
+    do
+        ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
+        ceph osd pg-upmap "$(expr $p + 1).0" $nonfillosds
+    done
+
+    # Can't wait for clean here because we created a stale pg
+    #wait_for_clean || return 1
+    sleep 15
+
+    ceph pg dump pgs
+
+    dd if=/dev/urandom of=$dir/datafile bs=1024 count=12
+    for o in $(seq 1 $ecobjects)
+    do
+      for p in $(seq 1 $pools)
+      do
+	rados -p "${poolprefix}$p" put obj$o-$p $dir/datafile
+      done
+    done
+
+    ceph pg dump pgs
+
+    for p in $(seq 1 $pools)
+    do
+      ceph osd pg-upmap $(expr $p + 1).0 ${nonfillosds% *} $fillosd
+    done
+
+    sleep 30
+
+    wait_for_not_backfilling 1200 || return 1
+    wait_for_not_activating 60 || return 1
+
+    ceph pg dump pgs
+
+    ERRORS=0
+    if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ];
+    then
+      echo "One pool should have been in backfill_toofull"
+      ERRORS="$(expr $ERRORS + 1)"
+    fi
+
+    if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ];
+    then
+      echo "One didn't finish backfill"
+      ERRORS="$(expr $ERRORS + 1)"
+    fi
+
+    if [ $ERRORS != "0" ];
+    then
+      return 1
+    fi
+
+    delete_pool fillpool
+    for i in $(seq 1 $pools)
+    do
+      delete_pool "${poolprefix}$i"
+    done
+    kill_daemons $dir || return 1
+}
+
+# Similar to TEST_ec_backfill_multi but one of the ec pools
+# already had some data on the target OSD
+
+# Create a pool with size 1 and fill with data so that only 1 EC shard can fit.
+# Write a small amount of data to 1 EC pool that still includes the filled one
+# Take down fillosd with noout set
+# Write data to 2 EC pools mapped to the same OSDs (excluding filled one)
+# Remap the last OSD to partially full OSD on both pools
+# The 2 pools should race to backfill.
+# One pool goes active+clean
+# The other goes acitve+...+backfill_toofull
+function SKIP_TEST_ec_backfill_multi_partial() {
+    local dir=$1
+    local EC=$2
+    local pools=2
+    local OSDS=5
+    local k=3
+    local m=2
+    local ecobjects=$(expr $objects / $k)
+    local lastosd=$(expr $OSDS - 1)
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    export CEPH_ARGS
+
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd || return 1
+    done
+
+    # This test requires that shards from 2 different pools
+    # fit on a given OSD, but both will not fix.  I'm using
+    # making the fillosd plus 1 shard use 75% of the space,
+    # leaving not enough to be under the 85% set here.
+    ceph osd set-backfillfull-ratio .85
+
+    ceph osd set-require-min-compat-client luminous
+    create_pool fillpool 1 1
+    ceph osd pool set fillpool size 1 --yes-i-really-mean-it
+    # last osd
+    ceph osd pg-upmap 1.0 $lastosd
+
+    # Partially fill an osd
+    # We have room for 200 18K replicated objects, if we create 9K objects
+    # there is only 3600K - (9K * 200) = 1800K which will only hold
+    # one k=3 shard below ((12K / 3) + 4K) * 200 = 1600K
+    # The actual data will be (12K / 3) * 200 = 800K because the extra
+    # is the reservation padding for chunking.
+    dd if=/dev/urandom of=$dir/datafile bs=1024 count=9
+    for o in $(seq 1 $ecobjects)
+    do
+      rados -p fillpool put obj$o $dir/datafile
+    done
+
+    local fillosd=$(get_primary fillpool obj1)
+    ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
+
+    nonfillosds="$(osdlist $OSDS $fillosd)"
+
+    for p in $(seq 1 $pools)
+    do
+        ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
+        ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $lastosd)
+    done
+
+    # Can't wait for clean here because we created a stale pg
+    #wait_for_clean || return 1
+    sleep 15
+
+    ceph pg dump pgs
+
+    dd if=/dev/urandom of=$dir/datafile bs=1024 count=1
+    for o in $(seq 1 $ecobjects)
+    do
+      rados -p "${poolprefix}1" put obj$o-1 $dir/datafile
+    done
+
+    for p in $(seq 1 $pools)
+    do
+        ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $(expr $lastosd - 1))
+    done
+    ceph pg dump pgs
+
+    #ceph osd set noout
+    #kill_daemons $dir TERM osd.$lastosd || return 1
+
+    dd if=/dev/urandom of=$dir/datafile bs=1024 count=12
+    for o in $(seq 1 $ecobjects)
+    do
+      for p in $(seq 1 $pools)
+      do
+	rados -p "${poolprefix}$p" put obj$o-$p $dir/datafile
+      done
+    done
+
+    ceph pg dump pgs
+
+    # Now backfill lastosd by adding back into the upmap
+    for p in $(seq 1 $pools)
+    do
+        ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $lastosd)
+    done
+    #activate_osd $dir $lastosd || return 1
+    #ceph tell osd.0 debug kick_recovery_wq 0
+
+    sleep 30
+    ceph pg dump pgs
+
+    wait_for_not_backfilling 1200 || return 1
+    wait_for_not_activating 60 || return 1
+
+    ceph pg dump pgs
+
+    ERRORS=0
+    if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ];
+    then
+      echo "One pool should have been in backfill_toofull"
+      ERRORS="$(expr $ERRORS + 1)"
+    fi
+
+    if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ];
+    then
+      echo "One didn't finish backfill"
+      ERRORS="$(expr $ERRORS + 1)"
+    fi
+
+    if [ $ERRORS != "0" ];
+    then
+      return 1
+    fi
+
+    delete_pool fillpool
+    for i in $(seq 1 $pools)
+    do
+      delete_pool "${poolprefix}$i"
+    done
+    kill_daemons $dir || return 1
+}
+
+function SKIP_TEST_ec_backfill_multi_partial() {
+    local dir=$1
+    local EC=$2
+    local pools=2
+    local OSDS=6
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    export CEPH_ARGS
+
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd || return 1
+    done
+
+    # Below we need to fit 3200K in 3600K which is 88%
+    # so set to 90%
+    ceph osd set-backfillfull-ratio .90
+
+    ceph osd set-require-min-compat-client luminous
+    create_pool fillpool 1 1
+    ceph osd pool set fillpool size 1 --yes-i-really-mean-it
+
+    # Partially fill an osd
+    # We have room for 200 48K ec objects, if we create 4k replicated objects
+    # there is 3600K - (4K * 200) = 2800K which won't hold 2 k=3 shard
+    # of 200 12K objects which takes ((12K / 3) + 4K) * 200 = 1600K each.
+    # On the other OSDs 2 * 1600K = 3200K which is 88% of 3600K.
+    dd if=/dev/urandom of=$dir/datafile bs=1024 count=4
+    for o in $(seq 1 $objects)
+    do
+      rados -p fillpool put obj$o $dir/datafile
+    done
+
+    local fillosd=$(get_primary fillpool obj1)
+    osd=$(expr $fillosd + 1)
+    if [ "$osd" = "$OSDS" ]; then
+      osd="0"
+    fi
+
+    sleep 5
+    kill_daemon $dir/osd.$fillosd.pid TERM
+    ceph osd out osd.$fillosd
+    sleep 2
+    ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
+
+    for p in $(seq 1 $pools)
+    do
+        ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
+    done
+
+    # Can't wait for clean here because we created a stale pg
+    #wait_for_clean || return 1
+    sleep 5
+
+    ceph pg dump pgs
+
+    dd if=/dev/urandom of=$dir/datafile bs=1024 count=12
+    for o in $(seq 1 $objects)
+    do
+      for p in $(seq 1 $pools)
+      do
+	rados -p "${poolprefix}$p" put obj$o $dir/datafile
+      done
+    done
+
+    #ceph pg map 2.0 --format=json | jq '.'
+    kill_daemon $dir/osd.$osd.pid TERM
+    ceph osd out osd.$osd
+
+    _objectstore_tool_nodown $dir $osd --op export --pgid 2.0 --file $dir/export.out
+    _objectstore_tool_nodown $dir $fillosd --op import --pgid 2.0 --file $dir/export.out
+
+    activate_osd $dir $fillosd || return 1
+    ceph osd in osd.$fillosd
+    sleep 30
+
+    wait_for_not_backfilling 1200 || return 1
+    wait_for_not_activating 60 || return 1
+
+    ERRORS=0
+    if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ];
+    then
+      echo "One pool should have been in backfill_toofull"
+      ERRORS="$(expr $ERRORS + 1)"
+    fi
+
+    if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ];
+    then
+      echo "One didn't finish backfill"
+      ERRORS="$(expr $ERRORS + 1)"
+    fi
+
+    ceph pg dump pgs
+
+    if [ $ERRORS != "0" ];
+    then
+      return 1
+    fi
+
+    delete_pool fillpool
+    for i in $(seq 1 $pools)
+    do
+      delete_pool "${poolprefix}$i"
+    done
+    kill_daemons $dir || return 1
+}
+
+# Create 1 EC pool
+# Write 200 12K objects ((12K / 3) + 4K) *200) = 1600K
+# Take 1 shard's OSD down (with noout set)
+# Remove 50 objects ((12K / 3) + 4k) * 50) = 400K
+# Write 150 36K objects (grow 150 objects) 2400K
+# 	But there is already 1600K usage so backfill
+# 	would be too full if it didn't account for existing data
+# Bring back down OSD so it must backfill
+# It should go active+clean taking into account data already there
+function TEST_ec_backfill_grow() {
+    local dir=$1
+    local poolname="test"
+    local OSDS=6
+    local k=3
+    local m=2
+    local ecobjects=$(expr $objects / $k)
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd || return 1
+    done
+
+    ceph osd set-backfillfull-ratio .85
+
+    ceph osd set-require-min-compat-client luminous
+    ceph osd erasure-code-profile set ec-profile k=$k m=$m crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
+    ceph osd pool create $poolname 1 1 erasure ec-profile
+
+    wait_for_clean || return 1
+
+    dd if=/dev/urandom of=${dir}/12kdata bs=1k count=12
+    for i in $(seq 1 $ecobjects)
+    do
+	rados -p $poolname put obj$i $dir/12kdata
+    done
+
+    local PG=$(get_pg $poolname obj1)
+    # Remember primary during the backfill
+    local primary=$(get_primary $poolname obj1)
+    local otherosd=$(get_not_primary $poolname obj1)
+
+    ceph osd set noout
+    kill_daemons $dir TERM $otherosd || return 1
+
+    rmobjects=$(expr $ecobjects / 4)
+    for i in $(seq 1 $rmobjects)
+    do
+        rados -p $poolname rm obj$i
+    done
+
+    dd if=/dev/urandom of=${dir}/36kdata bs=1k count=36
+    for i in $(seq $(expr $rmobjects + 1) $ecobjects)
+    do
+	rados -p $poolname put obj$i $dir/36kdata
+    done
+
+    activate_osd $dir $otherosd || return 1
+
+    ceph tell osd.$primary debug kick_recovery_wq 0
+
+    sleep 2
+
+    wait_for_clean || return 1
+
+    delete_pool $poolname
+    kill_daemons $dir || return 1
+}
+
+main osd-backfill-space "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-space.sh"
+# End:
diff --git a/qa/standalone/osd-backfill/osd-backfill-stats.sh b/qa/standalone/osd-backfill/osd-backfill-stats.sh
new file mode 100755
index 000000000..21b42a4ce
--- /dev/null
+++ b/qa/standalone/osd-backfill/osd-backfill-stats.sh
@@ -0,0 +1,761 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2017 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    # Fix port????
+    export CEPH_MON="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    CEPH_ARGS+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 "
+    export margin=10
+    export objects=200
+    export poolname=test
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function below_margin() {
+    local -i check=$1
+    shift
+    local -i target=$1
+
+    return $(( $check <= $target && $check >= $target - $margin ? 0 : 1 ))
+}
+
+function above_margin() {
+    local -i check=$1
+    shift
+    local -i target=$1
+
+    return $(( $check >= $target && $check <= $target + $margin ? 0 : 1 ))
+}
+
+FIND_UPACT='grep "pg[[]${PG}.*backfilling.*update_calc_stats " $log | tail -1 | sed "s/.*[)] \([[][^ p]*\).*$/\1/"'
+FIND_FIRST='grep "pg[[]${PG}.*backfilling.*update_calc_stats $which " $log | grep -F " ${UPACT}${addp}" | grep -v est | head -1 | sed "s/.* \([0-9]*\)$/\1/"'
+FIND_LAST='grep "pg[[]${PG}.*backfilling.*update_calc_stats $which " $log | tail -1 | sed "s/.* \([0-9]*\)$/\1/"'
+
+function check() {
+    local dir=$1
+    local PG=$2
+    local primary=$3
+    local type=$4
+    local degraded_start=$5
+    local degraded_end=$6
+    local misplaced_start=$7
+    local misplaced_end=$8
+    local primary_start=${9:-}
+    local primary_end=${10:-}
+    local check_setup=${11:-true}
+
+    local log=$(grep -l +backfilling $dir/osd.$primary.log)
+    if [ $check_setup = "true" ];
+    then
+      local alllogs=$(grep -l +backfilling $dir/osd.*.log)
+      if [ "$(echo "$alllogs" | wc -w)" != "1" ];
+      then
+        echo "Test setup failure, a single OSD should have performed backfill"
+        return 1
+      fi
+    fi
+
+    local addp=" "
+    if [ "$type" = "erasure" ];
+    then
+      addp="p"
+    fi
+
+    UPACT=$(eval $FIND_UPACT)
+    [ -n "$UPACT" ] || return 1
+
+    # Check 3rd line at start because of false recovery starts
+    local which="degraded"
+    FIRST=$(eval $FIND_FIRST)
+    [ -n "$FIRST" ] || return 1
+    below_margin $FIRST $degraded_start || return 1
+    LAST=$(eval $FIND_LAST)
+    [ -n "$LAST" ] || return 1
+    above_margin $LAST $degraded_end || return 1
+
+    # Check 3rd line at start because of false recovery starts
+    which="misplaced"
+    FIRST=$(eval $FIND_FIRST)
+    [ -n "$FIRST" ] || return 1
+    below_margin $FIRST $misplaced_start || return 1
+    LAST=$(eval $FIND_LAST)
+    [ -n "$LAST" ] || return 1
+    above_margin $LAST $misplaced_end || return 1
+
+    # This is the value of set into MISSING_ON_PRIMARY
+    if [ -n "$primary_start" ];
+    then
+      which="shard $primary"
+      FIRST=$(eval $FIND_FIRST)
+      [ -n "$FIRST" ] || return 1
+      below_margin $FIRST $primary_start || return 1
+      LAST=$(eval $FIND_LAST)
+      [ -n "$LAST" ] || return 1
+      above_margin $LAST $primary_end || return 1
+    fi
+}
+
+# [1] -> [1, 0, 2]
+# degraded 1000 -> 0
+# state: active+undersized+degraded+remapped+backfilling
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE                                           STATE_STAMP                VERSION REPORTED UP      UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP                LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0         500                  0      1000         0       0     0 100      100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:44:23.531466  22'500   26:617 [1,0,2]          1    [1]              1        0'0 2017-10-27 09:43:44.654882             0'0 2017-10-27 09:43:44.654882
+function TEST_backfill_sizeup() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    export CEPH_ARGS
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+    run_osd $dir 3 || return 1
+    run_osd $dir 4 || return 1
+    run_osd $dir 5 || return 1
+
+    create_pool $poolname 1 1
+    ceph osd pool set $poolname size 1 --yes-i-really-mean-it
+
+    wait_for_clean || return 1
+
+    for i in $(seq 1 $objects)
+    do
+	rados -p $poolname put obj$i /dev/null
+    done
+
+    ceph osd set nobackfill
+    ceph osd pool set $poolname size 3
+    sleep 2
+    ceph osd unset nobackfill
+
+    wait_for_clean || return 1
+
+    local primary=$(get_primary $poolname obj1)
+    local PG=$(get_pg $poolname obj1)
+
+    local degraded=$(expr $objects \* 2)
+    check $dir $PG $primary replicated $degraded 0 0 0 || return 1
+
+    delete_pool $poolname
+    kill_daemons $dir || return 1
+}
+
+
+
+# [1] -> [0, 2, 4]
+# degraded 1000 -> 0
+# misplaced 500 -> 0
+# state: active+undersized+degraded+remapped+backfilling
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE                                           STATE_STAMP                VERSION REPORTED UP      UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP                LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0         500                  0      1000       500       0     0 100      100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:48:53.326849  22'500   26:603 [0,2,4]          0    [1]              1        0'0 2017-10-27 09:48:13.236253             0'0 2017-10-27 09:48:13.236253
+function TEST_backfill_sizeup_out() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+    run_osd $dir 3 || return 1
+    run_osd $dir 4 || return 1
+    run_osd $dir 5 || return 1
+
+    create_pool $poolname 1 1
+    ceph osd pool set $poolname size 1 --yes-i-really-mean-it
+
+    wait_for_clean || return 1
+
+    for i in $(seq 1 $objects)
+    do
+	rados -p $poolname put obj$i /dev/null
+    done
+
+    local PG=$(get_pg $poolname obj1)
+    # Remember primary during the backfill
+    local primary=$(get_primary $poolname obj1)
+
+    ceph osd set nobackfill
+    ceph osd out osd.$primary
+    ceph osd pool set $poolname size 3
+    sleep 2
+    ceph osd unset nobackfill
+
+    wait_for_clean || return 1
+
+    local degraded=$(expr $objects \* 2)
+    check $dir $PG $primary replicated $degraded 0 $objects 0 || return 1
+
+    delete_pool $poolname
+    kill_daemons $dir || return 1
+}
+
+
+# [1 0] -> [1,2]/[1,0]
+# misplaced 500 -> 0
+# state: active+remapped+backfilling
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE                       STATE_STAMP                VERSION REPORTED UP    UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP                LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0         500                  0        0       500       0     0 100      100 active+remapped+backfilling 2017-10-27 09:51:18.800517  22'500   25:570 [1,2]          1  [1,0]              1        0'0 2017-10-27 09:50:40.441274             0'0 2017-10-27 09:50:40.441274
+function TEST_backfill_out() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+    run_osd $dir 3 || return 1
+    run_osd $dir 4 || return 1
+    run_osd $dir 5 || return 1
+
+    create_pool $poolname 1 1
+    ceph osd pool set $poolname size 2
+    sleep 5
+
+    wait_for_clean || return 1
+
+    for i in $(seq 1 $objects)
+    do
+	rados -p $poolname put obj$i /dev/null
+    done
+
+    local PG=$(get_pg $poolname obj1)
+    # Remember primary during the backfill
+    local primary=$(get_primary $poolname obj1)
+
+    ceph osd set nobackfill
+    ceph osd out osd.$(get_not_primary $poolname obj1)
+    sleep 2
+    ceph osd unset nobackfill
+
+    wait_for_clean || return 1
+
+    check $dir $PG $primary replicated 0 0 $objects 0 || return 1
+
+    delete_pool $poolname
+    kill_daemons $dir || return 1
+}
+
+
+# [0, 1] -> [0, 2]/[0]
+# osd 1 down/out
+# degraded 500 -> 0
+# state: active+undersized+degraded+remapped+backfilling
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE                                           STATE_STAMP                VERSION REPORTED UP    UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP                LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0         500                  0      500         0       0     0 100      100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:53:24.051091  22'500   27:719 [0,2]          0    [0]              0        0'0 2017-10-27 09:52:43.188368             0'0 2017-10-27 09:52:43.188368
+function TEST_backfill_down_out() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+    run_osd $dir 3 || return 1
+    run_osd $dir 4 || return 1
+    run_osd $dir 5 || return 1
+
+    create_pool $poolname 1 1
+    ceph osd pool set $poolname size 2
+    sleep 5
+
+    wait_for_clean || return 1
+
+    for i in $(seq 1 $objects)
+    do
+	rados -p $poolname put obj$i /dev/null
+    done
+
+    local PG=$(get_pg $poolname obj1)
+    # Remember primary during the backfill
+    local primary=$(get_primary $poolname obj1)
+    local otherosd=$(get_not_primary $poolname obj1)
+
+    ceph osd set nobackfill
+    kill $(cat $dir/osd.${otherosd}.pid)
+    ceph osd down osd.${otherosd}
+    ceph osd out osd.${otherosd}
+    sleep 2
+    ceph osd unset nobackfill
+
+    wait_for_clean || return 1
+
+    check $dir $PG $primary replicated $objects 0 0 0 || return 1
+
+    delete_pool $poolname
+    kill_daemons $dir || return 1
+}
+
+
+# [1, 0] -> [2, 3, 4]
+# degraded 500 -> 0
+# misplaced 1000 -> 0
+# state: active+undersized+degraded+remapped+backfilling
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE                                           STATE_STAMP                VERSION REPORTED UP      UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP                LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0         500                  0      500       1000       0     0 100      100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:55:50.375722  23'500   27:553 [2,4,3]          2  [1,0]              1        0'0 2017-10-27 09:55:10.230919             0'0 2017-10-27 09:55:10.230919
+function TEST_backfill_out2() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+    run_osd $dir 3 || return 1
+    run_osd $dir 4 || return 1
+    run_osd $dir 5 || return 1
+
+    create_pool $poolname 1 1
+    ceph osd pool set $poolname size 2
+    sleep 5
+
+    wait_for_clean || return 1
+
+    for i in $(seq 1 $objects)
+    do
+	rados -p $poolname put obj$i /dev/null
+    done
+
+    local PG=$(get_pg $poolname obj1)
+    # Remember primary during the backfill
+    local primary=$(get_primary $poolname obj1)
+    local otherosd=$(get_not_primary $poolname obj1)
+
+    ceph osd set nobackfill
+    ceph osd pool set $poolname size 3
+    ceph osd out osd.${otherosd}
+    ceph osd out osd.${primary}
+    # Primary might change before backfill starts
+    sleep 2
+    primary=$(get_primary $poolname obj1)
+    ceph osd unset nobackfill
+    ceph tell osd.$primary get_latest_osdmap
+    ceph tell osd.$primary debug kick_recovery_wq 0
+    sleep 2
+
+    wait_for_clean || return 1
+
+    local misplaced=$(expr $objects \* 2)
+
+    check $dir $PG $primary replicated $objects 0 $misplaced 0 || return 1
+
+    delete_pool $poolname
+    kill_daemons $dir || return 1
+}
+
+
+# [0,1] ->  [2,4,3]/[0,1]
+# degraded 1000 -> 0
+# misplaced 1000 -> 500
+# state ends at active+clean+remapped [2,4,3]/[2,4,3,0]
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE                                           STATE_STAMP                VERSION REPORTED UP      UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP                LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0         500                  0     1000       1000       0     0 100      100 active+undersized+degraded+remapped+backfilling 2017-10-30 18:21:45.995149  19'500  23:1817 [2,4,3]          2  [0,1]              0        0'0 2017-10-30 18:21:05.109904             0'0 2017-10-30 18:21:05.109904
+# ENDS:
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE                 STATE_STAMP                VERSION REPORTED UP      UP_PRIMARY ACTING    ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0         500                  0        0       500       0     0   5        5 active+clean+remapped 2017-10-30 18:22:42.293730  19'500  25:2557 [2,4,3]          2 [2,4,3,0]              2        0'0 2017-10-30 18:21:05.109904             0'0 2017-10-30 18:21:05.109904
+function TEST_backfill_sizeup4_allout() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+    run_osd $dir 3 || return 1
+    run_osd $dir 4 || return 1
+
+    create_pool $poolname 1 1
+    ceph osd pool set $poolname size 2
+
+    wait_for_clean || return 1
+
+    for i in $(seq 1 $objects)
+    do
+	rados -p $poolname put obj$i /dev/null
+    done
+
+    local PG=$(get_pg $poolname obj1)
+    # Remember primary during the backfill
+    local primary=$(get_primary $poolname obj1)
+    local otherosd=$(get_not_primary $poolname obj1)
+
+    ceph osd set nobackfill
+    ceph osd out osd.$otherosd
+    ceph osd out osd.$primary
+    ceph osd pool set $poolname size 4
+    # Primary might change before backfill starts
+    sleep 2
+    primary=$(get_primary $poolname obj1)
+    ceph osd unset nobackfill
+    ceph tell osd.$primary get_latest_osdmap
+    ceph tell osd.$primary debug kick_recovery_wq 0
+    sleep 2
+
+    wait_for_clean || return 1
+
+    local misdeg=$(expr $objects \* 2)
+    check $dir $PG $primary replicated $misdeg 0 $misdeg $objects || return 1
+
+    delete_pool $poolname
+    kill_daemons $dir || return 1
+}
+
+
+# [1,2,0] ->  [3]/[1,2]
+# misplaced 1000 -> 500
+# state ends at active+clean+remapped [3]/[3,1]
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE                       STATE_STAMP                VERSION REPORTED UP  UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP                LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0         500                  0        0       1000       0     0 100      100 active+remapped+backfilling 2017-11-28 19:13:56.092439  21'500   31:790 [3]          3  [1,2]              1        0'0 2017-11-28 19:13:28.698661             0'0 2017-11-28 19:13:28.698661
+function TEST_backfill_remapped() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+    run_osd $dir 3 || return 1
+
+    create_pool $poolname 1 1
+    ceph osd pool set $poolname size 3
+    sleep 5
+
+    wait_for_clean || return 1
+
+    for i in $(seq 1 $objects)
+    do
+	rados -p $poolname put obj$i /dev/null
+    done
+
+    local PG=$(get_pg $poolname obj1)
+    # Remember primary during the backfill
+    local primary=$(get_primary $poolname obj1)
+    local otherosd=$(get_not_primary $poolname obj1)
+
+    ceph osd set nobackfill
+    ceph osd out osd.${otherosd}
+    for i in $(get_osds $poolname obj1)
+    do
+        if [ $i = $primary -o $i = $otherosd ];
+        then
+            continue
+        fi
+        ceph osd out osd.$i
+        break
+    done
+    ceph osd out osd.${primary}
+    ceph osd pool set $poolname size 2
+    sleep 2
+
+    # primary may change due to invalidating the old pg_temp, which was [1,2,0],
+    # but up_primary (3) chooses [0,1] for acting.
+    primary=$(get_primary $poolname obj1)
+
+    ceph osd unset nobackfill
+    ceph tell osd.$primary get_latest_osdmap
+    ceph tell osd.$primary debug kick_recovery_wq 0
+
+    sleep 2
+
+    wait_for_clean || return 1
+
+    local misplaced=$(expr $objects \* 2)
+
+    check $dir $PG $primary replicated 0 0 $misplaced $objects "" "" false || return 1
+
+    delete_pool $poolname
+    kill_daemons $dir || return 1
+}
+
+# [1,0,2] -> [4,3,NONE]/[1,0,2]
+# misplaced 1500 -> 500
+# state ends at active+clean+remapped [4,3,NONE]/[4,3,2]
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE                                STATE_STAMP                VERSION REPORTED UP         UP_PRIMARY ACTING  ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP                LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0         500                  0      0      1500       0     0 100      100 active+degraded+remapped+backfilling 2017-10-31 16:53:39.467126  19'500   23:615 [4,3,NONE]          4 [1,0,2]              1        0'0 2017-10-31 16:52:59.624429             0'0 2017-10-31 16:52:59.624429
+
+
+# ENDS:
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE                 STATE_STAMP                VERSION REPORTED UP         UP_PRIMARY ACTING  ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0         500                  0        0       500       0     0   5        5 active+clean+remapped 2017-10-31 16:48:34.414040  19'500  25:2049 [4,3,NONE]          4 [4,3,2]              4        0'0 2017-10-31 16:46:58.203440             0'0 2017-10-31 16:46:58.203440
+function TEST_backfill_ec_all_out() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+    run_osd $dir 3 || return 1
+    run_osd $dir 4 || return 1
+
+    ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd
+    create_pool $poolname 1 1 erasure myprofile
+
+    wait_for_clean || return 1
+
+    for i in $(seq 1 $objects)
+    do
+	rados -p $poolname put obj$i /dev/null
+    done
+
+    local PG=$(get_pg $poolname obj1)
+    # Remember primary during the backfill
+    local primary=$(get_primary $poolname obj1)
+
+    ceph osd set nobackfill
+    for o in $(get_osds $poolname obj1)
+    do
+        ceph osd out osd.$o
+    done
+    # Primary might change before backfill starts
+    sleep 2
+    primary=$(get_primary $poolname obj1)
+    ceph osd unset nobackfill
+    ceph tell osd.$primary get_latest_osdmap
+    ceph tell osd.$primary debug kick_recovery_wq 0
+    sleep 2
+
+    wait_for_clean || return 1
+
+    local misplaced=$(expr $objects \* 3)
+    check $dir $PG $primary erasure 0 0 $misplaced $objects || return 1
+
+    delete_pool $poolname
+    kill_daemons $dir || return 1
+}
+
+
+# [1,0,2] -> [4, 0, 2]
+# misplaced 500 -> 0
+# active+remapped+backfilling
+#
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE                       STATE_STAMP                VERSION REPORTED UP      UP_PRIMARY ACTING  ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP                LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0         500                  0        0       500       0     0 100      100 active+remapped+backfilling 2017-11-08 18:05:39.036420  24'500   27:742 [4,0,2]          4 [1,0,2]              1        0'0 2017-11-08 18:04:58.697315             0'0 2017-11-08 18:04:58.697315
+function TEST_backfill_ec_prim_out() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+    run_osd $dir 3 || return 1
+    run_osd $dir 4 || return 1
+
+    ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd
+    create_pool $poolname 1 1 erasure myprofile
+
+    wait_for_clean || return 1
+
+    for i in $(seq 1 $objects)
+    do
+	rados -p $poolname put obj$i /dev/null
+    done
+
+    local PG=$(get_pg $poolname obj1)
+    # Remember primary during the backfill
+    local primary=$(get_primary $poolname obj1)
+
+    ceph osd set nobackfill
+    ceph osd out osd.$primary
+    # Primary might change before backfill starts
+    sleep 2
+    primary=$(get_primary $poolname obj1)
+    ceph osd unset nobackfill
+    ceph tell osd.$primary get_latest_osdmap
+    ceph tell osd.$primary debug kick_recovery_wq 0
+    sleep 2
+
+    wait_for_clean || return 1
+
+    local misplaced=$(expr $objects \* 3)
+    check $dir $PG $primary erasure 0 0 $objects 0 || return 1
+
+    delete_pool $poolname
+    kill_daemons $dir || return 1
+}
+
+# [1,0] -> [1,2]
+# degraded 500 -> 0
+# misplaced 1000 -> 0
+#
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE                                           STATE_STAMP                VERSION REPORTED UP      UP_PRIMARY ACTING     ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP                LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0         500                  0      500      1000       0     0 100      100 active+undersized+degraded+remapped+backfilling 2017-11-06 14:02:29.439105  24'500  29:1020 [4,3,5]          4 [1,NONE,2]              1        0'0 2017-11-06 14:01:46.509963             0'0 2017-11-06 14:01:46.509963
+function TEST_backfill_ec_down_all_out() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+    run_osd $dir 3 || return 1
+    run_osd $dir 4 || return 1
+    run_osd $dir 5 || return 1
+
+    ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd
+    create_pool $poolname 1 1 erasure myprofile
+    ceph osd pool set $poolname min_size 2
+
+    wait_for_clean || return 1
+
+    for i in $(seq 1 $objects)
+    do
+	rados -p $poolname put obj$i /dev/null
+    done
+
+    local PG=$(get_pg $poolname obj1)
+    # Remember primary during the backfill
+    local primary=$(get_primary $poolname obj1)
+    local otherosd=$(get_not_primary $poolname obj1)
+    local allosds=$(get_osds $poolname obj1)
+
+    ceph osd set nobackfill
+    kill $(cat $dir/osd.${otherosd}.pid)
+    ceph osd down osd.${otherosd}
+    for o in $allosds
+    do
+        ceph osd out osd.$o
+    done
+    # Primary might change before backfill starts
+    sleep 2
+    primary=$(get_primary $poolname obj1)
+    ceph osd unset nobackfill
+    ceph tell osd.$primary get_latest_osdmap
+    ceph tell osd.$primary debug kick_recovery_wq 0
+    sleep 2
+    flush_pg_stats
+
+    # Wait for recovery to finish
+    # Can't use wait_for_clean() because state goes from active+undersized+degraded+remapped+backfilling
+    # to  active+undersized+remapped
+    while(true)
+    do
+      if test "$(ceph --format json pg dump pgs |
+         jq '.pg_stats | [.[] | .state | select(. == "incomplete")] | length')" -ne "0"
+      then
+        sleep 2
+        continue
+      fi
+      break
+    done
+    ceph pg dump pgs
+    for i in $(seq 1 240)
+    do
+      if ceph pg dump pgs | grep ^$PG | grep -qv backfilling
+      then
+          break
+      fi
+      if [ $i = "240" ];
+      then
+          echo "Timeout waiting for recovery to finish"
+          return 1
+      fi
+      sleep 1
+    done
+
+    ceph pg dump pgs
+
+    local misplaced=$(expr $objects \* 2)
+    check $dir $PG $primary erasure $objects 0 $misplaced 0 || return 1
+
+    delete_pool $poolname
+    kill_daemons $dir || return 1
+}
+
+
+# [1,0,2] -> [1,3,2]
+# degraded 500 -> 0
+# active+backfilling+degraded
+#
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE                                           STATE_STAMP                VERSION REPORTED UP      UP_PRIMARY ACTING     ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP                LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0         500                  0      500         0       0     0 100      100 active+undersized+degraded+remapped+backfilling 2017-11-06 13:57:25.412322  22'500   28:794 [1,3,2]          1 [1,NONE,2]              1        0'0 2017-11-06 13:54:58.033906             0'0 2017-11-06 13:54:58.033906
+function TEST_backfill_ec_down_out() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+    run_osd $dir 3 || return 1
+    run_osd $dir 4 || return 1
+    run_osd $dir 5 || return 1
+
+    ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd
+    create_pool $poolname 1 1 erasure myprofile
+    ceph osd pool set $poolname min_size 2
+
+    wait_for_clean || return 1
+
+    for i in $(seq 1 $objects)
+    do
+	rados -p $poolname put obj$i /dev/null
+    done
+
+    local PG=$(get_pg $poolname obj1)
+    # Remember primary during the backfill
+    local primary=$(get_primary $poolname obj1)
+    local otherosd=$(get_not_primary $poolname obj1)
+
+    ceph osd set nobackfill
+    kill $(cat $dir/osd.${otherosd}.pid)
+    ceph osd down osd.${otherosd}
+    ceph osd out osd.${otherosd}
+    # Primary might change before backfill starts
+    sleep 2
+    primary=$(get_primary $poolname obj1)
+    ceph osd unset nobackfill
+    ceph tell osd.$primary get_latest_osdmap
+    ceph tell osd.$primary debug kick_recovery_wq 0
+    sleep 2
+
+    wait_for_clean || return 1
+
+    local misplaced=$(expr $objects \* 2)
+    check $dir $PG $primary erasure $objects 0 0 0 || return 1
+
+    delete_pool $poolname
+    kill_daemons $dir || return 1
+}
+
+
+main osd-backfill-stats "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-stats.sh"
+# End:
diff --git a/qa/standalone/osd/bad-inc-map.sh b/qa/standalone/osd/bad-inc-map.sh
new file mode 100755
index 000000000..cc3cf27cc
--- /dev/null
+++ b/qa/standalone/osd/bad-inc-map.sh
@@ -0,0 +1,62 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+mon_port=$(get_unused_port)
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:$mon_port"
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    set -e
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+	$func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_bad_inc_map() {
+    local dir=$1
+
+    run_mon $dir a
+    run_mgr $dir x
+    run_osd $dir 0
+    run_osd $dir 1
+    run_osd $dir 2
+
+    ceph config set osd.2 osd_inject_bad_map_crc_probability 1
+
+    # osd map churn
+    create_pool foo 8
+    ceph osd pool set foo min_size 1
+    ceph osd pool set foo min_size 2
+
+    sleep 5
+
+    # make sure all the OSDs are still up
+    TIMEOUT=10 wait_for_osd up 0
+    TIMEOUT=10 wait_for_osd up 1
+    TIMEOUT=10 wait_for_osd up 2
+
+    # check for the signature in the log
+    grep "injecting map crc failure" $dir/osd.2.log || return 1
+    grep "bailing because last" $dir/osd.2.log || return 1
+
+    echo success
+
+    delete_pool foo
+    kill_daemons $dir || return 1
+}
+
+main bad-inc-map "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh bad-inc-map.sh"
+# End:
diff --git a/qa/standalone/osd/divergent-priors.sh b/qa/standalone/osd/divergent-priors.sh
new file mode 100755
index 000000000..40d72544d
--- /dev/null
+++ b/qa/standalone/osd/divergent-priors.sh
@@ -0,0 +1,855 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2019 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    # This should multiple of 6
+    export loglen=12
+    export divisor=3
+    export trim=$(expr $loglen / 2)
+    export DIVERGENT_WRITE=$(expr $trim / $divisor)
+    export DIVERGENT_REMOVE=$(expr $trim / $divisor)
+    export DIVERGENT_CREATE=$(expr $trim / $divisor)
+    export poolname=test
+    export testobjects=100
+    # Fix port????
+    export CEPH_MON="127.0.0.1:7115" # git grep '\<7115\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    # so we will not force auth_log_shard to be acting_primary
+    CEPH_ARGS+="--osd_force_auth_primary_missing_objects=1000000 "
+    CEPH_ARGS+="--osd_debug_pg_log_writeout=true "
+    CEPH_ARGS+="--osd_min_pg_log_entries=$loglen --osd_max_pg_log_entries=$loglen --osd_pg_log_trim_min=$trim "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+
+# Special case divergence test
+#	Test handling of divergent entries with prior_version
+#	prior to log_tail
+# 	based on qa/tasks/divergent_prior.py
+function TEST_divergent() {
+    local dir=$1
+
+    # something that is always there
+    local dummyfile='/etc/fstab'
+    local dummyfile2='/etc/resolv.conf'
+
+    local num_osds=3
+    local osds="$(seq 0 $(expr $num_osds - 1))"
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    for i in $osds
+    do
+      run_osd $dir $i || return 1
+    done
+
+    ceph osd set noout
+    ceph osd set noin
+    ceph osd set nodown
+    create_pool $poolname 1 1
+    ceph osd pool set $poolname size 3
+    ceph osd pool set $poolname min_size 2
+
+    flush_pg_stats || return 1
+    wait_for_clean || return 1
+
+    # determine primary
+    local divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')"
+    echo "primary and soon to be divergent is $divergent"
+    ceph pg dump pgs
+    local non_divergent=""
+    for i in $osds
+    do
+      if [ "$i" = "$divergent" ]; then
+	  continue
+      fi
+      non_divergent="$non_divergent $i"
+    done
+
+    echo "writing initial objects"
+    # write a bunch of objects
+    for i in $(seq 1 $testobjects)
+    do
+      rados -p $poolname put existing_$i $dummyfile
+    done
+
+    WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+    local pgid=$(get_pg $poolname existing_1)
+
+    # blackhole non_divergent
+    echo "blackholing osds $non_divergent"
+    ceph pg dump pgs
+    for i in $non_divergent
+    do
+      CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1
+    done
+
+    local case5=$testobjects
+    local case3=$(expr $testobjects - 1)
+    # Write some soon to be divergent
+    echo 'writing divergent object'
+    rados -p $poolname put existing_$case5 $dummyfile &
+    echo 'create missing divergent object'
+    inject_eio rep data $poolname existing_$case3 $dir 0 || return 1
+    rados -p $poolname get existing_$case3 $dir/existing &
+    sleep 10
+    killall -9 rados
+
+    # kill all the osds but leave divergent in
+    echo 'killing all the osds'
+    ceph pg dump pgs
+    kill_daemons $dir KILL osd || return 1
+    for i in $osds
+    do
+      ceph osd down osd.$i
+    done
+    for i in $non_divergent
+    do
+      ceph osd out osd.$i
+    done
+
+    # bring up non-divergent
+    echo "bringing up non_divergent $non_divergent"
+    ceph pg dump pgs
+    for i in $non_divergent
+    do
+      activate_osd $dir $i || return 1
+    done
+    for i in $non_divergent
+    do
+      ceph osd in osd.$i
+    done
+
+    WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+    # write 1 non-divergent object (ensure that old divergent one is divergent)
+    objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)"
+    echo "writing non-divergent object $objname"
+    ceph pg dump pgs
+    rados -p $poolname put $objname $dummyfile2
+
+    # ensure no recovery of up osds first
+    echo 'delay recovery'
+    ceph pg dump pgs
+    for i in $non_divergent
+    do
+      CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000
+    done
+
+    # bring in our divergent friend
+    echo "revive divergent $divergent"
+    ceph pg dump pgs
+    ceph osd set noup
+    activate_osd $dir $divergent
+    sleep 5
+
+    echo 'delay recovery divergent'
+    ceph pg dump pgs
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000
+
+    ceph osd unset noup
+
+    wait_for_osd up 0
+    wait_for_osd up 1
+    wait_for_osd up 2
+
+    ceph pg dump pgs
+    echo 'wait for peering'
+    ceph pg dump pgs
+    rados -p $poolname put foo $dummyfile
+
+    echo "killing divergent $divergent"
+    ceph pg dump pgs
+    kill_daemons $dir KILL osd.$divergent
+    #_objectstore_tool_nodown $dir $divergent --op log --pgid $pgid
+    echo "reviving divergent $divergent"
+    ceph pg dump pgs
+    activate_osd $dir $divergent
+
+    sleep 20
+
+    echo "allowing recovery"
+    ceph pg dump pgs
+    # Set osd_recovery_delay_start back to 0 and kick the queue
+    for i in $osds
+    do
+	 ceph tell osd.$i debug kick_recovery_wq 0
+    done
+
+    echo 'reading divergent objects'
+    ceph pg dump pgs
+    for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE))
+    do
+      rados -p $poolname get existing_$i $dir/existing || return 1
+    done
+    rm -f $dir/existing
+
+    grep _merge_object_divergent_entries $(find $dir -name '*osd*log')
+    # Check for _merge_object_divergent_entries for case #5
+    if ! grep -q "_merge_object_divergent_entries.*cannot roll back, removing and adding to missing" $(find $dir -name '*osd*log')
+    then
+	    echo failure
+	    return 1
+    fi
+    echo "success"
+
+    delete_pool $poolname
+    kill_daemons $dir || return 1
+}
+
+function TEST_divergent_ec() {
+    local dir=$1
+
+    # something that is always there
+    local dummyfile='/etc/fstab'
+    local dummyfile2='/etc/resolv.conf'
+
+    local num_osds=3
+    local osds="$(seq 0 $(expr $num_osds - 1))"
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    for i in $osds
+    do
+      run_osd $dir $i || return 1
+    done
+
+    ceph osd set noout
+    ceph osd set noin
+    ceph osd set nodown
+    create_ec_pool $poolname true k=2 m=1 || return 1
+
+    flush_pg_stats || return 1
+    wait_for_clean || return 1
+
+    # determine primary
+    local divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')"
+    echo "primary and soon to be divergent is $divergent"
+    ceph pg dump pgs
+    local non_divergent=""
+    for i in $osds
+    do
+      if [ "$i" = "$divergent" ]; then
+	  continue
+      fi
+      non_divergent="$non_divergent $i"
+    done
+
+    echo "writing initial objects"
+    # write a bunch of objects
+    for i in $(seq 1 $testobjects)
+    do
+      rados -p $poolname put existing_$i $dummyfile
+    done
+
+    WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+    local pgid=$(get_pg $poolname existing_1)
+
+    # blackhole non_divergent
+    echo "blackholing osds $non_divergent"
+    ceph pg dump pgs
+    for i in $non_divergent
+    do
+      CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1
+    done
+
+    # Write some soon to be divergent
+    echo 'writing divergent object'
+    rados -p $poolname put existing_$testobjects $dummyfile2 &
+    sleep 1
+    rados -p $poolname put existing_$testobjects $dummyfile &
+    rados -p $poolname mksnap snap1
+    rados -p $poolname put existing_$(expr $testobjects - 1) $dummyfile &
+    sleep 10
+    killall -9 rados
+
+    # kill all the osds but leave divergent in
+    echo 'killing all the osds'
+    ceph pg dump pgs
+    kill_daemons $dir KILL osd || return 1
+    for i in $osds
+    do
+      ceph osd down osd.$i
+    done
+    for i in $non_divergent
+    do
+      ceph osd out osd.$i
+    done
+
+    # bring up non-divergent
+    echo "bringing up non_divergent $non_divergent"
+    ceph pg dump pgs
+    for i in $non_divergent
+    do
+      activate_osd $dir $i || return 1
+    done
+    for i in $non_divergent
+    do
+      ceph osd in osd.$i
+    done
+
+    sleep 5
+    #WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+    # write 1 non-divergent object (ensure that old divergent one is divergent)
+    objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)"
+    echo "writing non-divergent object $objname"
+    ceph pg dump pgs
+    rados -p $poolname put $objname $dummyfile2
+
+    WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+    # Dump logs
+    for i in $non_divergent
+    do
+      kill_daemons $dir KILL osd.$i || return 1
+      _objectstore_tool_nodown $dir $i --op log --pgid $pgid
+      activate_osd $dir $i || return 1
+    done
+    _objectstore_tool_nodown $dir $divergent --op log --pgid $pgid
+
+    WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+    # ensure no recovery of up osds first
+    echo 'delay recovery'
+    ceph pg dump pgs
+    for i in $non_divergent
+    do
+      CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000
+    done
+
+    # bring in our divergent friend
+    echo "revive divergent $divergent"
+    ceph pg dump pgs
+    ceph osd set noup
+    activate_osd $dir $divergent
+    sleep 5
+
+    echo 'delay recovery divergent'
+    ceph pg dump pgs
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000
+
+    ceph osd unset noup
+
+    wait_for_osd up 0
+    wait_for_osd up 1
+    wait_for_osd up 2
+
+    ceph pg dump pgs
+    echo 'wait for peering'
+    ceph pg dump pgs
+    rados -p $poolname put foo $dummyfile
+
+    echo "killing divergent $divergent"
+    ceph pg dump pgs
+    kill_daemons $dir KILL osd.$divergent
+    #_objectstore_tool_nodown $dir $divergent --op log --pgid $pgid
+    echo "reviving divergent $divergent"
+    ceph pg dump pgs
+    activate_osd $dir $divergent
+
+    sleep 20
+
+    echo "allowing recovery"
+    ceph pg dump pgs
+    # Set osd_recovery_delay_start back to 0 and kick the queue
+    for i in $osds
+    do
+	 ceph tell osd.$i debug kick_recovery_wq 0
+    done
+
+    echo 'reading divergent objects'
+    ceph pg dump pgs
+    for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE))
+    do
+      rados -p $poolname get existing_$i $dir/existing || return 1
+    done
+    rm -f $dir/existing
+
+    grep _merge_object_divergent_entries $(find $dir -name '*osd*log')
+    # Check for _merge_object_divergent_entries for case #3
+    # XXX: Not reproducing this case
+#    if ! grep -q "_merge_object_divergent_entries.* missing, .* adjusting" $(find $dir -name '*osd*log')
+#    then
+#	echo failure
+#	return 1
+#    fi
+    # Check for _merge_object_divergent_entries for case #4
+    if ! grep -q "_merge_object_divergent_entries.*rolled back" $(find $dir -name '*osd*log')
+    then
+	echo failure
+	return 1
+    fi
+    echo "success"
+
+    delete_pool $poolname
+    kill_daemons $dir || return 1
+}
+
+# Special case divergence test with ceph-objectstore-tool export/remove/import
+# 	Test handling of divergent entries with prior_version
+# 	prior to log_tail and a ceph-objectstore-tool export/import
+# 	based on qa/tasks/divergent_prior2.py
+function TEST_divergent_2() {
+    local dir=$1
+
+    # something that is always there
+    local dummyfile='/etc/fstab'
+    local dummyfile2='/etc/resolv.conf'
+
+    local num_osds=3
+    local osds="$(seq 0 $(expr $num_osds - 1))"
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    for i in $osds
+    do
+      run_osd $dir $i || return 1
+    done
+
+    ceph osd set noout
+    ceph osd set noin
+    ceph osd set nodown
+    create_pool $poolname 1 1
+    ceph osd pool set $poolname size 3
+    ceph osd pool set $poolname min_size 2
+
+    flush_pg_stats || return 1
+    wait_for_clean || return 1
+
+    # determine primary
+    local divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')"
+    echo "primary and soon to be divergent is $divergent"
+    ceph pg dump pgs
+    local non_divergent=""
+    for i in $osds
+    do
+      if [ "$i" = "$divergent" ]; then
+	  continue
+      fi
+      non_divergent="$non_divergent $i"
+    done
+
+    echo "writing initial objects"
+    # write a bunch of objects
+    for i in $(seq 1 $testobjects)
+    do
+      rados -p $poolname put existing_$i $dummyfile
+    done
+
+    WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+    local pgid=$(get_pg $poolname existing_1)
+
+    # blackhole non_divergent
+    echo "blackholing osds $non_divergent"
+    ceph pg dump pgs
+    for i in $non_divergent
+    do
+      CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1
+    done
+
+    # Do some creates to hit case 2
+    echo 'create new divergent objects'
+    for i in $(seq 1 $DIVERGENT_CREATE)
+    do
+      rados -p $poolname create newobject_$i &
+    done
+    # Write some soon to be divergent
+    echo 'writing divergent objects'
+    for i in $(seq 1 $DIVERGENT_WRITE)
+    do
+      rados -p $poolname put existing_$i $dummyfile2 &
+    done
+    # Remove some soon to be divergent
+    echo 'remove divergent objects'
+    for i in $(seq 1 $DIVERGENT_REMOVE)
+    do
+      rmi=$(expr $i + $DIVERGENT_WRITE)
+      rados -p $poolname rm existing_$rmi &
+    done
+    sleep 10
+    killall -9 rados
+
+    # kill all the osds but leave divergent in
+    echo 'killing all the osds'
+    ceph pg dump pgs
+    kill_daemons $dir KILL osd || return 1
+    for i in $osds
+    do
+      ceph osd down osd.$i
+    done
+    for i in $non_divergent
+    do
+      ceph osd out osd.$i
+    done
+
+    # bring up non-divergent
+    echo "bringing up non_divergent $non_divergent"
+    ceph pg dump pgs
+    for i in $non_divergent
+    do
+      activate_osd $dir $i || return 1
+    done
+    for i in $non_divergent
+    do
+      ceph osd in osd.$i
+    done
+
+    WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+    # write 1 non-divergent object (ensure that old divergent one is divergent)
+    objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)"
+    echo "writing non-divergent object $objname"
+    ceph pg dump pgs
+    rados -p $poolname put $objname $dummyfile2
+
+    WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+    # ensure no recovery of up osds first
+    echo 'delay recovery'
+    ceph pg dump pgs
+    for i in $non_divergent
+    do
+      CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000
+    done
+
+    # bring in our divergent friend
+    echo "revive divergent $divergent"
+    ceph pg dump pgs
+    ceph osd set noup
+    activate_osd $dir $divergent
+    sleep 5
+
+    echo 'delay recovery divergent'
+    ceph pg dump pgs
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000
+
+    ceph osd unset noup
+
+    wait_for_osd up 0
+    wait_for_osd up 1
+    wait_for_osd up 2
+
+    ceph pg dump pgs
+    echo 'wait for peering'
+    ceph pg dump pgs
+    rados -p $poolname put foo $dummyfile
+
+    # At this point the divergent_priors should have been detected
+
+    echo "killing divergent $divergent"
+    ceph pg dump pgs
+    kill_daemons $dir KILL osd.$divergent
+
+    # export a pg
+    expfile=$dir/exp.$$.out
+    _objectstore_tool_nodown $dir $divergent --op export-remove --pgid $pgid --file $expfile
+    _objectstore_tool_nodown $dir $divergent --op import --file $expfile
+
+    echo "reviving divergent $divergent"
+    ceph pg dump pgs
+    activate_osd $dir $divergent
+    wait_for_osd up $divergent
+
+    sleep 20
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) dump_ops_in_flight
+
+    echo "allowing recovery"
+    ceph pg dump pgs
+    # Set osd_recovery_delay_start back to 0 and kick the queue
+    for i in $osds
+    do
+	 ceph tell osd.$i debug kick_recovery_wq 0
+    done
+
+    echo 'reading divergent objects'
+    ceph pg dump pgs
+    for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE))
+    do
+      rados -p $poolname get existing_$i $dir/existing || return 1
+    done
+    for i in $(seq 1 $DIVERGENT_CREATE)
+    do
+      rados -p $poolname get newobject_$i $dir/existing
+    done
+    rm -f $dir/existing
+
+    grep _merge_object_divergent_entries $(find $dir -name '*osd*log')
+    # Check for _merge_object_divergent_entries for case #1
+    if ! grep -q "_merge_object_divergent_entries: more recent entry found:" $(find $dir -name '*osd*log')
+    then
+	    echo failure
+	    return 1
+    fi
+    # Check for _merge_object_divergent_entries for case #2
+    if ! grep -q "_merge_object_divergent_entries.*prior_version or op type indicates creation" $(find $dir -name '*osd*log')
+    then
+	    echo failure
+	    return 1
+    fi
+    echo "success"
+
+    rm $dir/$expfile
+
+    delete_pool $poolname
+    kill_daemons $dir || return 1
+}
+
+# this is the same as case _2 above, except we enable pg autoscaling in order
+# to reproduce https://tracker.ceph.com/issues/41816
+function TEST_divergent_3() {
+    local dir=$1
+
+    # something that is always there
+    local dummyfile='/etc/fstab'
+    local dummyfile2='/etc/resolv.conf'
+
+    local num_osds=3
+    local osds="$(seq 0 $(expr $num_osds - 1))"
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    for i in $osds
+    do
+      run_osd $dir $i || return 1
+    done
+
+    ceph osd set noout
+    ceph osd set noin
+    ceph osd set nodown
+    create_pool $poolname 1 1
+    ceph osd pool set $poolname size 3
+    ceph osd pool set $poolname min_size 2
+
+    # reproduce https://tracker.ceph.com/issues/41816
+    ceph osd pool set $poolname pg_autoscale_mode on
+
+    divergent=-1
+    start_time=$(date +%s)
+    max_duration=300
+
+    while [ "$divergent" -le -1 ]
+      do
+        flush_pg_stats || return 1
+        wait_for_clean || return 1
+
+        # determine primary
+        divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')"
+        echo "primary and soon to be divergent is $divergent"
+        ceph pg dump pgs
+
+        current_time=$(date +%s)
+        elapsed_time=$(expr $current_time - $start_time)
+        if [ "$elapsed_time" -gt "$max_duration" ]; then
+          echo "timed out waiting for divergent"
+          return 1
+        fi
+    done
+
+    local non_divergent=""
+    for i in $osds
+    do
+      if [ "$i" = "$divergent" ]; then
+	  continue
+      fi
+      non_divergent="$non_divergent $i"
+    done
+
+    echo "writing initial objects"
+    # write a bunch of objects
+    for i in $(seq 1 $testobjects)
+    do
+      rados -p $poolname put existing_$i $dummyfile
+    done
+
+    WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+    local pgid=$(get_pg $poolname existing_1)
+
+    # blackhole non_divergent
+    echo "blackholing osds $non_divergent"
+    ceph pg dump pgs
+    for i in $non_divergent
+    do
+      CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1
+    done
+
+    # Do some creates to hit case 2
+    echo 'create new divergent objects'
+    for i in $(seq 1 $DIVERGENT_CREATE)
+    do
+      rados -p $poolname create newobject_$i &
+    done
+    # Write some soon to be divergent
+    echo 'writing divergent objects'
+    for i in $(seq 1 $DIVERGENT_WRITE)
+    do
+      rados -p $poolname put existing_$i $dummyfile2 &
+    done
+    # Remove some soon to be divergent
+    echo 'remove divergent objects'
+    for i in $(seq 1 $DIVERGENT_REMOVE)
+    do
+      rmi=$(expr $i + $DIVERGENT_WRITE)
+      rados -p $poolname rm existing_$rmi &
+    done
+    sleep 10
+    killall -9 rados
+
+    # kill all the osds but leave divergent in
+    echo 'killing all the osds'
+    ceph pg dump pgs
+    kill_daemons $dir KILL osd || return 1
+    for i in $osds
+    do
+      ceph osd down osd.$i
+    done
+    for i in $non_divergent
+    do
+      ceph osd out osd.$i
+    done
+
+    # bring up non-divergent
+    echo "bringing up non_divergent $non_divergent"
+    ceph pg dump pgs
+    for i in $non_divergent
+    do
+      activate_osd $dir $i || return 1
+    done
+    for i in $non_divergent
+    do
+      ceph osd in osd.$i
+    done
+
+    WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+    # write 1 non-divergent object (ensure that old divergent one is divergent)
+    objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)"
+    echo "writing non-divergent object $objname"
+    ceph pg dump pgs
+    rados -p $poolname put $objname $dummyfile2
+
+    WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+    # ensure no recovery of up osds first
+    echo 'delay recovery'
+    ceph pg dump pgs
+    for i in $non_divergent
+    do
+      CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000
+    done
+
+    # bring in our divergent friend
+    echo "revive divergent $divergent"
+    ceph pg dump pgs
+    ceph osd set noup
+    activate_osd $dir $divergent
+    sleep 5
+
+    echo 'delay recovery divergent'
+    ceph pg dump pgs
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000
+
+    ceph osd unset noup
+
+    wait_for_osd up 0
+    wait_for_osd up 1
+    wait_for_osd up 2
+
+    ceph pg dump pgs
+    echo 'wait for peering'
+    ceph pg dump pgs
+    rados -p $poolname put foo $dummyfile
+
+    # At this point the divergent_priors should have been detected
+
+    echo "killing divergent $divergent"
+    ceph pg dump pgs
+    kill_daemons $dir KILL osd.$divergent
+
+    # export a pg
+    expfile=$dir/exp.$$.out
+    _objectstore_tool_nodown $dir $divergent --op export-remove --pgid $pgid --file $expfile
+    _objectstore_tool_nodown $dir $divergent --op import --file $expfile
+
+    echo "reviving divergent $divergent"
+    ceph pg dump pgs
+    activate_osd $dir $divergent
+    wait_for_osd up $divergent
+
+    sleep 20
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) dump_ops_in_flight
+
+    echo "allowing recovery"
+    ceph pg dump pgs
+    # Set osd_recovery_delay_start back to 0 and kick the queue
+    for i in $osds
+    do
+	 ceph tell osd.$i debug kick_recovery_wq 0
+    done
+
+    echo 'reading divergent objects'
+    ceph pg dump pgs
+    for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE))
+    do
+      rados -p $poolname get existing_$i $dir/existing || return 1
+    done
+    for i in $(seq 1 $DIVERGENT_CREATE)
+    do
+      rados -p $poolname get newobject_$i $dir/existing
+    done
+    rm -f $dir/existing
+
+    grep _merge_object_divergent_entries $(find $dir -name '*osd*log')
+    # Check for _merge_object_divergent_entries for case #1
+    if ! grep -q "_merge_object_divergent_entries: more recent entry found:" $(find $dir -name '*osd*log')
+    then
+	    echo failure
+	    return 1
+    fi
+    # Check for _merge_object_divergent_entries for case #2
+    if ! grep -q "_merge_object_divergent_entries.*prior_version or op type indicates creation" $(find $dir -name '*osd*log')
+    then
+	    echo failure
+	    return 1
+    fi
+    echo "success"
+
+    rm $dir/$expfile
+
+    delete_pool $poolname
+    kill_daemons $dir || return 1
+}
+
+
+main divergent-priors "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh divergent-priors.sh"
+# End:
diff --git a/qa/standalone/osd/ec-error-rollforward.sh b/qa/standalone/osd/ec-error-rollforward.sh
new file mode 100755
index 000000000..621e6b13f
--- /dev/null
+++ b/qa/standalone/osd/ec-error-rollforward.sh
@@ -0,0 +1,66 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    # Fix port????
+    export CEPH_MON="127.0.0.1:7132" # git grep '\<7132\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    export margin=10
+    export objects=200
+    export poolname=test
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_ec_error_rollforward() {
+    local dir=$1
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+    run_osd $dir 3 || return 1
+
+    ceph osd erasure-code-profile set ec-profile m=2 k=2 crush-failure-domain=osd
+    ceph osd pool create ec 1 1 erasure ec-profile
+
+    rados -p ec put foo /etc/passwd
+
+    kill -STOP $(cat $dir/osd.2.pid)
+
+    rados -p ec rm foo &
+    pids="$!"
+    sleep 1
+    rados -p ec rm a &
+    pids+=" $!"
+    rados -p ec rm b &
+    pids+=" $!"
+    rados -p ec rm c &
+    pids+=" $!"
+    sleep 1
+    # Use SIGKILL so stopped osd.2 will terminate
+    # and kill_daemons waits for daemons to die
+    kill_daemons $dir KILL osd
+    kill $pids
+    wait
+
+    activate_osd $dir 0 || return 1
+    activate_osd $dir 1 || return 1
+    activate_osd $dir 2 || return 1
+    activate_osd $dir 3 || return 1
+
+    wait_for_clean || return 1
+}
+
+main ec-error-rollforward "$@"
diff --git a/qa/standalone/osd/osd-bench.sh b/qa/standalone/osd/osd-bench.sh
new file mode 100755
index 000000000..eb1a6a440
--- /dev/null
+++ b/qa/standalone/osd/osd-bench.sh
@@ -0,0 +1,97 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7106" # git grep '\<7106\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    CEPH_ARGS+="--debug-bluestore 20 "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_bench() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+
+    local osd_bench_small_size_max_iops=$(CEPH_ARGS='' ceph-conf \
+        --show-config-value osd_bench_small_size_max_iops)
+    local osd_bench_large_size_max_throughput=$(CEPH_ARGS='' ceph-conf \
+        --show-config-value osd_bench_large_size_max_throughput)
+    local osd_bench_max_block_size=$(CEPH_ARGS='' ceph-conf \
+        --show-config-value osd_bench_max_block_size)
+    local osd_bench_duration=$(CEPH_ARGS='' ceph-conf \
+        --show-config-value osd_bench_duration)
+
+    #
+    # block size too high
+    #
+    expect_failure $dir osd_bench_max_block_size \
+        ceph tell osd.0 bench 1024 $((osd_bench_max_block_size + 1)) || return 1
+
+    #
+    # count too high for small (< 1MB) block sizes
+    #
+    local bsize=1024
+    local max_count=$(($bsize * $osd_bench_duration * $osd_bench_small_size_max_iops))
+    expect_failure $dir bench_small_size_max_iops \
+        ceph tell osd.0 bench $(($max_count + 1)) $bsize || return 1
+
+    #
+    # count too high for large (>= 1MB) block sizes
+    #
+    local bsize=$((1024 * 1024 + 1))
+    local max_count=$(($osd_bench_large_size_max_throughput * $osd_bench_duration))
+    expect_failure $dir osd_bench_large_size_max_throughput \
+        ceph tell osd.0 bench $(($max_count + 1)) $bsize || return 1
+
+    #
+    # default values should work
+    #
+    ceph tell osd.0 bench || return 1
+
+    #
+    # test object_size < block_size
+    ceph tell osd.0 bench 10 14456 4444 3
+    #
+
+    #
+    # test object_size < block_size & object_size = 0(default value)
+    #
+    ceph tell osd.0 bench 1 14456
+}
+
+main osd-bench "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-bench.sh"
+# End:
diff --git a/qa/standalone/osd/osd-bluefs-volume-ops.sh b/qa/standalone/osd/osd-bluefs-volume-ops.sh
new file mode 100755
index 000000000..aedfbc9b5
--- /dev/null
+++ b/qa/standalone/osd/osd-bluefs-volume-ops.sh
@@ -0,0 +1,497 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+[ `uname` = FreeBSD ] && exit 0
+
+function run() {
+    local dir=$1
+    shift
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_bluestore() {
+    local dir=$1
+
+    local flimit=$(ulimit -n)
+    if [ $flimit -lt 1536 ]; then
+        echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens."
+    fi
+    export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    CEPH_ARGS+="--bluestore_block_size=2147483648 "
+    CEPH_ARGS+="--bluestore_block_db_create=true "
+    CEPH_ARGS+="--bluestore_block_db_size=1073741824 "
+    CEPH_ARGS+="--bluestore_block_wal_size=536870912 "
+    CEPH_ARGS+="--bluestore_block_wal_create=true "
+    CEPH_ARGS+="--bluestore_fsck_on_mount=true "
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    osd_pid0=$(cat $dir/osd.0.pid)
+    run_osd $dir 1 || return 1
+    osd_pid1=$(cat $dir/osd.1.pid)
+    run_osd $dir 2 || return 1
+    osd_pid2=$(cat $dir/osd.2.pid)
+    run_osd $dir 3 || return 1
+    osd_pid3=$(cat $dir/osd.3.pid)
+
+    sleep 5
+
+    create_pool foo 16
+
+    # write some objects
+    timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1
+
+    echo "after bench"
+
+    # kill
+    while kill $osd_pid0; do sleep 1 ; done
+    ceph osd down 0
+    while kill $osd_pid1; do sleep 1 ; done
+    ceph osd down 1
+    while kill $osd_pid2; do sleep 1 ; done
+    ceph osd down 2
+    while kill $osd_pid3; do sleep 1 ; done
+    ceph osd down 3
+
+    # expand slow devices
+    ceph-bluestore-tool --path $dir/0 fsck || return 1
+    ceph-bluestore-tool --path $dir/1 fsck || return 1
+    ceph-bluestore-tool --path $dir/2 fsck || return 1
+    ceph-bluestore-tool --path $dir/3 fsck || return 1
+
+    truncate $dir/0/block -s 4294967296 # 4GB
+    ceph-bluestore-tool --path $dir/0 bluefs-bdev-expand || return 1
+    truncate $dir/1/block -s 4311744512 # 4GB + 16MB
+    ceph-bluestore-tool --path $dir/1 bluefs-bdev-expand || return 1
+    truncate $dir/2/block -s 4295099392 # 4GB + 129KB
+    ceph-bluestore-tool --path $dir/2 bluefs-bdev-expand || return 1
+    truncate $dir/3/block -s 4293918720 # 4GB - 1MB
+    ceph-bluestore-tool --path $dir/3 bluefs-bdev-expand || return 1
+
+    # slow, DB, WAL -> slow, DB
+    ceph-bluestore-tool --path $dir/0 fsck || return 1
+    ceph-bluestore-tool --path $dir/1 fsck || return 1
+    ceph-bluestore-tool --path $dir/2 fsck || return 1
+    ceph-bluestore-tool --path $dir/3 fsck || return 1
+
+    ceph-bluestore-tool --path $dir/0 bluefs-bdev-sizes
+
+    ceph-bluestore-tool --path $dir/0 \
+      --devs-source $dir/0/block.wal \
+      --dev-target $dir/0/block.db \
+      --command bluefs-bdev-migrate || return 1
+
+    ceph-bluestore-tool --path $dir/0 fsck || return 1
+
+    # slow, DB, WAL -> slow, WAL
+    ceph-bluestore-tool --path $dir/1 \
+      --devs-source $dir/1/block.db \
+      --dev-target $dir/1/block \
+      --command bluefs-bdev-migrate || return 1
+
+    ceph-bluestore-tool --path $dir/1 fsck || return 1
+
+    # slow, DB, WAL -> slow
+    ceph-bluestore-tool --path $dir/2 \
+      --devs-source $dir/2/block.wal \
+      --devs-source $dir/2/block.db \
+      --dev-target $dir/2/block \
+      --command bluefs-bdev-migrate || return 1
+
+    ceph-bluestore-tool --path $dir/2 fsck || return 1
+
+    # slow, DB, WAL -> slow, WAL (negative case)
+    ceph-bluestore-tool --path $dir/3 \
+      --devs-source $dir/3/block.db \
+      --dev-target $dir/3/block.wal \
+      --command bluefs-bdev-migrate
+
+    # Migration to WAL is unsupported
+    if [ $? -eq 0 ]; then
+        return 1
+    fi
+    ceph-bluestore-tool --path $dir/3 fsck || return 1
+
+    # slow, DB, WAL -> slow, DB (WAL to slow then slow to DB)
+    ceph-bluestore-tool --path $dir/3 \
+      --devs-source $dir/3/block.wal \
+      --dev-target $dir/3/block \
+      --command bluefs-bdev-migrate || return 1
+
+    ceph-bluestore-tool --path $dir/3 fsck || return 1
+
+    ceph-bluestore-tool --path $dir/3 \
+      --devs-source $dir/3/block \
+      --dev-target $dir/3/block.db \
+      --command bluefs-bdev-migrate || return 1
+
+    ceph-bluestore-tool --path $dir/3 fsck || return 1
+
+    activate_osd $dir 0 || return 1
+    osd_pid0=$(cat $dir/osd.0.pid)
+    activate_osd $dir 1 || return 1
+    osd_pid1=$(cat $dir/osd.1.pid)
+    activate_osd $dir 2 || return 1
+    osd_pid2=$(cat $dir/osd.2.pid)
+    activate_osd $dir 3 || return 1
+    osd_pid3=$(cat $dir/osd.3.pid)
+
+    wait_for_clean || return 1
+
+    # write some objects
+    timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1
+
+    # kill
+    while kill $osd_pid0; do sleep 1 ; done
+    ceph osd down 0
+    while kill $osd_pid1; do sleep 1 ; done
+    ceph osd down 1
+    while kill $osd_pid2; do sleep 1 ; done
+    ceph osd down 2
+    while kill $osd_pid3; do sleep 1 ; done
+    ceph osd down 3
+
+    # slow, DB -> slow, DB, WAL
+    ceph-bluestore-tool --path $dir/0 fsck || return 1
+
+    dd if=/dev/zero  of=$dir/0/wal count=512 bs=1M
+    ceph-bluestore-tool --path $dir/0 \
+      --dev-target $dir/0/wal \
+      --command bluefs-bdev-new-wal || return 1
+
+    ceph-bluestore-tool --path $dir/0 fsck || return 1
+
+    # slow, WAL -> slow, DB, WAL
+    ceph-bluestore-tool --path $dir/1 fsck || return 1
+
+    dd if=/dev/zero  of=$dir/1/db count=1024 bs=1M
+    ceph-bluestore-tool --path $dir/1 \
+      --dev-target $dir/1/db \
+      --command bluefs-bdev-new-db || return 1
+
+    ceph-bluestore-tool --path $dir/1 \
+      --devs-source $dir/1/block \
+      --dev-target $dir/1/block.db \
+      --command bluefs-bdev-migrate || return 1
+
+    ceph-bluestore-tool --path $dir/1 fsck || return 1
+
+    # slow -> slow, DB, WAL
+    ceph-bluestore-tool --path $dir/2 fsck || return 1
+
+    ceph-bluestore-tool --path $dir/2 \
+      --command bluefs-bdev-new-db || return 1
+
+    ceph-bluestore-tool --path $dir/2 \
+      --command bluefs-bdev-new-wal || return 1
+
+    ceph-bluestore-tool --path $dir/2 \
+      --devs-source $dir/2/block \
+      --dev-target $dir/2/block.db \
+      --command bluefs-bdev-migrate || return 1
+
+    ceph-bluestore-tool --path $dir/2 fsck || return 1
+
+    # slow, DB -> slow, WAL
+    ceph-bluestore-tool --path $dir/3 fsck || return 1
+
+    ceph-bluestore-tool --path $dir/3 \
+      --command bluefs-bdev-new-wal || return 1
+
+    ceph-bluestore-tool --path $dir/3 \
+      --devs-source $dir/3/block.db \
+      --dev-target $dir/3/block \
+      --command bluefs-bdev-migrate || return 1
+
+    ceph-bluestore-tool --path $dir/3 fsck || return 1
+
+    activate_osd $dir 0 || return 1
+    osd_pid0=$(cat $dir/osd.0.pid)
+    activate_osd $dir 1 || return 1
+    osd_pid1=$(cat $dir/osd.1.pid)
+    activate_osd $dir 2 || return 1
+    osd_pid2=$(cat $dir/osd.2.pid)
+    activate_osd $dir 3 || return 1
+    osd_pid3=$(cat $dir/osd.3.pid)
+
+    # write some objects
+    timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1
+
+    # kill
+    while kill $osd_pid0; do sleep 1 ; done
+    ceph osd down 0
+    while kill $osd_pid1; do sleep 1 ; done
+    ceph osd down 1
+    while kill $osd_pid2; do sleep 1 ; done
+    ceph osd down 2
+    while kill $osd_pid3; do sleep 1 ; done
+    ceph osd down 3
+
+    # slow, DB1, WAL -> slow, DB2, WAL
+    ceph-bluestore-tool --path $dir/0 fsck || return 1
+
+    dd if=/dev/zero  of=$dir/0/db2 count=1024 bs=1M
+    ceph-bluestore-tool --path $dir/0 \
+      --devs-source $dir/0/block.db \
+      --dev-target $dir/0/db2 \
+      --command bluefs-bdev-migrate || return 1
+
+    ceph-bluestore-tool --path $dir/0 fsck || return 1
+
+    # slow, DB, WAL1 -> slow, DB, WAL2
+
+    dd if=/dev/zero  of=$dir/0/wal2 count=512 bs=1M
+    ceph-bluestore-tool --path $dir/0 \
+      --devs-source $dir/0/block.wal \
+      --dev-target $dir/0/wal2 \
+      --command bluefs-bdev-migrate || return 1
+    rm -rf $dir/0/wal
+
+    ceph-bluestore-tool --path $dir/0 fsck || return 1
+
+    # slow, DB + WAL -> slow, DB2 -> slow
+    ceph-bluestore-tool --path $dir/1 fsck || return 1
+
+    dd if=/dev/zero  of=$dir/1/db2 count=1024 bs=1M
+    ceph-bluestore-tool --path $dir/1 \
+      --devs-source $dir/1/block.db \
+      --devs-source $dir/1/block.wal \
+      --dev-target $dir/1/db2 \
+      --command bluefs-bdev-migrate || return 1
+
+    rm -rf $dir/1/db
+
+    ceph-bluestore-tool --path $dir/1 fsck || return 1
+
+    ceph-bluestore-tool --path $dir/1 \
+      --devs-source $dir/1/block.db \
+      --dev-target $dir/1/block \
+      --command bluefs-bdev-migrate || return 1
+
+    rm -rf $dir/1/db2
+
+    ceph-bluestore-tool --path $dir/1 fsck || return 1
+
+    # slow -> slow, DB (negative case)
+    ceph-objectstore-tool --type bluestore --data-path $dir/2 \
+			  --op fsck --no-mon-config || return 1
+
+    dd if=/dev/zero  of=$dir/2/db2 count=1024 bs=1M
+    ceph-bluestore-tool --path $dir/2 \
+      --devs-source $dir/2/block \
+      --dev-target $dir/2/db2 \
+      --command bluefs-bdev-migrate
+
+    # Migration from slow-only to new device is unsupported
+    if [ $? -eq 0 ]; then
+        return 1
+    fi
+    ceph-bluestore-tool --path $dir/2 fsck || return 1
+
+    # slow + DB + WAL -> slow, DB2
+    dd if=/dev/zero  of=$dir/2/db2 count=1024 bs=1M
+
+    ceph-bluestore-tool --path $dir/2 \
+      --devs-source $dir/2/block \
+      --devs-source $dir/2/block.db \
+      --devs-source $dir/2/block.wal \
+      --dev-target $dir/2/db2 \
+      --command bluefs-bdev-migrate || return 1
+
+    ceph-bluestore-tool --path $dir/2 fsck || return 1
+
+    # slow + WAL -> slow2, WAL2
+    dd if=/dev/zero  of=$dir/3/wal2 count=1024 bs=1M
+
+    ceph-bluestore-tool --path $dir/3 \
+      --devs-source $dir/3/block \
+      --devs-source $dir/3/block.wal \
+      --dev-target $dir/3/wal2 \
+      --command bluefs-bdev-migrate || return 1
+
+    ceph-bluestore-tool --path $dir/3 fsck || return 1
+
+    activate_osd $dir 0 || return 1
+    osd_pid0=$(cat $dir/osd.0.pid)
+    activate_osd $dir 1 || return 1
+    osd_pid1=$(cat $dir/osd.1.pid)
+    activate_osd $dir 2 || return 1
+    osd_pid2=$(cat $dir/osd.2.pid)
+    activate_osd $dir 3 || return 1
+    osd_pid3=$(cat $dir/osd.3.pid)
+
+    # write some objects
+    timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1
+
+    wait_for_clean || return 1
+}
+
+function TEST_bluestore2() {
+    local dir=$1
+
+    local flimit=$(ulimit -n)
+    if [ $flimit -lt 1536 ]; then
+        echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens."
+    fi
+    export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    CEPH_ARGS+="--bluestore_block_size=4294967296 "
+    CEPH_ARGS+="--bluestore_block_db_create=true "
+    CEPH_ARGS+="--bluestore_block_db_size=1073741824 "
+    CEPH_ARGS+="--bluestore_block_wal_create=false "
+    CEPH_ARGS+="--bluestore_fsck_on_mount=true "
+    CEPH_ARGS+="--osd_pool_default_size=1 "
+    CEPH_ARGS+="--osd_pool_default_min_size=1 "
+    CEPH_ARGS+="--bluestore_debug_enforce_settings=ssd "
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    osd_pid0=$(cat $dir/osd.0.pid)
+
+    sleep 5
+    create_pool foo 16
+
+    retry = 0
+    while [[ $retry -le 5 ]]; do
+      # write some objects
+      timeout 60 rados bench -p foo 10 write --write-omap --no-cleanup #|| return 1
+
+      #give RocksDB some time to cooldown and put files to slow level(s)
+      sleep 10
+
+      db_used=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.db_used_bytes" )
+      spilled_over=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.slow_used_bytes" )
+      ((retry+=1))
+      test $spilled_over -eq 0 || break
+    done
+    test $spilled_over -gt 0 || return 1
+
+    while kill $osd_pid0; do sleep 1 ; done
+    ceph osd down 0
+
+    ceph-bluestore-tool --path $dir/0 \
+      --devs-source $dir/0/block.db \
+      --dev-target $dir/0/block \
+      --command bluefs-bdev-migrate || return 1
+
+    ceph-bluestore-tool --path $dir/0 \
+      --command bluefs-bdev-sizes || return 1
+
+    ceph-bluestore-tool --path $dir/0 \
+      --command fsck || return 1
+
+    activate_osd $dir 0 || return 1
+    osd_pid0=$(cat $dir/osd.0.pid)
+
+    wait_for_clean || return 1
+}
+
+function TEST_bluestore_expand() {
+    local dir=$1
+
+    local flimit=$(ulimit -n)
+    if [ $flimit -lt 1536 ]; then
+        echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens."
+    fi
+    export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    CEPH_ARGS+="--bluestore_block_size=4294967296 "
+    CEPH_ARGS+="--bluestore_block_db_create=true "
+    CEPH_ARGS+="--bluestore_block_db_size=1073741824 "
+    CEPH_ARGS+="--bluestore_block_wal_create=false "
+    CEPH_ARGS+="--bluestore_fsck_on_mount=true "
+    CEPH_ARGS+="--osd_pool_default_size=1 "
+    CEPH_ARGS+="--osd_pool_default_min_size=1 "
+    CEPH_ARGS+="--bluestore_debug_enforce_settings=ssd "
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    osd_pid0=$(cat $dir/osd.0.pid)
+
+    sleep 5
+    create_pool foo 16
+
+    # write some objects
+    timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1
+    sleep 5
+    
+    total_space_before=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.slow_total_bytes" )
+    free_space_before=`ceph tell osd.0 bluestore bluefs device info | grep "BDEV_SLOW" -A 2 | grep free | cut -d':' -f 2 | cut -d"," -f 1 | cut -d' ' -f 2`
+    
+    # kill
+    while kill $osd_pid0; do sleep 1 ; done
+    ceph osd down 0
+
+    # destage allocation to file before expand (in case fast-shutdown skipped that step)
+    ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 allocmap || return 1
+
+    # expand slow devices
+    ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 fsck || return 1
+
+    requested_space=4294967296 # 4GB
+    truncate $dir/0/block -s $requested_space
+    ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 bluefs-bdev-expand || return 1
+
+    # slow, DB, WAL -> slow, DB
+    ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 fsck || return 1
+
+    # compare allocation-file with RocksDB state
+    ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 qfsck || return 1
+
+    ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 bluefs-bdev-sizes
+    
+    activate_osd $dir 0 || return 1
+    osd_pid0=$(cat $dir/osd.0.pid)
+
+    wait_for_clean || return 1
+    
+    total_space_after=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.slow_total_bytes" )
+    free_space_after=`ceph tell osd.0 bluestore bluefs device info | grep "BDEV_SLOW" -A 2 | grep free | cut -d':' -f 2 | cut -d"," -f 1 | cut -d' ' -f 2`
+
+    if [$total_space_after != $requested_space]; then
+	echo "total_space_after = $total_space_after"
+	echo "requested_space   = $requested_space"
+	return 1;
+    fi
+
+    total_space_added=$((total_space_after - total_space_before))
+    free_space_added=$((free_space_after - free_space_before))
+
+    let new_used_space=($total_space_added - $free_space_added)
+    echo $new_used_space
+    # allow upto 128KB to be consumed
+    if [ $new_used_space -gt 131072 ]; then
+	echo "total_space_added = $total_space_added"
+	echo "free_space_added  = $free_space_added"
+	return 1;
+    fi
+    
+    # kill
+    while kill $osd_pid0; do sleep 1 ; done
+    ceph osd down 0
+
+    ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 qfsck || return 1
+}
+
+main osd-bluefs-volume-ops "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-bluefs-volume-ops.sh"
+# End:
diff --git a/qa/standalone/osd/osd-config.sh b/qa/standalone/osd/osd-config.sh
new file mode 100755
index 000000000..126c2f7de
--- /dev/null
+++ b/qa/standalone/osd/osd-config.sh
@@ -0,0 +1,97 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7100" # git grep '\<7100\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_config_init() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    local stale=1000
+    local cache=500
+    run_osd $dir 0 \
+        --osd-map-cache-size=$cache \
+        --osd-pg-epoch-persisted-max-stale=$stale \
+        || return 1
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1
+    grep 'is not > osd_pg_epoch_persisted_max_stale' $dir/osd.0.log || return 1
+}
+
+function TEST_config_track() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+
+    local osd_map_cache_size=$(CEPH_ARGS='' ceph-conf \
+        --show-config-value osd_map_cache_size)
+    local osd_pg_epoch_persisted_max_stale=$(CEPH_ARGS='' ceph-conf \
+        --show-config-value osd_pg_epoch_persisted_max_stale)
+
+    #
+    # increase the osd_pg_epoch_persisted_max_stale above the default cache_size
+    #
+    ! grep 'is not > osd_pg_epoch_persisted_max_stale' $dir/osd.0.log || return 1
+    local stale=$(($osd_map_cache_size * 2))
+    ceph tell osd.0 injectargs "--osd-pg-epoch-persisted-max-stale $stale" || return 1
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1
+    grep 'is not > osd_pg_epoch_persisted_max_stale' $dir/osd.0.log || return 1
+    rm $dir/osd.0.log
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log reopen || return 1
+}
+
+function TEST_default_adjustment() {
+    a=$(ceph-osd --no-mon-config --show-config-value rgw_torrent_origin)
+    b=$(ceph-osd --no-mon-config --show-config-value rgw_torrent_origin --default-rgw-torrent-origin default)
+    c=$(ceph-osd --no-mon-config --show-config-value rgw_torrent_origin --default-rgw-torrent-origin arg)
+    [ "$a" != "default" ] || return 1
+    [ "$b" = "default" ] || return 1
+    [ "$c" = "arg" ] || return 1
+
+    a=$(ceph-osd --no-mon-config --show-config-value log_to_file)
+    b=$(ceph-osd --no-mon-config --show-config-value log_to_file --default-log-to-file=false)
+    c=$(ceph-osd --no-mon-config --show-config-value log_to_file --default-log-to-file=false --log-to-file)
+    [ "$a" = "true" ] || return 1
+    [ "$b" = "false" ] || return 1
+    [ "$c" = "true" ] || return 1
+}
+
+main osd-config "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-config.sh"
+# End:
diff --git a/qa/standalone/osd/osd-copy-from.sh b/qa/standalone/osd/osd-copy-from.sh
new file mode 100755
index 000000000..8ac0ab541
--- /dev/null
+++ b/qa/standalone/osd/osd-copy-from.sh
@@ -0,0 +1,68 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+# Author: Sage Weil <sage@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7111" # git grep '\<7111\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_copy_from() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    create_rbd_pool || return 1
+
+    # success
+    rados -p rbd put foo $(which rados)
+    rados -p rbd cp foo foo2
+    rados -p rbd stat foo2
+
+    # failure
+    ceph tell osd.\* injectargs -- --osd-debug-inject-copyfrom-error
+    ! rados -p rbd cp foo foo3
+    ! rados -p rbd stat foo3
+
+    # success again
+    ceph tell osd.\* injectargs -- --no-osd-debug-inject-copyfrom-error
+    ! rados -p rbd cp foo foo3
+    rados -p rbd stat foo3
+}
+
+main osd-copy-from "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-bench.sh"
+# End:
diff --git a/qa/standalone/osd/osd-dup.sh b/qa/standalone/osd/osd-dup.sh
new file mode 100755
index 000000000..ab442c538
--- /dev/null
+++ b/qa/standalone/osd/osd-dup.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+[ `uname` = FreeBSD ] && exit 0
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    # avoid running out of fds in rados bench
+    CEPH_ARGS+="--filestore_wbthrottle_xfs_ios_hard_limit=900 "
+    CEPH_ARGS+="--filestore_wbthrottle_btrfs_ios_hard_limit=900 "
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+main osd-dup "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-dup.sh"
+# End:
diff --git a/qa/standalone/osd/osd-fast-mark-down.sh b/qa/standalone/osd/osd-fast-mark-down.sh
new file mode 100755
index 000000000..0ef9d8ce4
--- /dev/null
+++ b/qa/standalone/osd/osd-fast-mark-down.sh
@@ -0,0 +1,111 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2016 Piotr Dałek <git@predictor.org.pl>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Piotr Dałek <git@predictor.org.pl>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+MAX_PROPAGATION_TIME=30
+
+function run() {
+    local dir=$1
+    shift
+    rm -f $dir/*.pid
+    export CEPH_MON="127.0.0.1:7126" # git grep '\<7126\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+
+    OLD_ARGS=$CEPH_ARGS
+    CEPH_ARGS+="--osd-fast-fail-on-connection-refused=false "
+    echo "Ensuring old behavior is there..."
+    test_fast_kill $dir && (echo "OSDs died too early! Old behavior doesn't work." ; return 1)
+
+    CEPH_ARGS=$OLD_ARGS"--osd-fast-fail-on-connection-refused=true "
+    OLD_ARGS=$CEPH_ARGS
+
+    CEPH_ARGS=$OLD_ARGS"--ms_type=async --mon-host=$CEPH_MON"
+    echo "Testing async msgr..."
+    test_fast_kill $dir || return 1
+
+    return 0
+
+}
+
+function test_fast_kill() {
+   # create cluster with 3 osds
+   setup $dir || return 1
+   run_mon $dir a --osd_pool_default_size=3 || return 1
+   run_mgr $dir x || return 1
+   for oi in {0..2}; do
+     run_osd $dir $oi || return 1
+     pids[$oi]=$(cat $dir/osd.$oi.pid)
+   done
+
+   create_rbd_pool || return 1
+
+   # make some objects so osds to ensure connectivity between osds
+   timeout 20 rados -p rbd bench 10 write -b 4096 --max-objects 128 --no-cleanup || return 1
+   sleep 1
+
+   killid=0
+   previd=0
+
+   # kill random osd and see if after max MAX_PROPAGATION_TIME, the osd count decreased.
+   for i in {1..2}; do
+     while [ $killid -eq $previd ]; do
+        killid=${pids[$RANDOM%${#pids[@]}]}
+     done
+     previd=$killid
+
+     kill -9 $killid
+     time_left=$MAX_PROPAGATION_TIME
+     down_osds=0
+
+     while [ $time_left -gt 0 ]; do
+       sleep 1
+       time_left=$[$time_left - 1];
+
+       grep -m 1 -c -F "ms_handle_refused" $dir/osd.*.log > /dev/null
+       if [ $? -ne 0 ]; then
+         continue
+       fi
+
+       down_osds=$(ceph osd tree | grep -c down)
+       if [ $down_osds -lt $i ]; then
+         # osds not marked down yet, try again in a second
+         continue
+       elif [ $down_osds -gt $i ]; then
+         echo Too many \($down_osds\) osds died!
+         return 1
+       else
+         break
+       fi
+     done
+
+     if [ $down_osds -lt $i ]; then
+        echo Killed the OSD, yet it is not marked down
+        ceph osd tree
+        return 1
+     fi
+   done
+   pkill -SIGTERM rados
+   teardown $dir || return 1
+}
+
+main osd-fast-mark-down "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-fast-mark-down.sh"
+# End:
diff --git a/qa/standalone/osd/osd-force-create-pg.sh b/qa/standalone/osd/osd-force-create-pg.sh
new file mode 100755
index 000000000..ca4b0239e
--- /dev/null
+++ b/qa/standalone/osd/osd-force-create-pg.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7145" # git grep '\<7145\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_reuse_id() {
+    local dir=$1
+
+    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+
+    ceph osd pool create foo 50 || return 1
+    wait_for_clean || return 1
+
+    kill_daemons $dir TERM osd.0
+    kill_daemons $dir TERM osd.1
+    kill_daemons $dir TERM osd.2
+    ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0  --force
+    ceph-objectstore-tool --data-path $dir/1 --op remove --pgid 1.0  --force
+    ceph-objectstore-tool --data-path $dir/2 --op remove --pgid 1.0  --force
+    activate_osd $dir 0 || return 1
+    activate_osd $dir 1 || return 1
+    activate_osd $dir 2 || return 1
+    sleep 10
+    ceph pg ls | grep 1.0 | grep stale || return 1
+
+    ceph osd force-create-pg 1.0 --yes-i-really-mean-it || return 1
+    wait_for_clean || return 1
+}
+
+main osd-force-create-pg "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-force-create-pg.sh"
+# End:
diff --git a/qa/standalone/osd/osd-markdown.sh b/qa/standalone/osd/osd-markdown.sh
new file mode 100755
index 000000000..5c4a78440
--- /dev/null
+++ b/qa/standalone/osd/osd-markdown.sh
@@ -0,0 +1,149 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2015 Intel <contact@intel.com.com>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Xiaoxi Chen <xiaoxi.chen@intel.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7108" # git grep '\<7108\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function markdown_N_impl() {
+  markdown_times=$1
+  total_time=$2
+  sleeptime=$3
+  for i in `seq 1 $markdown_times`
+  do
+    # check the OSD is UP
+    ceph tell osd.0 get_latest_osdmap || return 1
+    ceph osd tree
+    ceph osd tree | grep osd.0 |grep up || return 1
+    # mark the OSD down.
+    # override any dup setting in the environment to ensure we do this
+    # exactly once (modulo messenger failures, at least; we can't *actually*
+    # provide exactly-once semantics for mon commands).
+    ( unset CEPH_CLI_TEST_DUP_COMMAND ; ceph osd down 0 )
+    sleep $sleeptime
+  done
+}
+
+
+function TEST_markdown_exceed_maxdown_count() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+
+    create_rbd_pool || return 1
+
+    # 3+1 times within 300s, osd should stay dead on the 4th time
+    local count=3
+    local sleeptime=10
+    local period=300
+    ceph tell osd.0 injectargs '--osd_max_markdown_count '$count'' || return 1
+    ceph tell osd.0 injectargs '--osd_max_markdown_period '$period'' || return 1
+
+    markdown_N_impl $(($count+1)) $period $sleeptime
+    # down N+1 times ,the osd.0 should die
+    ceph osd tree | grep down | grep osd.0 || return 1
+}
+
+function TEST_markdown_boot() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+
+    create_rbd_pool || return 1
+
+    # 3 times within 120s, should stay up
+    local count=3
+    local sleeptime=10
+    local period=120
+    ceph tell osd.0 injectargs '--osd_max_markdown_count '$count'' || return 1
+    ceph tell osd.0 injectargs '--osd_max_markdown_period '$period'' || return 1
+
+    markdown_N_impl $count $period $sleeptime
+    #down N times, osd.0 should be up
+    sleep 15  # give osd plenty of time to notice and come back up
+    ceph tell osd.0 get_latest_osdmap || return 1
+    ceph osd tree | grep up | grep osd.0 || return 1
+}
+
+function TEST_markdown_boot_exceed_time() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+
+    create_rbd_pool || return 1
+
+    # 3+1 times, but over 40s, > 20s, so should stay up
+    local count=3
+    local period=20
+    local sleeptime=10
+    ceph tell osd.0 injectargs '--osd_max_markdown_count '$count'' || return 1
+    ceph tell osd.0 injectargs '--osd_max_markdown_period '$period'' || return 1
+
+    markdown_N_impl $(($count+1)) $period $sleeptime
+    sleep 15  # give osd plenty of time to notice and come back up
+    ceph tell osd.0 get_latest_osdmap || return 1
+    ceph osd tree | grep up | grep osd.0 || return 1
+}
+
+function TEST_osd_stop() {
+
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+    osd_0_pid=$(cat $dir/osd.0.pid)
+    ps -p $osd_0_pid || return 1
+
+    ceph osd tree | grep osd.0 | grep up || return 1
+    ceph osd stop osd.0
+    sleep 15 # give osd plenty of time to notice and exit
+    ceph osd tree | grep down | grep osd.0 || return 1
+    ! ps -p $osd_0_pid || return 1
+}
+
+main osd-markdown "$@"
diff --git a/qa/standalone/osd/osd-reactivate.sh b/qa/standalone/osd/osd-reactivate.sh
new file mode 100755
index 000000000..6d6438629
--- /dev/null
+++ b/qa/standalone/osd/osd-reactivate.sh
@@ -0,0 +1,56 @@
+#!/usr/bin/env bash
+#
+# Author: Vicente Cheng <freeze.bilsted@gmail.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7122" # git grep '\<7122\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_reactivate() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+
+    kill_daemons $dir TERM osd || return 1
+
+    ready_path=$dir"/0/ready"
+    activate_path=$dir"/0/active"
+    # trigger mkfs again
+    rm -rf $ready_path $activate_path
+    activate_osd $dir 0 || return 1
+
+}
+
+main osd-reactivate "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-reactivate.sh"
+# End:
diff --git a/qa/standalone/osd/osd-recovery-prio.sh b/qa/standalone/osd/osd-recovery-prio.sh
new file mode 100755
index 000000000..02b65f67a
--- /dev/null
+++ b/qa/standalone/osd/osd-recovery-prio.sh
@@ -0,0 +1,542 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2019 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    # Fix port????
+    export CEPH_MON="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON --osd_max_backfills=1 --debug_reserver=20 "
+    # Set osd op queue = wpq for the tests. Recovery priority is not
+    # considered by mclock_scheduler leading to unexpected results.
+    CEPH_ARGS+="--osd-op-queue=wpq "
+    export objects=200
+    export poolprefix=test
+    export FORCE_PRIO="255"    # See OSD_RECOVERY_PRIORITY_FORCED
+    export NORMAL_PRIO="190"   # See OSD_RECOVERY_PRIORITY_BASE + 10
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+
+function TEST_recovery_priority() {
+    local dir=$1
+    local pools=10
+    local OSDS=5
+    local max_tries=10
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    export CEPH_ARGS
+
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd || return 1
+    done
+
+    for p in $(seq 1 $pools)
+    do
+      create_pool "${poolprefix}$p" 1 1
+      ceph osd pool set "${poolprefix}$p" size 2
+    done
+    sleep 5
+
+    wait_for_clean || return 1
+
+    ceph pg dump pgs
+
+    # Find 3 pools with a pg with the same primaries but second
+    # replica on another osd.
+    local PG1
+    local POOLNUM1
+    local pool1
+    local chk_osd1_1
+    local chk_osd1_2
+
+    local PG2
+    local POOLNUM2
+    local pool2
+    local chk_osd2
+
+    local PG3
+    local POOLNUM3
+    local pool3
+
+    for p in $(seq 1 $pools)
+    do
+      ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting
+      local test_osd1=$(head -1 $dir/acting)
+      local test_osd2=$(tail -1 $dir/acting)
+      if [ -z "$PG1" ];
+      then
+        PG1="${p}.0"
+        POOLNUM1=$p
+        pool1="${poolprefix}$p"
+        chk_osd1_1=$test_osd1
+        chk_osd1_2=$test_osd2
+      elif [ -z "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 ];
+      then
+        PG2="${p}.0"
+        POOLNUM2=$p
+        pool2="${poolprefix}$p"
+        chk_osd2=$test_osd2
+      elif [ -n "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 -a "$chk_osd2" != $test_osd2 ];
+      then
+        PG3="${p}.0"
+        POOLNUM3=$p
+        pool3="${poolprefix}$p"
+        break
+      fi
+    done
+    rm -f $dir/acting
+
+    if [ "$pool2" = "" -o "pool3" = "" ];
+    then
+      echo "Failure to find appropirate PGs"
+      return 1
+    fi
+
+    for p in $(seq 1 $pools)
+    do
+      if [ $p != $POOLNUM1 -a $p != $POOLNUM2 -a $p != $POOLNUM3 ];
+      then
+        delete_pool ${poolprefix}$p
+      fi
+    done
+
+    ceph osd pool set $pool2 size 1 --yes-i-really-mean-it
+    ceph osd pool set $pool3 size 1 --yes-i-really-mean-it
+    wait_for_clean || return 1
+
+    dd if=/dev/urandom of=$dir/data bs=1M count=10
+    p=1
+    for pname in $pool1 $pool2 $pool3
+    do
+      for i in $(seq 1 $objects)
+      do
+	rados -p ${pname} put obj${i}-p${p} $dir/data
+      done
+      p=$(expr $p + 1)
+    done
+
+    local otherosd=$(get_not_primary $pool1 obj1-p1)
+
+    ceph pg dump pgs
+    ERRORS=0
+
+    ceph osd set norecover
+    ceph osd set noout
+
+    # Get a pg to want to recover and quickly force it
+    # to be preempted.
+    ceph osd pool set $pool3 size 2
+    sleep 2
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
+
+    # 3. Item is in progress, adjust priority with no higher priority waiting
+    for i in $(seq 1 $max_tries)
+    do
+      if ! ceph pg force-recovery $PG3 2>&1 | grep -q "doesn't require recovery"; then
+        break
+      fi
+      if [ "$i" = "$max_tries" ]; then
+        echo "ERROR: Didn't appear to be able to force-recovery"
+        ERRORS=$(expr $ERRORS + 1)
+      fi
+      sleep 2
+    done
+    flush_pg_stats || return 1
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
+
+    ceph osd out osd.$chk_osd1_2
+    sleep 2
+    flush_pg_stats || return 1
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
+    ceph pg dump pgs
+
+    ceph osd pool set $pool2 size 2
+    sleep 2
+    flush_pg_stats || return 1
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
+    cat $dir/out
+    ceph pg dump pgs
+
+    PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG1}\")).prio")
+    if [ "$PRIO" != "$NORMAL_PRIO" ];
+    then
+      echo "The normal PG ${PG1} doesn't have prio $NORMAL_PRIO queued waiting"
+      ERRORS=$(expr $ERRORS + 1)
+    fi
+
+    # Using eval will strip double-quotes from item
+    eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
+    if [ "$ITEM" != ${PG3} ];
+    then
+      echo "The first force-recovery PG $PG3 didn't become the in progress item"
+      ERRORS=$(expr $ERRORS + 1)
+    else
+      PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
+      if [ "$PRIO" != $FORCE_PRIO ];
+      then
+        echo "The first force-recovery PG ${PG3} doesn't have prio $FORCE_PRIO"
+        ERRORS=$(expr $ERRORS + 1)
+      fi
+    fi
+
+    # 1. Item is queued, re-queue with new priority
+    for i in $(seq 1 $max_tries)
+    do
+      if ! ceph pg force-recovery $PG2 2>&1 | grep -q "doesn't require recovery"; then
+        break
+      fi
+      if [ "$i" = "$max_tries" ]; then
+        echo "ERROR: Didn't appear to be able to force-recovery"
+        ERRORS=$(expr $ERRORS + 1)
+      fi
+      sleep 2
+    done
+    sleep 2
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
+    cat $dir/out
+    PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
+    if [ "$PRIO" != "$FORCE_PRIO" ];
+    then
+      echo "The second force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
+      ERRORS=$(expr $ERRORS + 1)
+    fi
+    flush_pg_stats || return 1
+
+    # 4. Item is in progress, if higher priority items waiting prempt item
+    #ceph osd unset norecover
+    ceph pg cancel-force-recovery $PG3 || return 1
+    sleep 2
+    #ceph osd set norecover
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
+    cat $dir/out
+    PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG3}\")).prio")
+    if [ "$PRIO" != "$NORMAL_PRIO" ];
+    then
+      echo "After cancel-recovery PG ${PG3} doesn't have prio $NORMAL_PRIO"
+      ERRORS=$(expr $ERRORS + 1)
+    fi
+
+    eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
+    if [ "$ITEM" != ${PG2} ];
+    then
+      echo "The force-recovery PG $PG2 didn't become the in progress item"
+      ERRORS=$(expr $ERRORS + 1)
+    else
+      PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
+      if [ "$PRIO" != $FORCE_PRIO ];
+      then
+        echo "The first force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
+        ERRORS=$(expr $ERRORS + 1)
+      fi
+    fi
+
+    ceph pg cancel-force-recovery $PG2 || return 1
+    sleep 5
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
+
+    # 2. Item is queued, re-queue and preempt because new priority higher than an in progress item
+    flush_pg_stats || return 1
+    ceph pg force-recovery $PG3 || return 1
+    sleep 2
+
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
+    cat $dir/out
+    PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
+    if [ "$PRIO" != "$NORMAL_PRIO" ];
+    then
+      echo "After cancel-force-recovery PG ${PG3} doesn't have prio $NORMAL_PRIO"
+      ERRORS=$(expr $ERRORS + 1)
+    fi
+
+    eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
+    if [ "$ITEM" != ${PG3} ];
+    then
+      echo "The force-recovery PG $PG3 didn't get promoted to an in progress item"
+      ERRORS=$(expr $ERRORS + 1)
+    else
+      PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
+      if [ "$PRIO" != $FORCE_PRIO ];
+      then
+        echo "The force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
+        ERRORS=$(expr $ERRORS + 1)
+      fi
+    fi
+
+    ceph osd unset noout
+    ceph osd unset norecover
+
+    wait_for_clean "CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations" || return 1
+
+    ceph pg dump pgs
+
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_pgstate_history
+
+    if [ $ERRORS != "0" ];
+    then
+      echo "$ERRORS error(s) found"
+    else
+      echo TEST PASSED
+    fi
+
+    delete_pool $pool1
+    delete_pool $pool2
+    delete_pool $pool3
+    kill_daemons $dir || return 1
+    return $ERRORS
+}
+
+#
+# Show that pool recovery_priority is added to recovery priority
+#
+# Create 2 pools with 2 OSDs with different primarys
+# pool 1 with recovery_priority 1
+# pool 2 with recovery_priority 2
+#
+# Start recovery by changing the pool sizes from 1 to 2
+# Use dump_recovery_reservations to verify priorities
+function TEST_recovery_pool_priority() {
+    local dir=$1
+    local pools=3 # Don't assume the first 2 pools are exact what we want
+    local OSDS=2
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    export CEPH_ARGS
+
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd || return 1
+    done
+
+    for p in $(seq 1 $pools)
+    do
+      create_pool "${poolprefix}$p" 1 1
+      ceph osd pool set "${poolprefix}$p" size 2
+    done
+    sleep 5
+
+    wait_for_clean || return 1
+
+    ceph pg dump pgs
+
+    # Find 2 pools with different primaries which
+    # means the replica must be on another osd.
+    local PG1
+    local POOLNUM1
+    local pool1
+    local chk_osd1_1
+    local chk_osd1_2
+
+    local PG2
+    local POOLNUM2
+    local pool2
+    local chk_osd2_1
+    local chk_osd2_2
+
+    for p in $(seq 1 $pools)
+    do
+      ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting
+      local test_osd1=$(head -1 $dir/acting)
+      local test_osd2=$(tail -1 $dir/acting)
+      if [ -z "$PG1" ];
+      then
+        PG1="${p}.0"
+        POOLNUM1=$p
+        pool1="${poolprefix}$p"
+        chk_osd1_1=$test_osd1
+        chk_osd1_2=$test_osd2
+      elif [ $chk_osd1_1 != $test_osd1 ];
+      then
+        PG2="${p}.0"
+        POOLNUM2=$p
+        pool2="${poolprefix}$p"
+        chk_osd2_1=$test_osd1
+        chk_osd2_2=$test_osd2
+        break
+      fi
+    done
+    rm -f $dir/acting
+
+    if [ "$pool2" = "" ];
+    then
+      echo "Failure to find appropirate PGs"
+      return 1
+    fi
+
+    for p in $(seq 1 $pools)
+    do
+      if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ];
+      then
+        delete_pool ${poolprefix}$p
+      fi
+    done
+
+    pool1_extra_prio=1
+    pool2_extra_prio=2
+    pool1_prio=$(expr $NORMAL_PRIO + $pool1_extra_prio)
+    pool2_prio=$(expr $NORMAL_PRIO + $pool2_extra_prio)
+
+    ceph osd pool set $pool1 size 1 --yes-i-really-mean-it
+    ceph osd pool set $pool1 recovery_priority $pool1_extra_prio
+    ceph osd pool set $pool2 size 1 --yes-i-really-mean-it
+    ceph osd pool set $pool2 recovery_priority $pool2_extra_prio
+    wait_for_clean || return 1
+
+    dd if=/dev/urandom of=$dir/data bs=1M count=10
+    p=1
+    for pname in $pool1 $pool2
+    do
+      for i in $(seq 1 $objects)
+      do
+	rados -p ${pname} put obj${i}-p${p} $dir/data
+      done
+      p=$(expr $p + 1)
+    done
+
+    local otherosd=$(get_not_primary $pool1 obj1-p1)
+
+    ceph pg dump pgs
+    ERRORS=0
+
+    ceph osd pool set $pool1 size 2
+    ceph osd pool set $pool2 size 2
+
+    # Wait for both PGs to be in recovering state
+    ceph pg dump pgs
+
+    # Wait for recovery to start
+    set -o pipefail
+    count=0
+    while(true)
+    do
+      if test $(ceph --format json pg dump pgs |
+	      jq '.pg_stats | .[] | .state | contains("recovering")' | grep -c true) == "2"
+      then
+        break
+      fi
+      sleep 2
+      if test "$count" -eq "10"
+      then
+        echo "Recovery never started on both PGs"
+        return 1
+      fi
+      count=$(expr $count + 1)
+    done
+    set +o pipefail
+    ceph pg dump pgs
+
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/dump.${chk_osd1_1}.out
+    echo osd.${chk_osd1_1}
+    cat $dir/dump.${chk_osd1_1}.out
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_2}) dump_recovery_reservations > $dir/dump.${chk_osd1_2}.out
+    echo osd.${chk_osd1_2}
+    cat $dir/dump.${chk_osd1_2}.out
+
+    # Using eval will strip double-quotes from item
+    eval ITEM=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].item')
+    if [ "$ITEM" != ${PG1} ];
+    then
+      echo "The primary PG for $pool1 didn't become the in progress item"
+      ERRORS=$(expr $ERRORS + 1)
+    else
+      PRIO=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].prio')
+      if [ "$PRIO" != $pool1_prio ];
+      then
+        echo "The primary PG ${PG1} doesn't have prio $pool1_prio"
+        ERRORS=$(expr $ERRORS + 1)
+      fi
+    fi
+
+    # Using eval will strip double-quotes from item
+    eval ITEM=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].item')
+    if [ "$ITEM" != ${PG1} ];
+    then
+      echo "The primary PG for $pool1 didn't become the in progress item on remote"
+      ERRORS=$(expr $ERRORS + 1)
+    else
+      PRIO=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].prio')
+      if [ "$PRIO" != $pool1_prio ];
+      then
+        echo "The primary PG ${PG1} doesn't have prio $pool1_prio on remote"
+        ERRORS=$(expr $ERRORS + 1)
+      fi
+    fi
+
+    # Using eval will strip double-quotes from item
+    eval ITEM=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].item')
+    if [ "$ITEM" != ${PG2} ];
+    then
+      echo "The primary PG for $pool2 didn't become the in progress item"
+      ERRORS=$(expr $ERRORS + 1)
+    else
+      PRIO=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].prio')
+      if [ "$PRIO" != $pool2_prio ];
+      then
+        echo "The primary PG ${PG2} doesn't have prio $pool2_prio"
+        ERRORS=$(expr $ERRORS + 1)
+      fi
+    fi
+
+    # Using eval will strip double-quotes from item
+    eval ITEM=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].item')
+    if [ "$ITEM" != ${PG2} ];
+    then
+      echo "The primary PG $PG2 didn't become the in progress item on remote"
+      ERRORS=$(expr $ERRORS + 1)
+    else
+      PRIO=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].prio')
+      if [ "$PRIO" != $pool2_prio ];
+      then
+        echo "The primary PG ${PG2} doesn't have prio $pool2_prio on remote"
+        ERRORS=$(expr $ERRORS + 1)
+      fi
+    fi
+
+    wait_for_clean || return 1
+
+    if [ $ERRORS != "0" ];
+    then
+      echo "$ERRORS error(s) found"
+    else
+      echo TEST PASSED
+    fi
+
+    delete_pool $pool1
+    delete_pool $pool2
+    kill_daemons $dir || return 1
+    return $ERRORS
+}
+
+main osd-recovery-prio "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-prio.sh"
+# End:
diff --git a/qa/standalone/osd/osd-recovery-space.sh b/qa/standalone/osd/osd-recovery-space.sh
new file mode 100755
index 000000000..3bafc5138
--- /dev/null
+++ b/qa/standalone/osd/osd-recovery-space.sh
@@ -0,0 +1,176 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2018 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7221" # git grep '\<7221\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    CEPH_ARGS+="--osd_max_backfills=10 "
+    CEPH_ARGS+="--osd_mclock_override_recovery_settings=true "
+    export objects=600
+    export poolprefix=test
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+
+function get_num_in_state() {
+    local state=$1
+    local expression
+    expression+="select(contains(\"${state}\"))"
+    ceph --format json pg dump pgs 2>/dev/null | \
+        jq ".pg_stats | [.[] | .state | $expression] | length"
+}
+
+
+function wait_for_state() {
+    local state=$1
+    local cur_in_state
+    local -a delays=($(get_timeout_delays $2 5))
+    local -i loop=0
+
+    flush_pg_stats || return 1
+    while test $(get_num_pgs) == 0 ; do
+	sleep 1
+    done
+
+    while true ; do
+        cur_in_state=$(get_num_in_state ${state})
+        test $cur_in_state -gt 0 && break
+        if (( $loop >= ${#delays[*]} )) ; then
+            ceph pg dump pgs
+            return 1
+        fi
+        sleep ${delays[$loop]}
+        loop+=1
+    done
+    return 0
+}
+
+
+function wait_for_recovery_toofull() {
+    local timeout=$1
+    wait_for_state recovery_toofull $timeout
+}
+
+
+# Create 1 pools with size 1
+# set ful-ratio to 50%
+# Write data 600 5K (3000K)
+# Inject fake_statfs_for_testing to 3600K (83% full)
+# Incresase the pool size to 2
+# The pool shouldn't have room to recovery
+function TEST_recovery_test_simple() {
+    local dir=$1
+    local pools=1
+    local OSDS=2
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    export CEPH_ARGS
+
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd || return 1
+    done
+
+    ceph osd set-nearfull-ratio .40
+    ceph osd set-backfillfull-ratio .45
+    ceph osd set-full-ratio .50
+
+    for p in $(seq 1 $pools)
+    do
+      create_pool "${poolprefix}$p" 1 1
+      ceph osd pool set "${poolprefix}$p" size 1 --yes-i-really-mean-it
+    done
+
+    wait_for_clean || return 1
+
+    dd if=/dev/urandom of=$dir/datafile bs=1024 count=5
+    for o in $(seq 1 $objects)
+    do
+      rados -p "${poolprefix}$p" put obj$o $dir/datafile
+    done
+
+    for o in $(seq 0 $(expr $OSDS - 1))
+    do
+      ceph tell osd.$o injectargs '--fake_statfs_for_testing 3686400' || return 1
+    done
+    sleep 5
+
+    ceph pg dump pgs
+
+    for p in $(seq 1 $pools)
+    do
+      ceph osd pool set "${poolprefix}$p" size 2
+    done
+
+    # If this times out, we'll detected errors below
+    wait_for_recovery_toofull 30
+
+    ERRORS=0
+    if [ "$(ceph pg dump pgs | grep +recovery_toofull | wc -l)" != "1" ];
+    then
+      echo "One pool should have been in recovery_toofull"
+      ERRORS="$(expr $ERRORS + 1)"
+    fi
+
+    ceph pg dump pgs
+    ceph status
+    ceph status --format=json-pretty > $dir/stat.json
+
+    eval SEV=$(jq '.health.checks.PG_RECOVERY_FULL.severity' $dir/stat.json)
+    if [ "$SEV" != "HEALTH_ERR" ]; then
+      echo "PG_RECOVERY_FULL severity $SEV not HEALTH_ERR"
+      ERRORS="$(expr $ERRORS + 1)"
+    fi
+    eval MSG=$(jq '.health.checks.PG_RECOVERY_FULL.summary.message' $dir/stat.json)
+    if [ "$MSG" != "Full OSDs blocking recovery: 1 pg recovery_toofull" ]; then
+      echo "PG_RECOVERY_FULL message '$MSG' mismatched"
+      ERRORS="$(expr $ERRORS + 1)"
+    fi
+    rm -f $dir/stat.json
+
+    if [ $ERRORS != "0" ];
+    then
+      return 1
+    fi
+
+    for i in $(seq 1 $pools)
+    do
+      delete_pool "${poolprefix}$i"
+    done
+    kill_daemons $dir || return 1
+}
+
+
+main osd-recovery-space "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-space.sh"
+# End:
diff --git a/qa/standalone/osd/osd-recovery-stats.sh b/qa/standalone/osd/osd-recovery-stats.sh
new file mode 100755
index 000000000..ad6f810d7
--- /dev/null
+++ b/qa/standalone/osd/osd-recovery-stats.sh
@@ -0,0 +1,512 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2017 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    # Fix port????
+    export CEPH_MON="127.0.0.1:7115" # git grep '\<7115\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    # so we will not force auth_log_shard to be acting_primary
+    CEPH_ARGS+="--osd_force_auth_primary_missing_objects=1000000 "
+    export margin=10
+    export objects=200
+    export poolname=test
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function below_margin() {
+    local -i check=$1
+    shift
+    local -i target=$1
+
+    return $(( $check <= $target && $check >= $target - $margin ? 0 : 1 ))
+}
+
+function above_margin() {
+    local -i check=$1
+    shift
+    local -i target=$1
+
+    return $(( $check >= $target && $check <= $target + $margin ? 0 : 1 ))
+}
+
+FIND_UPACT='grep "pg[[]${PG}.*recovering.*update_calc_stats " $log | tail -1 | sed "s/.*[)] \([[][^ p]*\).*$/\1/"'
+FIND_FIRST='grep "pg[[]${PG}.*recovering.*update_calc_stats $which " $log | grep -F " ${UPACT}${addp}" | grep -v est | head -1 | sed "s/.* \([0-9]*\)$/\1/"'
+FIND_LAST='grep "pg[[]${PG}.*recovering.*update_calc_stats $which " $log | tail -1 | sed "s/.* \([0-9]*\)$/\1/"'
+
+function check() {
+    local dir=$1
+    local PG=$2
+    local primary=$3
+    local type=$4
+    local degraded_start=$5
+    local degraded_end=$6
+    local misplaced_start=$7
+    local misplaced_end=$8
+    local primary_start=${9:-}
+    local primary_end=${10:-}
+
+    local log=$dir/osd.${primary}.log
+
+    local addp=" "
+    if [ "$type" = "erasure" ];
+    then
+      addp="p"
+    fi
+
+    UPACT=$(eval $FIND_UPACT)
+
+    # Check 3rd line at start because of false recovery starts
+    local which="degraded"
+    FIRST=$(eval $FIND_FIRST)
+    below_margin $FIRST $degraded_start || return 1
+    LAST=$(eval $FIND_LAST)
+    above_margin $LAST $degraded_end || return 1
+
+    # Check 3rd line at start because of false recovery starts
+    which="misplaced"
+    FIRST=$(eval $FIND_FIRST)
+    below_margin $FIRST $misplaced_start || return 1
+    LAST=$(eval $FIND_LAST)
+    above_margin $LAST $misplaced_end || return 1
+
+    # This is the value of set into MISSING_ON_PRIMARY
+    if [ -n "$primary_start" ];
+    then
+      which="shard $primary"
+      FIRST=$(eval $FIND_FIRST)
+      below_margin $FIRST $primary_start || return 1
+      LAST=$(eval $FIND_LAST)
+      above_margin $LAST $primary_end || return 1
+    fi
+}
+
+# [1,0,?] -> [1,2,4]
+# degraded 500 -> 0
+# active+recovering+degraded
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE                      STATE_STAMP                VERSION REPORTED UP      UP_PRIMARY ACTING  ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP                LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0         500                  0      500         0       0     0 500      500 active+recovering+degraded 2017-11-17 19:27:36.493828  28'500   32:603 [1,2,4]          1 [1,2,4]              1        0'0 2017-11-17 19:27:05.915467             0'0 2017-11-17 19:27:05.915467
+function do_recovery_out1() {
+    local dir=$1
+    shift
+    local type=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+    run_osd $dir 3 || return 1
+    run_osd $dir 4 || return 1
+    run_osd $dir 5 || return 1
+
+    if [ $type = "erasure" ];
+    then
+        ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd
+        create_pool $poolname 1 1 $type myprofile
+    else
+        create_pool $poolname 1 1 $type
+    fi
+
+    wait_for_clean || return 1
+
+    for i in $(seq 1 $objects)
+    do
+	rados -p $poolname put obj$i /dev/null
+    done
+
+    local primary=$(get_primary $poolname obj1)
+    local PG=$(get_pg $poolname obj1)
+    # Only 2 OSDs so only 1 not primary
+    local otherosd=$(get_not_primary $poolname obj1)
+
+    ceph osd set norecover
+    kill $(cat $dir/osd.${otherosd}.pid)
+    ceph osd down osd.${otherosd}
+    ceph osd out osd.${otherosd}
+    ceph osd unset norecover
+    ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0
+    sleep 2
+
+    wait_for_clean || return 1
+
+    check $dir $PG $primary $type $objects 0 0 0 || return 1
+
+    delete_pool $poolname
+    kill_daemons $dir || return 1
+}
+
+function TEST_recovery_replicated_out1() {
+    local dir=$1
+
+    do_recovery_out1 $dir replicated || return 1
+}
+
+function TEST_recovery_erasure_out1() {
+    local dir=$1
+
+    do_recovery_out1 $dir erasure || return 1
+}
+
+# [0, 1] -> [2,3,4,5]
+# degraded 1000 -> 0
+# misplaced 1000 -> 0
+# missing on primary 500 -> 0
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE                      STATE_STAMP                VERSION REPORTED UP        UP_PRIMARY ACTING    ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP                LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0         500                500     1000      1000       0     0 500      500 active+recovering+degraded 2017-10-27 09:38:37.453438  22'500   25:394 [2,4,3,5]          2 [2,4,3,5]              2        0'0 2017-10-27 09:37:58.046748             0'0 2017-10-27 09:37:58.046748
+function TEST_recovery_sizeup() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+    run_osd $dir 3 || return 1
+    run_osd $dir 4 || return 1
+    run_osd $dir 5 || return 1
+
+    create_pool $poolname 1 1
+    ceph osd pool set $poolname size 2
+
+    wait_for_clean || return 1
+
+    for i in $(seq 1 $objects)
+    do
+	rados -p $poolname put obj$i /dev/null
+    done
+
+    local primary=$(get_primary $poolname obj1)
+    local PG=$(get_pg $poolname obj1)
+    # Only 2 OSDs so only 1 not primary
+    local otherosd=$(get_not_primary $poolname obj1)
+
+    ceph osd set norecover
+    ceph osd out osd.$primary osd.$otherosd
+    ceph osd pool set test size 4
+    ceph osd unset norecover
+    # Get new primary
+    primary=$(get_primary $poolname obj1)
+
+    ceph tell osd.${primary} debug kick_recovery_wq 0
+    sleep 2
+
+    wait_for_clean || return 1
+
+    local degraded=$(expr $objects \* 2)
+    local misplaced=$(expr $objects \* 2)
+    local log=$dir/osd.${primary}.log
+    check $dir $PG $primary replicated $degraded 0 $misplaced 0 $objects 0 || return 1
+
+    delete_pool $poolname
+    kill_daemons $dir || return 1
+}
+
+# [0, 1, 2, 4] -> [3, 5]
+# misplaced 1000 -> 0
+# missing on primary 500 -> 0
+# active+recovering+degraded
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE                      STATE_STAMP                VERSION REPORTED UP    UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP                LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0         500                500         0      1000       0     0 500      500 active+recovering+degraded 2017-10-27 09:34:50.012261  22'500   27:118 [3,5]          3  [3,5]              3        0'0 2017-10-27 09:34:08.617248             0'0 2017-10-27 09:34:08.617248
+function TEST_recovery_sizedown() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+    run_osd $dir 3 || return 1
+    run_osd $dir 4 || return 1
+    run_osd $dir 5 || return 1
+
+    create_pool $poolname 1 1
+    ceph osd pool set $poolname size 4
+
+    wait_for_clean || return 1
+
+    for i in $(seq 1 $objects)
+    do
+	rados -p $poolname put obj$i /dev/null
+    done
+
+    local primary=$(get_primary $poolname obj1)
+    local PG=$(get_pg $poolname obj1)
+    # Only 2 OSDs so only 1 not primary
+    local allosds=$(get_osds $poolname obj1)
+
+    ceph osd set norecover
+    for osd in $allosds
+    do
+        ceph osd out osd.$osd
+    done
+
+    ceph osd pool set test size 2
+    ceph osd unset norecover
+    ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0
+    sleep 2
+
+    wait_for_clean || return 1
+
+    # Get new primary
+    primary=$(get_primary $poolname obj1)
+
+    local misplaced=$(expr $objects \* 2)
+    local log=$dir/osd.${primary}.log
+    check $dir $PG $primary replicated 0 0 $misplaced 0 || return 1
+
+    UPACT=$(grep "pg[[]${PG}.*recovering.*update_calc_stats " $log | tail -1 | sed "s/.*[)] \([[][^ p]*\).*$/\1/")
+
+    # This is the value of set into MISSING_ON_PRIMARY
+    FIRST=$(grep "pg[[]${PG}.*recovering.*update_calc_stats shard $primary " $log | grep -F " $UPACT " | head -1 | sed "s/.* \([0-9]*\)$/\1/")
+    below_margin $FIRST $objects || return 1
+    LAST=$(grep "pg[[]${PG}.*recovering.*update_calc_stats shard $primary " $log | tail -1 | sed "s/.* \([0-9]*\)$/\1/")
+    above_margin $LAST 0 || return 1
+
+    delete_pool $poolname
+    kill_daemons $dir || return 1
+}
+
+# [1] -> [1,2]
+# degraded 300 -> 200
+# active+recovering+undersized+degraded
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE                                 STATE_STAMP                VERSION REPORTED UP    UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP                LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0         100                  0     300         0       0     0 100      100 active+recovering+undersized+degraded 2017-11-17 17:16:15.302943  13'500   16:643 [1,2]          1  [1,2]              1        0'0 2017-11-17 17:15:34.985563             0'0 2017-11-17 17:15:34.985563
+function TEST_recovery_undersized() {
+    local dir=$1
+
+    local osds=3
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    for i in $(seq 0 $(expr $osds - 1))
+    do
+      run_osd $dir $i || return 1
+    done
+
+    create_pool $poolname 1 1
+    ceph osd pool set $poolname size 1 --yes-i-really-mean-it
+
+    wait_for_clean || return 1
+
+    for i in $(seq 1 $objects)
+    do
+	rados -p $poolname put obj$i /dev/null
+    done
+
+    local primary=$(get_primary $poolname obj1)
+    local PG=$(get_pg $poolname obj1)
+
+    ceph osd set norecover
+    # Mark any osd not the primary (only 1 replica so also has no replica)
+    for i in $(seq 0 $(expr $osds - 1))
+    do
+      if [ $i = $primary ];
+      then
+        continue
+      fi
+      ceph osd out osd.$i
+      break
+    done
+    ceph osd pool set test size 4
+    ceph osd unset norecover
+    ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0
+    # Give extra sleep time because code below doesn't have the sophistication of wait_for_clean()
+    sleep 10
+    flush_pg_stats || return 1
+
+    # Wait for recovery to finish
+    # Can't use wait_for_clean() because state goes from active+recovering+undersized+degraded
+    # to  active+undersized+degraded
+    for i in $(seq 1 300)
+    do
+      if ceph pg dump pgs | grep ^$PG | grep -qv recovering
+      then
+          break
+      fi
+      if [ $i = "300" ];
+      then
+          echo "Timeout waiting for recovery to finish"
+          return 1
+      fi
+      sleep 1
+    done
+
+    # Get new primary
+    primary=$(get_primary $poolname obj1)
+    local log=$dir/osd.${primary}.log
+
+    local first_degraded=$(expr $objects \* 3)
+    local last_degraded=$(expr $objects \* 2)
+    check $dir $PG $primary replicated $first_degraded $last_degraded 0 0 || return 1
+
+    delete_pool $poolname
+    kill_daemons $dir || return 1
+}
+
+# [1,0,2] -> [1,3,NONE]/[1,3,2]
+# degraded 100 -> 0
+# misplaced 100 -> 100
+# active+recovering+degraded+remapped
+
+# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE                               STATE_STAMP                VERSION REPORTED UP         UP_PRIMARY ACTING  ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP                LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
+# 1.0         100                  0      100        100       0     0 100      100 active+recovering+degraded+remapped 2017-11-27 21:24:20.851243  18'500   23:618 [1,3,NONE]          1 [1,3,2]              1        0'0 2017-11-27 21:23:39.395242             0'0 2017-11-27 21:23:39.395242
+function TEST_recovery_erasure_remapped() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+    run_osd $dir 3 || return 1
+
+    ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd
+    create_pool $poolname 1 1 erasure myprofile
+    ceph osd pool set $poolname min_size 2
+
+    wait_for_clean || return 1
+
+    for i in $(seq 1 $objects)
+    do
+	rados -p $poolname put obj$i /dev/null
+    done
+
+    local primary=$(get_primary $poolname obj1)
+    local PG=$(get_pg $poolname obj1)
+    local otherosd=$(get_not_primary $poolname obj1)
+
+    ceph osd set norecover
+    kill $(cat $dir/osd.${otherosd}.pid)
+    ceph osd down osd.${otherosd}
+    ceph osd out osd.${otherosd}
+
+    # Mark osd not the primary and not down/out osd as just out
+    for i in 0 1 2 3
+    do
+      if [ $i = $primary ];
+      then
+	continue
+      fi
+      if [ $i = $otherosd ];
+      then
+	continue
+      fi
+      ceph osd out osd.$i
+      break
+    done
+    ceph osd unset norecover
+    ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0
+    sleep 2
+
+    wait_for_clean || return 1
+
+    local log=$dir/osd.${primary}.log
+    check $dir $PG $primary erasure $objects 0 $objects $objects || return 1
+
+    delete_pool $poolname
+    kill_daemons $dir || return 1
+}
+
+function TEST_recovery_multi() {
+    local dir=$1
+
+    local osds=6
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    for i in $(seq 0 $(expr $osds - 1))
+    do
+      run_osd $dir $i || return 1
+    done
+
+    create_pool $poolname 1 1
+    ceph osd pool set $poolname size 3
+    ceph osd pool set $poolname min_size 1
+
+    wait_for_clean || return 1
+
+    rados -p $poolname put obj1 /dev/null
+
+    local primary=$(get_primary $poolname obj1)
+    local otherosd=$(get_not_primary $poolname obj1)
+
+    ceph osd set noout
+    ceph osd set norecover
+    kill $(cat $dir/osd.${otherosd}.pid)
+    ceph osd down osd.${otherosd}
+
+    local half=$(expr $objects / 2)
+    for i in $(seq 2 $half)
+    do
+	rados -p $poolname put obj$i /dev/null
+    done
+
+    kill $(cat $dir/osd.${primary}.pid)
+    ceph osd down osd.${primary}
+    activate_osd $dir ${otherosd}
+    sleep 3
+
+    for i in $(seq $(expr $half + 1) $objects)
+    do
+	rados -p $poolname put obj$i /dev/null
+    done
+
+    local PG=$(get_pg $poolname obj1)
+    local otherosd=$(get_not_primary $poolname obj$objects)
+
+    ceph osd unset noout
+    ceph osd out osd.$primary osd.$otherosd
+    activate_osd $dir ${primary}
+    sleep 3
+
+    ceph osd pool set test size 4
+    ceph osd unset norecover
+    ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0
+    sleep 2
+
+    wait_for_clean || return 1
+
+    # Get new primary
+    primary=$(get_primary $poolname obj1)
+
+    local log=$dir/osd.${primary}.log
+    check $dir $PG $primary replicated 399 0 300 0 99 0 || return 1
+
+    delete_pool $poolname
+    kill_daemons $dir || return 1
+}
+
+main osd-recovery-stats "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-stats.sh"
+# End:
diff --git a/qa/standalone/osd/osd-rep-recov-eio.sh b/qa/standalone/osd/osd-rep-recov-eio.sh
new file mode 100755
index 000000000..6fea441b3
--- /dev/null
+++ b/qa/standalone/osd/osd-rep-recov-eio.sh
@@ -0,0 +1,422 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2017 Red Hat <contact@redhat.com>
+#
+#
+# Author: Kefu Chai <kchai@redhat.com>
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+warnings=10
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7140" # git grep '\<7140\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+	# set warning amount in case default changes
+        run_mon $dir a --mon_osd_warn_num_repaired=$warnings || return 1
+	run_mgr $dir x || return 1
+	ceph osd pool create foo 8 || return 1
+
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function setup_osds() {
+    local count=$1
+    shift
+    local type=$1
+
+    for id in $(seq 0 $(expr $count - 1)) ; do
+        run_osd${type} $dir $id || return 1
+    done
+    wait_for_clean || return 1
+}
+
+function get_state() {
+    local pgid=$1
+    local sname=state
+    ceph --format json pg dump pgs 2>/dev/null | \
+        jq -r ".pg_stats | .[] | select(.pgid==\"$pgid\") | .$sname"
+}
+
+function rados_put() {
+    local dir=$1
+    local poolname=$2
+    local objname=${3:-SOMETHING}
+
+    for marker in AAA BBB CCCC DDDD ; do
+        printf "%*s" 1024 $marker
+    done > $dir/ORIGINAL
+    #
+    # get and put an object, compare they are equal
+    #
+    rados --pool $poolname put $objname $dir/ORIGINAL || return 1
+}
+
+function rados_get() {
+    local dir=$1
+    local poolname=$2
+    local objname=${3:-SOMETHING}
+    local expect=${4:-ok}
+
+    #
+    # Expect a failure to get object
+    #
+    if [ $expect = "fail" ];
+    then
+        ! rados --pool $poolname get $objname $dir/COPY
+        return
+    fi
+    #
+    # Expect hang trying to get object
+    #
+    if [ $expect = "hang" ];
+    then
+        timeout 5 rados --pool $poolname get $objname $dir/COPY
+        test "$?" = "124"
+        return
+    fi
+    #
+    # get an object, compare with $dir/ORIGINAL
+    #
+    rados --pool $poolname get $objname $dir/COPY || return 1
+    diff $dir/ORIGINAL $dir/COPY || return 1
+    rm $dir/COPY
+}
+
+function rados_get_data() {
+    local inject=$1
+    shift
+    local dir=$1
+
+    local poolname=pool-rep
+    local objname=obj-$inject-$$
+    local pgid=$(get_pg $poolname $objname)
+
+    rados_put $dir $poolname $objname || return 1
+    inject_$inject rep data $poolname $objname $dir 0 || return 1
+    rados_get $dir $poolname $objname || return 1
+
+    wait_for_clean
+    COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+    test "$COUNT" = "1" || return 1
+    flush_pg_stats
+    COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
+    test "$COUNT" = "1" || return 1
+
+    local object_osds=($(get_osds $poolname $objname))
+    local primary=${object_osds[0]}
+    local bad_peer=${object_osds[1]}
+    inject_$inject rep data $poolname $objname $dir 0 || return 1
+    inject_$inject rep data $poolname $objname $dir 1 || return 1
+    # Force primary to pull from the bad peer, so we can repair it too!
+    set_config osd $primary osd_debug_feed_pullee $bad_peer || return 1
+    rados_get $dir $poolname $objname || return 1
+
+    # Wait until automatic repair of bad peer is done
+    wait_for_clean || return 1
+
+    inject_$inject rep data $poolname $objname $dir 0 || return 1
+    inject_$inject rep data $poolname $objname $dir 2 || return 1
+    rados_get $dir $poolname $objname || return 1
+
+    wait_for_clean
+    COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+    test "$COUNT" = "3" || return 1
+    flush_pg_stats
+    COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
+    test "$COUNT" = "4" || return 1
+
+    inject_$inject rep data $poolname $objname $dir 0 || return 1
+    inject_$inject rep data $poolname $objname $dir 1 || return 1
+    inject_$inject rep data $poolname $objname $dir 2 || return 1
+    rados_get $dir $poolname $objname hang || return 1
+
+    wait_for_clean
+    # After hang another repair couldn't happen, so count stays the same
+    COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+    test "$COUNT" = "3" || return 1
+    flush_pg_stats
+    COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
+    test "$COUNT" = "4" || return 1
+}
+
+function TEST_rados_get_with_eio() {
+    local dir=$1
+
+    setup_osds 4 || return 1
+
+    local poolname=pool-rep
+    create_pool $poolname 1 1 || return 1
+    wait_for_clean || return 1
+    rados_get_data eio $dir || return 1
+
+    delete_pool $poolname
+}
+
+function TEST_rados_repair_warning() {
+    local dir=$1
+    local OBJS=$(expr $warnings + 1)
+
+    setup_osds 4 || return 1
+
+    local poolname=pool-rep
+    create_pool $poolname 1 1 || return 1
+    wait_for_clean || return 1
+
+    local poolname=pool-rep
+    local objbase=obj-warn
+    local inject=eio
+
+   for i in $(seq 1 $OBJS)
+    do
+      rados_put $dir $poolname ${objbase}-$i || return 1
+      inject_$inject rep data $poolname ${objbase}-$i $dir 0 || return 1
+      rados_get $dir $poolname ${objbase}-$i || return 1
+    done
+    local pgid=$(get_pg $poolname ${objbase}-1)
+
+    local object_osds=($(get_osds $poolname ${objbase}-1))
+    local primary=${object_osds[0]}
+    local bad_peer=${object_osds[1]}
+
+    wait_for_clean
+    COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+    test "$COUNT" = "$OBJS" || return 1
+    flush_pg_stats
+    COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
+    test "$COUNT" = "$OBJS" || return 1
+
+    ceph health | grep -q "Too many repaired reads on 1 OSDs" || return 1
+    ceph health detail | grep -q "osd.$primary had $OBJS reads repaired" || return 1
+
+    ceph health mute OSD_TOO_MANY_REPAIRS
+    set -o pipefail
+    # Should mute this
+    ceph health | $(! grep -q "Too many repaired reads on 1 OSDs") || return 1
+    set +o pipefail
+
+    for i in $(seq 1 $OBJS)
+     do
+       inject_$inject rep data $poolname ${objbase}-$i $dir 0 || return 1
+       inject_$inject rep data $poolname ${objbase}-$i $dir 1 || return 1
+       # Force primary to pull from the bad peer, so we can repair it too!
+       set_config osd $primary osd_debug_feed_pullee $bad_peer || return 1
+       rados_get $dir $poolname ${objbase}-$i || return 1
+    done
+
+    wait_for_clean
+    COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+    test "$COUNT" = "$(expr $OBJS \* 2)" || return 1
+    flush_pg_stats
+    COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
+    test "$COUNT" = "$(expr $OBJS \* 3)" || return 1
+
+    # Give mon a chance to notice additional OSD and unmute
+    # The default tick time is 5 seconds
+    CHECKTIME=10
+    LOOPS=0
+    while(true)
+    do
+      sleep 1
+      if ceph health | grep -q "Too many repaired reads on 2 OSDs"
+      then
+	      break
+      fi
+      LOOPS=$(expr $LOOPS + 1)
+      if test "$LOOPS" = "$CHECKTIME"
+      then
+	      echo "Too many repaired reads not seen after $CHECKTIME seconds"
+	      return 1
+      fi
+    done
+    ceph health detail | grep -q "osd.$primary had $(expr $OBJS \* 2) reads repaired" || return 1
+    ceph health detail | grep -q "osd.$bad_peer had $OBJS reads repaired" || return 1
+
+    delete_pool $poolname
+}
+
+# Test backfill with unfound object
+function TEST_rep_backfill_unfound() {
+    local dir=$1
+    local objname=myobject
+    local lastobj=300
+    # Must be between 1 and $lastobj
+    local testobj=obj250
+
+    export CEPH_ARGS
+    CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10'
+    setup_osds 3 || return 1
+
+    local poolname=test-pool
+    create_pool $poolname 1 1 || return 1
+    wait_for_clean || return 1
+
+    ceph pg dump pgs
+
+    rados_put $dir $poolname $objname || return 1
+
+    local -a initial_osds=($(get_osds $poolname $objname))
+    local last_osd=${initial_osds[-1]}
+    kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1
+    ceph osd down ${last_osd} || return 1
+    ceph osd out ${last_osd} || return 1
+
+    ceph pg dump pgs
+
+    dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4
+    for i in $(seq 1 $lastobj)
+    do
+      rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1
+    done
+
+    inject_eio rep data $poolname $testobj $dir 0 || return 1
+    inject_eio rep data $poolname $testobj $dir 1 || return 1
+
+    activate_osd $dir ${last_osd} || return 1
+    ceph osd in ${last_osd} || return 1
+
+    sleep 15
+
+    for tmp in $(seq 1 360); do
+      state=$(get_state 2.0)
+      echo $state | grep backfill_unfound
+      if [ "$?" = "0" ]; then
+        break
+      fi
+      echo "$state "
+      sleep 1
+    done
+
+    ceph pg dump pgs
+    ceph pg 2.0 list_unfound | grep -q $testobj || return 1
+
+    # Command should hang because object is unfound
+    timeout 5 rados -p $poolname get $testobj $dir/CHECK
+    test $? = "124" || return 1
+
+    ceph pg 2.0 mark_unfound_lost delete
+
+    wait_for_clean || return 1
+
+    for i in $(seq 1 $lastobj)
+    do
+      if [ obj${i} = "$testobj" ]; then
+        # Doesn't exist anymore
+        ! rados -p $poolname get $testobj $dir/CHECK || return 1
+      else
+        rados --pool $poolname get obj${i} $dir/CHECK || return 1
+        diff -q $dir/ORIGINAL $dir/CHECK || return 1
+      fi
+    done
+
+    rm -f ${dir}/ORIGINAL ${dir}/CHECK
+
+    delete_pool $poolname
+}
+
+# Test recovery with unfound object
+function TEST_rep_recovery_unfound() {
+    local dir=$1
+    local objname=myobject
+    local lastobj=100
+    # Must be between 1 and $lastobj
+    local testobj=obj75
+
+    setup_osds 3 || return 1
+
+    local poolname=test-pool
+    create_pool $poolname 1 1 || return 1
+    wait_for_clean || return 1
+
+    ceph pg dump pgs
+
+    rados_put $dir $poolname $objname || return 1
+
+    local -a initial_osds=($(get_osds $poolname $objname))
+    local last_osd=${initial_osds[-1]}
+    kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1
+    ceph osd down ${last_osd} || return 1
+    ceph osd out ${last_osd} || return 1
+
+    ceph pg dump pgs
+
+    dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4
+    for i in $(seq 1 $lastobj)
+    do
+      rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1
+    done
+
+    inject_eio rep data $poolname $testobj $dir 0 || return 1
+    inject_eio rep data $poolname $testobj $dir 1 || return 1
+
+    activate_osd $dir ${last_osd} || return 1
+    ceph osd in ${last_osd} || return 1
+
+    sleep 15
+
+    for tmp in $(seq 1 100); do
+      state=$(get_state 2.0)
+      echo $state | grep -v recovering
+      if [ "$?" = "0" ]; then
+        break
+      fi
+      echo "$state "
+      sleep 1
+    done
+
+    ceph pg dump pgs
+    ceph pg 2.0 list_unfound | grep -q $testobj || return 1
+
+    # Command should hang because object is unfound
+    timeout 5 rados -p $poolname get $testobj $dir/CHECK
+    test $? = "124" || return 1
+
+    ceph pg 2.0 mark_unfound_lost delete
+
+    wait_for_clean || return 1
+
+    for i in $(seq 1 $lastobj)
+    do
+      if [ obj${i} = "$testobj" ]; then
+        # Doesn't exist anymore
+        ! rados -p $poolname get $testobj $dir/CHECK || return 1
+      else
+        rados --pool $poolname get obj${i} $dir/CHECK || return 1
+        diff -q $dir/ORIGINAL $dir/CHECK || return 1
+      fi
+    done
+
+    rm -f ${dir}/ORIGINAL ${dir}/CHECK
+
+    delete_pool $poolname
+}
+
+main osd-rep-recov-eio.sh "$@"
+
+# Local Variables:
+# compile-command: "cd ../../../build ; make -j4 && ../qa/run-standalone.sh osd-rep-recov-eio.sh"
+# End:
diff --git a/qa/standalone/osd/osd-reuse-id.sh b/qa/standalone/osd/osd-reuse-id.sh
new file mode 100755
index 000000000..b24b6f2eb
--- /dev/null
+++ b/qa/standalone/osd/osd-reuse-id.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2015 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7123" # git grep '\<7123\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_reuse_id() {
+    local dir=$1
+
+    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+    destroy_osd $dir 1 || return 1
+    run_osd $dir 1 || return 1
+}
+
+main osd-reuse-id "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-reuse-id.sh"
+# End:
diff --git a/qa/standalone/osd/pg-split-merge.sh b/qa/standalone/osd/pg-split-merge.sh
new file mode 100755
index 000000000..7f2899b60
--- /dev/null
+++ b/qa/standalone/osd/pg-split-merge.sh
@@ -0,0 +1,203 @@
+#!/usr/bin/env bash
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7147" # git grep '\<7147\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON --mon_min_osdmap_epochs=50 --paxos_service_trim_min=10"
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_a_merge_empty() {
+    local dir=$1
+
+    run_mon $dir a --osd_pool_default_size=3 || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+
+    ceph osd pool create foo 2 || return 1
+    ceph osd pool set foo pgp_num 1 || return 1
+
+    wait_for_clean || return 1
+
+    # note: we need 1.0 to have the same or more objects than 1.1
+    #  1.1
+    rados -p foo put foo1 /etc/passwd
+    rados -p foo put foo2 /etc/passwd
+    rados -p foo put foo3 /etc/passwd
+    rados -p foo put foo4 /etc/passwd
+    #  1.0
+    rados -p foo put foo5 /etc/passwd
+    rados -p foo put foo6 /etc/passwd
+    rados -p foo put foo8 /etc/passwd
+    rados -p foo put foo10 /etc/passwd
+    rados -p foo put foo11 /etc/passwd
+    rados -p foo put foo12 /etc/passwd
+    rados -p foo put foo16 /etc/passwd
+
+    wait_for_clean || return 1
+
+    ceph tell osd.1 config set osd_debug_no_purge_strays true
+    ceph osd pool set foo size 2 || return 1
+    wait_for_clean || return 1
+
+    kill_daemons $dir TERM osd.2 || return 1
+    ceph-objectstore-tool --data-path $dir/2 --op remove --pgid 1.1 --force || return 1
+    activate_osd $dir 2 || return 1
+
+    wait_for_clean || return 1
+
+    # osd.2: now 1.0 is there but 1.1 is not
+
+    # instantiate 1.1 on osd.2 with last_update=0'0 ('empty'), which is
+    # the problematic state... then let it merge with 1.0
+    ceph tell osd.2 config set osd_debug_no_acting_change true
+    ceph osd out 0 1
+    ceph osd pool set foo pg_num 1
+    sleep 5
+    ceph tell osd.2 config set osd_debug_no_acting_change false
+
+    # go back to osd.1 being primary, and 3x so the osd.2 copy doesn't get
+    # removed
+    ceph osd in 0 1
+    ceph osd pool set foo size 3
+
+    wait_for_clean || return 1
+
+    # scrub to ensure the osd.3 copy of 1.0 was incomplete (vs missing
+    # half of its objects).
+    ceph pg scrub 1.0
+    sleep 10
+    ceph log last debug
+    ceph pg ls
+    ceph pg ls | grep ' active.clean ' || return 1
+}
+
+function TEST_import_after_merge_and_gap() {
+    local dir=$1
+
+    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+
+    ceph osd pool create foo 2 || return 1
+    wait_for_clean || return 1
+    rados -p foo bench 3 write -b 1024 --no-cleanup || return 1
+
+    kill_daemons $dir TERM osd.0 || return 1
+    ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.1 --file $dir/1.1  --force || return 1
+    ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.0 --file $dir/1.0  --force || return 1
+    activate_osd $dir 0 || return 1
+
+    ceph osd pool set foo pg_num 1
+    sleep 5
+    while ceph daemon osd.0 perf dump | jq '.osd.numpg' | grep 2 ; do sleep 1 ; done
+    wait_for_clean || return 1
+
+    #
+    kill_daemons $dir TERM osd.0 || return 1
+    ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1
+    # this will import both halves the original pg
+    ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1
+    ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1
+    activate_osd $dir 0 || return 1
+
+    wait_for_clean || return 1
+
+    # make a map gap
+    for f in `seq 1 50` ; do
+	ceph osd set nodown
+	ceph osd unset nodown
+    done
+
+    # poke and prod to ensure last_epech_clean is big, reported to mon, and
+    # the osd is able to trim old maps
+    rados -p foo bench 1 write -b 1024 --no-cleanup || return 1
+    wait_for_clean || return 1
+    ceph tell osd.0 send_beacon
+    sleep 5
+    ceph osd set nodown
+    ceph osd unset nodown
+    sleep 5
+
+    kill_daemons $dir TERM osd.0 || return 1
+
+    # this should fail.. 1.1 still doesn't exist
+    ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1
+
+    ceph-objectstore-tool --data-path $dir/0 --op export-remove --pgid 1.0 --force --file $dir/1.0.later || return 1
+
+    # this should fail too because of the gap
+    ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1
+    ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1
+
+    # we can force it...
+    ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 --force || return 1
+    ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 --force || return 1
+
+    # ...but the osd won't start, so remove it again.
+    ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1
+    ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.1 --force || return 1
+
+    ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0.later --force || return 1
+
+
+    activate_osd $dir 0 || return 1
+
+    wait_for_clean || return 1
+}
+
+function TEST_import_after_split() {
+    local dir=$1
+
+    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+
+    ceph osd pool create foo 1 || return 1
+    wait_for_clean || return 1
+    rados -p foo bench 3 write -b 1024 --no-cleanup || return 1
+
+    kill_daemons $dir TERM osd.0 || return 1
+    ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.0 --file $dir/1.0  --force || return 1
+    activate_osd $dir 0 || return 1
+
+    ceph osd pool set foo pg_num 2
+    sleep 5
+    while ceph daemon osd.0 perf dump | jq '.osd.numpg' | grep 1 ; do sleep 1 ; done
+    wait_for_clean || return 1
+
+    kill_daemons $dir TERM osd.0 || return 1
+
+    ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1
+
+    # this should fail because 1.1 (split child) is there
+    ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1
+
+    ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.1 --force || return 1
+    # now it will work (1.1. is gone)
+    ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1
+
+    activate_osd $dir 0 || return 1
+
+    wait_for_clean || return 1
+}
+
+
+main pg-split-merge "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/pg-split-merge.sh"
+# End:
diff --git a/qa/standalone/osd/repeer-on-acting-back.sh b/qa/standalone/osd/repeer-on-acting-back.sh
new file mode 100755
index 000000000..af406ef92
--- /dev/null
+++ b/qa/standalone/osd/repeer-on-acting-back.sh
@@ -0,0 +1,129 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2020  ZTE Corporation <contact@zte.com.cn>
+#
+# Author: xie xingguo <xie.xingguo@zte.com.cn>
+# Author: Yan Jun <yan.jun8@zte.com.cn>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export poolname=test
+    export testobjects=100
+    export loglen=12
+    export trim=$(expr $loglen / 2)
+    export CEPH_MON="127.0.0.1:7115" # git grep '\<7115\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    # so we will not force auth_log_shard to be acting_primary
+    CEPH_ARGS+="--osd_force_auth_primary_missing_objects=1000000 "
+    # use small pg_log settings, so we always do backfill instead of recovery
+    CEPH_ARGS+="--osd_min_pg_log_entries=$loglen --osd_max_pg_log_entries=$loglen --osd_pg_log_trim_min=$trim "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+
+function TEST_repeer_on_down_acting_member_coming_back() {
+    local dir=$1
+    local dummyfile='/etc/fstab'
+
+    local num_osds=6
+    local osds="$(seq 0 $(expr $num_osds - 1))"
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    for i in $osds
+    do
+      run_osd $dir $i || return 1
+    done
+
+    create_pool $poolname 1 1
+    ceph osd pool set $poolname size 3
+    ceph osd pool set $poolname min_size 2
+    local poolid=$(ceph pg dump pools -f json | jq '.pool_stats' | jq '.[].poolid')
+    local pgid=$poolid.0
+
+    # enable required feature-bits for upmap
+    ceph osd set-require-min-compat-client luminous
+    # reset up to [1,2,3]
+    ceph osd pg-upmap $pgid 1 2 3 || return 1
+
+    flush_pg_stats || return 1
+    wait_for_clean || return 1
+
+    echo "writing initial objects"
+    # write a bunch of objects
+    for i in $(seq 1 $testobjects)
+    do
+      rados -p $poolname put existing_$i $dummyfile
+    done
+
+    WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+    # reset up to [1,4,5]
+    ceph osd pg-upmap $pgid 1 4 5 || return 1
+
+    # wait for peering to complete
+    sleep 2
+
+    # make sure osd.2 belongs to current acting set
+    ceph pg $pgid query | jq '.acting' | grep 2 || return 1
+
+    # kill osd.2
+    kill_daemons $dir KILL osd.2 || return 1
+    ceph osd down osd.2
+
+    # again, wait for peering to complete
+    sleep 2
+
+    # osd.2 should have been moved out from acting set
+    ceph pg $pgid query | jq '.acting' | grep 2 && return 1
+
+    # bring up osd.2
+    activate_osd $dir 2 || return 1
+    wait_for_osd up 2
+
+    # again, wait for peering to complete
+    sleep 2
+
+    # primary should be able to re-add osd.2 into acting
+    ceph pg $pgid query | jq '.acting' | grep 2 || return 1
+
+    WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
+
+    if ! grep -q "Active: got notify from previous acting member.*, requesting pg_temp change" $(find $dir -name '*osd*log')
+    then
+            echo failure
+            return 1
+    fi
+    echo "success"
+
+    delete_pool $poolname
+    kill_daemons $dir || return 1
+}
+
+main repeer-on-acting-back "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh repeer-on-acting-back.sh"
+# End:
diff --git a/qa/standalone/osd/repro_long_log.sh b/qa/standalone/osd/repro_long_log.sh
new file mode 100755
index 000000000..fa27d7017
--- /dev/null
+++ b/qa/standalone/osd/repro_long_log.sh
@@ -0,0 +1,197 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+# Copyright (C) 2018 Red Hat <contact@redhat.com>
+#
+# Author: Josh Durgin <jdurgin@redhat.com>
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7100" # git grep '\<7100\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+PGID=
+
+function test_log_size()
+{
+    local PGID=$1
+    local EXPECTED=$2
+    local DUPS_EXPECTED=${3:-0}
+    ceph tell osd.\* flush_pg_stats
+    sleep 3
+    ceph pg $PGID query | jq .info.stats.log_size
+    ceph pg $PGID query | jq .info.stats.log_size | grep "${EXPECTED}"
+    ceph pg $PGID query | jq .info.stats.log_dups_size
+    ceph pg $PGID query | jq .info.stats.log_dups_size | grep "${DUPS_EXPECTED}"
+}
+
+function setup_log_test() {
+    local dir=$1
+    local which=$2
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+
+    ceph osd pool create test 1 1 || true
+    POOL_ID=$(ceph osd dump --format json | jq '.pools[] | select(.pool_name == "test") | .pool')
+    PGID="${POOL_ID}.0"
+
+    # With 1 PG setting entries per osd 20 results in a target log of 20
+    ceph tell osd.\* injectargs -- --osd_target_pg_log_entries_per_osd 20 || return 1
+    ceph tell osd.\* injectargs -- --osd-min-pg-log-entries 20 || return 1
+    ceph tell osd.\* injectargs -- --osd-max-pg-log-entries 30 || return 1
+    ceph tell osd.\* injectargs -- --osd-pg-log-trim-min 10 || return 1
+    ceph tell osd.\* injectargs -- --osd_pg_log_dups_tracked 20 || return 1
+
+    touch $dir/foo
+    for i in $(seq 1 20)
+    do
+        rados -p test put foo $dir/foo || return 1
+    done
+
+    test_log_size $PGID 20 || return 1
+
+    rados -p test rm foo || return 1
+
+    # generate error entries
+    for i in $(seq 1 20)
+    do
+        rados -p test rm foo
+    done
+
+    # log should have been trimmed down to min_entries with one extra
+    test_log_size $PGID 21 || return 1
+}
+
+function TEST_repro_long_log1()
+{
+    local dir=$1
+
+    setup_log_test $dir || return 1
+    # regular write should trim the log
+    rados -p test put foo $dir/foo || return 1
+    test_log_size $PGID 22 || return 1
+}
+
+function TEST_repro_long_log2()
+{
+    local dir=$1
+
+    setup_log_test $dir || return 1
+    local PRIMARY=$(ceph pg $PGID query  | jq '.info.stats.up_primary')
+    kill_daemons $dir TERM osd.$PRIMARY || return 1
+    CEPH_ARGS="--osd-max-pg-log-entries=2 --osd-pg-log-dups-tracked=3 --no-mon-config" ceph-objectstore-tool --data-path $dir/$PRIMARY --pgid $PGID --op trim-pg-log || return 1
+    activate_osd $dir $PRIMARY || return 1
+    wait_for_clean || return 1
+    test_log_size $PGID 21 18 || return 1
+}
+
+function TEST_trim_max_entries()
+{
+    local dir=$1
+
+    setup_log_test $dir || return 1
+
+    ceph tell osd.\* injectargs -- --osd_target_pg_log_entries_per_osd 2 || return 1
+    ceph tell osd.\* injectargs -- --osd-min-pg-log-entries 2
+    ceph tell osd.\* injectargs -- --osd-pg-log-trim-min 2
+    ceph tell osd.\* injectargs -- --osd-pg-log-trim-max 4
+    ceph tell osd.\* injectargs -- --osd_pg_log_dups_tracked 0
+
+    # adding log entries, should only trim 4 and add one each time
+    rados -p test rm foo
+    test_log_size $PGID 18 || return 1
+    rados -p test rm foo
+    test_log_size $PGID 15 || return 1
+    rados -p test rm foo
+    test_log_size $PGID 12 || return 1
+    rados -p test rm foo
+    test_log_size $PGID 9 || return 1
+    rados -p test rm foo
+    test_log_size $PGID 6 || return 1
+    rados -p test rm foo
+    test_log_size $PGID 3 || return 1
+
+    # below trim_min
+    rados -p test rm foo
+    test_log_size $PGID 4 || return 1
+    rados -p test rm foo
+    test_log_size $PGID 3 || return 1
+    rados -p test rm foo
+    test_log_size $PGID 4 || return 1
+    rados -p test rm foo
+    test_log_size $PGID 3 || return 1
+}
+
+function TEST_trim_max_entries_with_dups()
+{
+    local dir=$1
+
+    setup_log_test $dir || return 1
+
+    ceph tell osd.\* injectargs -- --osd_target_pg_log_entries_per_osd 2 || return 1
+    ceph tell osd.\* injectargs -- --osd-min-pg-log-entries 2
+    ceph tell osd.\* injectargs -- --osd-pg-log-trim-min 2
+    ceph tell osd.\* injectargs -- --osd-pg-log-trim-max 4
+    ceph tell osd.\* injectargs -- --osd_pg_log_dups_tracked 20 || return 1
+
+    # adding log entries, should only trim 4 and add one each time
+    # dups should be trimmed to 1
+    rados -p test rm foo
+    test_log_size $PGID 18 2 || return 1
+    rados -p test rm foo
+    test_log_size $PGID 15 6 || return 1
+    rados -p test rm foo
+    test_log_size $PGID 12 10 || return 1
+    rados -p test rm foo
+    test_log_size $PGID 9 14 || return 1
+    rados -p test rm foo
+    test_log_size $PGID 6 18 || return 1
+    rados -p test rm foo
+    test_log_size $PGID 3 20 || return 1
+
+    # below trim_min
+    rados -p test rm foo
+    test_log_size $PGID 4 20 || return 1
+    rados -p test rm foo
+    test_log_size $PGID 3 20 || return 1
+    rados -p test rm foo
+    test_log_size $PGID 4 20 || return 1
+    rados -p test rm foo
+    test_log_size $PGID 3 20 || return 1
+}
+
+main repro-long-log "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && ../qa/run-standalone.sh repro_long_log.sh"
+# End:
diff --git a/qa/standalone/scrub/osd-mapper.sh b/qa/standalone/scrub/osd-mapper.sh
new file mode 100755
index 000000000..ed18f94f1
--- /dev/null
+++ b/qa/standalone/scrub/osd-mapper.sh
@@ -0,0 +1,182 @@
+#!/usr/bin/env bash
+# -*- mode:text; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+# vim: ts=8 sw=2 smarttab
+#
+# test the handling of a corrupted SnapMapper DB by Scrub
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+source $CEPH_ROOT/qa/standalone/scrub/scrub-helpers.sh
+
+function run() {
+  local dir=$1
+  shift
+
+  export CEPH_MON="127.0.0.1:7144" # git grep '\<7144\>' : there must be only one
+  export CEPH_ARGS
+  CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+  CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+  export -n CEPH_CLI_TEST_DUP_COMMAND
+  local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+  for func in $funcs ; do
+    setup $dir || return 1
+    $func $dir || return 1
+    teardown $dir || return 1
+  done
+}
+
+# one clone & multiple snaps (according to the number of parameters)
+function make_a_clone()
+{
+  #turn off '-x' (but remember previous state)
+  local saved_echo_flag=${-//[^x]/}
+  set +x
+  local pool=$1
+  local obj=$2
+  echo $RANDOM | rados -p $pool put $obj - || return 1
+  shift 2
+  for snap in $@ ; do
+    rados -p $pool mksnap $snap || return 1
+  done
+  if [[ -n "$saved_echo_flag" ]]; then set -x; fi
+}
+
+function TEST_truncated_sna_record() {
+    local dir=$1
+    local -A cluster_conf=(
+        ['osds_num']="3" 
+        ['pgs_in_pool']="4"
+        ['pool_name']="test"
+    )
+
+    local extr_dbg=3
+    (( extr_dbg > 1 )) && echo "Dir: $dir"
+    standard_scrub_cluster $dir cluster_conf
+    ceph tell osd.* config set osd_stats_update_period_not_scrubbing "1"
+    ceph tell osd.* config set osd_stats_update_period_scrubbing "1"
+
+    local osdn=${cluster_conf['osds_num']}
+    local poolid=${cluster_conf['pool_id']}
+    local poolname=${cluster_conf['pool_name']}
+    local objname="objxxx"
+
+    # create an object and clone it
+    make_a_clone $poolname $objname snap01 snap02 || return 1
+    make_a_clone $poolname $objname snap13 || return 1
+    make_a_clone $poolname $objname snap24 snap25 || return 1
+    echo $RANDOM | rados -p $poolname put $objname - || return 1
+
+    #identify the PG and the primary OSD
+    local pgid=`ceph --format=json-pretty osd map $poolname $objname | jq -r '.pgid'`
+    local osd=`ceph --format=json-pretty osd map $poolname $objname | jq -r '.up[0]'`
+    echo "pgid is $pgid (primary: osd.$osd)"
+    # turn on the publishing of test data in the 'scrubber' section of 'pg query' output
+    set_query_debug $pgid
+
+    # verify the existence of these clones
+    (( extr_dbg >= 1 )) && rados --format json-pretty -p $poolname listsnaps $objname
+
+    # scrub the PG
+    ceph pg $pgid deep_scrub || return 1
+
+    # we aren't just waiting for the scrub to terminate, but also for the
+    # logs to be published
+    sleep 3
+    ceph pg dump pgs
+    until grep -a -q -- "event: --^^^^---- ScrubFinished" $dir/osd.$osd.log ; do
+        sleep 0.2
+    done
+
+    ceph pg dump pgs
+    ceph osd set noscrub || return 1
+    ceph osd set nodeep-scrub || return 1
+    sleep 5
+    grep -a -q -v "ERR" $dir/osd.$osd.log || return 1
+
+    # kill the OSDs
+    kill_daemons $dir TERM osd || return 1
+
+    (( extr_dbg >= 2 )) && ceph-kvstore-tool bluestore-kv $dir/0 dump "p"
+    (( extr_dbg >= 2 )) && ceph-kvstore-tool bluestore-kv $dir/2 dump "p" | grep -a SNA_
+    (( extr_dbg >= 2 )) && grep -a SNA_ /tmp/oo2.dump
+    (( extr_dbg >= 2 )) && ceph-kvstore-tool bluestore-kv $dir/2 dump p 2> /dev/null
+    local num_sna_b4=`ceph-kvstore-tool bluestore-kv $dir/$osd dump p 2> /dev/null | grep -a -e 'SNA_[0-9]_000000000000000[0-9]_000000000000000' \
+            | awk -e '{print $2;}' | wc -l`
+
+    for sdn in $(seq 0 $(expr $osdn - 1))
+    do
+        kvdir=$dir/$sdn
+        echo "corrupting the SnapMapper DB of osd.$sdn (db: $kvdir)"
+        (( extr_dbg >= 3 )) && ceph-kvstore-tool bluestore-kv $kvdir dump "p"
+
+        # truncate the 'mapping' (SNA_) entry corresponding to the snap13 clone
+        KY=`ceph-kvstore-tool bluestore-kv $kvdir dump p 2> /dev/null | grep -a -e 'SNA_[0-9]_0000000000000003_000000000000000' \
+            | awk -e '{print $2;}'`
+        (( extr_dbg >= 1 )) && echo "SNA key: $KY" | cat -v
+
+        tmp_fn1=`mktemp -p /tmp --suffix="_the_val"`
+        (( extr_dbg >= 1 )) && echo "Value dumped in: $tmp_fn1"
+        ceph-kvstore-tool bluestore-kv $kvdir get p "$KY" out $tmp_fn1 2> /dev/null
+        (( extr_dbg >= 2 )) && od -xc $tmp_fn1
+
+        NKY=${KY:0:-30}
+        ceph-kvstore-tool bluestore-kv $kvdir rm "p" "$KY" 2> /dev/null
+        ceph-kvstore-tool bluestore-kv $kvdir set "p" "$NKY" in $tmp_fn1 2> /dev/null
+
+        (( extr_dbg >= 1 )) || rm $tmp_fn1
+    done
+
+    orig_osd_args=" ${cluster_conf['osd_args']}"
+    orig_osd_args=" $(echo $orig_osd_args)"
+    (( extr_dbg >= 2 )) && echo "Copied OSD args: /$orig_osd_args/ /${orig_osd_args:1}/"
+    for sdn in $(seq 0 $(expr $osdn - 1))
+    do
+      CEPH_ARGS="$CEPH_ARGS $orig_osd_args" activate_osd $dir $sdn
+    done
+    sleep 1
+
+    for sdn in $(seq 0 $(expr $osdn - 1))
+    do
+      timeout 60 ceph tell osd.$sdn version
+    done
+    rados --format json-pretty -p $poolname listsnaps $objname
+
+    # when scrubbing now - we expect the scrub to emit a cluster log ERR message regarding SnapMapper internal inconsistency
+    ceph osd unset nodeep-scrub || return 1
+    ceph osd unset noscrub || return 1
+
+    # what is the primary now?
+    local cur_prim=`ceph --format=json-pretty osd map $poolname $objname | jq -r '.up[0]'`
+    ceph pg dump pgs
+    sleep 2
+    ceph pg $pgid deep_scrub || return 1
+    sleep 5
+    ceph pg dump pgs
+    (( extr_dbg >= 1 )) && grep -a "ERR" $dir/osd.$cur_prim.log
+    grep -a -q "ERR" $dir/osd.$cur_prim.log || return 1
+
+    # but did we fix the snap issue? let's try scrubbing again
+
+    local prev_err_cnt=`grep -a "ERR" $dir/osd.$cur_prim.log | wc -l`
+    echo "prev count: $prev_err_cnt"
+
+    # scrub again. No errors expected this time
+    ceph pg $pgid deep_scrub || return 1
+    sleep 5
+    ceph pg dump pgs
+    (( extr_dbg >= 1 )) && grep -a "ERR" $dir/osd.$cur_prim.log
+    local current_err_cnt=`grep -a "ERR" $dir/osd.$cur_prim.log | wc -l`
+    (( extr_dbg >= 1 )) && echo "current count: $current_err_cnt"
+    (( current_err_cnt == prev_err_cnt )) || return 1
+    kill_daemons $dir TERM osd || return 1
+    kvdir=$dir/$cur_prim
+    (( extr_dbg >= 2 )) && ceph-kvstore-tool bluestore-kv $kvdir dump p 2> /dev/null | grep -a -e 'SNA_[0-9]_' \
+            | awk -e '{print $2;}'
+    local num_sna_full=`ceph-kvstore-tool bluestore-kv $kvdir dump p 2> /dev/null | grep -a -e 'SNA_[0-9]_000000000000000[0-9]_000000000000000' \
+            | awk -e '{print $2;}' | wc -l`
+    (( num_sna_full == num_sna_b4 )) || return 1
+    return 0
+}
+
+
+main osd-mapper "$@"
diff --git a/qa/standalone/scrub/osd-recovery-scrub.sh b/qa/standalone/scrub/osd-recovery-scrub.sh
new file mode 100755
index 000000000..9541852c7
--- /dev/null
+++ b/qa/standalone/scrub/osd-recovery-scrub.sh
@@ -0,0 +1,352 @@
+#! /usr/bin/env bash
+#
+# Copyright (C) 2017 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7124" # git grep '\<7124\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    export -n CEPH_CLI_TEST_DUP_COMMAND
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        $func $dir || return 1
+    done
+}
+
+# Simple test for "not scheduling scrubs due to active recovery"
+# OSD::sched_scrub() called on all OSDs during ticks
+function TEST_recovery_scrub_1() {
+    local dir=$1
+    local poolname=test
+
+    TESTDATA="testdata.$$"
+    OSDS=4
+    PGS=1
+    OBJECTS=100
+    ERRORS=0
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true \
+                   --osd_scrub_interval_randomize_ratio=0.0 || return 1
+    run_mgr $dir x || return 1
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+        run_osd $dir $osd --osd_scrub_during_recovery=false || return 1
+    done
+
+    # Create a pool with $PGS pgs
+    create_pool $poolname $PGS $PGS
+    wait_for_clean || return 1
+    poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }')
+
+    ceph pg dump pgs
+
+    dd if=/dev/urandom of=$TESTDATA bs=1M count=50
+    for i in $(seq 1 $OBJECTS)
+    do
+        rados -p $poolname put obj${i} $TESTDATA
+    done
+    rm -f $TESTDATA
+
+    ceph osd pool set $poolname size 4
+
+    # Wait for recovery to start
+    set -o pipefail
+    count=0
+    while(true)
+    do
+      if ceph --format json pg dump pgs |
+        jq '.pg_stats | [.[] | .state | contains("recovering")]' | grep -q true
+      then
+        break
+      fi
+      sleep 2
+      if test "$count" -eq "10"
+      then
+        echo "Recovery never started"
+        return 1
+      fi
+      count=$(expr $count + 1)
+    done
+    set +o pipefail
+    ceph pg dump pgs
+
+    sleep 10
+    # Work around for http://tracker.ceph.com/issues/38195
+    kill_daemons $dir #|| return 1
+
+    declare -a err_strings
+    err_strings[0]="not scheduling scrubs due to active recovery"
+
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+        grep "not scheduling scrubs" $dir/osd.${osd}.log
+    done
+    for err_string in "${err_strings[@]}"
+    do
+        found=false
+	count=0
+        for osd in $(seq 0 $(expr $OSDS - 1))
+        do
+            if grep -q "$err_string" $dir/osd.${osd}.log
+            then
+                found=true
+		count=$(expr $count + 1)
+            fi
+        done
+        if [ "$found" = "false" ]; then
+            echo "Missing log message '$err_string'"
+            ERRORS=$(expr $ERRORS + 1)
+        fi
+        [ $count -eq $OSDS ] || return 1
+    done
+
+    teardown $dir || return 1
+
+    if [ $ERRORS != "0" ];
+    then
+        echo "TEST FAILED WITH $ERRORS ERRORS"
+        return 1
+    fi
+
+    echo "TEST PASSED"
+    return 0
+}
+
+##
+# a modified version of wait_for_scrub(), which terminates if the Primary
+# of the to-be-scrubbed PG changes
+#
+# Given the *last_scrub*, wait for scrub to happen on **pgid**. It
+# will fail if scrub does not complete within $TIMEOUT seconds. The
+# repair is complete whenever the **get_last_scrub_stamp** function
+# reports a timestamp different from the one given in argument.
+#
+# @param pgid the id of the PG
+# @param the primary OSD when started
+# @param last_scrub timestamp of the last scrub for *pgid*
+# @return 0 on success, 1 on error
+#
+function wait_for_scrub_mod() {
+    local pgid=$1
+    local orig_primary=$2
+    local last_scrub="$3"
+    local sname=${4:-last_scrub_stamp}
+
+    for ((i=0; i < $TIMEOUT; i++)); do
+        sleep 0.2
+        if test "$(get_last_scrub_stamp $pgid $sname)" '>' "$last_scrub" ; then
+            return 0
+        fi
+        sleep 1
+        # are we still the primary?
+        local current_primary=`bin/ceph pg $pgid query | jq '.acting[0]' `
+        if [ $orig_primary != $current_primary ]; then
+            echo $orig_primary no longer primary for $pgid
+            return 0
+        fi
+    done
+    return 1
+}
+
+##
+# A modified version of pg_scrub()
+#
+# Run scrub on **pgid** and wait until it completes. The pg_scrub
+# function will fail if repair does not complete within $TIMEOUT
+# seconds. The pg_scrub is complete whenever the
+# **get_last_scrub_stamp** function reports a timestamp different from
+# the one stored before starting the scrub, or whenever the Primary
+# changes.
+#
+# @param pgid the id of the PG
+# @return 0 on success, 1 on error
+#
+function pg_scrub_mod() {
+    local pgid=$1
+    local last_scrub=$(get_last_scrub_stamp $pgid)
+    # locate the primary
+    local my_primary=`bin/ceph pg $pgid query | jq '.acting[0]' `
+    local recovery=false
+    ceph pg scrub $pgid
+    #ceph --format json pg dump pgs | jq ".pg_stats | .[] | select(.pgid == \"$pgid\") | .state"
+    if ceph --format json pg dump pgs | jq ".pg_stats | .[] | select(.pgid == \"$pgid\") | .state" | grep -q recovering
+    then
+      recovery=true
+    fi
+    wait_for_scrub_mod $pgid $my_primary "$last_scrub" || return 1
+    if test $recovery = "true"
+    then
+      return 2
+    fi
+}
+
+# Same as wait_background() except that it checks for exit code 2 and bumps recov_scrub_count
+function wait_background_check() {
+    # We extract the PIDS from the variable name
+    pids=${!1}
+
+    return_code=0
+    for pid in $pids; do
+        wait $pid
+	retcode=$?
+	if test $retcode -eq 2
+	then
+	  recov_scrub_count=$(expr $recov_scrub_count + 1)
+	elif test $retcode -ne 0
+	then
+            # If one process failed then return 1
+            return_code=1
+        fi
+    done
+
+    # We empty the variable reporting that all process ended
+    eval "$1=''"
+
+    return $return_code
+}
+
+# osd_scrub_during_recovery=true make sure scrub happens
+function TEST_recovery_scrub_2() {
+    local dir=$1
+    local poolname=test
+
+    TESTDATA="testdata.$$"
+    OSDS=8
+    PGS=32
+    OBJECTS=40
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true \
+                   --osd_scrub_interval_randomize_ratio=0.0 || return 1
+    run_mgr $dir x || return 1
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+        run_osd $dir $osd --osd_scrub_during_recovery=true --osd_recovery_sleep=10 || return 1
+    done
+
+    # Create a pool with $PGS pgs
+    create_pool $poolname $PGS $PGS
+    wait_for_clean || return 1
+    poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }')
+
+    dd if=/dev/urandom of=$TESTDATA bs=1M count=50
+    for i in $(seq 1 $OBJECTS)
+    do
+        rados -p $poolname put obj${i} $TESTDATA
+    done
+    rm -f $TESTDATA
+
+    ceph osd pool set $poolname size 3
+
+    ceph pg dump pgs
+
+    # Wait for recovery to start
+    count=0
+    while(true)
+    do
+      #ceph --format json pg dump pgs | jq '.pg_stats | [.[].state]'
+      if test $(ceph --format json pg dump pgs |
+	      jq '.pg_stats | [.[].state]'| grep recovering | wc -l) -ge 2
+      then
+        break
+      fi
+      sleep 2
+      if test "$count" -eq "10"
+      then
+        echo "Not enough recovery started simultaneously"
+        return 1
+      fi
+      count=$(expr $count + 1)
+    done
+    ceph pg dump pgs
+
+    pids=""
+    recov_scrub_count=0
+    for pg in $(seq 0 $(expr $PGS - 1))
+    do
+        run_in_background pids pg_scrub_mod $poolid.$(printf "%x" $pg)
+    done
+    wait_background_check pids
+    return_code=$?
+    if [ $return_code -ne 0 ]; then return $return_code; fi
+
+    ERRORS=0
+    if test $recov_scrub_count -eq 0
+    then
+      echo "No scrubs occurred while PG recovering"
+      ERRORS=$(expr $ERRORS + 1)
+    fi
+
+    pidfile=$(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid')
+    pid=$(cat $pidfile)
+    if ! kill -0 $pid
+    then
+        echo "OSD crash occurred"
+        #tail -100 $dir/osd.0.log
+        ERRORS=$(expr $ERRORS + 1)
+    fi
+
+    # Work around for http://tracker.ceph.com/issues/38195
+    kill_daemons $dir #|| return 1
+
+    declare -a err_strings
+    err_strings[0]="not scheduling scrubs due to active recovery"
+
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+        grep "not scheduling scrubs" $dir/osd.${osd}.log
+    done
+    for err_string in "${err_strings[@]}"
+    do
+        found=false
+        for osd in $(seq 0 $(expr $OSDS - 1))
+        do
+            if grep "$err_string" $dir/osd.${osd}.log > /dev/null;
+            then
+                found=true
+            fi
+        done
+        if [ "$found" = "true" ]; then
+            echo "Found log message not expected '$err_string'"
+	    ERRORS=$(expr $ERRORS + 1)
+        fi
+    done
+
+    teardown $dir || return 1
+
+    if [ $ERRORS != "0" ];
+    then
+        echo "TEST FAILED WITH $ERRORS ERRORS"
+        return 1
+    fi
+
+    echo "TEST PASSED"
+    return 0
+}
+
+main osd-recovery-scrub "$@"
+
+# Local Variables:
+# compile-command: "cd build ; make -j4 && \
+#    ../qa/run-standalone.sh osd-recovery-scrub.sh"
+# End:
diff --git a/qa/standalone/scrub/osd-scrub-dump.sh b/qa/standalone/scrub/osd-scrub-dump.sh
new file mode 100755
index 000000000..f21ec7801
--- /dev/null
+++ b/qa/standalone/scrub/osd-scrub-dump.sh
@@ -0,0 +1,180 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2019 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+MAX_SCRUBS=4
+SCRUB_SLEEP=3
+POOL_SIZE=3
+
+function run() {
+    local dir=$1
+    shift
+    local CHUNK_MAX=5
+
+    export CEPH_MON="127.0.0.1:7184" # git grep '\<7184\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    CEPH_ARGS+="--osd_max_scrubs=$MAX_SCRUBS "
+    CEPH_ARGS+="--osd_shallow_scrub_chunk_max=$CHUNK_MAX "
+    CEPH_ARGS+="--osd_scrub_sleep=$SCRUB_SLEEP "
+    CEPH_ARGS+="--osd_pool_default_size=$POOL_SIZE "
+    # Set scheduler to "wpq" until there's a reliable way to query scrub states
+    # with "--osd-scrub-sleep" set to 0. The "mclock_scheduler" overrides the
+    # scrub sleep to 0 and as a result the checks in the test fail.
+    CEPH_ARGS+="--osd_op_queue=wpq "
+
+    export -n CEPH_CLI_TEST_DUP_COMMAND
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_recover_unexpected() {
+    local dir=$1
+    shift
+    local OSDS=6
+    local PGS=16
+    local POOLS=3
+    local OBJS=1000
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    for o in $(seq 0 $(expr $OSDS - 1))
+    do
+        run_osd $dir $o
+    done
+
+    for i in $(seq 1 $POOLS)
+    do
+        create_pool test$i $PGS $PGS
+    done
+
+    wait_for_clean || return 1
+
+    dd if=/dev/urandom of=datafile bs=4k count=2
+    for i in $(seq 1 $POOLS)
+    do
+       for j in $(seq 1 $OBJS)
+       do
+	       rados -p test$i put obj$j datafile
+       done
+    done
+    rm datafile
+
+    ceph osd set noscrub
+    ceph osd set nodeep-scrub
+
+    for qpg in $(ceph pg dump pgs --format=json-pretty | jq '.pg_stats[].pgid')
+    do
+	primary=$(ceph pg dump pgs --format=json | jq ".pg_stats[] | select(.pgid == $qpg) | .acting_primary")
+	eval pg=$qpg   # strip quotes around qpg
+	ceph tell $pg scrub
+    done
+
+    ceph pg dump pgs
+
+    max=$(CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_scrub_reservations | jq '.osd_max_scrubs')
+    if [ $max != $MAX_SCRUBS ]; then
+        echo "ERROR: Incorrect osd_max_scrubs from dump_scrub_reservations"
+        return 1
+    fi
+
+    ceph osd unset noscrub
+
+    ok=false
+    for i in $(seq 0 300)
+    do
+	ceph pg dump pgs
+	if ceph pg dump pgs | grep '+scrubbing'; then
+	    ok=true
+	    break
+	fi
+	sleep 1
+    done
+    if test $ok = "false"; then
+	echo "ERROR: Test set-up failed no scrubbing"
+	return 1
+    fi
+
+    local total=0
+    local zerocount=0
+    local maxzerocount=3
+    while(true)
+    do
+	pass=0
+	for o in $(seq 0 $(expr $OSDS - 1))
+	do
+		CEPH_ARGS='' ceph daemon $(get_asok_path osd.$o) dump_scrub_reservations
+		scrubs=$(CEPH_ARGS='' ceph daemon $(get_asok_path osd.$o) dump_scrub_reservations | jq '.scrubs_local + .scrubs_remote')
+		if [ $scrubs -gt $MAX_SCRUBS ]; then
+		    echo "ERROR: More than $MAX_SCRUBS currently reserved"
+		    return 1
+	        fi
+		pass=$(expr $pass + $scrubs)
+        done
+	if [ $pass = "0" ]; then
+	    zerocount=$(expr $zerocount + 1)
+	fi
+	if [ $zerocount -gt $maxzerocount ]; then
+	    break
+	fi
+	total=$(expr $total + $pass)
+	if [ $total -gt 0 ]; then
+	    # already saw some reservations, so wait longer to avoid excessive over-counting.
+	    # Note the loop itself takes about 2-3 seconds
+	    sleep $(expr $SCRUB_SLEEP - 2)
+	else
+	    sleep 0.5
+	fi
+    done
+
+    # Check that there are no more scrubs
+    for i in $(seq 0 5)
+    do
+        if ceph pg dump pgs | grep '+scrubbing'; then
+	    echo "ERROR: Extra scrubs after test completion...not expected"
+	    return 1
+        fi
+	sleep $SCRUB_SLEEP
+    done
+
+    echo $total total reservations seen
+
+    # Sort of arbitraty number based on PGS * POOLS * POOL_SIZE as the number of total scrub
+    # reservations that must occur.  However, the loop above might see the same reservation more
+    # than once.
+    actual_reservations=$(expr $PGS \* $POOLS \* $POOL_SIZE)
+    if [ $total -lt $actual_reservations ]; then
+	echo "ERROR: Unexpectedly low amount of scrub reservations seen during test"
+	return 1
+    fi
+
+    return 0
+}
+
+
+main osd-scrub-dump "$@"
+
+# Local Variables:
+# compile-command: "cd build ; make check && \
+#    ../qa/run-standalone.sh osd-scrub-dump.sh"
+# End:
diff --git a/qa/standalone/scrub/osd-scrub-repair.sh b/qa/standalone/scrub/osd-scrub-repair.sh
new file mode 100755
index 000000000..13b30360c
--- /dev/null
+++ b/qa/standalone/scrub/osd-scrub-repair.sh
@@ -0,0 +1,6255 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+set -x
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+if [ `uname` = FreeBSD ]; then
+    # erasure coding overwrites are only tested on Bluestore
+    # erasure coding on filestore is unsafe
+    # http://docs.ceph.com/en/latest/rados/operations/erasure-code/#erasure-coding-with-overwrites
+    use_ec_overwrite=false
+else
+    use_ec_overwrite=true
+fi
+
+# Test development and debugging
+# Set to "yes" in order to ignore diff errors and save results to update test
+getjson="no"
+
+# Filter out mtime and local_mtime dates, version, prior_version and last_reqid (client) from any object_info.
+jqfilter='def walk(f):
+  . as $in
+  | if type == "object" then
+      reduce keys[] as $key
+        ( {}; . + { ($key):  ($in[$key] | walk(f)) } ) | f
+    elif type == "array" then map( walk(f) ) | f
+    else f
+    end;
+walk(if type == "object" then del(.mtime) else . end)
+| walk(if type == "object" then del(.local_mtime) else . end)
+| walk(if type == "object" then del(.last_reqid) else . end)
+| walk(if type == "object" then del(.version) else . end)
+| walk(if type == "object" then del(.prior_version) else . end)'
+
+sortkeys='import json; import sys ; JSON=sys.stdin.read() ; ud = json.loads(JSON) ; print(json.dumps(ud, sort_keys=True, indent=2))'
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7107" # git grep '\<7107\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    CEPH_ARGS+="--osd-skip-data-digest=false "
+
+    export -n CEPH_CLI_TEST_DUP_COMMAND
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function add_something() {
+    local dir=$1
+    local poolname=$2
+    local obj=${3:-SOMETHING}
+    local scrub=${4:-noscrub}
+
+    if [ "$scrub" = "noscrub" ];
+    then
+        ceph osd set noscrub || return 1
+        ceph osd set nodeep-scrub || return 1
+    else
+        ceph osd unset noscrub || return 1
+        ceph osd unset nodeep-scrub || return 1
+    fi
+
+    local payload=ABCDEF
+    echo $payload > $dir/ORIGINAL
+    rados --pool $poolname put $obj $dir/ORIGINAL || return 1
+}
+
+#
+# Corrupt one copy of a replicated pool
+#
+function TEST_corrupt_and_repair_replicated() {
+    local dir=$1
+    local poolname=rbd
+
+    run_mon $dir a --osd_pool_default_size=2 || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+
+    add_something $dir $poolname || return 1
+    corrupt_and_repair_one $dir $poolname $(get_not_primary $poolname SOMETHING) || return 1
+    # Reproduces http://tracker.ceph.com/issues/8914
+    corrupt_and_repair_one $dir $poolname $(get_primary $poolname SOMETHING) || return 1
+}
+
+#
+# Allow repair to be scheduled when some recovering is still undergoing on the same OSD
+#
+function TEST_allow_repair_during_recovery() {
+    local dir=$1
+    local poolname=rbd
+
+    run_mon $dir a --osd_pool_default_size=2 || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 --osd_scrub_during_recovery=false \
+                   --osd_repair_during_recovery=true \
+                   --osd_debug_pretend_recovery_active=true || return 1
+    run_osd $dir 1 --osd_scrub_during_recovery=false \
+                   --osd_repair_during_recovery=true \
+                   --osd_debug_pretend_recovery_active=true || return 1
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+
+    add_something $dir $poolname || return 1
+    corrupt_and_repair_one $dir $poolname $(get_not_primary $poolname SOMETHING) || return 1
+}
+
+#
+# Skip non-repair scrub correctly during recovery
+#
+function TEST_skip_non_repair_during_recovery() {
+    local dir=$1
+    local poolname=rbd
+
+    run_mon $dir a --osd_pool_default_size=2 || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 --osd_scrub_during_recovery=false \
+                   --osd_repair_during_recovery=true \
+                   --osd_debug_pretend_recovery_active=true || return 1
+    run_osd $dir 1 --osd_scrub_during_recovery=false \
+                   --osd_repair_during_recovery=true \
+                   --osd_debug_pretend_recovery_active=true || return 1
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+
+    add_something $dir $poolname || return 1
+    scrub_and_not_schedule $dir $poolname $(get_not_primary $poolname SOMETHING) || return 1
+}
+
+function scrub_and_not_schedule() {
+    local dir=$1
+    local poolname=$2
+    local osd=$3
+
+    #
+    # 1) start a non-repair scrub
+    #
+    local pg=$(get_pg $poolname SOMETHING)
+    local last_scrub=$(get_last_scrub_stamp $pg)
+    ceph pg scrub $pg
+
+    #
+    # 2) Assure the scrub is not scheduled
+    #
+    for ((i=0; i < 3; i++)); do
+        if test "$(get_last_scrub_stamp $pg)" '>' "$last_scrub" ; then
+            return 1
+        fi
+        sleep 1
+    done
+
+    #
+    # 3) Access to the file must OK
+    #
+    objectstore_tool $dir $osd SOMETHING list-attrs || return 1
+    rados --pool $poolname get SOMETHING $dir/COPY || return 1
+    diff $dir/ORIGINAL $dir/COPY || return 1
+}
+
+function corrupt_and_repair_two() {
+    local dir=$1
+    local poolname=$2
+    local first=$3
+    local second=$4
+
+    #
+    # 1) remove the corresponding file from the OSDs
+    #
+    pids=""
+    run_in_background pids objectstore_tool $dir $first SOMETHING remove
+    run_in_background pids objectstore_tool $dir $second SOMETHING remove
+    wait_background pids
+    return_code=$?
+    if [ $return_code -ne 0 ]; then return $return_code; fi
+
+    #
+    # 2) repair the PG
+    #
+    local pg=$(get_pg $poolname SOMETHING)
+    repair $pg
+    #
+    # 3) The files must be back
+    #
+    pids=""
+    run_in_background pids objectstore_tool $dir $first SOMETHING list-attrs
+    run_in_background pids objectstore_tool $dir $second SOMETHING list-attrs
+    wait_background pids
+    return_code=$?
+    if [ $return_code -ne 0 ]; then return $return_code; fi
+
+    rados --pool $poolname get SOMETHING $dir/COPY || return 1
+    diff $dir/ORIGINAL $dir/COPY || return 1
+}
+
+#
+# 1) add an object
+# 2) remove the corresponding file from a designated OSD
+# 3) repair the PG
+# 4) check that the file has been restored in the designated OSD
+#
+function corrupt_and_repair_one() {
+    local dir=$1
+    local poolname=$2
+    local osd=$3
+
+    #
+    # 1) remove the corresponding file from the OSD
+    #
+    objectstore_tool $dir $osd SOMETHING remove || return 1
+    #
+    # 2) repair the PG
+    #
+    local pg=$(get_pg $poolname SOMETHING)
+    repair $pg
+    #
+    # 3) The file must be back
+    #
+    objectstore_tool $dir $osd SOMETHING list-attrs || return 1
+    rados --pool $poolname get SOMETHING $dir/COPY || return 1
+    diff $dir/ORIGINAL $dir/COPY || return 1
+}
+
+function corrupt_and_repair_erasure_coded() {
+    local dir=$1
+    local poolname=$2
+
+    add_something $dir $poolname || return 1
+
+    local primary=$(get_primary $poolname SOMETHING)
+    local -a osds=($(get_osds $poolname SOMETHING | sed -e "s/$primary//"))
+    local not_primary_first=${osds[0]}
+    local not_primary_second=${osds[1]}
+
+    # Reproduces http://tracker.ceph.com/issues/10017
+    corrupt_and_repair_one $dir $poolname $primary  || return 1
+    # Reproduces http://tracker.ceph.com/issues/10409
+    corrupt_and_repair_one $dir $poolname $not_primary_first || return 1
+    corrupt_and_repair_two $dir $poolname $not_primary_first $not_primary_second || return 1
+    corrupt_and_repair_two $dir $poolname $primary $not_primary_first || return 1
+
+}
+
+function auto_repair_erasure_coded() {
+    local dir=$1
+    local allow_overwrites=$2
+    local poolname=ecpool
+
+    # Launch a cluster with 5 seconds scrub interval
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    local ceph_osd_args="--osd-scrub-auto-repair=true \
+            --osd-deep-scrub-interval=5 \
+            --osd-scrub-max-interval=5 \
+            --osd-scrub-min-interval=5 \
+            --osd-scrub-interval-randomize-ratio=0"
+    for id in $(seq 0 2) ; do
+        run_osd $dir $id $ceph_osd_args || return 1
+    done
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+
+    # Create an EC pool
+    create_ec_pool $poolname $allow_overwrites k=2 m=1 || return 1
+
+    # Put an object
+    local payload=ABCDEF
+    echo $payload > $dir/ORIGINAL
+    rados --pool $poolname put SOMETHING $dir/ORIGINAL || return 1
+
+    # Remove the object from one shard physically
+    # Restarted osd get $ceph_osd_args passed
+    objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING remove || return 1
+    # Wait for auto repair
+    local pgid=$(get_pg $poolname SOMETHING)
+    wait_for_scrub $pgid "$(get_last_scrub_stamp $pgid)"
+    wait_for_clean || return 1
+    # Verify - the file should be back
+    # Restarted osd get $ceph_osd_args passed
+    objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING list-attrs || return 1
+    rados --pool $poolname get SOMETHING $dir/COPY || return 1
+    diff $dir/ORIGINAL $dir/COPY || return 1
+}
+
+function TEST_auto_repair_erasure_coded_appends() {
+    auto_repair_erasure_coded $1 false
+}
+
+function TEST_auto_repair_erasure_coded_overwrites() {
+    if [ "$use_ec_overwrite" = "true" ]; then
+        auto_repair_erasure_coded $1 true
+    fi
+}
+
+# initiate a scrub, then check for the (expected) 'scrubbing' and the
+# (not expected until an error was identified) 'repair'
+# Arguments: osd#, pg, sleep time
+function initiate_and_fetch_state() {
+    local the_osd="osd.$1"
+    local pgid=$2
+    local last_scrub=$(get_last_scrub_stamp $pgid)
+
+    set_config "osd" "$1" "osd_scrub_sleep"  "$3"
+    set_config "osd" "$1" "osd_scrub_auto_repair" "true"
+
+    flush_pg_stats
+    date  --rfc-3339=ns
+
+    # note: must initiate a "regular" (periodic) deep scrub - not an operator-initiated one
+    env CEPH_ARGS= ceph --format json daemon $(get_asok_path $the_osd) deep_scrub "$pgid"
+    env CEPH_ARGS= ceph --format json daemon $(get_asok_path $the_osd) scrub "$pgid"
+
+    # wait for 'scrubbing' to appear
+    for ((i=0; i < 80; i++)); do
+
+        st=`ceph pg $pgid query --format json | jq '.state' `
+        echo $i ") state now: " $st
+
+        case "$st" in
+            *scrubbing*repair* ) echo "found scrub+repair"; return 1;; # PR #41258 should have prevented this
+            *scrubbing* ) echo "found scrub"; return 0;;
+            *inconsistent* ) echo "Got here too late. Scrub has already finished"; return 1;;
+            *recovery* ) echo "Got here too late. Scrub has already finished."; return 1;;
+            * ) echo $st;;
+        esac
+
+        if [ $((i % 10)) == 4 ]; then
+            echo "loop --------> " $i
+        fi
+    sleep 0.3
+    done
+
+    echo "Timeout waiting for deep-scrub of " $pgid " on " $the_osd " to start"
+    return 1
+}
+
+function wait_end_of_scrub() { # osd# pg
+    local the_osd="osd.$1"
+    local pgid=$2
+
+    for ((i=0; i < 40; i++)); do
+        st=`ceph pg $pgid query --format json | jq '.state' `
+        echo "wait-scrub-end state now: " $st
+        [[ $st =~ (.*scrubbing.*) ]] || break
+        if [ $((i % 5)) == 4 ] ; then
+            flush_pg_stats
+        fi
+        sleep 0.3
+    done
+
+    if [[ $st =~ (.*scrubbing.*) ]]
+    then
+        # a timeout
+        return 1
+    fi
+    return 0
+}
+
+
+function TEST_auto_repair_bluestore_tag() {
+    local dir=$1
+    local poolname=testpool
+
+    # Launch a cluster with 3 seconds scrub interval
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    # Set scheduler to "wpq" until there's a reliable way to query scrub states
+    # with "--osd-scrub-sleep" set to 0. The "mclock_scheduler" overrides the
+    # scrub sleep to 0 and as a result the checks in the test fail.
+    local ceph_osd_args="--osd-scrub-auto-repair=true \
+            --osd_deep_scrub_randomize_ratio=0 \
+            --osd-scrub-interval-randomize-ratio=0 \
+            --osd-op-queue=wpq"
+    for id in $(seq 0 2) ; do
+        run_osd $dir $id $ceph_osd_args || return 1
+    done
+
+    create_pool $poolname 1 1 || return 1
+    ceph osd pool set $poolname size 2
+    wait_for_clean || return 1
+
+    # Put an object
+    local payload=ABCDEF
+    echo $payload > $dir/ORIGINAL
+    rados --pool $poolname put SOMETHING $dir/ORIGINAL || return 1
+
+    # Remove the object from one shard physically
+    # Restarted osd get $ceph_osd_args passed
+    objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING remove || return 1
+
+    local pgid=$(get_pg $poolname SOMETHING)
+    local primary=$(get_primary $poolname SOMETHING)
+    echo "Affected PG " $pgid " w/ primary " $primary
+    local last_scrub_stamp="$(get_last_scrub_stamp $pgid)"
+    initiate_and_fetch_state $primary $pgid "3.0"
+    r=$?
+    echo "initiate_and_fetch_state ret: " $r
+    set_config "osd"  "$1"  "osd_scrub_sleep"  "0"
+    if [ $r -ne 0 ]; then
+        return 1
+    fi
+
+    wait_end_of_scrub "$primary" "$pgid" || return 1
+    ceph pg dump pgs
+
+    # Verify - the file should be back
+    # Restarted osd get $ceph_osd_args passed
+    objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING list-attrs || return 1
+    objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING get-bytes $dir/COPY || return 1
+    diff $dir/ORIGINAL $dir/COPY || return 1
+    grep scrub_finish $dir/osd.${primary}.log
+}
+
+
+function TEST_auto_repair_bluestore_basic() {
+    local dir=$1
+    local poolname=testpool
+
+    # Launch a cluster with 5 seconds scrub interval
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    local ceph_osd_args="--osd-scrub-auto-repair=true \
+            --osd_deep_scrub_randomize_ratio=0 \
+            --osd-scrub-interval-randomize-ratio=0"
+    for id in $(seq 0 2) ; do
+        run_osd $dir $id $ceph_osd_args || return 1
+    done
+
+    create_pool $poolname 1 1 || return 1
+    ceph osd pool set $poolname size 2
+    wait_for_clean || return 1
+
+    # Put an object
+    local payload=ABCDEF
+    echo $payload > $dir/ORIGINAL
+    rados --pool $poolname put SOMETHING $dir/ORIGINAL || return 1
+
+    # Remove the object from one shard physically
+    # Restarted osd get $ceph_osd_args passed
+    objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING remove || return 1
+
+    local pgid=$(get_pg $poolname SOMETHING)
+    local primary=$(get_primary $poolname SOMETHING)
+    local last_scrub_stamp="$(get_last_scrub_stamp $pgid)"
+    ceph tell $pgid deep_scrub
+    ceph tell $pgid scrub
+
+    # Wait for auto repair
+    wait_for_scrub $pgid "$last_scrub_stamp" || return 1
+    wait_for_clean || return 1
+    ceph pg dump pgs
+    # Verify - the file should be back
+    # Restarted osd get $ceph_osd_args passed
+    objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING list-attrs || return 1
+    objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING get-bytes $dir/COPY || return 1
+    diff $dir/ORIGINAL $dir/COPY || return 1
+    grep scrub_finish $dir/osd.${primary}.log
+}
+
+function TEST_auto_repair_bluestore_scrub() {
+    local dir=$1
+    local poolname=testpool
+
+    # Launch a cluster with 5 seconds scrub interval
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    local ceph_osd_args="--osd-scrub-auto-repair=true \
+            --osd_deep_scrub_randomize_ratio=0 \
+            --osd-scrub-interval-randomize-ratio=0 \
+            --osd-scrub-backoff-ratio=0"
+    for id in $(seq 0 2) ; do
+        run_osd $dir $id $ceph_osd_args || return 1
+    done
+
+    create_pool $poolname 1 1 || return 1
+    ceph osd pool set $poolname size 2
+    wait_for_clean || return 1
+
+    # Put an object
+    local payload=ABCDEF
+    echo $payload > $dir/ORIGINAL
+    rados --pool $poolname put SOMETHING $dir/ORIGINAL || return 1
+
+    # Remove the object from one shard physically
+    # Restarted osd get $ceph_osd_args passed
+    objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING remove || return 1
+
+    local pgid=$(get_pg $poolname SOMETHING)
+    local primary=$(get_primary $poolname SOMETHING)
+    local last_scrub_stamp="$(get_last_scrub_stamp $pgid)"
+    ceph tell $pgid scrub
+
+    # Wait for scrub -> auto repair
+    wait_for_scrub $pgid "$last_scrub_stamp" || return 1
+    ceph pg dump pgs
+    # Actually this causes 2 scrubs, so we better wait a little longer
+    sleep 5
+    wait_for_clean || return 1
+    ceph pg dump pgs
+    # Verify - the file should be back
+    # Restarted osd get $ceph_osd_args passed
+    objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING list-attrs || return 1
+    rados --pool $poolname get SOMETHING $dir/COPY || return 1
+    diff $dir/ORIGINAL $dir/COPY || return 1
+    grep scrub_finish $dir/osd.${primary}.log
+
+    # This should have caused 1 object to be repaired
+    COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+    test "$COUNT" = "1" || return 1
+}
+
+function TEST_auto_repair_bluestore_failed() {
+    local dir=$1
+    local poolname=testpool
+
+    # Launch a cluster with 5 seconds scrub interval
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    local ceph_osd_args="--osd-scrub-auto-repair=true \
+            --osd_deep_scrub_randomize_ratio=0 \
+            --osd-scrub-interval-randomize-ratio=0"
+    for id in $(seq 0 2) ; do
+        run_osd $dir $id $ceph_osd_args || return 1
+    done
+
+    create_pool $poolname 1 1 || return 1
+    ceph osd pool set $poolname size 2
+    wait_for_clean || return 1
+
+    # Put an object
+    local payload=ABCDEF
+    echo $payload > $dir/ORIGINAL
+    for i in $(seq 1 10)
+    do
+      rados --pool $poolname put obj$i $dir/ORIGINAL || return 1
+    done
+
+    # Remove the object from one shard physically
+    # Restarted osd get $ceph_osd_args passed
+    objectstore_tool $dir $(get_not_primary $poolname SOMETHING) obj1 remove || return 1
+    # obj2 can't be repaired
+    objectstore_tool $dir $(get_not_primary $poolname SOMETHING) obj2 remove || return 1
+    objectstore_tool $dir $(get_primary $poolname SOMETHING) obj2 rm-attr _ || return 1
+
+    local pgid=$(get_pg $poolname obj1)
+    local primary=$(get_primary $poolname obj1)
+    local last_scrub_stamp="$(get_last_scrub_stamp $pgid)"
+    ceph tell $pgid deep_scrub
+    ceph tell $pgid scrub
+
+    # Wait for auto repair
+    wait_for_scrub $pgid "$last_scrub_stamp" || return 1
+    wait_for_clean || return 1
+    flush_pg_stats
+    grep scrub_finish $dir/osd.${primary}.log
+    grep -q "scrub_finish.*still present after re-scrub" $dir/osd.${primary}.log || return 1
+    ceph pg dump pgs
+    ceph pg dump pgs | grep -q "^${pgid}.*+failed_repair" || return 1
+
+    # Verify - obj1 should be back
+    # Restarted osd get $ceph_osd_args passed
+    objectstore_tool $dir $(get_not_primary $poolname obj1) obj1 list-attrs || return 1
+    rados --pool $poolname get obj1 $dir/COPY || return 1
+    diff $dir/ORIGINAL $dir/COPY || return 1
+    grep scrub_finish $dir/osd.${primary}.log
+
+    # Make it repairable
+    objectstore_tool $dir $(get_primary $poolname SOMETHING) obj2 remove || return 1
+    repair $pgid
+    sleep 2
+
+    flush_pg_stats
+    ceph pg dump pgs
+    ceph pg dump pgs | grep -q -e "^${pgid}.* active+clean " -e "^${pgid}.* active+clean+wait " || return 1
+    grep scrub_finish $dir/osd.${primary}.log
+}
+
+function TEST_auto_repair_bluestore_failed_norecov() {
+    local dir=$1
+    local poolname=testpool
+
+    # Launch a cluster with 5 seconds scrub interval
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    local ceph_osd_args="--osd-scrub-auto-repair=true \
+            --osd_deep_scrub_randomize_ratio=0 \
+            --osd-scrub-interval-randomize-ratio=0"
+    for id in $(seq 0 2) ; do
+        run_osd $dir $id $ceph_osd_args || return 1
+    done
+
+    create_pool $poolname 1 1 || return 1
+    ceph osd pool set $poolname size 2
+    wait_for_clean || return 1
+
+    # Put an object
+    local payload=ABCDEF
+    echo $payload > $dir/ORIGINAL
+    for i in $(seq 1 10)
+    do
+      rados --pool $poolname put obj$i $dir/ORIGINAL || return 1
+    done
+
+    # Remove the object from one shard physically
+    # Restarted osd get $ceph_osd_args passed
+    # obj1 can't be repaired
+    objectstore_tool $dir $(get_not_primary $poolname SOMETHING) obj1 remove || return 1
+    objectstore_tool $dir $(get_primary $poolname SOMETHING) obj1 rm-attr _ || return 1
+    # obj2 can't be repaired
+    objectstore_tool $dir $(get_not_primary $poolname SOMETHING) obj2 remove || return 1
+    objectstore_tool $dir $(get_primary $poolname SOMETHING) obj2 rm-attr _ || return 1
+
+    local pgid=$(get_pg $poolname obj1)
+    local primary=$(get_primary $poolname obj1)
+    local last_scrub_stamp="$(get_last_scrub_stamp $pgid)"
+    ceph tell $pgid deep_scrub
+    ceph tell $pgid scrub
+
+    # Wait for auto repair
+    wait_for_scrub $pgid "$last_scrub_stamp" || return 1
+    wait_for_clean || return 1
+    flush_pg_stats
+    grep -q "scrub_finish.*present with no repair possible" $dir/osd.${primary}.log || return 1
+    ceph pg dump pgs
+    ceph pg dump pgs | grep -q "^${pgid}.*+failed_repair" || return 1
+}
+
+function TEST_repair_stats() {
+    local dir=$1
+    local poolname=testpool
+    local OSDS=2
+    local OBJS=30
+    # This need to be an even number
+    local REPAIRS=20
+
+    # Launch a cluster with 5 seconds scrub interval
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \
+            --osd-scrub-interval-randomize-ratio=0"
+    for id in $(seq 0 $(expr $OSDS - 1)) ; do
+        run_osd $dir $id $ceph_osd_args || return 1
+    done
+
+    create_pool $poolname 1 1 || return 1
+    ceph osd pool set $poolname size 2
+    wait_for_clean || return 1
+
+    # Put an object
+    local payload=ABCDEF
+    echo $payload > $dir/ORIGINAL
+    for i in $(seq 1 $OBJS)
+    do
+      rados --pool $poolname put obj$i $dir/ORIGINAL || return 1
+    done
+
+    # Remove the object from one shard physically
+    # Restarted osd get $ceph_osd_args passed
+    local other=$(get_not_primary $poolname obj1)
+    local pgid=$(get_pg $poolname obj1)
+    local primary=$(get_primary $poolname obj1)
+
+    kill_daemons $dir TERM osd.$other >&2 < /dev/null || return 1
+    kill_daemons $dir TERM osd.$primary >&2 < /dev/null || return 1
+    for i in $(seq 1 $REPAIRS)
+    do
+      # Remove from both osd.0 and osd.1
+      OSD=$(expr $i % 2)
+      _objectstore_tool_nodown $dir $OSD obj$i remove || return 1
+    done
+    activate_osd $dir $primary $ceph_osd_args || return 1
+    activate_osd $dir $other $ceph_osd_args || return 1
+    wait_for_clean || return 1
+
+    repair $pgid
+    wait_for_clean || return 1
+    ceph pg dump pgs
+    flush_pg_stats
+
+    # This should have caused 1 object to be repaired
+    ceph pg $pgid query | jq '.info.stats.stat_sum'
+    COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+    test "$COUNT" = "$REPAIRS" || return 1
+
+    ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $primary )"
+    COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $primary ).num_shards_repaired")
+    test "$COUNT" = "$(expr $REPAIRS / 2)" || return 1
+
+    ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $other )"
+    COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $other ).num_shards_repaired")
+    test "$COUNT" = "$(expr $REPAIRS / 2)" || return 1
+
+    ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum"
+    COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
+    test "$COUNT" = "$REPAIRS" || return 1
+}
+
+function TEST_repair_stats_ec() {
+    local dir=$1
+    local poolname=testpool
+    local OSDS=3
+    local OBJS=30
+    # This need to be an even number
+    local REPAIRS=26
+    local allow_overwrites=false
+
+    # Launch a cluster with 5 seconds scrub interval
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \
+            --osd-scrub-interval-randomize-ratio=0"
+    for id in $(seq 0 $(expr $OSDS - 1)) ; do
+        run_osd $dir $id $ceph_osd_args || return 1
+    done
+
+    # Create an EC pool
+    create_ec_pool $poolname $allow_overwrites k=2 m=1 || return 1
+
+    # Put an object
+    local payload=ABCDEF
+    echo $payload > $dir/ORIGINAL
+    for i in $(seq 1 $OBJS)
+    do
+      rados --pool $poolname put obj$i $dir/ORIGINAL || return 1
+    done
+
+    # Remove the object from one shard physically
+    # Restarted osd get $ceph_osd_args passed
+    local other=$(get_not_primary $poolname obj1)
+    local pgid=$(get_pg $poolname obj1)
+    local primary=$(get_primary $poolname obj1)
+
+    kill_daemons $dir TERM osd.$other >&2 < /dev/null || return 1
+    kill_daemons $dir TERM osd.$primary >&2 < /dev/null || return 1
+    for i in $(seq 1 $REPAIRS)
+    do
+      # Remove from both osd.0 and osd.1
+      OSD=$(expr $i % 2)
+      _objectstore_tool_nodown $dir $OSD obj$i remove || return 1
+    done
+    activate_osd $dir $primary $ceph_osd_args || return 1
+    activate_osd $dir $other $ceph_osd_args || return 1
+    wait_for_clean || return 1
+
+    repair $pgid
+    wait_for_clean || return 1
+    ceph pg dump pgs
+    flush_pg_stats
+
+    # This should have caused 1 object to be repaired
+    ceph pg $pgid query | jq '.info.stats.stat_sum'
+    COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+    test "$COUNT" = "$REPAIRS" || return 1
+
+    for osd in $(seq 0 $(expr $OSDS - 1)) ; do
+      if [ $osd = $other -o $osd = $primary ]; then
+        repair=$(expr $REPAIRS / 2)
+      else
+        repair="0"
+      fi
+
+      ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $osd )"
+      COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $osd ).num_shards_repaired")
+      test "$COUNT" = "$repair" || return 1
+    done
+
+    ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum"
+    COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
+    test "$COUNT" = "$REPAIRS" || return 1
+}
+
+function corrupt_and_repair_jerasure() {
+    local dir=$1
+    local allow_overwrites=$2
+    local poolname=ecpool
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    for id in $(seq 0 3) ; do
+	run_osd $dir $id || return 1
+    done
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+
+    create_ec_pool $poolname $allow_overwrites k=2 m=2 || return 1
+    corrupt_and_repair_erasure_coded $dir $poolname || return 1
+}
+
+function TEST_corrupt_and_repair_jerasure_appends() {
+    corrupt_and_repair_jerasure $1 false
+}
+
+function TEST_corrupt_and_repair_jerasure_overwrites() {
+    if [ "$use_ec_overwrite" = "true" ]; then
+        corrupt_and_repair_jerasure $1 true
+    fi
+}
+
+function corrupt_and_repair_lrc() {
+    local dir=$1
+    local allow_overwrites=$2
+    local poolname=ecpool
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    for id in $(seq 0 9) ; do
+        run_osd $dir $id || return 1
+    done
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+
+    create_ec_pool $poolname $allow_overwrites k=4 m=2 l=3 plugin=lrc || return 1
+    corrupt_and_repair_erasure_coded $dir $poolname || return 1
+}
+
+function TEST_corrupt_and_repair_lrc_appends() {
+    corrupt_and_repair_lrc $1 false
+}
+
+function TEST_corrupt_and_repair_lrc_overwrites() {
+    if [ "$use_ec_overwrite" = "true" ]; then
+        corrupt_and_repair_lrc $1 true
+    fi
+}
+
+function unfound_erasure_coded() {
+    local dir=$1
+    local allow_overwrites=$2
+    local poolname=ecpool
+    local payload=ABCDEF
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    for id in $(seq 0 3) ; do
+        run_osd $dir $id || return 1
+    done
+
+    create_ec_pool $poolname $allow_overwrites k=2 m=2 || return 1
+
+    add_something $dir $poolname || return 1
+
+    local primary=$(get_primary $poolname SOMETHING)
+    local -a osds=($(get_osds $poolname SOMETHING | sed -e "s/$primary//"))
+    local not_primary_first=${osds[0]}
+    local not_primary_second=${osds[1]}
+    local not_primary_third=${osds[2]}
+
+    #
+    # 1) remove the corresponding file from the OSDs
+    #
+    pids=""
+    run_in_background pids objectstore_tool $dir $not_primary_first SOMETHING remove
+    run_in_background pids objectstore_tool $dir $not_primary_second SOMETHING remove
+    run_in_background pids objectstore_tool $dir $not_primary_third SOMETHING remove
+    wait_background pids
+    return_code=$?
+    if [ $return_code -ne 0 ]; then return $return_code; fi
+
+    #
+    # 2) repair the PG
+    #
+    local pg=$(get_pg $poolname SOMETHING)
+    repair $pg
+    #
+    # 3) check pg state
+    #
+    # it may take a bit to appear due to mon/mgr asynchrony
+    for f in `seq 1 60`; do
+	ceph -s | grep "1/1 objects unfound" && break
+	sleep 1
+    done
+    ceph -s|grep "4 up" || return 1
+    ceph -s|grep "4 in" || return 1
+    ceph -s|grep "1/1 objects unfound" || return 1
+}
+
+function TEST_unfound_erasure_coded_appends() {
+    unfound_erasure_coded $1 false
+}
+
+function TEST_unfound_erasure_coded_overwrites() {
+    if [ "$use_ec_overwrite" = "true" ]; then
+        unfound_erasure_coded $1 true
+    fi
+}
+
+#
+# list_missing for EC pool
+#
+function list_missing_erasure_coded() {
+    local dir=$1
+    local allow_overwrites=$2
+    local poolname=ecpool
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    for id in $(seq 0 2) ; do
+        run_osd $dir $id || return 1
+    done
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+
+    create_ec_pool $poolname $allow_overwrites k=2 m=1 || return 1
+
+    # Put an object and remove the two shards (including primary)
+    add_something $dir $poolname MOBJ0 || return 1
+    local -a osds0=($(get_osds $poolname MOBJ0))
+
+    # Put another object and remove two shards (excluding primary)
+    add_something $dir $poolname MOBJ1 || return 1
+    local -a osds1=($(get_osds $poolname MOBJ1))
+
+    # Stop all osd daemons
+    for id in $(seq 0 2) ; do
+        kill_daemons $dir TERM osd.$id >&2 < /dev/null || return 1
+    done
+
+    id=${osds0[0]}
+    ceph-objectstore-tool --data-path $dir/$id \
+        MOBJ0 remove || return 1
+    id=${osds0[1]}
+    ceph-objectstore-tool --data-path $dir/$id \
+        MOBJ0 remove || return 1
+
+    id=${osds1[1]}
+    ceph-objectstore-tool --data-path $dir/$id \
+        MOBJ1 remove || return 1
+    id=${osds1[2]}
+    ceph-objectstore-tool --data-path $dir/$id \
+        MOBJ1 remove || return 1
+
+    for id in $(seq 0 2) ; do
+        activate_osd $dir $id >&2 || return 1
+    done
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+
+    # Get get - both objects should in the same PG
+    local pg=$(get_pg $poolname MOBJ0)
+
+    # Repair the PG, which triggers the recovering,
+    # and should mark the object as unfound
+    repair $pg
+
+    for i in $(seq 0 120) ; do
+        [ $i -lt 60 ] || return 1
+        matches=$(ceph pg $pg list_unfound | egrep "MOBJ0|MOBJ1" | wc -l)
+        [ $matches -eq 2 ] && break
+    done
+}
+
+function TEST_list_missing_erasure_coded_appends() {
+    list_missing_erasure_coded $1 false
+}
+
+function TEST_list_missing_erasure_coded_overwrites() {
+    if [ "$use_ec_overwrite" = "true" ]; then
+        list_missing_erasure_coded $1 true
+    fi
+}
+
+#
+# Corrupt one copy of a replicated pool
+#
+function TEST_corrupt_scrub_replicated() {
+    local dir=$1
+    local poolname=csr_pool
+    local total_objs=19
+
+    run_mon $dir a --osd_pool_default_size=2 || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+
+    create_pool foo 1 || return 1
+    create_pool $poolname 1 1 || return 1
+    wait_for_clean || return 1
+
+    for i in $(seq 1 $total_objs) ; do
+        objname=ROBJ${i}
+        add_something $dir $poolname $objname || return 1
+
+        rados --pool $poolname setomapheader $objname hdr-$objname || return 1
+        rados --pool $poolname setomapval $objname key-$objname val-$objname || return 1
+    done
+
+    # Increase file 1 MB + 1KB
+    dd if=/dev/zero of=$dir/new.ROBJ19 bs=1024 count=1025
+    rados --pool $poolname put $objname $dir/new.ROBJ19 || return 1
+    rm -f $dir/new.ROBJ19
+
+    local pg=$(get_pg $poolname ROBJ0)
+    local primary=$(get_primary $poolname ROBJ0)
+
+    # Compute an old omap digest and save oi
+    CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) \
+        config set osd_deep_scrub_update_digest_min_age 0
+    CEPH_ARGS='' ceph daemon $(get_asok_path osd.1) \
+        config set osd_deep_scrub_update_digest_min_age 0
+    pg_deep_scrub $pg
+
+    for i in $(seq 1 $total_objs) ; do
+        objname=ROBJ${i}
+
+        # Alternate corruption between osd.0 and osd.1
+        local osd=$(expr $i % 2)
+
+        case $i in
+        1)
+            # Size (deep scrub data_digest too)
+            local payload=UVWXYZZZ
+            echo $payload > $dir/CORRUPT
+            objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1
+            ;;
+
+        2)
+            # digest (deep scrub only)
+            local payload=UVWXYZ
+            echo $payload > $dir/CORRUPT
+            objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1
+            ;;
+
+        3)
+             # missing
+             objectstore_tool $dir $osd $objname remove || return 1
+             ;;
+
+         4)
+             # Modify omap value (deep scrub only)
+             objectstore_tool $dir $osd $objname set-omap key-$objname $dir/CORRUPT || return 1
+             ;;
+
+         5)
+            # Delete omap key (deep scrub only)
+            objectstore_tool $dir $osd $objname rm-omap key-$objname || return 1
+            ;;
+
+         6)
+            # Add extra omap key (deep scrub only)
+            echo extra > $dir/extra-val
+            objectstore_tool $dir $osd $objname set-omap key2-$objname $dir/extra-val || return 1
+            rm $dir/extra-val
+            ;;
+
+         7)
+            # Modify omap header (deep scrub only)
+            echo -n newheader > $dir/hdr
+            objectstore_tool $dir $osd $objname set-omaphdr $dir/hdr || return 1
+            rm $dir/hdr
+            ;;
+
+         8)
+            rados --pool $poolname setxattr $objname key1-$objname val1-$objname || return 1
+            rados --pool $poolname setxattr $objname key2-$objname val2-$objname || return 1
+
+            # Break xattrs
+            echo -n bad-val > $dir/bad-val
+            objectstore_tool $dir $osd $objname set-attr _key1-$objname $dir/bad-val || return 1
+            objectstore_tool $dir $osd $objname rm-attr _key2-$objname || return 1
+            echo -n val3-$objname > $dir/newval
+            objectstore_tool $dir $osd $objname set-attr _key3-$objname $dir/newval || return 1
+            rm $dir/bad-val $dir/newval
+            ;;
+
+        9)
+            objectstore_tool $dir $osd $objname get-attr _ > $dir/robj9-oi
+            echo -n D > $dir/change
+            rados --pool $poolname put $objname $dir/change
+            objectstore_tool $dir $osd $objname set-attr _ $dir/robj9-oi
+            rm $dir/oi $dir/change
+            ;;
+
+          # ROBJ10 must be handled after digests are re-computed by a deep scrub below
+          # ROBJ11 must be handled with config change before deep scrub
+          # ROBJ12 must be handled with config change before scrubs
+          # ROBJ13 must be handled before scrubs
+
+        14)
+            echo -n bad-val > $dir/bad-val
+            objectstore_tool $dir 0 $objname set-attr _ $dir/bad-val || return 1
+            objectstore_tool $dir 1 $objname rm-attr _ || return 1
+            rm $dir/bad-val
+            ;;
+
+        15)
+            objectstore_tool $dir $osd $objname rm-attr _ || return 1
+            ;;
+
+        16)
+            objectstore_tool $dir 0 $objname rm-attr snapset || return 1
+            echo -n bad-val > $dir/bad-val
+            objectstore_tool $dir 1 $objname set-attr snapset $dir/bad-val || return 1
+	    ;;
+
+	17)
+	    # Deep-scrub only (all replicas are diffent than the object info
+           local payload=ROBJ17
+           echo $payload > $dir/new.ROBJ17
+	   objectstore_tool $dir 0 $objname set-bytes $dir/new.ROBJ17 || return 1
+	   objectstore_tool $dir 1 $objname set-bytes $dir/new.ROBJ17 || return 1
+	   ;;
+
+	18)
+	    # Deep-scrub only (all replicas are diffent than the object info
+           local payload=ROBJ18
+           echo $payload > $dir/new.ROBJ18
+	   objectstore_tool $dir 0 $objname set-bytes $dir/new.ROBJ18 || return 1
+	   objectstore_tool $dir 1 $objname set-bytes $dir/new.ROBJ18 || return 1
+	   # Make one replica have a different object info, so a full repair must happen too
+	   objectstore_tool $dir $osd $objname corrupt-info || return 1
+	   ;;
+
+	19)
+	   # Set osd-max-object-size smaller than this object's size
+
+        esac
+    done
+
+    local pg=$(get_pg $poolname ROBJ0)
+
+    ceph tell osd.\* injectargs -- --osd-max-object-size=1048576
+
+    inject_eio rep data $poolname ROBJ11 $dir 0 || return 1 # shard 0 of [1, 0], osd.1
+    inject_eio rep mdata $poolname ROBJ12 $dir 1 || return 1 # shard 1 of [1, 0], osd.0
+    inject_eio rep mdata $poolname ROBJ13 $dir 1 || return 1 # shard 1 of [1, 0], osd.0
+    inject_eio rep data $poolname ROBJ13 $dir 0 || return 1 # shard 0 of [1, 0], osd.1
+
+    pg_scrub $pg
+
+    ERRORS=0
+    declare -a s_err_strings
+    err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:30259878:::ROBJ15:head : candidate had a missing info key"
+    err_strings[1]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:33aca486:::ROBJ18:head : object info inconsistent "
+    err_strings[2]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:5c7b2c47:::ROBJ16:head : candidate had a corrupt snapset"
+    err_strings[3]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:5c7b2c47:::ROBJ16:head : candidate had a missing snapset key"
+    err_strings[4]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:5c7b2c47:::ROBJ16:head : failed to pick suitable object info"
+    err_strings[5]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:86586531:::ROBJ8:head : attr value mismatch '_key1-ROBJ8', attr name mismatch '_key3-ROBJ8', attr name mismatch '_key2-ROBJ8'"
+    err_strings[6]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:bc819597:::ROBJ12:head : candidate had a stat error"
+    err_strings[7]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:c0c86b1d:::ROBJ14:head : candidate had a missing info key"
+    err_strings[8]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:c0c86b1d:::ROBJ14:head : candidate had a corrupt info"
+    err_strings[9]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:c0c86b1d:::ROBJ14:head : failed to pick suitable object info"
+    err_strings[10]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ce3f1d6a:::ROBJ1:head : candidate size 9 info size 7 mismatch"
+    err_strings[11]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ce3f1d6a:::ROBJ1:head : size 9 != size 7 from auth oi 3:ce3f1d6a:::ROBJ1:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [[]0 0 0[]][)], size 9 != size 7 from shard 0"
+    err_strings[12]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:d60617f9:::ROBJ13:head : candidate had a stat error"
+    err_strings[13]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 3:f2a5b2a4:::ROBJ3:head : missing"
+    err_strings[14]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ffdb2004:::ROBJ9:head : candidate size 1 info size 7 mismatch"
+    err_strings[15]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ffdb2004:::ROBJ9:head : object info inconsistent "
+    err_strings[16]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 3:c0c86b1d:::ROBJ14:head : no '_' attr"
+    err_strings[17]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 3:5c7b2c47:::ROBJ16:head : can't decode 'snapset' attr .* no longer understand old encoding version 3 < 97: Malformed input"
+    err_strings[18]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub : stat mismatch, got 19/19 objects, 0/0 clones, 18/19 dirty, 18/19 omap, 0/0 pinned, 0/0 hit_set_archive, 0/0 whiteouts, 1049713/1049720 bytes, 0/0 manifest objects, 0/0 hit_set_archive bytes."
+    err_strings[19]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 1 missing, 8 inconsistent objects"
+    err_strings[20]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 18 errors"
+    err_strings[21]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:123a5f55:::ROBJ19:head : size 1049600 > 1048576 is too large"
+
+    for err_string in "${err_strings[@]}"
+    do
+        if ! grep -q "$err_string" $dir/osd.${primary}.log
+        then
+            echo "Missing log message '$err_string'"
+            ERRORS=$(expr $ERRORS + 1)
+        fi
+    done
+
+    rados list-inconsistent-pg $poolname > $dir/json || return 1
+    # Check pg count
+    test $(jq '. | length' $dir/json) = "1" || return 1
+    # Check pgid
+    test $(jq -r '.[0]' $dir/json) = $pg || return 1
+
+    rados list-inconsistent-obj $pg > $dir/json || return 1
+    # Get epoch for repair-get requests
+    epoch=$(jq .epoch $dir/json)
+
+    jq "$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson
+{
+  "inconsistents": [
+    {
+      "shards": [
+        {
+          "size": 7,
+          "errors": [],
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "object_info": {
+            "oid": {
+              "oid": "ROBJ1",
+              "key": "",
+              "snapid": -2,
+              "hash": 1454963827,
+              "max": 0,
+              "pool": 3,
+              "namespace": ""
+            },
+            "version": "51'58",
+            "prior_version": "21'3",
+            "last_reqid": "osd.1.0:57",
+            "user_version": 3,
+            "size": 7,
+            "mtime": "",
+            "local_mtime": "",
+            "lost": 0,
+            "flags": [
+              "dirty",
+              "omap",
+              "data_digest",
+              "omap_digest"
+            ],
+            "truncate_seq": 0,
+            "truncate_size": 0,
+            "data_digest": "0x2ddbf8f5",
+            "omap_digest": "0xf5fba2c6",
+            "expected_object_size": 0,
+            "expected_write_size": 0,
+            "alloc_hint_flags": 0,
+            "manifest": {
+              "type": 0
+            },
+            "watchers": {}
+          },
+          "size": 9,
+          "errors": [
+            "size_mismatch_info",
+            "obj_size_info_mismatch"
+          ],
+          "osd": 1,
+          "primary": true
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "ROBJ1",
+          "key": "",
+          "snapid": -2,
+          "hash": 1454963827,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "51'58",
+        "prior_version": "21'3",
+        "last_reqid": "osd.1.0:57",
+        "user_version": 3,
+        "size": 7,
+        "mtime": "2018-04-05 14:33:19.804040",
+        "local_mtime": "2018-04-05 14:33:19.804839",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "omap",
+          "data_digest",
+          "omap_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0xf5fba2c6",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [
+        "size_mismatch_info",
+        "obj_size_info_mismatch"
+      ],
+      "errors": [
+        "size_mismatch"
+      ],
+      "object": {
+        "version": 3,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "ROBJ1"
+      }
+    },
+    {
+      "shards": [
+        {
+          "errors": [
+            "stat_error"
+          ],
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "size": 7,
+          "errors": [],
+          "osd": 1,
+          "primary": true
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "ROBJ12",
+          "key": "",
+          "snapid": -2,
+          "hash": 3920199997,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "51'56",
+        "prior_version": "43'36",
+        "last_reqid": "osd.1.0:55",
+        "user_version": 36,
+        "size": 7,
+        "mtime": "",
+        "local_mtime": "",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "omap",
+          "data_digest",
+          "omap_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0x067f306a",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [
+        "stat_error"
+      ],
+      "errors": [],
+      "object": {
+        "version": 36,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "ROBJ12"
+      }
+    },
+    {
+      "shards": [
+        {
+          "errors": [
+            "stat_error"
+          ],
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "size": 7,
+          "errors": [],
+          "osd": 1,
+          "primary": true
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "ROBJ13",
+          "key": "",
+          "snapid": -2,
+          "hash": 2682806379,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "51'59",
+        "prior_version": "45'39",
+        "last_reqid": "osd.1.0:58",
+        "user_version": 39,
+        "size": 7,
+        "mtime": "",
+        "local_mtime": "",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "omap",
+          "data_digest",
+          "omap_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0x6441854d",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [
+        "stat_error"
+      ],
+      "errors": [],
+      "object": {
+        "version": 39,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "ROBJ13"
+      }
+    },
+    {
+      "shards": [
+        {
+          "object_info": "bad-val",
+          "size": 7,
+          "errors": [
+            "info_corrupted"
+          ],
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "size": 7,
+          "errors": [
+            "info_missing"
+          ],
+          "osd": 1,
+          "primary": true
+        }
+      ],
+      "union_shard_errors": [
+        "info_missing",
+        "info_corrupted"
+      ],
+      "errors": [],
+      "object": {
+        "version": 0,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "ROBJ14"
+      }
+    },
+    {
+      "shards": [
+        {
+          "object_info": {
+            "oid": {
+              "oid": "ROBJ15",
+              "key": "",
+              "snapid": -2,
+              "hash": 504996876,
+              "max": 0,
+              "pool": 3,
+              "namespace": ""
+            },
+            "version": "51'49",
+            "prior_version": "49'45",
+            "last_reqid": "osd.1.0:48",
+            "user_version": 45,
+            "size": 7,
+            "mtime": "2018-04-05 14:33:29.498969",
+            "local_mtime": "2018-04-05 14:33:29.499890",
+            "lost": 0,
+            "flags": [
+              "dirty",
+              "omap",
+              "data_digest",
+              "omap_digest"
+            ],
+            "truncate_seq": 0,
+            "truncate_size": 0,
+            "data_digest": "0x2ddbf8f5",
+            "omap_digest": "0x2d2a4d6e",
+            "expected_object_size": 0,
+            "expected_write_size": 0,
+            "alloc_hint_flags": 0,
+            "manifest": {
+              "type": 0
+            },
+            "watchers": {}
+          },
+          "size": 7,
+          "errors": [],
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "size": 7,
+          "errors": [
+            "info_missing"
+          ],
+          "osd": 1,
+          "primary": true
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "ROBJ15",
+          "key": "",
+          "snapid": -2,
+          "hash": 504996876,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "51'49",
+        "prior_version": "49'45",
+        "last_reqid": "osd.1.0:48",
+        "user_version": 45,
+        "size": 7,
+        "mtime": "",
+        "local_mtime": "",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "omap",
+          "data_digest",
+          "omap_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0x2d2a4d6e",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [
+        "info_missing"
+      ],
+      "errors": [],
+      "object": {
+        "version": 45,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "ROBJ15"
+      }
+    },
+    {
+      "errors": [],
+      "object": {
+      "locator": "",
+      "name": "ROBJ16",
+      "nspace": "",
+      "snap": "head",
+      "version": 0
+       },
+        "shards": [
+      {
+        "errors": [
+          "snapset_missing"
+        ],
+        "osd": 0,
+        "primary": false,
+        "size": 7
+      },
+      {
+        "errors": [
+          "snapset_corrupted"
+        ],
+        "osd": 1,
+        "primary": true,
+        "snapset": "bad-val",
+        "size": 7
+      }
+      ],
+      "union_shard_errors": [
+        "snapset_missing",
+        "snapset_corrupted"
+      ]
+    },
+    {
+     "errors": [
+       "object_info_inconsistency"
+     ],
+     "object": {
+       "locator": "",
+       "name": "ROBJ18",
+       "nspace": "",
+       "snap": "head"
+     },
+     "selected_object_info": {
+       "alloc_hint_flags": 255,
+       "data_digest": "0x2ddbf8f5",
+       "expected_object_size": 0,
+       "expected_write_size": 0,
+       "flags": [
+         "dirty",
+         "omap",
+         "data_digest",
+         "omap_digest"
+       ],
+       "lost": 0,
+       "manifest": {
+         "type": 0
+       },
+       "oid": {
+         "hash": 1629828556,
+         "key": "",
+         "max": 0,
+         "namespace": "",
+         "oid": "ROBJ18",
+         "pool": 3,
+         "snapid": -2
+       },
+       "omap_digest": "0xddc3680f",
+       "size": 7,
+       "truncate_seq": 0,
+       "truncate_size": 0,
+       "user_version": 54,
+       "watchers": {}
+     },
+     "shards": [
+       {
+         "errors": [],
+         "object_info": {
+           "alloc_hint_flags": 0,
+           "data_digest": "0x2ddbf8f5",
+           "expected_object_size": 0,
+           "expected_write_size": 0,
+           "flags": [
+             "dirty",
+             "omap",
+             "data_digest",
+             "omap_digest"
+           ],
+           "lost": 0,
+           "manifest": {
+             "type": 0
+           },
+           "oid": {
+             "hash": 1629828556,
+             "key": "",
+             "max": 0,
+             "namespace": "",
+             "oid": "ROBJ18",
+             "pool": 3,
+             "snapid": -2
+           },
+           "omap_digest": "0xddc3680f",
+           "size": 7,
+           "truncate_seq": 0,
+           "truncate_size": 0,
+           "user_version": 54,
+           "watchers": {}
+         },
+         "osd": 0,
+         "primary": false,
+         "size": 7
+       },
+       {
+         "errors": [],
+         "object_info": {
+           "alloc_hint_flags": 255,
+           "data_digest": "0x2ddbf8f5",
+           "expected_object_size": 0,
+           "expected_write_size": 0,
+           "flags": [
+             "dirty",
+             "omap",
+             "data_digest",
+             "omap_digest"
+           ],
+           "lost": 0,
+           "manifest": {
+             "type": 0
+           },
+           "oid": {
+             "hash": 1629828556,
+             "key": "",
+             "max": 0,
+             "namespace": "",
+             "oid": "ROBJ18",
+             "pool": 3,
+             "snapid": -2
+           },
+           "omap_digest": "0xddc3680f",
+           "size": 7,
+           "truncate_seq": 0,
+           "truncate_size": 0,
+           "user_version": 54,
+           "watchers": {}
+         },
+         "osd": 1,
+         "primary": true,
+         "size": 7
+       }
+     ],
+     "union_shard_errors": []
+   },
+   {
+      "object": {
+        "name": "ROBJ19",
+        "nspace": "",
+        "locator": "",
+        "snap": "head",
+        "version": 58
+      },
+      "errors": [
+        "size_too_large"
+      ],
+      "union_shard_errors": [],
+      "selected_object_info": {
+        "oid": {
+          "oid": "ROBJ19",
+          "key": "",
+          "snapid": -2,
+          "hash": 2868534344,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "63'59",
+        "prior_version": "63'58",
+        "last_reqid": "osd.1.0:58",
+        "user_version": 58,
+        "size": 1049600,
+        "mtime": "2019-08-09T23:33:58.340709+0000",
+        "local_mtime": "2019-08-09T23:33:58.345676+0000",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "omap",
+          "data_digest",
+          "omap_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x3dde0ef3",
+        "omap_digest": "0xbffddd28",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "shards": [
+        {
+          "osd": 0,
+          "primary": false,
+          "errors": [],
+          "size": 1049600
+        },
+        {
+          "osd": 1,
+          "primary": true,
+          "errors": [],
+          "size": 1049600
+        }
+      ]
+   },
+   {
+      "shards": [
+        {
+          "size": 7,
+          "errors": [],
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "errors": [
+            "missing"
+          ],
+          "osd": 1,
+          "primary": true
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "ROBJ3",
+          "key": "",
+          "snapid": -2,
+          "hash": 625845583,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "51'61",
+        "prior_version": "25'9",
+        "last_reqid": "osd.1.0:60",
+        "user_version": 9,
+        "size": 7,
+        "mtime": "",
+        "local_mtime": "",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "omap",
+          "data_digest",
+          "omap_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0x00b35dfd",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [
+        "missing"
+      ],
+      "errors": [],
+      "object": {
+        "version": 9,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "ROBJ3"
+      }
+    },
+    {
+      "shards": [
+        {
+          "attrs": [
+            {
+              "Base64": false,
+              "value": "bad-val",
+              "name": "key1-ROBJ8"
+            },
+            {
+              "Base64": false,
+              "value": "val2-ROBJ8",
+              "name": "key2-ROBJ8"
+            }
+          ],
+          "size": 7,
+          "errors": [],
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "attrs": [
+            {
+              "Base64": false,
+              "value": "val1-ROBJ8",
+              "name": "key1-ROBJ8"
+            },
+            {
+              "Base64": false,
+              "value": "val3-ROBJ8",
+              "name": "key3-ROBJ8"
+            }
+          ],
+          "size": 7,
+          "errors": [],
+          "osd": 1,
+          "primary": true
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "ROBJ8",
+          "key": "",
+          "snapid": -2,
+          "hash": 2359695969,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "79'66",
+        "prior_version": "79'65",
+        "last_reqid": "client.4554.0:1",
+        "user_version": 79,
+        "size": 7,
+        "mtime": "",
+        "local_mtime": "",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "omap",
+          "data_digest",
+          "omap_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0xd6be81dc",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [],
+      "errors": [
+        "attr_value_mismatch",
+        "attr_name_mismatch"
+      ],
+      "object": {
+        "version": 66,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "ROBJ8"
+      }
+    },
+    {
+      "shards": [
+        {
+          "object_info": {
+            "oid": {
+              "oid": "ROBJ9",
+              "key": "",
+              "snapid": -2,
+              "hash": 537189375,
+              "max": 0,
+              "pool": 3,
+              "namespace": ""
+            },
+            "version": "95'67",
+            "prior_version": "51'64",
+            "last_reqid": "client.4649.0:1",
+            "user_version": 80,
+            "size": 1,
+            "mtime": "",
+            "local_mtime": "",
+            "lost": 0,
+            "flags": [
+              "dirty",
+              "omap",
+              "data_digest",
+              "omap_digest"
+            ],
+            "truncate_seq": 0,
+            "truncate_size": 0,
+            "data_digest": "0x2b63260d",
+            "omap_digest": "0x2eecc539",
+            "expected_object_size": 0,
+            "expected_write_size": 0,
+            "alloc_hint_flags": 0,
+            "manifest": {
+              "type": 0
+            },
+            "watchers": {}
+          },
+          "size": 1,
+          "errors": [],
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "object_info": {
+            "oid": {
+              "oid": "ROBJ9",
+              "key": "",
+              "snapid": -2,
+              "hash": 537189375,
+              "max": 0,
+              "pool": 3,
+              "namespace": ""
+            },
+            "version": "51'64",
+            "prior_version": "37'27",
+            "last_reqid": "osd.1.0:63",
+            "user_version": 27,
+            "size": 7,
+            "mtime": "2018-04-05 14:33:25.352485",
+            "local_mtime": "2018-04-05 14:33:25.353746",
+            "lost": 0,
+            "flags": [
+              "dirty",
+              "omap",
+              "data_digest",
+              "omap_digest"
+            ],
+            "truncate_seq": 0,
+            "truncate_size": 0,
+            "data_digest": "0x2ddbf8f5",
+            "omap_digest": "0x2eecc539",
+            "expected_object_size": 0,
+            "expected_write_size": 0,
+            "alloc_hint_flags": 0,
+            "manifest": {
+              "type": 0
+            },
+            "watchers": {}
+          },
+          "size": 1,
+          "errors": [
+            "obj_size_info_mismatch"
+          ],
+          "osd": 1,
+          "primary": true
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "ROBJ9",
+          "key": "",
+          "snapid": -2,
+          "hash": 537189375,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "95'67",
+        "prior_version": "51'64",
+        "last_reqid": "client.4649.0:1",
+        "user_version": 80,
+        "size": 1,
+        "mtime": "",
+        "local_mtime": "",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "omap",
+          "data_digest",
+          "omap_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2b63260d",
+        "omap_digest": "0x2eecc539",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [
+         "obj_size_info_mismatch"
+      ],
+      "errors": [
+        "object_info_inconsistency"
+      ],
+      "object": {
+        "version": 67,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "ROBJ9"
+      }
+    }
+  ],
+  "epoch": 0
+}
+EOF
+
+    jq "$jqfilter" $dir/json | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/csjson
+    multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
+    if test $getjson = "yes"
+    then
+        jq '.' $dir/json > save1.json
+    fi
+
+    if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null;
+    then
+      jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1
+    fi
+
+    objname=ROBJ9
+    # Change data and size again because digest was recomputed
+    echo -n ZZZ > $dir/change
+    rados --pool $poolname put $objname $dir/change
+    # Set one to an even older value
+    objectstore_tool $dir 0 $objname set-attr _ $dir/robj9-oi
+    rm $dir/oi $dir/change
+
+    objname=ROBJ10
+    objectstore_tool $dir 1 $objname get-attr _ > $dir/oi
+    rados --pool $poolname setomapval $objname key2-$objname val2-$objname
+    objectstore_tool $dir 0 $objname set-attr _ $dir/oi
+    objectstore_tool $dir 1 $objname set-attr _ $dir/oi
+    rm $dir/oi
+
+    inject_eio rep data $poolname ROBJ11 $dir 0 || return 1 # shard 0 of [1, 0], osd.1
+    inject_eio rep mdata $poolname ROBJ12 $dir 1 || return 1 # shard 1 of [1, 0], osd.0
+    inject_eio rep mdata $poolname ROBJ13 $dir 1 || return 1 # shard 1 of [1, 0], osd.0
+    inject_eio rep data $poolname ROBJ13 $dir 0 || return 1 # shard 0 of [1, 0], osd.1
+
+    # ROBJ19 won't error this time
+    ceph tell osd.\* injectargs -- --osd-max-object-size=134217728
+
+    pg_deep_scrub $pg
+
+    err_strings=()
+    err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:30259878:::ROBJ15:head : candidate had a missing info key"
+    err_strings[1]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:33aca486:::ROBJ18:head : data_digest 0xbd89c912 != data_digest 0x2ddbf8f5 from auth oi 3:33aca486:::ROBJ18:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 54 dd 2ddbf8f5 od ddc3680f alloc_hint [[]0 0 255[]][)], object info inconsistent "
+    err_strings[2]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:33aca486:::ROBJ18:head : data_digest 0xbd89c912 != data_digest 0x2ddbf8f5 from auth oi 3:33aca486:::ROBJ18:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 54 dd 2ddbf8f5 od ddc3680f alloc_hint [[]0 0 255[]][)]"
+    err_strings[3]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:33aca486:::ROBJ18:head : failed to pick suitable auth object"
+    err_strings[4]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:5c7b2c47:::ROBJ16:head : candidate had a corrupt snapset"
+    err_strings[5]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:5c7b2c47:::ROBJ16:head : candidate had a missing snapset key"
+    err_strings[6]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:5c7b2c47:::ROBJ16:head : failed to pick suitable object info"
+    err_strings[7]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:86586531:::ROBJ8:head : attr value mismatch '_key1-ROBJ8', attr name mismatch '_key3-ROBJ8', attr name mismatch '_key2-ROBJ8'"
+    err_strings[8]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:87abbf36:::ROBJ11:head : candidate had a read error"
+    err_strings[9]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:8aa5320e:::ROBJ17:head : data_digest 0x5af0c3ef != data_digest 0x2ddbf8f5 from auth oi 3:8aa5320e:::ROBJ17:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 51 dd 2ddbf8f5 od e9572720 alloc_hint [[]0 0 0[]][)]"
+    err_strings[10]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:8aa5320e:::ROBJ17:head : data_digest 0x5af0c3ef != data_digest 0x2ddbf8f5 from auth oi 3:8aa5320e:::ROBJ17:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 51 dd 2ddbf8f5 od e9572720 alloc_hint [[]0 0 0[]][)]"
+    err_strings[11]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:8aa5320e:::ROBJ17:head : failed to pick suitable auth object"
+    err_strings[12]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:8b55fa4b:::ROBJ7:head : omap_digest 0xefced57a != omap_digest 0x6a73cc07 from shard 1"
+    err_strings[13]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:8b55fa4b:::ROBJ7:head : omap_digest 0x6a73cc07 != omap_digest 0xefced57a from auth oi 3:8b55fa4b:::ROBJ7:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 21 dd 2ddbf8f5 od efced57a alloc_hint [[]0 0 0[]][)]"
+    err_strings[14]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:a53c12e8:::ROBJ6:head : omap_digest 0x689ee887 != omap_digest 0x179c919f from shard 1, omap_digest 0x689ee887 != omap_digest 0x179c919f from auth oi 3:a53c12e8:::ROBJ6:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 18 dd 2ddbf8f5 od 179c919f alloc_hint [[]0 0 0[]][)]"
+    err_strings[15]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:b1f19cbd:::ROBJ10:head : omap_digest 0xa8dd5adc != omap_digest 0xc2025a24 from auth oi 3:b1f19cbd:::ROBJ10:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 30 dd 2ddbf8f5 od c2025a24 alloc_hint [[]0 0 0[]][)]"
+    err_strings[16]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:b1f19cbd:::ROBJ10:head : omap_digest 0xa8dd5adc != omap_digest 0xc2025a24 from auth oi 3:b1f19cbd:::ROBJ10:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 30 dd 2ddbf8f5 od c2025a24 alloc_hint [[]0 0 0[]][)]"
+    err_strings[17]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:b1f19cbd:::ROBJ10:head : failed to pick suitable auth object"
+    err_strings[18]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:bc819597:::ROBJ12:head : candidate had a stat error"
+    err_strings[19]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:c0c86b1d:::ROBJ14:head : candidate had a missing info key"
+    err_strings[20]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:c0c86b1d:::ROBJ14:head : candidate had a corrupt info"
+    err_strings[21]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:c0c86b1d:::ROBJ14:head : failed to pick suitable object info"
+    err_strings[22]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ce3f1d6a:::ROBJ1:head : candidate size 9 info size 7 mismatch"
+    err_strings[23]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ce3f1d6a:::ROBJ1:head : data_digest 0x2d4a11c2 != data_digest 0x2ddbf8f5 from shard 0, data_digest 0x2d4a11c2 != data_digest 0x2ddbf8f5 from auth oi 3:ce3f1d6a:::ROBJ1:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [[]0 0 0[]][)], size 9 != size 7 from auth oi 3:ce3f1d6a:::ROBJ1:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [[]0 0 0[]][)], size 9 != size 7 from shard 0"
+    err_strings[24]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:d60617f9:::ROBJ13:head : candidate had a read error"
+    err_strings[25]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:d60617f9:::ROBJ13:head : candidate had a stat error"
+    err_strings[26]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:d60617f9:::ROBJ13:head : failed to pick suitable object info"
+    err_strings[27]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:e97ce31e:::ROBJ2:head : data_digest 0x578a4830 != data_digest 0x2ddbf8f5 from shard 1, data_digest 0x578a4830 != data_digest 0x2ddbf8f5 from auth oi 3:e97ce31e:::ROBJ2:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 6 dd 2ddbf8f5 od f8e11918 alloc_hint [[]0 0 0[]][)]"
+    err_strings[28]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 3:f2a5b2a4:::ROBJ3:head : missing"
+    err_strings[29]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:f4981d31:::ROBJ4:head : omap_digest 0xd7178dfe != omap_digest 0xe2d46ea4 from shard 1, omap_digest 0xd7178dfe != omap_digest 0xe2d46ea4 from auth oi 3:f4981d31:::ROBJ4:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 12 dd 2ddbf8f5 od e2d46ea4 alloc_hint [[]0 0 0[]][)]"
+    err_strings[30]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:f4bfd4d1:::ROBJ5:head : omap_digest 0x1a862a41 != omap_digest 0x6cac8f6 from shard 1"
+    err_strings[31]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:f4bfd4d1:::ROBJ5:head : omap_digest 0x6cac8f6 != omap_digest 0x1a862a41 from auth oi 3:f4bfd4d1:::ROBJ5:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 15 dd 2ddbf8f5 od 1a862a41 alloc_hint [[]0 0 0[]][)]"
+    err_strings[32]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:ffdb2004:::ROBJ9:head : candidate size 3 info size 7 mismatch"
+    err_strings[33]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:ffdb2004:::ROBJ9:head : object info inconsistent "
+    err_strings[34]="log_channel[(]cluster[)] log [[]ERR[]] : deep-scrub [0-9]*[.]0 3:c0c86b1d:::ROBJ14:head : no '_' attr"
+    err_strings[35]="log_channel[(]cluster[)] log [[]ERR[]] : deep-scrub [0-9]*[.]0 3:5c7b2c47:::ROBJ16:head : can't decode 'snapset' attr .* no longer understand old encoding version 3 < 97: Malformed input"
+    err_strings[36]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 deep-scrub : stat mismatch, got 19/19 objects, 0/0 clones, 18/19 dirty, 18/19 omap, 0/0 pinned, 0/0 hit_set_archive, 0/0 whiteouts, 1049715/1049716 bytes, 0/0 manifest objects, 0/0 hit_set_archive bytes."
+    err_strings[37]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 deep-scrub 1 missing, 11 inconsistent objects"
+    err_strings[38]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 deep-scrub 35 errors"
+
+    for err_string in "${err_strings[@]}"
+    do
+        if ! grep -q "$err_string" $dir/osd.${primary}.log
+        then
+            echo "Missing log message '$err_string'"
+            ERRORS=$(expr $ERRORS + 1)
+        fi
+    done
+
+    rados list-inconsistent-pg $poolname > $dir/json || return 1
+    # Check pg count
+    test $(jq '. | length' $dir/json) = "1" || return 1
+    # Check pgid
+    test $(jq -r '.[0]' $dir/json) = $pg || return 1
+
+    rados list-inconsistent-obj $pg > $dir/json || return 1
+    # Get epoch for repair-get requests
+    epoch=$(jq .epoch $dir/json)
+
+    jq "$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson
+{
+  "inconsistents": [
+    {
+      "shards": [
+        {
+          "data_digest": "0x2ddbf8f5",
+          "omap_digest": "0xf5fba2c6",
+          "size": 7,
+          "errors": [],
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "object_info": {
+            "oid": {
+              "oid": "ROBJ1",
+              "key": "",
+              "snapid": -2,
+              "hash": 1454963827,
+              "max": 0,
+              "pool": 3,
+              "namespace": ""
+            },
+            "version": "51'58",
+            "prior_version": "21'3",
+            "last_reqid": "osd.1.0:57",
+            "user_version": 3,
+            "size": 7,
+            "mtime": "2018-04-05 14:33:19.804040",
+            "local_mtime": "2018-04-05 14:33:19.804839",
+            "lost": 0,
+            "flags": [
+              "dirty",
+              "omap",
+              "data_digest",
+              "omap_digest"
+            ],
+            "truncate_seq": 0,
+            "truncate_size": 0,
+            "data_digest": "0x2ddbf8f5",
+            "omap_digest": "0xf5fba2c6",
+            "expected_object_size": 0,
+            "expected_write_size": 0,
+            "alloc_hint_flags": 0,
+            "manifest": {
+              "type": 0
+            },
+            "watchers": {}
+          },
+          "data_digest": "0x2d4a11c2",
+          "omap_digest": "0xf5fba2c6",
+          "size": 9,
+          "errors": [
+            "data_digest_mismatch_info",
+            "size_mismatch_info",
+            "obj_size_info_mismatch"
+          ],
+          "osd": 1,
+          "primary": true
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "ROBJ1",
+          "key": "",
+          "snapid": -2,
+          "hash": 1454963827,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "51'58",
+        "prior_version": "21'3",
+        "last_reqid": "osd.1.0:57",
+        "user_version": 3,
+        "size": 7,
+        "mtime": "2018-04-05 14:33:19.804040",
+        "local_mtime": "2018-04-05 14:33:19.804839",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "omap",
+          "data_digest",
+          "omap_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0xf5fba2c6",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [
+        "data_digest_mismatch_info",
+        "size_mismatch_info",
+        "obj_size_info_mismatch"
+      ],
+      "errors": [
+        "data_digest_mismatch",
+        "size_mismatch"
+      ],
+      "object": {
+        "version": 3,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "ROBJ1"
+      }
+    },
+    {
+      "shards": [
+        {
+          "data_digest": "0x2ddbf8f5",
+          "omap_digest": "0xa8dd5adc",
+          "size": 7,
+          "errors": [
+            "omap_digest_mismatch_info"
+          ],
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "data_digest": "0x2ddbf8f5",
+          "omap_digest": "0xa8dd5adc",
+          "size": 7,
+          "errors": [
+            "omap_digest_mismatch_info"
+          ],
+          "osd": 1,
+          "primary": true
+        }
+      ],
+      "selected_object_info": {
+        "alloc_hint_flags": 0,
+        "data_digest": "0x2ddbf8f5",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "flags": [
+          "dirty",
+          "omap",
+          "data_digest",
+          "omap_digest"
+        ],
+        "lost": 0,
+        "manifest": {
+          "type": 0
+        },
+        "oid": {
+          "hash": 3174666125,
+          "key": "",
+          "max": 0,
+          "namespace": "",
+          "oid": "ROBJ10",
+          "pool": 3,
+          "snapid": -2
+        },
+        "omap_digest": "0xc2025a24",
+        "size": 7,
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "user_version": 30,
+        "watchers": {}
+      },
+      "union_shard_errors": [
+        "omap_digest_mismatch_info"
+      ],
+      "errors": [],
+      "object": {
+        "version": 30,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "ROBJ10"
+      }
+    },
+    {
+      "shards": [
+        {
+          "data_digest": "0x2ddbf8f5",
+          "omap_digest": "0xa03cef03",
+          "size": 7,
+          "errors": [],
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "size": 7,
+          "errors": [
+            "read_error"
+          ],
+          "osd": 1,
+          "primary": true
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "ROBJ11",
+          "key": "",
+          "snapid": -2,
+          "hash": 1828574689,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "51'52",
+        "prior_version": "41'33",
+        "last_reqid": "osd.1.0:51",
+        "user_version": 33,
+        "size": 7,
+        "mtime": "2018-04-05 14:33:26.761286",
+        "local_mtime": "2018-04-05 14:33:26.762368",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "omap",
+          "data_digest",
+          "omap_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0xa03cef03",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [
+        "read_error"
+      ],
+      "errors": [],
+      "object": {
+        "version": 33,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "ROBJ11"
+      }
+    },
+    {
+      "shards": [
+        {
+          "errors": [
+            "stat_error"
+          ],
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "data_digest": "0x2ddbf8f5",
+          "omap_digest": "0x067f306a",
+          "size": 7,
+          "errors": [],
+          "osd": 1,
+          "primary": true
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "ROBJ12",
+          "key": "",
+          "snapid": -2,
+          "hash": 3920199997,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "51'56",
+        "prior_version": "43'36",
+        "last_reqid": "osd.1.0:55",
+        "user_version": 36,
+        "size": 7,
+        "mtime": "2018-04-05 14:33:27.460958",
+        "local_mtime": "2018-04-05 14:33:27.462109",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "omap",
+          "data_digest",
+          "omap_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0x067f306a",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [
+        "stat_error"
+      ],
+      "errors": [],
+      "object": {
+        "version": 36,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "ROBJ12"
+      }
+    },
+    {
+      "shards": [
+        {
+          "errors": [
+            "stat_error"
+          ],
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "size": 7,
+          "errors": [
+            "read_error"
+          ],
+          "osd": 1,
+          "primary": true
+        }
+      ],
+      "union_shard_errors": [
+        "stat_error",
+        "read_error"
+      ],
+      "errors": [],
+      "object": {
+        "version": 0,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "ROBJ13"
+      }
+    },
+    {
+      "shards": [
+        {
+          "object_info": "bad-val",
+          "data_digest": "0x2ddbf8f5",
+          "omap_digest": "0x4f14f849",
+          "size": 7,
+          "errors": [
+            "info_corrupted"
+          ],
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "data_digest": "0x2ddbf8f5",
+          "omap_digest": "0x4f14f849",
+          "size": 7,
+          "errors": [
+            "info_missing"
+          ],
+          "osd": 1,
+          "primary": true
+        }
+      ],
+      "union_shard_errors": [
+        "info_missing",
+        "info_corrupted"
+      ],
+      "errors": [],
+      "object": {
+        "version": 0,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "ROBJ14"
+      }
+    },
+    {
+      "shards": [
+        {
+          "object_info": {
+            "oid": {
+              "oid": "ROBJ15",
+              "key": "",
+              "snapid": -2,
+              "hash": 504996876,
+              "max": 0,
+              "pool": 3,
+              "namespace": ""
+            },
+            "version": "51'49",
+            "prior_version": "49'45",
+            "last_reqid": "osd.1.0:48",
+            "user_version": 45,
+            "size": 7,
+            "mtime": "2018-04-05 14:33:29.498969",
+            "local_mtime": "2018-04-05 14:33:29.499890",
+            "lost": 0,
+            "flags": [
+              "dirty",
+              "omap",
+              "data_digest",
+              "omap_digest"
+            ],
+            "truncate_seq": 0,
+            "truncate_size": 0,
+            "data_digest": "0x2ddbf8f5",
+            "omap_digest": "0x2d2a4d6e",
+            "expected_object_size": 0,
+            "expected_write_size": 0,
+            "alloc_hint_flags": 0,
+            "manifest": {
+              "type": 0
+            },
+            "watchers": {}
+          },
+          "data_digest": "0x2ddbf8f5",
+          "omap_digest": "0x2d2a4d6e",
+          "size": 7,
+          "errors": [],
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "data_digest": "0x2ddbf8f5",
+          "omap_digest": "0x2d2a4d6e",
+          "size": 7,
+          "errors": [
+            "info_missing"
+          ],
+          "osd": 1,
+          "primary": true
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "ROBJ15",
+          "key": "",
+          "snapid": -2,
+          "hash": 504996876,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "51'49",
+        "prior_version": "49'45",
+        "last_reqid": "osd.1.0:48",
+        "user_version": 45,
+        "size": 7,
+        "mtime": "2018-04-05 14:33:29.498969",
+        "local_mtime": "2018-04-05 14:33:29.499890",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "omap",
+          "data_digest",
+          "omap_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0x2d2a4d6e",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [
+        "info_missing"
+      ],
+      "errors": [],
+      "object": {
+        "version": 45,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "ROBJ15"
+      }
+    },
+    {
+      "errors": [],
+      "object": {
+      "locator": "",
+      "name": "ROBJ16",
+      "nspace": "",
+      "snap": "head",
+      "version": 0
+       },
+        "shards": [
+      {
+        "data_digest": "0x2ddbf8f5",
+        "errors": [
+          "snapset_missing"
+        ],
+        "omap_digest": "0x8b699207",
+        "osd": 0,
+        "primary": false,
+        "size": 7
+      },
+      {
+        "snapset": "bad-val",
+        "data_digest": "0x2ddbf8f5",
+        "errors": [
+          "snapset_corrupted"
+        ],
+        "omap_digest": "0x8b699207",
+        "osd": 1,
+        "primary": true,
+        "size": 7
+      }
+      ],
+      "union_shard_errors": [
+        "snapset_missing",
+        "snapset_corrupted"
+      ]
+    },
+    {
+     "errors": [],
+     "object": {
+       "locator": "",
+       "name": "ROBJ17",
+       "nspace": "",
+       "snap": "head"
+     },
+     "selected_object_info": {
+       "alloc_hint_flags": 0,
+       "data_digest": "0x2ddbf8f5",
+       "expected_object_size": 0,
+       "expected_write_size": 0,
+       "flags": [
+         "dirty",
+         "omap",
+         "data_digest",
+         "omap_digest"
+       ],
+       "lost": 0,
+       "manifest": {
+         "type": 0
+       },
+       "oid": {
+         "hash": 1884071249,
+         "key": "",
+         "max": 0,
+         "namespace": "",
+         "oid": "ROBJ17",
+         "pool": 3,
+         "snapid": -2
+       },
+       "omap_digest": "0xe9572720",
+       "size": 7,
+       "truncate_seq": 0,
+       "truncate_size": 0,
+       "user_version": 51,
+       "watchers": {}
+     },
+     "shards": [
+       {
+         "data_digest": "0x5af0c3ef",
+         "errors": [
+           "data_digest_mismatch_info"
+         ],
+         "omap_digest": "0xe9572720",
+         "osd": 0,
+         "primary": false,
+         "size": 7
+       },
+       {
+         "data_digest": "0x5af0c3ef",
+         "errors": [
+           "data_digest_mismatch_info"
+         ],
+         "omap_digest": "0xe9572720",
+         "osd": 1,
+         "primary": true,
+         "size": 7
+       }
+     ],
+     "union_shard_errors": [
+       "data_digest_mismatch_info"
+     ]
+   },
+   {
+     "errors": [
+       "object_info_inconsistency"
+     ],
+     "object": {
+       "locator": "",
+       "name": "ROBJ18",
+       "nspace": "",
+       "snap": "head"
+     },
+     "selected_object_info": {
+       "alloc_hint_flags": 255,
+       "data_digest": "0x2ddbf8f5",
+       "expected_object_size": 0,
+       "expected_write_size": 0,
+       "flags": [
+         "dirty",
+         "omap",
+         "data_digest",
+         "omap_digest"
+       ],
+       "lost": 0,
+       "manifest": {
+         "type": 0
+       },
+       "oid": {
+         "hash": 1629828556,
+         "key": "",
+         "max": 0,
+         "namespace": "",
+         "oid": "ROBJ18",
+         "pool": 3,
+         "snapid": -2
+       },
+       "omap_digest": "0xddc3680f",
+       "size": 7,
+       "truncate_seq": 0,
+       "truncate_size": 0,
+       "user_version": 54,
+       "watchers": {}
+     },
+     "shards": [
+       {
+         "data_digest": "0xbd89c912",
+         "errors": [
+           "data_digest_mismatch_info"
+         ],
+         "object_info": {
+           "alloc_hint_flags": 0,
+           "data_digest": "0x2ddbf8f5",
+           "expected_object_size": 0,
+           "expected_write_size": 0,
+           "flags": [
+             "dirty",
+             "omap",
+             "data_digest",
+             "omap_digest"
+           ],
+           "lost": 0,
+           "manifest": {
+             "type": 0
+           },
+           "oid": {
+             "hash": 1629828556,
+             "key": "",
+             "max": 0,
+             "namespace": "",
+             "oid": "ROBJ18",
+             "pool": 3,
+             "snapid": -2
+           },
+           "omap_digest": "0xddc3680f",
+           "size": 7,
+           "truncate_seq": 0,
+           "truncate_size": 0,
+           "user_version": 54,
+           "watchers": {}
+         },
+         "omap_digest": "0xddc3680f",
+         "osd": 0,
+         "primary": false,
+         "size": 7
+       },
+       {
+         "data_digest": "0xbd89c912",
+         "errors": [
+           "data_digest_mismatch_info"
+         ],
+         "object_info": {
+           "alloc_hint_flags": 255,
+           "data_digest": "0x2ddbf8f5",
+           "expected_object_size": 0,
+           "expected_write_size": 0,
+           "flags": [
+             "dirty",
+             "omap",
+             "data_digest",
+             "omap_digest"
+           ],
+           "lost": 0,
+           "manifest": {
+             "type": 0
+           },
+           "oid": {
+             "hash": 1629828556,
+             "key": "",
+             "max": 0,
+             "namespace": "",
+             "oid": "ROBJ18",
+             "pool": 3,
+             "snapid": -2
+           },
+           "omap_digest": "0xddc3680f",
+           "size": 7,
+           "truncate_seq": 0,
+           "truncate_size": 0,
+           "user_version": 54,
+           "watchers": {}
+         },
+         "omap_digest": "0xddc3680f",
+         "osd": 1,
+         "primary": true,
+         "size": 7
+       }
+     ],
+     "union_shard_errors": [
+       "data_digest_mismatch_info"
+     ]
+   },
+   {
+     "shards": [
+        {
+          "data_digest": "0x578a4830",
+          "omap_digest": "0xf8e11918",
+          "size": 7,
+          "errors": [
+            "data_digest_mismatch_info"
+          ],
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "data_digest": "0x2ddbf8f5",
+          "omap_digest": "0xf8e11918",
+          "size": 7,
+          "errors": [],
+          "osd": 1,
+          "primary": true
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "ROBJ2",
+          "key": "",
+          "snapid": -2,
+          "hash": 2026323607,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "51'60",
+        "prior_version": "23'6",
+        "last_reqid": "osd.1.0:59",
+        "user_version": 6,
+        "size": 7,
+        "mtime": "2018-04-05 14:33:20.498756",
+        "local_mtime": "2018-04-05 14:33:20.499704",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "omap",
+          "data_digest",
+          "omap_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0xf8e11918",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [
+        "data_digest_mismatch_info"
+      ],
+      "errors": [
+        "data_digest_mismatch"
+      ],
+      "object": {
+        "version": 6,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "ROBJ2"
+      }
+    },
+    {
+      "shards": [
+        {
+          "data_digest": "0x2ddbf8f5",
+          "omap_digest": "0x00b35dfd",
+          "size": 7,
+          "errors": [],
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "errors": [
+            "missing"
+          ],
+          "osd": 1,
+          "primary": true
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "ROBJ3",
+          "key": "",
+          "snapid": -2,
+          "hash": 625845583,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "51'61",
+        "prior_version": "25'9",
+        "last_reqid": "osd.1.0:60",
+        "user_version": 9,
+        "size": 7,
+        "mtime": "2018-04-05 14:33:21.189382",
+        "local_mtime": "2018-04-05 14:33:21.190446",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "omap",
+          "data_digest",
+          "omap_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0x00b35dfd",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [
+        "missing"
+      ],
+      "errors": [],
+      "object": {
+        "version": 9,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "ROBJ3"
+      }
+    },
+    {
+      "shards": [
+        {
+          "data_digest": "0x2ddbf8f5",
+          "omap_digest": "0xd7178dfe",
+          "size": 7,
+          "errors": [
+            "omap_digest_mismatch_info"
+          ],
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "data_digest": "0x2ddbf8f5",
+          "omap_digest": "0xe2d46ea4",
+          "size": 7,
+          "errors": [],
+          "osd": 1,
+          "primary": true
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "ROBJ4",
+          "key": "",
+          "snapid": -2,
+          "hash": 2360875311,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "51'62",
+        "prior_version": "27'12",
+        "last_reqid": "osd.1.0:61",
+        "user_version": 12,
+        "size": 7,
+        "mtime": "2018-04-05 14:33:21.862313",
+        "local_mtime": "2018-04-05 14:33:21.863261",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "omap",
+          "data_digest",
+          "omap_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0xe2d46ea4",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [
+        "omap_digest_mismatch_info"
+      ],
+      "errors": [
+        "omap_digest_mismatch"
+      ],
+      "object": {
+        "version": 12,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "ROBJ4"
+      }
+    },
+    {
+      "shards": [
+        {
+          "data_digest": "0x2ddbf8f5",
+          "omap_digest": "0x1a862a41",
+          "size": 7,
+          "errors": [],
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "data_digest": "0x2ddbf8f5",
+          "omap_digest": "0x06cac8f6",
+          "size": 7,
+          "errors": [
+            "omap_digest_mismatch_info"
+          ],
+          "osd": 1,
+          "primary": true
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "ROBJ5",
+          "key": "",
+          "snapid": -2,
+          "hash": 2334915887,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "51'63",
+        "prior_version": "29'15",
+        "last_reqid": "osd.1.0:62",
+        "user_version": 15,
+        "size": 7,
+        "mtime": "2018-04-05 14:33:22.589300",
+        "local_mtime": "2018-04-05 14:33:22.590376",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "omap",
+          "data_digest",
+          "omap_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0x1a862a41",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [
+        "omap_digest_mismatch_info"
+      ],
+      "errors": [
+        "omap_digest_mismatch"
+      ],
+      "object": {
+        "version": 15,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "ROBJ5"
+      }
+    },
+    {
+      "shards": [
+        {
+          "data_digest": "0x2ddbf8f5",
+          "omap_digest": "0x689ee887",
+          "size": 7,
+          "errors": [
+            "omap_digest_mismatch_info"
+          ],
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "data_digest": "0x2ddbf8f5",
+          "omap_digest": "0x179c919f",
+          "size": 7,
+          "errors": [],
+          "osd": 1,
+          "primary": true
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "ROBJ6",
+          "key": "",
+          "snapid": -2,
+          "hash": 390610085,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "51'54",
+        "prior_version": "31'18",
+        "last_reqid": "osd.1.0:53",
+        "user_version": 18,
+        "size": 7,
+        "mtime": "2018-04-05 14:33:23.289188",
+        "local_mtime": "2018-04-05 14:33:23.290130",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "omap",
+          "data_digest",
+          "omap_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0x179c919f",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [
+        "omap_digest_mismatch_info"
+      ],
+      "errors": [
+        "omap_digest_mismatch"
+      ],
+      "object": {
+        "version": 18,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "ROBJ6"
+      }
+    },
+    {
+      "shards": [
+        {
+          "data_digest": "0x2ddbf8f5",
+          "omap_digest": "0xefced57a",
+          "size": 7,
+          "errors": [],
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "data_digest": "0x2ddbf8f5",
+          "omap_digest": "0x6a73cc07",
+          "size": 7,
+          "errors": [
+            "omap_digest_mismatch_info"
+          ],
+          "osd": 1,
+          "primary": true
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "ROBJ7",
+          "key": "",
+          "snapid": -2,
+          "hash": 3529485009,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "51'53",
+        "prior_version": "33'21",
+        "last_reqid": "osd.1.0:52",
+        "user_version": 21,
+        "size": 7,
+        "mtime": "2018-04-05 14:33:23.979658",
+        "local_mtime": "2018-04-05 14:33:23.980731",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "omap",
+          "data_digest",
+          "omap_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0xefced57a",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [
+        "omap_digest_mismatch_info"
+      ],
+      "errors": [
+        "omap_digest_mismatch"
+      ],
+      "object": {
+        "version": 21,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "ROBJ7"
+      }
+    },
+    {
+      "shards": [
+        {
+          "attrs": [
+            {
+              "Base64": false,
+              "value": "bad-val",
+              "name": "key1-ROBJ8"
+            },
+            {
+              "Base64": false,
+              "value": "val2-ROBJ8",
+              "name": "key2-ROBJ8"
+            }
+          ],
+          "data_digest": "0x2ddbf8f5",
+          "omap_digest": "0xd6be81dc",
+          "size": 7,
+          "errors": [],
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "attrs": [
+            {
+              "Base64": false,
+              "value": "val1-ROBJ8",
+              "name": "key1-ROBJ8"
+            },
+            {
+              "Base64": false,
+              "value": "val3-ROBJ8",
+              "name": "key3-ROBJ8"
+            }
+          ],
+          "data_digest": "0x2ddbf8f5",
+          "omap_digest": "0xd6be81dc",
+          "size": 7,
+          "errors": [],
+          "osd": 1,
+          "primary": true
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "ROBJ8",
+          "key": "",
+          "snapid": -2,
+          "hash": 2359695969,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "79'66",
+        "prior_version": "79'65",
+        "last_reqid": "client.4554.0:1",
+        "user_version": 79,
+        "size": 7,
+        "mtime": "2018-04-05 14:34:05.598688",
+        "local_mtime": "2018-04-05 14:34:05.599698",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "omap",
+          "data_digest",
+          "omap_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0xd6be81dc",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [],
+      "errors": [
+        "attr_value_mismatch",
+        "attr_name_mismatch"
+      ],
+      "object": {
+        "version": 66,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "ROBJ8"
+      }
+    },
+    {
+      "shards": [
+        {
+          "object_info": {
+            "oid": {
+              "oid": "ROBJ9",
+              "key": "",
+              "snapid": -2,
+              "hash": 537189375,
+              "max": 0,
+              "pool": 3,
+              "namespace": ""
+            },
+            "version": "51'64",
+            "prior_version": "37'27",
+            "last_reqid": "osd.1.0:63",
+            "user_version": 27,
+            "size": 7,
+            "mtime": "2018-04-05 14:33:25.352485",
+            "local_mtime": "2018-04-05 14:33:25.353746",
+            "lost": 0,
+            "flags": [
+              "dirty",
+              "omap",
+              "data_digest",
+              "omap_digest"
+            ],
+            "truncate_seq": 0,
+            "truncate_size": 0,
+            "data_digest": "0x2ddbf8f5",
+            "omap_digest": "0x2eecc539",
+            "expected_object_size": 0,
+            "expected_write_size": 0,
+            "alloc_hint_flags": 0,
+            "manifest": {
+              "type": 0
+            },
+            "watchers": {}
+          },
+          "data_digest": "0x1f26fb26",
+          "omap_digest": "0x2eecc539",
+          "size": 3,
+          "errors": [
+            "obj_size_info_mismatch"
+          ],
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "object_info": {
+            "oid": {
+              "oid": "ROBJ9",
+              "key": "",
+              "snapid": -2,
+              "hash": 537189375,
+              "max": 0,
+              "pool": 3,
+              "namespace": ""
+            },
+            "version": "119'68",
+            "prior_version": "51'64",
+            "last_reqid": "client.4834.0:1",
+            "user_version": 81,
+            "size": 3,
+            "mtime": "2018-04-05 14:35:01.500659",
+            "local_mtime": "2018-04-05 14:35:01.502117",
+            "lost": 0,
+            "flags": [
+              "dirty",
+              "omap",
+              "data_digest",
+              "omap_digest"
+            ],
+            "truncate_seq": 0,
+            "truncate_size": 0,
+            "data_digest": "0x1f26fb26",
+            "omap_digest": "0x2eecc539",
+            "expected_object_size": 0,
+            "expected_write_size": 0,
+            "alloc_hint_flags": 0,
+            "manifest": {
+              "type": 0
+            },
+            "watchers": {}
+          },
+          "data_digest": "0x1f26fb26",
+          "omap_digest": "0x2eecc539",
+          "size": 3,
+          "errors": [],
+          "osd": 1,
+          "primary": true
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "ROBJ9",
+          "key": "",
+          "snapid": -2,
+          "hash": 537189375,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "119'68",
+        "prior_version": "51'64",
+        "last_reqid": "client.4834.0:1",
+        "user_version": 81,
+        "size": 3,
+        "mtime": "2018-04-05 14:35:01.500659",
+        "local_mtime": "2018-04-05 14:35:01.502117",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "omap",
+          "data_digest",
+          "omap_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x1f26fb26",
+        "omap_digest": "0x2eecc539",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [
+        "obj_size_info_mismatch"
+      ],
+      "errors": [
+        "object_info_inconsistency"
+      ],
+      "object": {
+        "version": 68,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "ROBJ9"
+      }
+    }
+  ],
+  "epoch": 0
+}
+EOF
+
+    jq "$jqfilter" $dir/json | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/csjson
+    multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
+    if test $getjson = "yes"
+    then
+        jq '.' $dir/json > save2.json
+    fi
+
+    if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null;
+    then
+      jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1
+    fi
+
+    repair $pg
+    wait_for_clean
+
+    # This hangs if the repair doesn't work
+    timeout 30 rados -p $poolname get ROBJ17 $dir/robj17.out || return 1
+    timeout 30 rados -p $poolname get ROBJ18 $dir/robj18.out || return 1
+    # Even though we couldn't repair all of the introduced errors, we can fix ROBJ17
+    diff -q $dir/new.ROBJ17 $dir/robj17.out || return 1
+    rm -f $dir/new.ROBJ17 $dir/robj17.out || return 1
+    diff -q $dir/new.ROBJ18 $dir/robj18.out || return 1
+    rm -f $dir/new.ROBJ18 $dir/robj18.out || return 1
+
+    if [ $ERRORS != "0" ];
+    then
+        echo "TEST FAILED WITH $ERRORS ERRORS"
+        return 1
+    fi
+
+    ceph osd pool rm $poolname $poolname --yes-i-really-really-mean-it
+}
+
+
+#
+# Test scrub errors for an erasure coded pool
+#
+function corrupt_scrub_erasure() {
+    local dir=$1
+    local allow_overwrites=$2
+    local poolname=ecpool
+    local total_objs=7
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    for id in $(seq 0 2) ; do
+        run_osd $dir $id || return 1
+    done
+    create_rbd_pool || return 1
+    create_pool foo 1
+
+    create_ec_pool $poolname $allow_overwrites k=2 m=1 stripe_unit=2K --force || return 1
+    wait_for_clean || return 1
+
+    for i in $(seq 1 $total_objs) ; do
+        objname=EOBJ${i}
+        add_something $dir $poolname $objname || return 1
+
+        local osd=$(expr $i % 2)
+
+        case $i in
+        1)
+            # Size (deep scrub data_digest too)
+            local payload=UVWXYZZZ
+            echo $payload > $dir/CORRUPT
+            objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1
+            ;;
+
+        2)
+            # Corrupt EC shard
+            dd if=/dev/urandom of=$dir/CORRUPT bs=2048 count=1
+            objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1
+            ;;
+
+        3)
+             # missing
+             objectstore_tool $dir $osd $objname remove || return 1
+             ;;
+
+        4)
+            rados --pool $poolname setxattr $objname key1-$objname val1-$objname || return 1
+            rados --pool $poolname setxattr $objname key2-$objname val2-$objname || return 1
+
+            # Break xattrs
+            echo -n bad-val > $dir/bad-val
+            objectstore_tool $dir $osd $objname set-attr _key1-$objname $dir/bad-val || return 1
+            objectstore_tool $dir $osd $objname rm-attr _key2-$objname || return 1
+            echo -n val3-$objname > $dir/newval
+            objectstore_tool $dir $osd $objname set-attr _key3-$objname $dir/newval || return 1
+            rm $dir/bad-val $dir/newval
+            ;;
+
+        5)
+            # Corrupt EC shard
+            dd if=/dev/urandom of=$dir/CORRUPT bs=2048 count=2
+            objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1
+            ;;
+
+        6)
+            objectstore_tool $dir 0 $objname rm-attr hinfo_key || return 1
+            echo -n bad-val > $dir/bad-val
+            objectstore_tool $dir 1 $objname set-attr hinfo_key $dir/bad-val || return 1
+            ;;
+
+        7)
+            local payload=MAKETHISDIFFERENTFROMOTHEROBJECTS
+            echo $payload > $dir/DIFFERENT
+            rados --pool $poolname put $objname $dir/DIFFERENT || return 1
+
+            # Get hinfo_key from EOBJ1
+            objectstore_tool $dir 0 EOBJ1 get-attr hinfo_key > $dir/hinfo
+            objectstore_tool $dir 0 $objname set-attr hinfo_key $dir/hinfo || return 1
+            rm -f $dir/hinfo
+            ;;
+
+        esac
+    done
+
+    local pg=$(get_pg $poolname EOBJ0)
+
+    pg_scrub $pg
+
+    rados list-inconsistent-pg $poolname > $dir/json || return 1
+    # Check pg count
+    test $(jq '. | length' $dir/json) = "1" || return 1
+    # Check pgid
+    test $(jq -r '.[0]' $dir/json) = $pg || return 1
+
+    rados list-inconsistent-obj $pg > $dir/json || return 1
+    # Get epoch for repair-get requests
+    epoch=$(jq .epoch $dir/json)
+
+    jq "$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson
+{
+  "inconsistents": [
+    {
+      "shards": [
+        {
+          "size": 2048,
+          "errors": [],
+          "shard": 2,
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "object_info": {
+            "oid": {
+              "oid": "EOBJ1",
+              "key": "",
+              "snapid": -2,
+              "hash": 560836233,
+              "max": 0,
+              "pool": 3,
+              "namespace": ""
+            },
+            "version": "27'1",
+            "prior_version": "0'0",
+            "last_reqid": "client.4184.0:1",
+            "user_version": 1,
+            "size": 7,
+            "mtime": "",
+            "local_mtime": "",
+            "lost": 0,
+            "flags": [
+              "dirty",
+              "data_digest"
+            ],
+            "truncate_seq": 0,
+            "truncate_size": 0,
+            "data_digest": "0x2ddbf8f5",
+            "omap_digest": "0xffffffff",
+            "expected_object_size": 0,
+            "expected_write_size": 0,
+            "alloc_hint_flags": 0,
+            "manifest": {
+              "type": 0
+            },
+            "watchers": {}
+          },
+          "size": 9,
+          "shard": 0,
+          "errors": [
+            "size_mismatch_info",
+            "obj_size_info_mismatch"
+          ],
+          "osd": 1,
+          "primary": true
+        },
+        {
+          "size": 2048,
+          "shard": 1,
+          "errors": [],
+          "osd": 2,
+          "primary": false
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "EOBJ1",
+          "key": "",
+          "snapid": -2,
+          "hash": 560836233,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "27'1",
+        "prior_version": "0'0",
+        "last_reqid": "client.4184.0:1",
+        "user_version": 1,
+        "size": 7,
+        "mtime": "",
+        "local_mtime": "",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "data_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0xffffffff",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [
+        "size_mismatch_info",
+        "obj_size_info_mismatch"
+      ],
+      "errors": [
+        "size_mismatch"
+      ],
+      "object": {
+        "version": 1,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "EOBJ1"
+      }
+    },
+    {
+      "shards": [
+        {
+          "size": 2048,
+          "errors": [],
+          "shard": 2,
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "shard": 0,
+          "errors": [
+            "missing"
+          ],
+          "osd": 1,
+          "primary": true
+        },
+        {
+          "size": 2048,
+          "shard": 1,
+          "errors": [],
+          "osd": 2,
+          "primary": false
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "EOBJ3",
+          "key": "",
+          "snapid": -2,
+          "hash": 3125668237,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "39'3",
+        "prior_version": "0'0",
+        "last_reqid": "client.4252.0:1",
+        "user_version": 3,
+        "size": 7,
+        "mtime": "",
+        "local_mtime": "",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "data_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0xffffffff",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [
+        "missing"
+      ],
+      "errors": [],
+      "object": {
+        "version": 3,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "EOBJ3"
+      }
+    },
+    {
+      "shards": [
+        {
+          "attrs": [
+            {
+              "Base64": false,
+              "value": "bad-val",
+              "name": "key1-EOBJ4"
+            },
+            {
+              "Base64": false,
+              "value": "val2-EOBJ4",
+              "name": "key2-EOBJ4"
+            }
+          ],
+          "size": 2048,
+          "errors": [],
+          "shard": 2,
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "osd": 1,
+          "primary": true,
+          "shard": 0,
+          "errors": [],
+          "size": 2048,
+          "attrs": [
+            {
+              "Base64": false,
+              "value": "val1-EOBJ4",
+              "name": "key1-EOBJ4"
+            },
+            {
+              "Base64": false,
+              "value": "val2-EOBJ4",
+              "name": "key2-EOBJ4"
+            }
+          ]
+        },
+        {
+          "osd": 2,
+          "primary": false,
+          "shard": 1,
+          "errors": [],
+          "size": 2048,
+          "attrs": [
+            {
+              "Base64": false,
+              "value": "val1-EOBJ4",
+              "name": "key1-EOBJ4"
+            },
+            {
+              "Base64": false,
+              "value": "val3-EOBJ4",
+              "name": "key3-EOBJ4"
+            }
+          ]
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "EOBJ4",
+          "key": "",
+          "snapid": -2,
+          "hash": 1618759290,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "45'6",
+        "prior_version": "45'5",
+        "last_reqid": "client.4294.0:1",
+        "user_version": 6,
+        "size": 7,
+        "mtime": "",
+        "local_mtime": "",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "data_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0xffffffff",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [],
+      "errors": [
+        "attr_value_mismatch",
+        "attr_name_mismatch"
+      ],
+      "object": {
+        "version": 6,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "EOBJ4"
+      }
+    },
+    {
+      "shards": [
+        {
+          "size": 2048,
+          "errors": [],
+          "shard": 2,
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "object_info": {
+            "oid": {
+              "oid": "EOBJ5",
+              "key": "",
+              "snapid": -2,
+              "hash": 2918945441,
+              "max": 0,
+              "pool": 3,
+              "namespace": ""
+            },
+            "version": "59'7",
+            "prior_version": "0'0",
+            "last_reqid": "client.4382.0:1",
+            "user_version": 7,
+            "size": 7,
+            "mtime": "",
+            "local_mtime": "",
+            "lost": 0,
+            "flags": [
+              "dirty",
+              "data_digest"
+            ],
+            "truncate_seq": 0,
+            "truncate_size": 0,
+            "data_digest": "0x2ddbf8f5",
+            "omap_digest": "0xffffffff",
+            "expected_object_size": 0,
+            "expected_write_size": 0,
+            "alloc_hint_flags": 0,
+            "manifest": {
+              "type": 0
+            },
+            "watchers": {}
+          },
+          "size": 4096,
+          "shard": 0,
+          "errors": [
+            "size_mismatch_info",
+            "obj_size_info_mismatch"
+          ],
+          "osd": 1,
+          "primary": true
+        },
+        {
+          "size": 2048,
+          "shard": 1,
+          "errors": [],
+          "osd": 2,
+          "primary": false
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "EOBJ5",
+          "key": "",
+          "snapid": -2,
+          "hash": 2918945441,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "59'7",
+        "prior_version": "0'0",
+        "last_reqid": "client.4382.0:1",
+        "user_version": 7,
+        "size": 7,
+        "mtime": "",
+        "local_mtime": "",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "data_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0xffffffff",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [
+        "size_mismatch_info",
+        "obj_size_info_mismatch"
+      ],
+      "errors": [
+        "size_mismatch"
+      ],
+      "object": {
+        "version": 7,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "EOBJ5"
+      }
+   },
+   {
+     "errors": [],
+     "object": {
+       "locator": "",
+       "name": "EOBJ6",
+       "nspace": "",
+       "snap": "head",
+       "version": 8
+     },
+     "selected_object_info": {
+        "oid": {
+          "oid": "EOBJ6",
+          "key": "",
+          "snapid": -2,
+          "hash": 3050890866,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "65'8",
+        "prior_version": "0'0",
+        "last_reqid": "client.4418.0:1",
+        "user_version": 8,
+        "size": 7,
+        "mtime": "",
+        "local_mtime": "",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "data_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0xffffffff",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+     },
+     "shards": [
+       {
+         "errors": [
+           "hinfo_missing"
+         ],
+         "osd": 0,
+         "primary": false,
+         "shard": 2,
+         "size": 2048
+       },
+       {
+         "errors": [
+           "hinfo_corrupted"
+         ],
+         "osd": 1,
+         "primary": true,
+         "shard": 0,
+         "hashinfo": "bad-val",
+         "size": 2048
+       },
+       {
+         "errors": [],
+         "osd": 2,
+         "primary": false,
+         "shard": 1,
+         "size": 2048,
+         "hashinfo": {
+           "cumulative_shard_hashes": [
+            {
+              "hash": 80717615,
+              "shard": 0
+            },
+            {
+              "hash": 1534491824,
+              "shard": 1
+            },
+            {
+              "hash": 80717615,
+              "shard": 2
+            }
+           ],
+           "total_chunk_size": 2048
+         }
+       }
+     ],
+     "union_shard_errors": [
+       "hinfo_missing",
+       "hinfo_corrupted"
+     ]
+   },
+   {
+     "errors": [
+       "hinfo_inconsistency"
+     ],
+     "object": {
+       "locator": "",
+       "name": "EOBJ7",
+       "nspace": "",
+       "snap": "head",
+       "version": 10
+     },
+     "selected_object_info": {
+        "oid": {
+          "oid": "EOBJ7",
+          "key": "",
+          "snapid": -2,
+          "hash": 3258066308,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "75'10",
+        "prior_version": "75'9",
+        "last_reqid": "client.4482.0:1",
+        "user_version": 10,
+        "size": 34,
+        "mtime": "",
+        "local_mtime": "",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "data_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x136e4e27",
+        "omap_digest": "0xffffffff",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+     },
+     "shards": [
+       {
+         "hashinfo": {
+           "cumulative_shard_hashes": [
+            {
+              "hash": 80717615,
+              "shard": 0
+            },
+            {
+              "hash": 1534491824,
+              "shard": 1
+            },
+            {
+              "hash": 80717615,
+              "shard": 2
+            }
+           ],
+           "total_chunk_size": 2048
+         },
+         "errors": [],
+         "osd": 0,
+         "primary": false,
+         "shard": 2,
+         "size": 2048
+       },
+       {
+         "hashinfo": {
+           "cumulative_shard_hashes": [
+            {
+              "hash": 1534350760,
+              "shard": 0
+            },
+            {
+              "hash": 1534491824,
+              "shard": 1
+            },
+            {
+              "hash": 1534350760,
+              "shard": 2
+            }
+           ],
+           "total_chunk_size": 2048
+         },
+         "errors": [],
+         "osd": 1,
+         "primary": true,
+         "shard": 0,
+         "size": 2048
+       },
+       {
+         "hashinfo": {
+           "cumulative_shard_hashes": [
+            {
+              "hash": 1534350760,
+              "shard": 0
+            },
+            {
+              "hash": 1534491824,
+              "shard": 1
+            },
+            {
+              "hash": 1534350760,
+              "shard": 2
+            }
+           ],
+           "total_chunk_size": 2048
+         },
+         "errors": [],
+         "osd": 2,
+         "primary": false,
+         "shard": 1,
+         "size": 2048
+       }
+     ],
+     "union_shard_errors": []
+    }
+  ],
+  "epoch": 0
+}
+EOF
+
+    jq "$jqfilter" $dir/json | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/csjson
+    multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
+    if test $getjson = "yes"
+    then
+        jq '.' $dir/json > save3.json
+    fi
+
+    if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null;
+    then
+      jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1
+    fi
+
+    pg_deep_scrub $pg
+
+    rados list-inconsistent-pg $poolname > $dir/json || return 1
+    # Check pg count
+    test $(jq '. | length' $dir/json) = "1" || return 1
+    # Check pgid
+    test $(jq -r '.[0]' $dir/json) = $pg || return 1
+
+    rados list-inconsistent-obj $pg > $dir/json || return 1
+    # Get epoch for repair-get requests
+    epoch=$(jq .epoch $dir/json)
+
+    if [ "$allow_overwrites" = "true" ]
+    then
+      jq "$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson
+{
+  "inconsistents": [
+    {
+      "shards": [
+        {
+          "data_digest": "0x00000000",
+          "omap_digest": "0xffffffff",
+          "size": 2048,
+          "errors": [],
+          "shard": 2,
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "object_info": {
+            "oid": {
+              "oid": "EOBJ1",
+              "key": "",
+              "snapid": -2,
+              "hash": 560836233,
+              "max": 0,
+              "pool": 3,
+              "namespace": ""
+            },
+            "version": "27'1",
+            "prior_version": "0'0",
+            "last_reqid": "client.4184.0:1",
+            "user_version": 1,
+            "size": 7,
+            "mtime": "2018-04-05 14:31:33.837147",
+            "local_mtime": "2018-04-05 14:31:33.840763",
+            "lost": 0,
+            "flags": [
+              "dirty",
+              "data_digest"
+            ],
+            "truncate_seq": 0,
+            "truncate_size": 0,
+            "data_digest": "0x2ddbf8f5",
+            "omap_digest": "0xffffffff",
+            "expected_object_size": 0,
+            "expected_write_size": 0,
+            "alloc_hint_flags": 0,
+            "manifest": {
+              "type": 0
+            },
+            "watchers": {}
+          },
+          "size": 9,
+          "shard": 0,
+          "errors": [
+            "read_error",
+            "size_mismatch_info",
+            "obj_size_info_mismatch"
+          ],
+          "osd": 1,
+          "primary": true
+        },
+        {
+          "data_digest": "0x00000000",
+          "omap_digest": "0xffffffff",
+          "size": 2048,
+          "shard": 1,
+          "errors": [],
+          "osd": 2,
+          "primary": false
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "EOBJ1",
+          "key": "",
+          "snapid": -2,
+          "hash": 560836233,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "27'1",
+        "prior_version": "0'0",
+        "last_reqid": "client.4184.0:1",
+        "user_version": 1,
+        "size": 7,
+        "mtime": "2018-04-05 14:31:33.837147",
+        "local_mtime": "2018-04-05 14:31:33.840763",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "data_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0xffffffff",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [
+        "read_error",
+        "size_mismatch_info",
+        "obj_size_info_mismatch"
+      ],
+      "errors": [
+        "size_mismatch"
+      ],
+      "object": {
+        "version": 1,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "EOBJ1"
+      }
+    },
+    {
+      "shards": [
+        {
+          "data_digest": "0x00000000",
+          "omap_digest": "0xffffffff",
+          "size": 2048,
+          "errors": [],
+          "shard": 2,
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "shard": 0,
+          "errors": [
+            "missing"
+          ],
+          "osd": 1,
+          "primary": true
+        },
+        {
+          "data_digest": "0x00000000",
+          "omap_digest": "0xffffffff",
+          "size": 2048,
+          "shard": 1,
+          "errors": [],
+          "osd": 2,
+          "primary": false
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "EOBJ3",
+          "key": "",
+          "snapid": -2,
+          "hash": 3125668237,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "39'3",
+        "prior_version": "0'0",
+        "last_reqid": "client.4252.0:1",
+        "user_version": 3,
+        "size": 7,
+        "mtime": "2018-04-05 14:31:46.841145",
+        "local_mtime": "2018-04-05 14:31:46.844996",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "data_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0xffffffff",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [
+        "missing"
+      ],
+      "errors": [],
+      "object": {
+        "version": 3,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "EOBJ3"
+      }
+    },
+    {
+      "shards": [
+        {
+          "attrs": [
+            {
+              "Base64": false,
+              "value": "bad-val",
+              "name": "key1-EOBJ4"
+            },
+            {
+              "Base64": false,
+              "value": "val2-EOBJ4",
+              "name": "key2-EOBJ4"
+            }
+          ],
+          "data_digest": "0x00000000",
+          "omap_digest": "0xffffffff",
+          "size": 2048,
+          "errors": [],
+          "shard": 2,
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "attrs": [
+            {
+              "Base64": false,
+              "value": "val1-EOBJ4",
+              "name": "key1-EOBJ4"
+            },
+            {
+              "Base64": false,
+              "value": "val2-EOBJ4",
+              "name": "key2-EOBJ4"
+            }
+          ],
+          "data_digest": "0x00000000",
+          "omap_digest": "0xffffffff",
+          "size": 2048,
+          "errors": [],
+          "shard": 0,
+          "osd": 1,
+          "primary": true
+        },
+        {
+          "attrs": [
+            {
+              "Base64": false,
+              "value": "val1-EOBJ4",
+              "name": "key1-EOBJ4"
+            },
+            {
+              "Base64": false,
+              "value": "val3-EOBJ4",
+              "name": "key3-EOBJ4"
+            }
+          ],
+          "data_digest": "0x00000000",
+          "omap_digest": "0xffffffff",
+          "size": 2048,
+          "errors": [],
+          "shard": 1,
+          "osd": 2,
+          "primary": false
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "EOBJ4",
+          "key": "",
+          "snapid": -2,
+          "hash": 1618759290,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "45'6",
+        "prior_version": "45'5",
+        "last_reqid": "client.4294.0:1",
+        "user_version": 6,
+        "size": 7,
+        "mtime": "2018-04-05 14:31:54.663622",
+        "local_mtime": "2018-04-05 14:31:54.664527",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "data_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0xffffffff",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [],
+      "errors": [
+        "attr_value_mismatch",
+        "attr_name_mismatch"
+      ],
+      "object": {
+        "version": 6,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "EOBJ4"
+      }
+    },
+    {
+      "shards": [
+        {
+          "data_digest": "0x00000000",
+          "omap_digest": "0xffffffff",
+          "size": 2048,
+          "errors": [],
+          "shard": 2,
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "object_info": {
+            "oid": {
+              "oid": "EOBJ5",
+              "key": "",
+              "snapid": -2,
+              "hash": 2918945441,
+              "max": 0,
+              "pool": 3,
+              "namespace": ""
+            },
+            "version": "59'7",
+            "prior_version": "0'0",
+            "last_reqid": "client.4382.0:1",
+            "user_version": 7,
+            "size": 7,
+            "mtime": "2018-04-05 14:32:12.929161",
+            "local_mtime": "2018-04-05 14:32:12.934707",
+            "lost": 0,
+            "flags": [
+              "dirty",
+              "data_digest"
+            ],
+            "truncate_seq": 0,
+            "truncate_size": 0,
+            "data_digest": "0x2ddbf8f5",
+            "omap_digest": "0xffffffff",
+            "expected_object_size": 0,
+            "expected_write_size": 0,
+            "alloc_hint_flags": 0,
+            "manifest": {
+              "type": 0
+            },
+            "watchers": {}
+          },
+          "size": 4096,
+          "errors": [
+            "read_error",
+            "size_mismatch_info",
+            "obj_size_info_mismatch"
+          ],
+          "shard": 0,
+          "osd": 1,
+          "primary": true
+        },
+        {
+          "data_digest": "0x00000000",
+          "omap_digest": "0xffffffff",
+          "size": 2048,
+          "errors": [],
+          "shard": 1,
+          "osd": 2,
+          "primary": false
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "EOBJ5",
+          "key": "",
+          "snapid": -2,
+          "hash": 2918945441,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "59'7",
+        "prior_version": "0'0",
+        "last_reqid": "client.4382.0:1",
+        "user_version": 7,
+        "size": 7,
+        "mtime": "2018-04-05 14:32:12.929161",
+        "local_mtime": "2018-04-05 14:32:12.934707",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "data_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0xffffffff",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [
+        "read_error",
+        "size_mismatch_info",
+        "obj_size_info_mismatch"
+      ],
+      "errors": [
+        "size_mismatch"
+      ],
+      "object": {
+        "version": 7,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "EOBJ5"
+      }
+    },
+    {
+      "object": {
+        "name": "EOBJ6",
+        "nspace": "",
+        "locator": "",
+        "snap": "head",
+        "version": 8
+      },
+      "errors": [],
+      "union_shard_errors": [
+        "read_error",
+        "hinfo_missing",
+        "hinfo_corrupted"
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "EOBJ6",
+          "key": "",
+          "snapid": -2,
+          "hash": 3050890866,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "65'8",
+        "prior_version": "0'0",
+        "last_reqid": "client.4418.0:1",
+        "user_version": 8,
+        "size": 7,
+        "mtime": "2018-04-05 14:32:20.634116",
+        "local_mtime": "2018-04-05 14:32:20.637999",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "data_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0xffffffff",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "shards": [
+        {
+          "osd": 0,
+          "primary": false,
+          "shard": 2,
+          "errors": [
+            "read_error",
+            "hinfo_missing"
+          ],
+          "size": 2048
+        },
+        {
+          "osd": 1,
+          "primary": true,
+          "shard": 0,
+          "errors": [
+            "read_error",
+            "hinfo_corrupted"
+          ],
+          "size": 2048,
+          "hashinfo": "bad-val"
+        },
+        {
+          "osd": 2,
+          "primary": false,
+          "shard": 1,
+          "errors": [],
+          "size": 2048,
+          "omap_digest": "0xffffffff",
+          "data_digest": "0x00000000",
+          "hashinfo": {
+            "cumulative_shard_hashes": [
+            {
+              "hash": 80717615,
+              "shard": 0
+            },
+            {
+              "hash": 1534491824,
+              "shard": 1
+            },
+            {
+              "hash": 80717615,
+              "shard": 2
+            }
+           ],
+           "total_chunk_size": 2048
+         }
+        }
+      ]
+    },
+    {
+      "object": {
+        "name": "EOBJ7",
+        "nspace": "",
+        "locator": "",
+        "snap": "head",
+        "version": 10
+      },
+      "errors": [
+        "hinfo_inconsistency"
+      ],
+      "union_shard_errors": [],
+      "selected_object_info": {
+        "oid": {
+          "oid": "EOBJ7",
+          "key": "",
+          "snapid": -2,
+          "hash": 3258066308,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "75'10",
+        "prior_version": "75'9",
+        "last_reqid": "client.4482.0:1",
+        "user_version": 10,
+        "size": 34,
+        "mtime": "2018-04-05 14:32:33.058782",
+        "local_mtime": "2018-04-05 14:32:33.059679",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "data_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x136e4e27",
+        "omap_digest": "0xffffffff",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "shards": [
+        {
+          "osd": 0,
+          "primary": false,
+          "shard": 2,
+          "errors": [],
+          "size": 2048,
+          "omap_digest": "0xffffffff",
+          "data_digest": "0x00000000",
+          "hashinfo": {
+           "cumulative_shard_hashes": [
+            {
+              "hash": 80717615,
+              "shard": 0
+            },
+            {
+              "hash": 1534491824,
+              "shard": 1
+            },
+            {
+              "hash": 80717615,
+              "shard": 2
+            }
+           ],
+           "total_chunk_size": 2048
+          }
+        },
+        {
+          "osd": 1,
+          "primary": true,
+          "shard": 0,
+          "errors": [],
+          "size": 2048,
+          "omap_digest": "0xffffffff",
+          "data_digest": "0x00000000",
+          "hashinfo": {
+           "cumulative_shard_hashes": [
+            {
+              "hash": 1534350760,
+              "shard": 0
+            },
+            {
+              "hash": 1534491824,
+              "shard": 1
+            },
+            {
+              "hash": 1534350760,
+              "shard": 2
+            }
+           ],
+           "total_chunk_size": 2048
+          }
+        },
+        {
+          "osd": 2,
+          "primary": false,
+          "shard": 1,
+          "errors": [],
+          "size": 2048,
+          "omap_digest": "0xffffffff",
+          "data_digest": "0x00000000",
+          "hashinfo": {
+           "cumulative_shard_hashes": [
+            {
+              "hash": 1534350760,
+              "shard": 0
+            },
+            {
+              "hash": 1534491824,
+              "shard": 1
+            },
+            {
+              "hash": 1534350760,
+              "shard": 2
+            }
+           ],
+           "total_chunk_size": 2048
+          }
+        }
+      ]
+    }
+  ],
+  "epoch": 0
+}
+EOF
+
+    else
+
+      jq "$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson
+{
+  "inconsistents": [
+    {
+      "shards": [
+        {
+          "data_digest": "0x04cfa72f",
+          "omap_digest": "0xffffffff",
+          "size": 2048,
+          "errors": [],
+          "shard": 2,
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "object_info": {
+            "oid": {
+              "oid": "EOBJ1",
+              "key": "",
+              "snapid": -2,
+              "hash": 560836233,
+              "max": 0,
+              "pool": 3,
+              "namespace": ""
+            },
+            "version": "27'1",
+            "prior_version": "0'0",
+            "last_reqid": "client.4192.0:1",
+            "user_version": 1,
+            "size": 7,
+            "mtime": "2018-04-05 14:30:10.688009",
+            "local_mtime": "2018-04-05 14:30:10.691774",
+            "lost": 0,
+            "flags": [
+              "dirty",
+              "data_digest"
+            ],
+            "truncate_seq": 0,
+            "truncate_size": 0,
+            "data_digest": "0x2ddbf8f5",
+            "omap_digest": "0xffffffff",
+            "expected_object_size": 0,
+            "expected_write_size": 0,
+            "alloc_hint_flags": 0,
+            "manifest": {
+              "type": 0
+            },
+            "watchers": {}
+          },
+          "size": 9,
+          "shard": 0,
+          "errors": [
+            "read_error",
+            "size_mismatch_info",
+            "obj_size_info_mismatch"
+          ],
+          "osd": 1,
+          "primary": true
+        },
+        {
+          "data_digest": "0x04cfa72f",
+          "omap_digest": "0xffffffff",
+          "size": 2048,
+          "shard": 1,
+          "errors": [],
+          "osd": 2,
+          "primary": false
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "EOBJ1",
+          "key": "",
+          "snapid": -2,
+          "hash": 560836233,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "27'1",
+        "prior_version": "0'0",
+        "last_reqid": "client.4192.0:1",
+        "user_version": 1,
+        "size": 7,
+        "mtime": "2018-04-05 14:30:10.688009",
+        "local_mtime": "2018-04-05 14:30:10.691774",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "data_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0xffffffff",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [
+        "read_error",
+        "size_mismatch_info",
+        "obj_size_info_mismatch"
+      ],
+      "errors": [
+        "size_mismatch"
+      ],
+      "object": {
+        "version": 1,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "EOBJ1"
+      }
+    },
+    {
+      "shards": [
+        {
+          "size": 2048,
+          "errors": [
+            "ec_hash_error"
+          ],
+          "shard": 2,
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "data_digest": "0x04cfa72f",
+          "omap_digest": "0xffffffff",
+          "size": 2048,
+          "errors": [],
+          "shard": 0,
+          "osd": 1,
+          "primary": true
+        },
+        {
+          "data_digest": "0x04cfa72f",
+          "omap_digest": "0xffffffff",
+          "size": 2048,
+          "errors": [],
+          "shard": 1,
+          "osd": 2,
+          "primary": false
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "EOBJ2",
+          "key": "",
+          "snapid": -2,
+          "hash": 562812377,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "33'2",
+        "prior_version": "0'0",
+        "last_reqid": "client.4224.0:1",
+        "user_version": 2,
+        "size": 7,
+        "mtime": "2018-04-05 14:30:14.152945",
+        "local_mtime": "2018-04-05 14:30:14.154014",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "data_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0xffffffff",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [
+        "ec_hash_error"
+      ],
+      "errors": [],
+      "object": {
+        "version": 2,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "EOBJ2"
+      }
+    },
+    {
+      "shards": [
+        {
+          "data_digest": "0x04cfa72f",
+          "omap_digest": "0xffffffff",
+          "size": 2048,
+          "errors": [],
+          "shard": 2,
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "osd": 1,
+          "primary": true,
+          "shard": 0,
+          "errors": [
+            "missing"
+          ]
+        },
+        {
+          "data_digest": "0x04cfa72f",
+          "omap_digest": "0xffffffff",
+          "size": 2048,
+          "shard": 1,
+          "errors": [],
+          "osd": 2,
+          "primary": false
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "EOBJ3",
+          "key": "",
+          "snapid": -2,
+          "hash": 3125668237,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "39'3",
+        "prior_version": "0'0",
+        "last_reqid": "client.4258.0:1",
+        "user_version": 3,
+        "size": 7,
+        "mtime": "2018-04-05 14:30:18.875544",
+        "local_mtime": "2018-04-05 14:30:18.880153",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "data_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0xffffffff",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [
+        "missing"
+      ],
+      "errors": [],
+      "object": {
+        "version": 3,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "EOBJ3"
+      }
+    },
+    {
+      "shards": [
+        {
+          "attrs": [
+            {
+              "Base64": false,
+              "value": "bad-val",
+              "name": "key1-EOBJ4"
+            },
+            {
+              "Base64": false,
+              "value": "val2-EOBJ4",
+              "name": "key2-EOBJ4"
+            }
+          ],
+          "data_digest": "0x04cfa72f",
+          "omap_digest": "0xffffffff",
+          "size": 2048,
+          "errors": [],
+          "shard": 2,
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "osd": 1,
+          "primary": true,
+          "shard": 0,
+          "errors": [],
+          "size": 2048,
+          "omap_digest": "0xffffffff",
+          "data_digest": "0x04cfa72f",
+          "attrs": [
+            {
+              "Base64": false,
+              "value": "val1-EOBJ4",
+              "name": "key1-EOBJ4"
+            },
+            {
+              "Base64": false,
+              "value": "val2-EOBJ4",
+              "name": "key2-EOBJ4"
+            }
+          ]
+        },
+        {
+          "osd": 2,
+          "primary": false,
+          "shard": 1,
+          "errors": [],
+          "size": 2048,
+          "omap_digest": "0xffffffff",
+          "data_digest": "0x04cfa72f",
+          "attrs": [
+            {
+              "Base64": false,
+              "value": "val1-EOBJ4",
+              "name": "key1-EOBJ4"
+            },
+            {
+              "Base64": false,
+              "value": "val3-EOBJ4",
+              "name": "key3-EOBJ4"
+            }
+          ]
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "EOBJ4",
+          "key": "",
+          "snapid": -2,
+          "hash": 1618759290,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "45'6",
+        "prior_version": "45'5",
+        "last_reqid": "client.4296.0:1",
+        "user_version": 6,
+        "size": 7,
+        "mtime": "2018-04-05 14:30:22.271983",
+        "local_mtime": "2018-04-05 14:30:22.272840",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "data_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0xffffffff",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [],
+      "errors": [
+        "attr_value_mismatch",
+        "attr_name_mismatch"
+      ],
+      "object": {
+        "version": 6,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "EOBJ4"
+      }
+    },
+    {
+      "shards": [
+        {
+          "data_digest": "0x04cfa72f",
+          "omap_digest": "0xffffffff",
+          "size": 2048,
+          "errors": [],
+          "shard": 2,
+          "osd": 0,
+          "primary": false
+        },
+        {
+          "object_info": {
+            "oid": {
+              "oid": "EOBJ5",
+              "key": "",
+              "snapid": -2,
+              "hash": 2918945441,
+              "max": 0,
+              "pool": 3,
+              "namespace": ""
+            },
+            "version": "59'7",
+            "prior_version": "0'0",
+            "last_reqid": "client.4384.0:1",
+            "user_version": 7,
+            "size": 7,
+            "mtime": "2018-04-05 14:30:35.162395",
+            "local_mtime": "2018-04-05 14:30:35.166390",
+            "lost": 0,
+            "flags": [
+              "dirty",
+              "data_digest"
+            ],
+            "truncate_seq": 0,
+            "truncate_size": 0,
+            "data_digest": "0x2ddbf8f5",
+            "omap_digest": "0xffffffff",
+            "expected_object_size": 0,
+            "expected_write_size": 0,
+            "alloc_hint_flags": 0,
+            "manifest": {
+              "type": 0
+            },
+            "watchers": {}
+          },
+          "size": 4096,
+          "shard": 0,
+          "errors": [
+            "read_error",
+            "size_mismatch_info",
+            "obj_size_info_mismatch"
+          ],
+          "osd": 1,
+          "primary": true
+        },
+        {
+          "data_digest": "0x04cfa72f",
+          "omap_digest": "0xffffffff",
+          "size": 2048,
+          "shard": 1,
+          "errors": [],
+          "osd": 2,
+          "primary": false
+        }
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "EOBJ5",
+          "key": "",
+          "snapid": -2,
+          "hash": 2918945441,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "59'7",
+        "prior_version": "0'0",
+        "last_reqid": "client.4384.0:1",
+        "user_version": 7,
+        "size": 7,
+        "mtime": "2018-04-05 14:30:35.162395",
+        "local_mtime": "2018-04-05 14:30:35.166390",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "data_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0xffffffff",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "union_shard_errors": [
+        "read_error",
+        "size_mismatch_info",
+        "obj_size_info_mismatch"
+      ],
+      "errors": [
+        "size_mismatch"
+      ],
+      "object": {
+        "version": 7,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "EOBJ5"
+      }
+    },
+    {
+      "object": {
+        "name": "EOBJ6",
+        "nspace": "",
+        "locator": "",
+        "snap": "head",
+        "version": 8
+      },
+      "errors": [],
+      "union_shard_errors": [
+        "read_error",
+        "hinfo_missing",
+        "hinfo_corrupted"
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "EOBJ6",
+          "key": "",
+          "snapid": -2,
+          "hash": 3050890866,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "65'8",
+        "prior_version": "0'0",
+        "last_reqid": "client.4420.0:1",
+        "user_version": 8,
+        "size": 7,
+        "mtime": "2018-04-05 14:30:40.914673",
+        "local_mtime": "2018-04-05 14:30:40.917705",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "data_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0xffffffff",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "shards": [
+        {
+          "osd": 0,
+          "primary": false,
+          "shard": 2,
+          "errors": [
+            "read_error",
+            "hinfo_missing"
+          ],
+          "size": 2048
+        },
+        {
+          "osd": 1,
+          "primary": true,
+          "shard": 0,
+          "errors": [
+            "read_error",
+            "hinfo_corrupted"
+          ],
+          "size": 2048,
+          "hashinfo": "bad-val"
+        },
+        {
+          "osd": 2,
+          "primary": false,
+          "shard": 1,
+          "errors": [],
+          "size": 2048,
+          "omap_digest": "0xffffffff",
+          "data_digest": "0x04cfa72f",
+          "hashinfo": {
+           "cumulative_shard_hashes": [
+            {
+              "hash": 80717615,
+              "shard": 0
+            },
+            {
+              "hash": 1534491824,
+              "shard": 1
+            },
+            {
+              "hash": 80717615,
+              "shard": 2
+            }
+           ],
+           "total_chunk_size": 2048
+          }
+        }
+      ]
+    },
+    {
+      "object": {
+        "name": "EOBJ7",
+        "nspace": "",
+        "locator": "",
+        "snap": "head",
+        "version": 10
+      },
+      "errors": [
+        "hinfo_inconsistency"
+      ],
+      "union_shard_errors": [
+        "ec_hash_error"
+      ],
+      "selected_object_info": {
+        "oid": {
+          "oid": "EOBJ7",
+          "key": "",
+          "snapid": -2,
+          "hash": 3258066308,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "75'10",
+        "prior_version": "75'9",
+        "last_reqid": "client.4486.0:1",
+        "user_version": 10,
+        "size": 34,
+        "mtime": "2018-04-05 14:30:50.995009",
+        "local_mtime": "2018-04-05 14:30:50.996112",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "data_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x136e4e27",
+        "omap_digest": "0xffffffff",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "shards": [
+        {
+          "osd": 0,
+          "primary": false,
+          "shard": 2,
+          "errors": [
+            "ec_hash_error"
+          ],
+          "size": 2048,
+          "hashinfo": {
+           "cumulative_shard_hashes": [
+            {
+              "hash": 80717615,
+              "shard": 0
+            },
+            {
+              "hash": 1534491824,
+              "shard": 1
+            },
+            {
+              "hash": 80717615,
+              "shard": 2
+            }
+           ],
+           "total_chunk_size": 2048
+         }
+        },
+        {
+          "osd": 1,
+          "primary": true,
+          "shard": 0,
+          "errors": [],
+          "size": 2048,
+          "omap_digest": "0xffffffff",
+          "data_digest": "0x5b7455a8",
+          "hashinfo": {
+           "cumulative_shard_hashes": [
+            {
+              "hash": 1534350760,
+              "shard": 0
+            },
+            {
+              "hash": 1534491824,
+              "shard": 1
+            },
+            {
+              "hash": 1534350760,
+              "shard": 2
+            }
+           ],
+           "total_chunk_size": 2048
+          }
+        },
+        {
+          "osd": 2,
+          "primary": false,
+          "shard": 1,
+          "errors": [],
+          "size": 2048,
+          "omap_digest": "0xffffffff",
+          "data_digest": "0x5b7455a8",
+          "hashinfo": {
+           "cumulative_shard_hashes": [
+            {
+              "hash": 1534350760,
+              "shard": 0
+            },
+            {
+              "hash": 1534491824,
+              "shard": 1
+            },
+            {
+              "hash": 1534350760,
+              "shard": 2
+            }
+           ],
+           "total_chunk_size": 2048
+          }
+        }
+      ]
+    }
+  ],
+  "epoch": 0
+}
+EOF
+
+    fi
+
+    jq "$jqfilter" $dir/json | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/csjson
+    multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
+    if test $getjson = "yes"
+    then
+      if [ "$allow_overwrites" = "true" ]
+      then
+        num=4
+      else
+        num=5
+      fi
+      jq '.' $dir/json > save${num}.json
+    fi
+
+    if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null;
+    then
+      jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1
+    fi
+
+    ceph osd pool rm $poolname $poolname --yes-i-really-really-mean-it
+}
+
+function TEST_corrupt_scrub_erasure_appends() {
+    corrupt_scrub_erasure $1 false
+}
+
+function TEST_corrupt_scrub_erasure_overwrites() {
+    if [ "$use_ec_overwrite" = "true" ]; then
+        corrupt_scrub_erasure $1 true
+    fi
+}
+
+#
+# Test to make sure that a periodic scrub won't cause deep-scrub info to be lost
+#
+function TEST_periodic_scrub_replicated() {
+    local dir=$1
+    local poolname=psr_pool
+    local objname=POBJ
+
+    run_mon $dir a --osd_pool_default_size=2 || return 1
+    run_mgr $dir x || return 1
+    local ceph_osd_args="--osd-scrub-interval-randomize-ratio=0 --osd-deep-scrub-randomize-ratio=0 "
+    ceph_osd_args+="--osd_scrub_backoff_ratio=0"
+    run_osd $dir 0 $ceph_osd_args || return 1
+    run_osd $dir 1 $ceph_osd_args || return 1
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+
+    create_pool $poolname 1 1 || return 1
+    wait_for_clean || return 1
+
+    local osd=0
+    add_something $dir $poolname $objname scrub || return 1
+    local primary=$(get_primary $poolname $objname)
+    local pg=$(get_pg $poolname $objname)
+
+    # Add deep-scrub only error
+    local payload=UVWXYZ
+    echo $payload > $dir/CORRUPT
+    # Uses $ceph_osd_args for osd restart
+    objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1
+
+    # No scrub information available, so expect failure
+    set -o pipefail
+    !  rados list-inconsistent-obj $pg | jq '.' || return 1
+    set +o pipefail
+
+    pg_deep_scrub $pg || return 1
+
+    # Make sure bad object found
+    rados list-inconsistent-obj $pg | jq '.' | grep -q $objname || return 1
+
+    flush_pg_stats
+    local last_scrub=$(get_last_scrub_stamp $pg)
+    # Fake a schedule scrub
+    ceph tell $pg scrub || return 1
+    # Wait for schedule regular scrub
+    wait_for_scrub $pg "$last_scrub"
+
+    # It needed to be upgraded
+    grep -q "Deep scrub errors, upgrading scrub to deep-scrub" $dir/osd.${primary}.log || return 1
+
+    # Bad object still known
+    rados list-inconsistent-obj $pg | jq '.' | grep -q $objname || return 1
+
+    # Can't upgrade with this set
+    ceph osd set nodeep-scrub
+    # Let map change propagate to OSDs
+    ceph tell osd.0 get_latest_osdmap
+    flush_pg_stats
+    sleep 5
+
+    # Fake a schedule scrub
+    ceph tell $pg scrub || return 1
+    # Wait for schedule regular scrub
+    # to notice scrub and skip it
+    local found=false
+    for i in $(seq 14 -1 0)
+    do
+      sleep 1
+      ! grep -q "Regular scrub skipped due to deep-scrub errors and nodeep-scrub set" $dir/osd.${primary}.log || { found=true ; break; }
+      echo Time left: $i seconds
+    done
+    test $found = "true" || return 1
+
+    # Bad object still known
+    rados list-inconsistent-obj $pg | jq '.' | grep -q $objname || return 1
+
+    flush_pg_stats
+    # Request a regular scrub and it will be done
+    pg_scrub $pg
+    grep -q "Regular scrub request, deep-scrub details will be lost" $dir/osd.${primary}.log || return 1
+
+    # deep-scrub error is no longer present
+    rados list-inconsistent-obj $pg | jq '.' | grep -qv $objname || return 1
+}
+
+function TEST_scrub_warning() {
+    local dir=$1
+    local poolname=psr_pool
+    local objname=POBJ
+    local scrubs=5
+    local deep_scrubs=5
+    local i1_day=86400
+    local i7_days=$(calc $i1_day \* 7)
+    local i14_days=$(calc $i1_day \* 14)
+    local overdue=0.5
+    local conf_overdue_seconds=$(calc $i7_days + $i1_day + \( $i7_days \* $overdue \) )
+    local pool_overdue_seconds=$(calc $i14_days + $i1_day + \( $i14_days \* $overdue \) )
+
+    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+    run_mgr $dir x --mon_warn_pg_not_scrubbed_ratio=${overdue} --mon_warn_pg_not_deep_scrubbed_ratio=${overdue} || return 1
+    run_osd $dir 0 $ceph_osd_args --osd_scrub_backoff_ratio=0 || return 1
+
+    for i in $(seq 1 $(expr $scrubs + $deep_scrubs))
+    do
+      create_pool $poolname-$i 1 1 || return 1
+      wait_for_clean || return 1
+      if [ $i = "1" ];
+      then
+        ceph osd pool set $poolname-$i scrub_max_interval $i14_days
+      fi
+      if [ $i = $(expr $scrubs + 1) ];
+      then
+        ceph osd pool set $poolname-$i deep_scrub_interval $i14_days
+      fi
+    done
+
+    # Only 1 osd
+    local primary=0
+
+    ceph osd set noscrub || return 1
+    ceph osd set nodeep-scrub || return 1
+    ceph config set global osd_scrub_interval_randomize_ratio 0
+    ceph config set global osd_deep_scrub_randomize_ratio 0
+    ceph config set global osd_scrub_max_interval ${i7_days}
+    ceph config set global osd_deep_scrub_interval ${i7_days}
+
+    # Fake schedule scrubs
+    for i in $(seq 1 $scrubs)
+    do
+      if [ $i = "1" ];
+      then
+        overdue_seconds=$pool_overdue_seconds
+      else
+        overdue_seconds=$conf_overdue_seconds
+      fi
+      ceph tell ${i}.0 scrub $(expr ${overdue_seconds} + ${i}00) || return 1
+    done
+    # Fake schedule deep scrubs
+    for i in $(seq $(expr $scrubs + 1) $(expr $scrubs + $deep_scrubs))
+    do
+      if [ $i = "$(expr $scrubs + 1)" ];
+      then
+        overdue_seconds=$pool_overdue_seconds
+      else
+        overdue_seconds=$conf_overdue_seconds
+      fi
+      ceph tell ${i}.0 deep_scrub $(expr ${overdue_seconds} + ${i}00) || return 1
+    done
+    flush_pg_stats
+
+    ceph health
+    ceph health detail
+    ceph health | grep -q " pgs not deep-scrubbed in time" || return 1
+    ceph health | grep -q " pgs not scrubbed in time" || return 1
+
+    # note that the 'ceph tell pg deep_scrub' command now also sets the regular scrub
+    # time-stamp. I.e. - all 'late for deep scrubbing' pgs are also late for
+    # regular scrubbing. For now, we'll allow both responses.
+    COUNT=$(ceph health detail | grep "not scrubbed since" | wc -l)
+
+    if (( $COUNT != $scrubs && $COUNT != $(expr $scrubs+$deep_scrubs) )); then
+      ceph health detail | grep "not scrubbed since"
+      return 1
+    fi
+    COUNT=$(ceph health detail | grep "not deep-scrubbed since" | wc -l)
+    if [ "$COUNT" != $deep_scrubs ]; then
+      ceph health detail | grep "not deep-scrubbed since"
+      return 1
+    fi
+}
+
+#
+# Corrupt snapset in replicated pool
+#
+function TEST_corrupt_snapset_scrub_rep() {
+    local dir=$1
+    local poolname=csr_pool
+    local total_objs=2
+
+    run_mon $dir a --osd_pool_default_size=2 || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+
+    create_pool foo 1 || return 1
+    create_pool $poolname 1 1 || return 1
+    wait_for_clean || return 1
+
+    for i in $(seq 1 $total_objs) ; do
+        objname=ROBJ${i}
+        add_something $dir $poolname $objname || return 1
+
+        rados --pool $poolname setomapheader $objname hdr-$objname || return 1
+        rados --pool $poolname setomapval $objname key-$objname val-$objname || return 1
+    done
+
+    local pg=$(get_pg $poolname ROBJ0)
+    local primary=$(get_primary $poolname ROBJ0)
+
+    rados -p $poolname mksnap snap1
+    echo -n head_of_snapshot_data > $dir/change
+
+    for i in $(seq 1 $total_objs) ; do
+        objname=ROBJ${i}
+
+        # Alternate corruption between osd.0 and osd.1
+        local osd=$(expr $i % 2)
+
+        case $i in
+        1)
+          rados --pool $poolname put $objname $dir/change
+          objectstore_tool $dir $osd --head $objname clear-snapset corrupt || return 1
+          ;;
+
+        2)
+          rados --pool $poolname put $objname $dir/change
+          objectstore_tool $dir $osd --head $objname clear-snapset corrupt || return 1
+          ;;
+
+        esac
+    done
+    rm $dir/change
+
+    pg_scrub $pg
+
+    rados list-inconsistent-pg $poolname > $dir/json || return 1
+    # Check pg count
+    test $(jq '. | length' $dir/json) = "1" || return 1
+    # Check pgid
+    test $(jq -r '.[0]' $dir/json) = $pg || return 1
+
+    rados list-inconsistent-obj $pg > $dir/json || return 1
+
+    jq "$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson
+{
+  "epoch": 34,
+  "inconsistents": [
+    {
+      "object": {
+        "name": "ROBJ1",
+        "nspace": "",
+        "locator": "",
+        "snap": "head",
+        "version": 8
+      },
+      "errors": [
+        "snapset_inconsistency"
+      ],
+      "union_shard_errors": [],
+      "selected_object_info": {
+        "oid": {
+          "oid": "ROBJ1",
+          "key": "",
+          "snapid": -2,
+          "hash": 1454963827,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "24'8",
+        "prior_version": "21'3",
+        "last_reqid": "client.4195.0:1",
+        "user_version": 8,
+        "size": 21,
+        "mtime": "2018-04-05 14:35:43.286117",
+        "local_mtime": "2018-04-05 14:35:43.288990",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "omap",
+          "data_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x53acb008",
+        "omap_digest": "0xffffffff",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "shards": [
+        {
+          "osd": 0,
+          "primary": false,
+          "errors": [],
+          "size": 21,
+          "snapset": {
+            "clones": [
+              {
+                "overlap": "[]",
+                "size": 7,
+                "snap": 1,
+                "snaps": [
+                  1
+                ]
+              }
+            ],
+            "seq": 1
+          }
+        },
+        {
+          "osd": 1,
+          "primary": true,
+          "errors": [],
+          "size": 21,
+          "snapset": {
+            "clones": [],
+            "seq": 0
+          }
+        }
+      ]
+    },
+    {
+      "object": {
+        "name": "ROBJ2",
+        "nspace": "",
+        "locator": "",
+        "snap": "head",
+        "version": 10
+      },
+      "errors": [
+        "snapset_inconsistency"
+      ],
+      "union_shard_errors": [],
+      "selected_object_info": {
+        "oid": {
+          "oid": "ROBJ2",
+          "key": "",
+          "snapid": -2,
+          "hash": 2026323607,
+          "max": 0,
+          "pool": 3,
+          "namespace": ""
+        },
+        "version": "28'10",
+        "prior_version": "23'6",
+        "last_reqid": "client.4223.0:1",
+        "user_version": 10,
+        "size": 21,
+        "mtime": "2018-04-05 14:35:48.326856",
+        "local_mtime": "2018-04-05 14:35:48.328097",
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "omap",
+          "data_digest"
+        ],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x53acb008",
+        "omap_digest": "0xffffffff",
+        "expected_object_size": 0,
+        "expected_write_size": 0,
+        "alloc_hint_flags": 0,
+        "manifest": {
+          "type": 0
+        },
+        "watchers": {}
+      },
+      "shards": [
+        {
+          "osd": 0,
+          "primary": false,
+          "errors": [],
+          "size": 21,
+          "snapset": {
+            "clones": [],
+            "seq": 0
+          }
+        },
+        {
+          "osd": 1,
+          "primary": true,
+          "errors": [],
+          "size": 21,
+          "snapset": {
+            "clones": [
+              {
+                "overlap": "[]",
+                "size": 7,
+                "snap": 1,
+                "snaps": [
+                  1
+                ]
+              }
+            ],
+            "seq": 1
+          }
+        }
+      ]
+    }
+  ]
+}
+EOF
+
+    jq "$jqfilter" $dir/json | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/csjson
+    multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
+    if test $getjson = "yes"
+    then
+        jq '.' $dir/json > save6.json
+    fi
+
+    if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null;
+    then
+      jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1
+    fi
+
+    ERRORS=0
+    declare -a err_strings
+    err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid [0-9]*:.*:::ROBJ1:head : snapset inconsistent"
+    err_strings[1]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid [0-9]*:.*:::ROBJ2:head : snapset inconsistent"
+    err_strings[2]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 [0-9]*:.*:::ROBJ1:1 : is an unexpected clone"
+    err_strings[3]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub : stat mismatch, got 3/4 objects, 1/2 clones, 3/4 dirty, 3/4 omap, 0/0 pinned, 0/0 hit_set_archive, 0/0 whiteouts, 49/56 bytes, 0/0 manifest objects, 0/0 hit_set_archive bytes."
+    err_strings[4]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 0 missing, 2 inconsistent objects"
+    err_strings[5]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 4 errors"
+
+    for err_string in "${err_strings[@]}"
+    do
+        if ! grep -q "$err_string" $dir/osd.${primary}.log
+        then
+            echo "Missing log message '$err_string'"
+            ERRORS=$(expr $ERRORS + 1)
+        fi
+    done
+
+    if [ $ERRORS != "0" ];
+    then
+        echo "TEST FAILED WITH $ERRORS ERRORS"
+        return 1
+    fi
+
+    ceph osd pool rm $poolname $poolname --yes-i-really-really-mean-it
+}
+
+function TEST_request_scrub_priority() {
+    local dir=$1
+    local poolname=psr_pool
+    local objname=POBJ
+    local OBJECTS=64
+    local PGS=8
+
+    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+    run_mgr $dir x || return 1
+    local ceph_osd_args="--osd-scrub-interval-randomize-ratio=0 --osd-deep-scrub-randomize-ratio=0 "
+    ceph_osd_args+="--osd_scrub_backoff_ratio=0"
+    run_osd $dir 0 $ceph_osd_args || return 1
+
+    create_pool $poolname $PGS $PGS || return 1
+    wait_for_clean || return 1
+
+    local osd=0
+    add_something $dir $poolname $objname noscrub || return 1
+    local primary=$(get_primary $poolname $objname)
+    local pg=$(get_pg $poolname $objname)
+    poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }')
+
+    local otherpgs
+    for i in $(seq 0 $(expr $PGS - 1))
+    do
+        opg="${poolid}.${i}"
+        if [ "$opg" = "$pg" ]; then
+          continue
+        fi
+        otherpgs="${otherpgs}${opg} "
+        local other_last_scrub=$(get_last_scrub_stamp $pg)
+        # Fake a schedule scrub
+        ceph tell $opg scrub $opg || return 1
+    done
+
+    sleep 15
+    flush_pg_stats
+
+    # Request a regular scrub and it will be done
+    local last_scrub=$(get_last_scrub_stamp $pg)
+    ceph pg scrub $pg
+
+    ceph osd unset noscrub || return 1
+    ceph osd unset nodeep-scrub || return 1
+
+    wait_for_scrub $pg "$last_scrub"
+
+    for opg in $otherpgs $pg
+    do
+        wait_for_scrub $opg "$other_last_scrub"
+    done
+
+    # Verify that the requested scrub ran first
+    grep "log_channel.*scrub ok" $dir/osd.${primary}.log | grep -v purged_snaps | head -1 | sed 's/.*[[]DBG[]]//' | grep -q $pg || return 1
+}
+
+
+main osd-scrub-repair "$@"
+
+# Local Variables:
+# compile-command: "cd build ; make -j4 && \
+#    ../qa/run-standalone.sh osd-scrub-repair.sh"
+# End:
diff --git a/qa/standalone/scrub/osd-scrub-snaps.sh b/qa/standalone/scrub/osd-scrub-snaps.sh
new file mode 100755
index 000000000..c543b48a1
--- /dev/null
+++ b/qa/standalone/scrub/osd-scrub-snaps.sh
@@ -0,0 +1,1188 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2015 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+# Test development and debugging
+# Set to "yes" in order to ignore diff errors and save results to update test
+getjson="no"
+
+jqfilter='.inconsistents'
+sortkeys='import json; import sys ; JSON=sys.stdin.read() ; ud = json.loads(JSON) ; print ( json.dumps(ud, sort_keys=True, indent=2) )'
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7121" # git grep '\<7121\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    export -n CEPH_CLI_TEST_DUP_COMMAND
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function create_scenario() {
+    local dir=$1
+    local poolname=$2
+    local TESTDATA=$3
+    local osd=$4
+
+    SNAP=1
+    rados -p $poolname mksnap snap${SNAP}
+    dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP}
+    rados -p $poolname put obj1 $TESTDATA
+    rados -p $poolname put obj5 $TESTDATA
+    rados -p $poolname put obj3 $TESTDATA
+    for i in `seq 6 14`
+     do rados -p $poolname put obj${i} $TESTDATA
+    done
+
+    SNAP=2
+    rados -p $poolname mksnap snap${SNAP}
+    dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP}
+    rados -p $poolname put obj5 $TESTDATA
+
+    SNAP=3
+    rados -p $poolname mksnap snap${SNAP}
+    dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP}
+    rados -p $poolname put obj3 $TESTDATA
+
+    SNAP=4
+    rados -p $poolname mksnap snap${SNAP}
+    dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP}
+    rados -p $poolname put obj5 $TESTDATA
+    rados -p $poolname put obj2 $TESTDATA
+
+    SNAP=5
+    rados -p $poolname mksnap snap${SNAP}
+    SNAP=6
+    rados -p $poolname mksnap snap${SNAP}
+    dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP}
+    rados -p $poolname put obj5 $TESTDATA
+
+    SNAP=7
+    rados -p $poolname mksnap snap${SNAP}
+
+    rados -p $poolname rm obj4
+    rados -p $poolname rm obj16
+    rados -p $poolname rm obj2
+
+    kill_daemons $dir TERM osd || return 1
+
+    # Don't need to use ceph_objectstore_tool() function because osd stopped
+
+    JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj1)"
+    ceph-objectstore-tool --data-path $dir/${osd} "$JSON" --force remove || return 1
+
+    JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj5 | grep \"snapid\":2)"
+    ceph-objectstore-tool --data-path $dir/${osd} "$JSON" remove || return 1
+
+    JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj5 | grep \"snapid\":1)"
+    OBJ5SAVE="$JSON"
+    # Starts with a snapmap
+    ceph-kvstore-tool bluestore-kv $dir/${osd} list 2> /dev/null > $dir/drk.log
+    grep SNA_ $dir/drk.log
+    grep "^[pm].*SNA_.*[.]1[.]obj5[.][.]$" $dir/drk.log || return 1
+    ceph-objectstore-tool --data-path $dir/${osd} --rmtype nosnapmap "$JSON" remove || return 1
+    # Check that snapmap is stil there
+    ceph-kvstore-tool bluestore-kv $dir/${osd} list 2> /dev/null > $dir/drk.log
+    grep SNA_ $dir/drk.log
+    grep "^[pm].*SNA_.*[.]1[.]obj5[.][.]$" $dir/drk.log || return 1
+    rm -f $dir/drk.log
+
+    JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj5 | grep \"snapid\":4)"
+    dd if=/dev/urandom of=$TESTDATA bs=256 count=18
+    ceph-objectstore-tool --data-path $dir/${osd} "$JSON" set-bytes $TESTDATA || return 1
+
+    JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj3)"
+    dd if=/dev/urandom of=$TESTDATA bs=256 count=15
+    ceph-objectstore-tool --data-path $dir/${osd} "$JSON" set-bytes $TESTDATA || return 1
+
+    JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj4 | grep \"snapid\":7)"
+    ceph-objectstore-tool --data-path $dir/${osd} "$JSON" remove || return 1
+
+    # Starts with a snapmap
+    ceph-kvstore-tool bluestore-kv $dir/${osd} list 2> /dev/null > $dir/drk.log
+    grep SNA_ $dir/drk.log
+    grep "^[pm].*SNA_.*[.]7[.]obj16[.][.]$" $dir/drk.log || return 1
+    JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj16 | grep \"snapid\":7)"
+    ceph-objectstore-tool --data-path $dir/${osd} --rmtype snapmap "$JSON" remove || return 1
+    # Check that snapmap is now removed
+    ceph-kvstore-tool bluestore-kv $dir/${osd} list 2> /dev/null > $dir/drk.log
+    grep SNA_ $dir/drk.log
+    ! grep "^[pm].*SNA_.*[.]7[.]obj16[.][.]$" $dir/drk.log || return 1
+    rm -f $dir/drk.log
+
+    JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj2)"
+    ceph-objectstore-tool --data-path $dir/${osd} "$JSON" rm-attr snapset || return 1
+
+    # Create a clone which isn't in snapset and doesn't have object info
+    JSON="$(echo "$OBJ5SAVE" | sed s/snapid\":1/snapid\":7/)"
+    dd if=/dev/urandom of=$TESTDATA bs=256 count=7
+    ceph-objectstore-tool --data-path $dir/${osd} "$JSON" set-bytes $TESTDATA || return 1
+
+    JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj6)"
+    ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset || return 1
+    JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj7)"
+    ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset corrupt || return 1
+    JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj8)"
+    ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset seq || return 1
+    JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj9)"
+    ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset clone_size || return 1
+    JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj10)"
+    ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset clone_overlap || return 1
+    JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj11)"
+    ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset clones || return 1
+    JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj12)"
+    ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset head || return 1
+    JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj13)"
+    ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset snaps || return 1
+    JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj14)"
+    ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset size || return 1
+
+    echo "garbage" > $dir/bad
+    JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj15)"
+    ceph-objectstore-tool --data-path $dir/${osd} "$JSON" set-attr snapset $dir/bad || return 1
+    rm -f $dir/bad
+    return 0
+}
+
+function TEST_scrub_snaps() {
+    local dir=$1
+    local poolname=test
+    local OBJS=16
+    local OSDS=1
+
+    TESTDATA="testdata.$$"
+
+    run_mon $dir a --osd_pool_default_size=$OSDS || return 1
+    run_mgr $dir x || return 1
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd || return 1
+    done
+
+    # All scrubs done manually.  Don't want any unexpected scheduled scrubs.
+    ceph osd set noscrub || return 1
+    ceph osd set nodeep-scrub || return 1
+
+    # Create a pool with a single pg
+    create_pool $poolname 1 1
+    wait_for_clean || return 1
+    poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }')
+
+    dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
+    for i in `seq 1 $OBJS`
+    do
+        rados -p $poolname put obj${i} $TESTDATA
+    done
+
+    local primary=$(get_primary $poolname obj1)
+
+    create_scenario $dir $poolname $TESTDATA $primary || return 1
+
+    rm -f $TESTDATA
+
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      activate_osd $dir $osd || return 1
+    done
+    ceph tell osd.* config set osd_shallow_scrub_chunk_max 25
+    ceph tell osd.* config set osd_shallow_scrub_chunk_min 5
+    ceph tell osd.* config set osd_pg_stat_report_interval_max 1
+
+
+    wait_for_clean || return 1
+
+    ceph tell osd.* config get osd_shallow_scrub_chunk_max
+    ceph tell osd.* config get osd_shallow_scrub_chunk_min
+    ceph tell osd.* config get osd_pg_stat_report_interval_max
+    ceph tell osd.* config get osd_scrub_chunk_max
+    ceph tell osd.* config get osd_scrub_chunk_min
+
+    local pgid="${poolid}.0"
+    if ! pg_scrub "$pgid" ; then
+        return 1
+    fi
+
+    test "$(grep "_scan_snaps start" $dir/osd.${primary}.log | wc -l)" = "2" || return 1
+
+    rados list-inconsistent-pg $poolname > $dir/json || return 1
+    # Check pg count
+    test $(jq '. | length' $dir/json) = "1" || return 1
+    # Check pgid
+    test $(jq -r '.[0]' $dir/json) = $pgid || return 1
+
+    rados list-inconsistent-obj $pgid > $dir/json || return 1
+
+    # The injected snapshot errors with a single copy pool doesn't
+    # see object errors because all the issues are detected by
+    # comparing copies.
+    jq "$jqfilter" << EOF | python3 -c "$sortkeys" > $dir/checkcsjson
+{
+    "epoch": 17,
+    "inconsistents": []
+}
+EOF
+
+    jq "$jqfilter" $dir/json | python3 -c "$sortkeys" > $dir/csjson
+    multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
+
+    rados list-inconsistent-snapset $pgid > $dir/json || return 1
+
+    jq "$jqfilter" << EOF | python3 -c "$sortkeys" > $dir/checkcsjson
+{
+  "inconsistents": [
+    {
+      "errors": [
+        "headless"
+      ],
+      "snap": 1,
+      "locator": "",
+      "nspace": "",
+      "name": "obj1"
+    },
+    {
+      "errors": [
+        "size_mismatch"
+      ],
+      "snap": 1,
+      "locator": "",
+      "nspace": "",
+      "name": "obj10"
+    },
+    {
+      "errors": [
+        "headless"
+      ],
+      "snap": 1,
+      "locator": "",
+      "nspace": "",
+      "name": "obj11"
+    },
+    {
+      "errors": [
+        "size_mismatch"
+      ],
+      "snap": 1,
+      "locator": "",
+      "nspace": "",
+      "name": "obj14"
+    },
+    {
+      "errors": [
+        "headless"
+      ],
+      "snap": 1,
+      "locator": "",
+      "nspace": "",
+      "name": "obj6"
+    },
+    {
+      "errors": [
+        "headless"
+      ],
+      "snap": 1,
+      "locator": "",
+      "nspace": "",
+      "name": "obj7"
+    },
+    {
+      "errors": [
+        "size_mismatch"
+      ],
+      "snap": 1,
+      "locator": "",
+      "nspace": "",
+      "name": "obj9"
+    },
+    {
+      "errors": [
+        "headless"
+      ],
+      "snap": 4,
+      "locator": "",
+      "nspace": "",
+      "name": "obj2"
+    },
+    {
+      "errors": [
+        "size_mismatch"
+      ],
+      "snap": 4,
+      "locator": "",
+      "nspace": "",
+      "name": "obj5"
+    },
+    {
+      "errors": [
+        "headless"
+      ],
+      "snap": 7,
+      "locator": "",
+      "nspace": "",
+      "name": "obj2"
+    },
+    {
+      "errors": [
+        "info_missing",
+        "headless"
+      ],
+      "snap": 7,
+      "locator": "",
+      "nspace": "",
+      "name": "obj5"
+    },
+    {
+      "name": "obj10",
+      "nspace": "",
+      "locator": "",
+      "snap": "head",
+      "snapset": {
+        "seq": 1,
+        "clones": [
+          {
+            "snap": 1,
+            "size": 1032,
+            "overlap": "????",
+            "snaps": [
+              1
+            ]
+          }
+        ]
+      },
+      "errors": []
+    },
+    {
+      "extra clones": [
+        1
+      ],
+      "errors": [
+        "extra_clones"
+      ],
+      "snap": "head",
+      "locator": "",
+      "nspace": "",
+      "name": "obj11",
+      "snapset": {
+        "seq": 1,
+        "clones": []
+      }
+    },
+    {
+      "name": "obj14",
+      "nspace": "",
+      "locator": "",
+      "snap": "head",
+      "snapset": {
+        "seq": 1,
+        "clones": [
+          {
+            "snap": 1,
+            "size": 1033,
+            "overlap": "[]",
+            "snaps": [
+              1
+            ]
+          }
+        ]
+      },
+      "errors": []
+    },
+    {
+      "errors": [
+        "snapset_corrupted"
+      ],
+      "snap": "head",
+      "locator": "",
+      "nspace": "",
+      "name": "obj15"
+    },
+    {
+      "extra clones": [
+        7,
+        4
+      ],
+      "errors": [
+        "snapset_missing",
+        "extra_clones"
+      ],
+      "snap": "head",
+      "locator": "",
+      "nspace": "",
+      "name": "obj2"
+    },
+    {
+      "errors": [
+        "size_mismatch"
+      ],
+      "snap": "head",
+      "locator": "",
+      "nspace": "",
+      "name": "obj3",
+      "snapset": {
+        "seq": 3,
+        "clones": [
+          {
+            "snap": 1,
+            "size": 1032,
+            "overlap": "[]",
+            "snaps": [
+              1
+            ]
+          },
+          {
+            "snap": 3,
+            "size": 256,
+            "overlap": "[]",
+            "snaps": [
+              3,
+              2
+            ]
+          }
+        ]
+      }
+    },
+    {
+      "missing": [
+        7
+      ],
+      "errors": [
+        "clone_missing"
+      ],
+      "snap": "head",
+      "locator": "",
+      "nspace": "",
+      "name": "obj4",
+      "snapset": {
+        "seq": 7,
+        "clones": [
+          {
+            "snap": 7,
+            "size": 1032,
+            "overlap": "[]",
+            "snaps": [
+              7,
+              6,
+              5,
+              4,
+              3,
+              2,
+              1
+            ]
+          }
+        ]
+      }
+    },
+    {
+      "missing": [
+        2,
+        1
+      ],
+      "extra clones": [
+        7
+      ],
+      "errors": [
+        "extra_clones",
+        "clone_missing"
+      ],
+      "snap": "head",
+      "locator": "",
+      "nspace": "",
+      "name": "obj5",
+      "snapset": {
+        "seq": 6,
+        "clones": [
+          {
+            "snap": 1,
+            "size": 1032,
+            "overlap": "[]",
+            "snaps": [
+              1
+            ]
+          },
+          {
+            "snap": 2,
+            "size": 256,
+            "overlap": "[]",
+            "snaps": [
+              2
+            ]
+          },
+          {
+            "snap": 4,
+            "size": 512,
+            "overlap": "[]",
+            "snaps": [
+              4,
+              3
+            ]
+          },
+          {
+            "snap": 6,
+            "size": 1024,
+            "overlap": "[]",
+            "snaps": [
+              6,
+              5
+            ]
+          }
+        ]
+      }
+    },
+    {
+      "extra clones": [
+        1
+      ],
+      "errors": [
+        "extra_clones"
+      ],
+      "snap": "head",
+      "locator": "",
+      "nspace": "",
+      "name": "obj6",
+      "snapset": {
+        "seq": 1,
+        "clones": []
+      }
+    },
+    {
+      "extra clones": [
+        1
+      ],
+      "errors": [
+        "extra_clones"
+      ],
+      "snap": "head",
+      "locator": "",
+      "nspace": "",
+      "name": "obj7",
+      "snapset": {
+        "seq": 0,
+        "clones": []
+      }
+    },
+    {
+      "errors": [
+        "snapset_error"
+      ],
+      "snap": "head",
+      "locator": "",
+      "nspace": "",
+      "name": "obj8",
+      "snapset": {
+        "seq": 0,
+        "clones": [
+          {
+            "snap": 1,
+            "size": 1032,
+            "overlap": "[]",
+            "snaps": [
+              1
+            ]
+          }
+        ]
+      }
+    },
+    {
+      "name": "obj9",
+      "nspace": "",
+      "locator": "",
+      "snap": "head",
+      "snapset": {
+        "seq": 1,
+        "clones": [
+          {
+            "snap": 1,
+            "size": "????",
+            "overlap": "[]",
+            "snaps": [
+              1
+            ]
+          }
+        ]
+      },
+      "errors": []
+    }
+  ],
+  "epoch": 20
+}
+EOF
+
+    jq "$jqfilter" $dir/json | python3 -c "$sortkeys" > $dir/csjson
+    multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
+    if test $getjson = "yes"
+    then
+        jq '.' $dir/json > save1.json
+    fi
+
+    if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null;
+    then
+      jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-snap.json || return 1
+    fi
+
+    pidfiles=$(find $dir 2>/dev/null | grep 'osd[^/]*\.pid')
+    pids=""
+    for pidfile in ${pidfiles}
+    do
+        pids+="$(cat $pidfile) "
+    done
+
+    ERRORS=0
+
+    for i in `seq 1 7`
+    do
+        rados -p $poolname rmsnap snap$i
+    done
+    sleep 5
+    local -i loop=0
+    while ceph pg dump pgs | grep -q snaptrim;
+    do
+        if ceph pg dump pgs | grep -q snaptrim_error;
+        then
+            break
+        fi
+        sleep 2
+        loop+=1
+        if (( $loop >= 10 )) ; then
+            ERRORS=$(expr $ERRORS + 1)
+            break
+        fi
+    done
+    ceph pg dump pgs
+
+    for pid in $pids
+    do
+        if ! kill -0 $pid
+        then
+            echo "OSD Crash occurred"
+            ERRORS=$(expr $ERRORS + 1)
+        fi
+    done
+
+    kill_daemons $dir || return 1
+
+    declare -a err_strings
+    err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj10:.* : is missing in clone_overlap"
+    err_strings[1]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj5:7 : no '_' attr"
+    err_strings[2]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj5:7 : is an unexpected clone"
+    err_strings[3]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj5:4 : on disk size [(]4608[)] does not match object info size [(]512[)] adjusted for ondisk to [(]512[)]"
+    err_strings[4]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj5:head : expected clone .*:::obj5:2"
+    err_strings[5]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj5:head : expected clone .*:::obj5:1"
+    err_strings[6]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 .*:::obj5:head : 2 missing clone[(]s[)]"
+    err_strings[7]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj8:head : snaps.seq not set"
+    err_strings[8]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj7:1 : is an unexpected clone"
+    err_strings[9]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj3:head : on disk size [(]3840[)] does not match object info size [(]768[)] adjusted for ondisk to [(]768[)]"
+    err_strings[10]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj6:1 : is an unexpected clone"
+    err_strings[11]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:head : no 'snapset' attr"
+    err_strings[12]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:7 : clone ignored due to missing snapset"
+    err_strings[13]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:4 : clone ignored due to missing snapset"
+    err_strings[14]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj4:head : expected clone .*:::obj4:7"
+    err_strings[15]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 .*:::obj4:head : 1 missing clone[(]s[)]"
+    err_strings[16]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj1:1 : is an unexpected clone"
+    err_strings[17]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj9:1 : is missing in clone_size"
+    err_strings[18]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj11:1 : is an unexpected clone"
+    err_strings[19]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj14:1 : size 1032 != clone_size 1033"
+    err_strings[20]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 20 errors"
+    err_strings[21]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj15:head : can't decode 'snapset' attr "
+    err_strings[22]="log_channel[(]cluster[)] log [[]ERR[]] : osd[.][0-9]* found snap mapper error on pg 1.0 oid 1:461f8b5e:::obj16:7 snaps missing in mapper, should be: {1, 2, 3, 4, 5, 6, 7} ...repaired"
+
+    for err_string in "${err_strings[@]}"
+    do
+        if ! grep "$err_string" $dir/osd.${primary}.log > /dev/null;
+        then
+            echo "Missing log message '$err_string'"
+            ERRORS=$(expr $ERRORS + 1)
+        fi
+    done
+
+    if [ $ERRORS != "0" ];
+    then
+        echo "TEST FAILED WITH $ERRORS ERRORS"
+        return 1
+    fi
+
+    echo "TEST PASSED"
+    return 0
+}
+
+function _scrub_snaps_multi() {
+    local dir=$1
+    local poolname=test
+    local OBJS=16
+    local OSDS=2
+    local which=$2
+
+    TESTDATA="testdata.$$"
+
+    run_mon $dir a --osd_pool_default_size=$OSDS || return 1
+    run_mgr $dir x || return 1
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd || return 1
+    done
+
+    # All scrubs done manually.  Don't want any unexpected scheduled scrubs.
+    ceph osd set noscrub || return 1
+    ceph osd set nodeep-scrub || return 1
+
+    # Create a pool with a single pg
+    create_pool $poolname 1 1
+    wait_for_clean || return 1
+    poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }')
+
+    dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
+    for i in `seq 1 $OBJS`
+    do
+        rados -p $poolname put obj${i} $TESTDATA
+    done
+
+    local primary=$(get_primary $poolname obj1)
+    local replica=$(get_not_primary $poolname obj1)
+
+    eval create_scenario $dir $poolname $TESTDATA \$$which || return 1
+
+    rm -f $TESTDATA
+
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      activate_osd $dir $osd || return 1
+    done
+
+    ceph tell osd.* config set osd_shallow_scrub_chunk_max 3
+    ceph tell osd.* config set osd_shallow_scrub_chunk_min 3
+    ceph tell osd.* config set osd_scrub_chunk_min 3
+    ceph tell osd.* config set osd_pg_stat_report_interval_max 1
+    wait_for_clean || return 1
+
+    local pgid="${poolid}.0"
+    if ! pg_scrub "$pgid" ; then
+        return 1
+    fi
+
+    test "$(grep "_scan_snaps start" $dir/osd.${primary}.log | wc -l)" -gt "3" || return 1
+    test "$(grep "_scan_snaps start" $dir/osd.${replica}.log | wc -l)" -gt "3" || return 1
+
+    rados list-inconsistent-pg $poolname > $dir/json || return 1
+    # Check pg count
+    test $(jq '. | length' $dir/json) = "1" || return 1
+    # Check pgid
+    test $(jq -r '.[0]' $dir/json) = $pgid || return 1
+
+    rados list-inconsistent-obj $pgid --format=json-pretty
+
+    rados list-inconsistent-snapset $pgid > $dir/json || return 1
+
+    # Since all of the snapshots on the primary is consistent there are no errors here
+    if [ $which = "replica" ];
+    then
+        scruberrors="20"
+        jq "$jqfilter" << EOF | python3 -c "$sortkeys" > $dir/checkcsjson
+{
+    "epoch": 23,
+    "inconsistents": []
+}
+EOF
+
+else
+        scruberrors="30"
+        jq "$jqfilter" << EOF | python3 -c "$sortkeys" > $dir/checkcsjson
+{
+    "epoch": 23,
+    "inconsistents": [
+        {
+            "name": "obj10",
+            "nspace": "",
+            "locator": "",
+            "snap": 1,
+            "errors": [
+                "size_mismatch"
+            ]
+        },
+        {
+            "name": "obj11",
+            "nspace": "",
+            "locator": "",
+            "snap": 1,
+            "errors": [
+                "headless"
+            ]
+        },
+        {
+            "name": "obj14",
+            "nspace": "",
+            "locator": "",
+            "snap": 1,
+            "errors": [
+                "size_mismatch"
+            ]
+        },
+        {
+            "name": "obj6",
+            "nspace": "",
+            "locator": "",
+            "snap": 1,
+            "errors": [
+                "headless"
+            ]
+        },
+        {
+            "name": "obj7",
+            "nspace": "",
+            "locator": "",
+            "snap": 1,
+            "errors": [
+                "headless"
+            ]
+        },
+        {
+            "name": "obj9",
+            "nspace": "",
+            "locator": "",
+            "snap": 1,
+            "errors": [
+                "size_mismatch"
+            ]
+        },
+        {
+            "name": "obj5",
+            "nspace": "",
+            "locator": "",
+            "snap": 7,
+            "errors": [
+                "info_missing",
+                "headless"
+            ]
+        },
+        {
+            "name": "obj10",
+            "nspace": "",
+            "locator": "",
+            "snap": "head",
+            "snapset": {
+                "seq": 1,
+                "clones": [
+                    {
+                        "snap": 1,
+                        "size": 1032,
+                        "overlap": "????",
+                        "snaps": [
+                            1
+                        ]
+                    }
+                ]
+            },
+            "errors": []
+        },
+        {
+            "name": "obj11",
+            "nspace": "",
+            "locator": "",
+            "snap": "head",
+            "snapset": {
+                "seq": 1,
+                "clones": []
+            },
+            "errors": [
+                "extra_clones"
+            ],
+            "extra clones": [
+                1
+            ]
+        },
+        {
+            "name": "obj14",
+            "nspace": "",
+            "locator": "",
+            "snap": "head",
+            "snapset": {
+                "seq": 1,
+                "clones": [
+                    {
+                        "snap": 1,
+                        "size": 1033,
+                        "overlap": "[]",
+                        "snaps": [
+                            1
+                        ]
+                    }
+                ]
+            },
+            "errors": []
+        },
+        {
+            "name": "obj5",
+            "nspace": "",
+            "locator": "",
+            "snap": "head",
+            "snapset": {
+                "seq": 6,
+                "clones": [
+                    {
+                        "snap": 1,
+                        "size": 1032,
+                        "overlap": "[]",
+                        "snaps": [
+                            1
+                        ]
+                    },
+                    {
+                        "snap": 2,
+                        "size": 256,
+                        "overlap": "[]",
+                        "snaps": [
+                            2
+                        ]
+                    },
+                    {
+                        "snap": 4,
+                        "size": 512,
+                        "overlap": "[]",
+                        "snaps": [
+                            4,
+                            3
+                        ]
+                    },
+                    {
+                        "snap": 6,
+                        "size": 1024,
+                        "overlap": "[]",
+                        "snaps": [
+                            6,
+                            5
+                        ]
+                    }
+                ]
+            },
+            "errors": [
+                "extra_clones"
+            ],
+            "extra clones": [
+                7
+            ]
+        },
+        {
+            "name": "obj6",
+            "nspace": "",
+            "locator": "",
+            "snap": "head",
+            "snapset": {
+                "seq": 1,
+                "clones": []
+            },
+            "errors": [
+                "extra_clones"
+            ],
+            "extra clones": [
+                1
+            ]
+        },
+        {
+            "name": "obj7",
+            "nspace": "",
+            "locator": "",
+            "snap": "head",
+            "snapset": {
+                "seq": 0,
+                "clones": []
+            },
+            "errors": [
+                "extra_clones"
+            ],
+            "extra clones": [
+                1
+            ]
+        },
+        {
+            "name": "obj8",
+            "nspace": "",
+            "locator": "",
+            "snap": "head",
+            "snapset": {
+                "seq": 0,
+                "clones": [
+                    {
+                        "snap": 1,
+                        "size": 1032,
+                        "overlap": "[]",
+                        "snaps": [
+                            1
+                        ]
+                    }
+                ]
+            },
+            "errors": [
+                "snapset_error"
+            ]
+        },
+        {
+            "name": "obj9",
+            "nspace": "",
+            "locator": "",
+            "snap": "head",
+            "snapset": {
+                "seq": 1,
+                "clones": [
+                    {
+                        "snap": 1,
+                        "size": "????",
+                        "overlap": "[]",
+                        "snaps": [
+                            1
+                        ]
+                    }
+                ]
+            },
+            "errors": []
+        }
+    ]
+}
+EOF
+fi
+
+    jq "$jqfilter" $dir/json | python3 -c "$sortkeys" > $dir/csjson
+    multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
+    if test $getjson = "yes"
+    then
+        jq '.' $dir/json > save1.json
+    fi
+
+    if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null;
+    then
+      jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-snap.json || return 1
+    fi
+
+    pidfiles=$(find $dir 2>/dev/null | grep 'osd[^/]*\.pid')
+    pids=""
+    for pidfile in ${pidfiles}
+    do
+        pids+="$(cat $pidfile) "
+    done
+
+    ERRORS=0
+
+    # When removing snapshots with a corrupt replica, it crashes.
+    # See http://tracker.ceph.com/issues/23875
+    if [ $which = "primary" ];
+    then
+        for i in `seq 1 7`
+        do
+            rados -p $poolname rmsnap snap$i
+        done
+        sleep 5
+        local -i loop=0
+        while ceph pg dump pgs | grep -q snaptrim;
+        do
+            if ceph pg dump pgs | grep -q snaptrim_error;
+            then
+                break
+            fi
+            sleep 2
+            loop+=1
+            if (( $loop >= 10 )) ; then
+                ERRORS=$(expr $ERRORS + 1)
+                break
+            fi
+        done
+    fi
+    ceph pg dump pgs
+
+    for pid in $pids
+    do
+        if ! kill -0 $pid
+        then
+            echo "OSD Crash occurred"
+            ERRORS=$(expr $ERRORS + 1)
+        fi
+    done
+
+    kill_daemons $dir || return 1
+
+    declare -a err_strings
+    err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] .*:::obj4:7 : missing"
+    err_strings[1]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] soid .*:::obj3:head : size 3840 != size 768 from auth oi"
+    err_strings[2]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] .*:::obj5:1 : missing"
+    err_strings[3]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] .*:::obj5:2 : missing"
+    err_strings[4]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] soid .*:::obj5:4 : size 4608 != size 512 from auth oi"
+    err_strings[5]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid .*:::obj5:7 : failed to pick suitable object info"
+    err_strings[6]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] .*:::obj1:head : missing"
+    err_strings[7]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub ${scruberrors} errors"
+
+    for err_string in "${err_strings[@]}"
+    do
+        if ! grep "$err_string" $dir/osd.${primary}.log > /dev/null;
+        then
+            echo "Missing log message '$err_string'"
+            ERRORS=$(expr $ERRORS + 1)
+        fi
+    done
+
+    # Check replica specific messages
+    declare -a rep_err_strings
+    osd=$(eval echo \$$which)
+    rep_err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : osd[.][0-9]* found snap mapper error on pg 1.0 oid 1:461f8b5e:::obj16:7 snaps missing in mapper, should be: {1, 2, 3, 4, 5, 6, 7} ...repaired"
+    for err_string in "${rep_err_strings[@]}"
+    do
+        if ! grep "$err_string" $dir/osd.${osd}.log > /dev/null;
+        then
+            echo "Missing log message '$err_string'"
+            ERRORS=$(expr $ERRORS + 1)
+        fi
+    done
+
+    if [ $ERRORS != "0" ];
+    then
+        echo "TEST FAILED WITH $ERRORS ERRORS"
+        return 1
+    fi
+
+    echo "TEST PASSED"
+    return 0
+}
+
+function TEST_scrub_snaps_replica() {
+    local dir=$1
+    ORIG_ARGS=$CEPH_ARGS
+    CEPH_ARGS+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=20 --osd_shallow_scrub_chunk_min=3 --osd_shallow_scrub_chunk_max=3 --osd_pg_stat_report_interval_max=1"
+    _scrub_snaps_multi $dir replica
+    err=$?
+    CEPH_ARGS=$ORIG_ARGS
+    return $err
+}
+
+function TEST_scrub_snaps_primary() {
+    local dir=$1
+    ORIG_ARGS=$CEPH_ARGS
+    CEPH_ARGS+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=20 --osd_shallow_scrub_chunk_min=3 --osd_shallow_scrub_chunk_max=3 --osd_pg_stat_report_interval_max=1"
+    _scrub_snaps_multi $dir primary
+    err=$?
+    CEPH_ARGS=$ORIG_ARGS
+    return $err
+}
+
+main osd-scrub-snaps "$@"
+
+# Local Variables:
+# compile-command: "cd build ; make -j4 && \
+#    ../qa/run-standalone.sh osd-scrub-snaps.sh"
+# End:
diff --git a/qa/standalone/scrub/osd-scrub-test.sh b/qa/standalone/scrub/osd-scrub-test.sh
new file mode 100755
index 000000000..73f165380
--- /dev/null
+++ b/qa/standalone/scrub/osd-scrub-test.sh
@@ -0,0 +1,664 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2018 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+source $CEPH_ROOT/qa/standalone/scrub/scrub-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7138" # git grep '\<7138\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    export -n CEPH_CLI_TEST_DUP_COMMAND
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_scrub_test() {
+    local dir=$1
+    local poolname=test
+    local OSDS=3
+    local objects=15
+
+    TESTDATA="testdata.$$"
+
+    run_mon $dir a --osd_pool_default_size=3 || return 1
+    run_mgr $dir x || return 1
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd || return 1
+    done
+
+    # Create a pool with a single pg
+    create_pool $poolname 1 1
+    wait_for_clean || return 1
+    poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }')
+
+    dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
+    for i in `seq 1 $objects`
+    do
+        rados -p $poolname put obj${i} $TESTDATA
+    done
+    rm -f $TESTDATA
+
+    local primary=$(get_primary $poolname obj1)
+    local otherosd=$(get_not_primary $poolname obj1)
+    if [ "$otherosd" = "2" ];
+    then
+      local anotherosd="0"
+    else
+      local anotherosd="2"
+    fi
+
+    objectstore_tool $dir $anotherosd obj1 set-bytes /etc/fstab
+
+    local pgid="${poolid}.0"
+    pg_deep_scrub "$pgid" || return 1
+
+    ceph pg dump pgs | grep ^${pgid} | grep -q -- +inconsistent || return 1
+    test "$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_scrub_errors')" = "2" || return 1
+
+    ceph osd out $primary
+    wait_for_clean || return 1
+
+    pg_deep_scrub "$pgid" || return 1
+
+    test "$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_scrub_errors')" = "2" || return 1
+    test "$(ceph pg $pgid query | jq '.peer_info[0].stats.stat_sum.num_scrub_errors')" = "2" || return 1
+    ceph pg dump pgs | grep ^${pgid} | grep -q -- +inconsistent || return 1
+
+    ceph osd in $primary
+    wait_for_clean || return 1
+
+    repair "$pgid" || return 1
+    wait_for_clean || return 1
+
+    # This sets up the test after we've repaired with previous primary has old value
+    test "$(ceph pg $pgid query | jq '.peer_info[0].stats.stat_sum.num_scrub_errors')" = "2" || return 1
+    ceph pg dump pgs | grep ^${pgid} | grep -vq -- +inconsistent || return 1
+
+    ceph osd out $primary
+    wait_for_clean || return 1
+
+    test "$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_scrub_errors')" = "0" || return 1
+    test "$(ceph pg $pgid query | jq '.peer_info[0].stats.stat_sum.num_scrub_errors')" = "0" || return 1
+    test "$(ceph pg $pgid query | jq '.peer_info[1].stats.stat_sum.num_scrub_errors')" = "0" || return 1
+    ceph pg dump pgs | grep ^${pgid} | grep -vq -- +inconsistent || return 1
+}
+
+# Grab year-month-day
+DATESED="s/\([0-9]*-[0-9]*-[0-9]*\).*/\1/"
+DATEFORMAT="%Y-%m-%d"
+
+function check_dump_scrubs() {
+    local primary=$1
+    local sched_time_check="$2"
+    local deadline_check="$3"
+
+    DS="$(CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) dump_scrubs)"
+    # use eval to drop double-quotes
+    eval SCHED_TIME=$(echo $DS | jq '.[0].sched_time')
+    test $(echo $SCHED_TIME | sed $DATESED) = $(date +${DATEFORMAT} -d "now + $sched_time_check") || return 1
+    # use eval to drop double-quotes
+    eval DEADLINE=$(echo $DS | jq '.[0].deadline')
+    test $(echo $DEADLINE | sed $DATESED) = $(date +${DATEFORMAT} -d "now + $deadline_check") || return 1
+}
+
+function TEST_interval_changes() {
+    local poolname=test
+    local OSDS=2
+    local objects=10
+    # Don't assume how internal defaults are set
+    local day="$(expr 24 \* 60 \* 60)"
+    local week="$(expr $day \* 7)"
+    local min_interval=$day
+    local max_interval=$week
+    local WAIT_FOR_UPDATE=15
+
+    TESTDATA="testdata.$$"
+
+    # This min scrub interval results in 30 seconds backoff time
+    run_mon $dir a --osd_pool_default_size=$OSDS || return 1
+    run_mgr $dir x || return 1
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd --osd_scrub_min_interval=$min_interval --osd_scrub_max_interval=$max_interval --osd_scrub_interval_randomize_ratio=0 || return 1
+    done
+
+    # Create a pool with a single pg
+    create_pool $poolname 1 1
+    wait_for_clean || return 1
+    local poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }')
+
+    dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
+    for i in `seq 1 $objects`
+    do
+        rados -p $poolname put obj${i} $TESTDATA
+    done
+    rm -f $TESTDATA
+
+    local primary=$(get_primary $poolname obj1)
+
+    # Check initial settings from above (min 1 day, min 1 week)
+    check_dump_scrubs $primary "1 day" "1 week" || return 1
+
+    # Change global osd_scrub_min_interval to 2 days
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) config set osd_scrub_min_interval $(expr $day \* 2)
+    sleep $WAIT_FOR_UPDATE
+    check_dump_scrubs $primary "2 days" "1 week" || return 1
+
+    # Change global osd_scrub_max_interval to 2 weeks
+    CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) config set osd_scrub_max_interval $(expr $week \* 2)
+    sleep $WAIT_FOR_UPDATE
+    check_dump_scrubs $primary "2 days" "2 week" || return 1
+
+    # Change pool osd_scrub_min_interval to 3 days
+    ceph osd pool set $poolname scrub_min_interval $(expr $day \* 3)
+    sleep $WAIT_FOR_UPDATE
+    check_dump_scrubs $primary "3 days" "2 week" || return 1
+
+    # Change pool osd_scrub_max_interval to 3 weeks
+    ceph osd pool set $poolname scrub_max_interval $(expr $week \* 3)
+    sleep $WAIT_FOR_UPDATE
+    check_dump_scrubs $primary "3 days" "3 week" || return 1
+}
+
+function TEST_scrub_extended_sleep() {
+    local dir=$1
+    local poolname=test
+    local OSDS=3
+    local objects=15
+
+    TESTDATA="testdata.$$"
+
+    DAY=$(date +%w)
+    # Handle wrap
+    if [ "$DAY" -ge "4" ];
+    then
+      DAY="0"
+    fi
+    # Start after 2 days in case we are near midnight
+    DAY_START=$(expr $DAY + 2)
+    DAY_END=$(expr $DAY + 3)
+
+    run_mon $dir a --osd_pool_default_size=3 || return 1
+    run_mgr $dir x || return 1
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd --osd_scrub_sleep=0 \
+                        --osd_scrub_extended_sleep=20 \
+                        --bluestore_cache_autotune=false \
+	                --osd_deep_scrub_randomize_ratio=0.0 \
+	                --osd_scrub_interval_randomize_ratio=0 \
+			--osd_scrub_begin_week_day=$DAY_START \
+			--osd_scrub_end_week_day=$DAY_END \
+			|| return 1
+    done
+
+    # Create a pool with a single pg
+    create_pool $poolname 1 1
+    wait_for_clean || return 1
+
+    # Trigger a scrub on a PG
+    local pgid=$(get_pg $poolname SOMETHING)
+    local primary=$(get_primary $poolname SOMETHING)
+    local last_scrub=$(get_last_scrub_stamp $pgid)
+    ceph tell $pgid scrub || return 1
+
+    # Allow scrub to start extended sleep
+    PASSED="false"
+    for ((i=0; i < 15; i++)); do
+      if grep -q "scrub state.*, sleeping" $dir/osd.${primary}.log
+      then
+	PASSED="true"
+        break
+      fi
+      sleep 1
+    done
+
+    # Check that extended sleep was triggered
+    if [ $PASSED = "false" ];
+    then
+      return 1
+    fi
+
+    # release scrub to run after extended sleep finishes
+    ceph tell osd.$primary config set osd_scrub_begin_week_day 0
+    ceph tell osd.$primary config set osd_scrub_end_week_day 0
+
+    # Due to extended sleep, the scrub should not be done within 20 seconds
+    # but test up to 10 seconds and make sure it happens by 25 seconds.
+    count=0
+    PASSED="false"
+    for ((i=0; i < 25; i++)); do
+	count=$(expr $count + 1)
+        if test "$(get_last_scrub_stamp $pgid)" '>' "$last_scrub" ; then
+	    # Did scrub run too soon?
+	    if [ $count -lt "10" ];
+	    then
+              return 1
+            fi
+	    PASSED="true"
+	    break
+        fi
+        sleep 1
+    done
+
+    # Make sure scrub eventually ran
+    if [ $PASSED = "false" ];
+    then
+      return 1
+    fi
+}
+
+function _scrub_abort() {
+    local dir=$1
+    local poolname=test
+    local OSDS=3
+    local objects=1000
+    local type=$2
+
+    TESTDATA="testdata.$$"
+    if test $type = "scrub";
+    then
+      stopscrub="noscrub"
+      check="noscrub"
+    else
+      stopscrub="nodeep-scrub"
+      check="nodeep_scrub"
+    fi
+
+    run_mon $dir a --osd_pool_default_size=3 || return 1
+    run_mgr $dir x || return 1
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+        # Set scheduler to "wpq" until there's a reliable way to query scrub
+        # states with "--osd-scrub-sleep" set to 0. The "mclock_scheduler"
+        # overrides the scrub sleep to 0 and as a result the checks in the
+        # test fail.
+        run_osd $dir $osd --osd_pool_default_pg_autoscale_mode=off \
+            --osd_deep_scrub_randomize_ratio=0.0 \
+            --osd_scrub_sleep=5.0 \
+            --osd_scrub_interval_randomize_ratio=0 \
+            --osd_op_queue=wpq || return 1
+    done
+
+    # Create a pool with a single pg
+    create_pool $poolname 1 1
+    wait_for_clean || return 1
+    poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }')
+
+    dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
+    for i in `seq 1 $objects`
+    do
+        rados -p $poolname put obj${i} $TESTDATA
+    done
+    rm -f $TESTDATA
+
+    local primary=$(get_primary $poolname obj1)
+    local pgid="${poolid}.0"
+
+    ceph tell $pgid $type || return 1
+    # deep-scrub won't start without scrub noticing
+    if [ "$type" = "deep_scrub" ];
+    then
+      ceph tell $pgid scrub || return 1
+    fi
+
+    # Wait for scrubbing to start
+    set -o pipefail
+    found="no"
+    for i in $(seq 0 200)
+    do
+      flush_pg_stats
+      if ceph pg dump pgs | grep  ^$pgid| grep -q "scrubbing"
+      then
+        found="yes"
+        #ceph pg dump pgs
+        break
+      fi
+    done
+    set +o pipefail
+
+    if test $found = "no";
+    then
+      echo "Scrubbing never started"
+      return 1
+    fi
+
+    ceph osd set $stopscrub
+    if [ "$type" = "deep_scrub" ];
+    then
+      ceph osd set noscrub
+    fi
+
+    # Wait for scrubbing to end
+    set -o pipefail
+    for i in $(seq 0 200)
+    do
+      flush_pg_stats
+      if ceph pg dump pgs | grep ^$pgid | grep -q "scrubbing"
+      then
+        continue
+      fi
+      #ceph pg dump pgs
+      break
+    done
+    set +o pipefail
+
+    sleep 5
+
+    if ! grep "$check set, aborting" $dir/osd.${primary}.log
+    then
+      echo "Abort not seen in log"
+      return 1
+    fi
+
+    local last_scrub=$(get_last_scrub_stamp $pgid)
+    ceph config set osd "osd_scrub_sleep" "0.1"
+
+    ceph osd unset $stopscrub
+    if [ "$type" = "deep_scrub" ];
+    then
+      ceph osd unset noscrub
+    fi
+    TIMEOUT=$(($objects / 2))
+    wait_for_scrub $pgid "$last_scrub" || return 1
+}
+
+function TEST_scrub_abort() {
+    local dir=$1
+    _scrub_abort $dir scrub
+}
+
+function TEST_deep_scrub_abort() {
+    local dir=$1
+    _scrub_abort $dir deep_scrub
+}
+
+function TEST_scrub_permit_time() {
+    local dir=$1
+    local poolname=test
+    local OSDS=3
+    local objects=15
+
+    TESTDATA="testdata.$$"
+
+    run_mon $dir a --osd_pool_default_size=3 || return 1
+    run_mgr $dir x || return 1
+    local scrub_begin_hour=$(date -d '2 hour ago' +"%H" | sed 's/^0//')
+    local scrub_end_hour=$(date -d '1 hour ago' +"%H" | sed 's/^0//')
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd --bluestore_cache_autotune=false \
+	                --osd_deep_scrub_randomize_ratio=0.0 \
+	                --osd_scrub_interval_randomize_ratio=0 \
+                        --osd_scrub_begin_hour=$scrub_begin_hour \
+                        --osd_scrub_end_hour=$scrub_end_hour || return 1
+    done
+
+    # Create a pool with a single pg
+    create_pool $poolname 1 1
+    wait_for_clean || return 1
+
+    # Trigger a scrub on a PG
+    local pgid=$(get_pg $poolname SOMETHING)
+    local primary=$(get_primary $poolname SOMETHING)
+    local last_scrub=$(get_last_scrub_stamp $pgid)
+    # If we don't specify an amount of time to subtract from
+    # current time to set last_scrub_stamp, it sets the deadline
+    # back by osd_max_interval which would cause the time permit checking
+    # to be skipped.  Set back 1 day, the default scrub_min_interval.
+    ceph tell $pgid scrub $(( 24 * 60 * 60 )) || return 1
+
+    # Scrub should not run
+    for ((i=0; i < 30; i++)); do
+        if test "$(get_last_scrub_stamp $pgid)" '>' "$last_scrub" ; then
+            return 1
+        fi
+        sleep 1
+    done
+}
+
+#  a test to recreate the problem described in bug #52901 - setting 'noscrub'
+#  without explicitly preventing deep scrubs made the PG 'unscrubable'.
+#  Fixed by PR#43521
+function TEST_just_deep_scrubs() {
+    local dir=$1
+    local -A cluster_conf=(
+        ['osds_num']="3" 
+        ['pgs_in_pool']="4"
+        ['pool_name']="test"
+    )
+
+    standard_scrub_cluster $dir cluster_conf
+    local poolid=${cluster_conf['pool_id']}
+    local poolname=${cluster_conf['pool_name']}
+    echo "Pool: $poolname : $poolid"
+
+    TESTDATA="testdata.$$"
+    local objects=15
+    dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
+    for i in `seq 1 $objects`
+    do
+        rados -p $poolname put obj${i} $TESTDATA
+    done
+    rm -f $TESTDATA
+
+    # set both 'no scrub' & 'no deep-scrub', then request a deep-scrub.
+    # we do not expect to see the scrub scheduled.
+
+    ceph osd set noscrub || return 1
+    ceph osd set nodeep-scrub || return 1
+    sleep 6 # the 'noscrub' command takes a long time to reach the OSDs
+    local now_is=`date -I"ns"`
+    declare -A sched_data
+    local pgid="${poolid}.2"
+
+    # turn on the publishing of test data in the 'scrubber' section of 'pg query' output
+    set_query_debug $pgid
+
+    extract_published_sch $pgid $now_is $now_is sched_data
+    local saved_last_stamp=${sched_data['query_last_stamp']}
+    local dbg_counter_at_start=${sched_data['query_scrub_seq']}
+    echo "test counter @ start: $dbg_counter_at_start"
+
+    ceph pg $pgid deep_scrub
+
+    sleep 5 # 5s is the 'pg dump' interval
+    declare -A sc_data_2
+    extract_published_sch $pgid $now_is $now_is sc_data_2
+    echo "test counter @ should show no change: " ${sc_data_2['query_scrub_seq']}
+    (( ${sc_data_2['dmp_last_duration']} == 0)) || return 1
+    (( ${sc_data_2['query_scrub_seq']} == $dbg_counter_at_start)) || return 1
+
+    # unset the 'no deep-scrub'. Deep scrubbing should start now.
+    ceph osd unset nodeep-scrub || return 1
+    sleep 5
+    declare -A expct_qry_duration=( ['query_last_duration']="0" ['query_last_duration_neg']="not0" )
+    sc_data_2=()
+    echo "test counter @ should be higher than before the unset: " ${sc_data_2['query_scrub_seq']}
+    wait_any_cond $pgid 10 $saved_last_stamp expct_qry_duration "WaitingAfterScrub " sc_data_2 || return 1
+}
+
+function TEST_dump_scrub_schedule() {
+    local dir=$1
+    local poolname=test
+    local OSDS=3
+    local objects=15
+
+    TESTDATA="testdata.$$"
+
+    run_mon $dir a --osd_pool_default_size=$OSDS || return 1
+    run_mgr $dir x || return 1
+
+    # Set scheduler to "wpq" until there's a reliable way to query scrub states
+    # with "--osd-scrub-sleep" set to 0. The "mclock_scheduler" overrides the
+    # scrub sleep to 0 and as a result the checks in the test fail.
+    local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \
+            --osd_scrub_interval_randomize_ratio=0 \
+            --osd_scrub_backoff_ratio=0.0 \
+            --osd_op_queue=wpq \
+            --osd_scrub_sleep=0.2"
+
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd $ceph_osd_args|| return 1
+    done
+
+    # Create a pool with a single pg
+    create_pool $poolname 1 1
+    wait_for_clean || return 1
+    poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }')
+
+    dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
+    for i in `seq 1 $objects`
+    do
+        rados -p $poolname put obj${i} $TESTDATA
+    done
+    rm -f $TESTDATA
+
+    local pgid="${poolid}.0"
+    local now_is=`date -I"ns"`
+
+    # before the scrubbing starts
+
+    # last scrub duration should be 0. The scheduling data should show
+    # a time in the future:
+    # e.g. 'periodic scrub scheduled @ 2021-10-12T20:32:43.645168+0000'
+
+    declare -A expct_starting=( ['query_active']="false" ['query_is_future']="true" ['query_schedule']="scrub scheduled" )
+    declare -A sched_data
+    extract_published_sch $pgid $now_is "2019-10-12T20:32:43.645168+0000" sched_data
+    schedule_against_expected sched_data expct_starting "initial"
+    (( ${sched_data['dmp_last_duration']} == 0)) || return 1
+    echo "last-scrub  --- " ${sched_data['query_last_scrub']}
+
+    #
+    # step 1: scrub once (mainly to ensure there is no urgency to scrub)
+    #
+
+    saved_last_stamp=${sched_data['query_last_stamp']}
+    ceph tell osd.* config set osd_scrub_sleep "0"
+    ceph pg deep-scrub $pgid
+    ceph pg scrub $pgid
+
+    # wait for the 'last duration' entries to change. Note that the 'dump' one will need
+    # up to 5 seconds to sync
+
+    sleep 5
+    sched_data=()
+    declare -A expct_qry_duration=( ['query_last_duration']="0" ['query_last_duration_neg']="not0" )
+    wait_any_cond $pgid 10 $saved_last_stamp expct_qry_duration "WaitingAfterScrub " sched_data || return 1
+    # verify that 'pg dump' also shows the change in last_scrub_duration
+    sched_data=()
+    declare -A expct_dmp_duration=( ['dmp_last_duration']="0" ['dmp_last_duration_neg']="not0" )
+    wait_any_cond $pgid 10 $saved_last_stamp expct_dmp_duration "WaitingAfterScrub_dmp " sched_data || return 1
+
+    sleep 2
+
+    #
+    # step 2: set noscrub and request a "periodic scrub". Watch for the change in the 'is the scrub
+    #         scheduled for the future' value
+    #
+
+    ceph tell osd.* config set osd_scrub_chunk_max "3" || return 1
+    ceph tell osd.* config set osd_scrub_sleep "1.0" || return 1
+    ceph osd set noscrub || return 1
+    sleep 2
+    saved_last_stamp=${sched_data['query_last_stamp']}
+
+    ceph pg $pgid scrub
+    sleep 1
+    sched_data=()
+    declare -A expct_scrub_peri_sched=( ['query_is_future']="false" )
+    wait_any_cond $pgid 10 $saved_last_stamp expct_scrub_peri_sched "waitingBeingScheduled" sched_data || return 1
+
+    # note: the induced change in 'last_scrub_stamp' that we've caused above, is by itself not a publish-stats
+    # trigger. Thus it might happen that the information in 'pg dump' will not get updated here. Do not expect
+    # 'dmp_is_future' to follow 'query_is_future' without a good reason
+    ## declare -A expct_scrub_peri_sched_dmp=( ['dmp_is_future']="false" )
+    ## wait_any_cond $pgid 15 $saved_last_stamp expct_scrub_peri_sched_dmp "waitingBeingScheduled" sched_data || echo "must be fixed"
+
+    #
+    # step 3: allow scrubs. Watch for the conditions during the scrubbing
+    #
+
+    saved_last_stamp=${sched_data['query_last_stamp']}
+    ceph osd unset noscrub
+
+    declare -A cond_active=( ['query_active']="true" )
+    sched_data=()
+    wait_any_cond $pgid 10 $saved_last_stamp cond_active "WaitingActive " sched_data || return 1
+
+    # check for pg-dump to show being active. But if we see 'query_active' being reset - we've just
+    # missed it.
+    declare -A cond_active_dmp=( ['dmp_state_has_scrubbing']="true" ['query_active']="false" )
+    sched_data=()
+    wait_any_cond $pgid 10 $saved_last_stamp cond_active_dmp "WaitingActive " sched_data || return 1
+}
+
+function TEST_pg_dump_objects_scrubbed() {
+    local dir=$1
+    local poolname=test
+    local OSDS=3
+    local objects=15
+    local timeout=10
+
+    TESTDATA="testdata.$$"
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=$OSDS || return 1
+    run_mgr $dir x || return 1
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd || return 1
+    done
+
+    # Create a pool with a single pg
+    create_pool $poolname 1 1
+    wait_for_clean || return 1
+    poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }')
+
+    dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
+    for i in `seq 1 $objects`
+    do
+        rados -p $poolname put obj${i} $TESTDATA
+    done
+    rm -f $TESTDATA
+
+    local pgid="${poolid}.0"
+    #Trigger a scrub on a PG
+    pg_scrub $pgid || return 1
+    test "$(ceph pg $pgid query | jq '.info.stats.objects_scrubbed')" '=' $objects || return 1
+
+    teardown $dir || return 1
+}
+
+main osd-scrub-test "$@"
+
+# Local Variables:
+# compile-command: "cd build ; make -j4 && \
+#    ../qa/run-standalone.sh osd-scrub-test.sh"
+# End:
diff --git a/qa/standalone/scrub/osd-unexpected-clone.sh b/qa/standalone/scrub/osd-unexpected-clone.sh
new file mode 100755
index 000000000..6895bfee6
--- /dev/null
+++ b/qa/standalone/scrub/osd-unexpected-clone.sh
@@ -0,0 +1,89 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2015 Intel <contact@intel.com.com>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Xiaoxi Chen <xiaoxi.chen@intel.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7144" # git grep '\<7144\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    export -n CEPH_CLI_TEST_DUP_COMMAND
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_recover_unexpected() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+
+    ceph osd pool create foo 1
+    rados -p foo put foo /etc/passwd
+    rados -p foo mksnap snap
+    rados -p foo put foo /etc/group
+
+    wait_for_clean || return 1
+
+    local osd=$(get_primary foo foo)
+
+    JSON=`objectstore_tool $dir $osd --op list foo | grep snapid.:1`
+    echo "JSON is $JSON"
+    rm -f $dir/_ $dir/data
+    objectstore_tool $dir $osd "$JSON" get-attr _ > $dir/_ || return 1
+    objectstore_tool $dir $osd "$JSON" get-bytes $dir/data || return 1
+
+    rados -p foo rmsnap snap
+
+    sleep 5
+
+    objectstore_tool $dir $osd "$JSON" set-bytes $dir/data || return 1
+    objectstore_tool $dir $osd "$JSON" set-attr _ $dir/_ || return 1
+
+    sleep 5
+
+    ceph pg repair 1.0 || return 1
+
+    sleep 10
+
+    ceph log last
+
+    # make sure osds are still up
+    timeout 60 ceph tell osd.0 version || return 1
+    timeout 60 ceph tell osd.1 version || return 1
+    timeout 60 ceph tell osd.2 version || return 1
+}
+
+
+main osd-unexpected-clone "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-bench.sh"
+# End:
diff --git a/qa/standalone/scrub/scrub-helpers.sh b/qa/standalone/scrub/scrub-helpers.sh
new file mode 100644
index 000000000..6816d71de
--- /dev/null
+++ b/qa/standalone/scrub/scrub-helpers.sh
@@ -0,0 +1,302 @@
+#!/usr/bin/env bash
+# @file   scrub-helpers.sh
+# @brief  a collection of bash functions useful for scrub standalone tests
+#
+
+# extract_published_sch()
+#
+# Use the output from both 'ceph pg dump pgs' and 'ceph pg x.x query' commands to determine
+# the published scrub scheduling status of a given PG.
+#
+# $1: pg id
+# $2: 'current' time to compare to
+# $3: an additional time-point to compare to
+# $4: [out] dictionary
+#
+function extract_published_sch() {
+  local pgn="$1"
+  local -n dict=$4 # a ref to the in/out dictionary
+  local current_time=$2
+  local extra_time=$3
+  local extr_dbg=1 # note: 3 and above leave some temp files around
+
+  #turn off '-x' (but remember previous state)
+  local saved_echo_flag=${-//[^x]/}
+  set +x
+
+  (( extr_dbg >= 3 )) && ceph pg dump pgs -f json-pretty >> /tmp/a_dmp$$
+  (( extr_dbg >= 3 )) && ceph pg $1 query -f json-pretty >> /tmp/a_qry$$
+
+  from_dmp=`ceph pg dump pgs -f json-pretty | jq -r --arg pgn "$pgn" --arg extra_dt "$extra_time" --arg current_dt "$current_time" '[
+    [[.pg_stats[]] | group_by(.pg_stats)][0][0] | 
+    [.[] |
+    select(has("pgid") and .pgid == $pgn) |
+
+        (.dmp_stat_part=(.scrub_schedule | if test(".*@.*") then (split(" @ ")|first) else . end)) |
+        (.dmp_when_part=(.scrub_schedule | if test(".*@.*") then (split(" @ ")|last) else "0" end)) |
+
+     [ {
+       dmp_pg_state: .state,
+       dmp_state_has_scrubbing: (.state | test(".*scrub.*";"i")),
+       dmp_last_duration:.last_scrub_duration,
+       dmp_schedule: .dmp_stat_part,
+       dmp_schedule_at: .dmp_when_part,
+       dmp_is_future: ( .dmp_when_part > $current_dt ),
+       dmp_vs_date: ( .dmp_when_part > $extra_dt  ),
+       dmp_reported_epoch: .reported_epoch,
+       dmp_seq: .reported_seq
+      }] ]][][][]'`
+
+  (( extr_dbg >= 2 )) && echo "from pg dump pg: $from_dmp"
+  (( extr_dbg >= 2 )) && echo "query output:"
+  (( extr_dbg >= 2 )) && ceph pg $1 query -f json-pretty | awk -e '/scrubber/,/agent_state/ {print;}'
+
+  from_qry=`ceph pg $1 query -f json-pretty | jq -r --arg extra_dt "$extra_time" --arg current_dt "$current_time"  --arg spt "'" '
+    . |
+        (.q_stat_part=((.scrubber.schedule// "-") | if test(".*@.*") then (split(" @ ")|first) else . end)) |
+        (.q_when_part=((.scrubber.schedule// "0") | if test(".*@.*") then (split(" @ ")|last) else "0" end)) |
+	(.q_when_is_future=(.q_when_part > $current_dt)) |
+	(.q_vs_date=(.q_when_part > $extra_dt)) |	
+      {
+        query_epoch: .epoch,
+        query_seq: .info.stats.reported_seq,
+        query_active: (.scrubber | if has("active") then .active else "bug" end),
+        query_schedule: .q_stat_part,
+        query_schedule_at: .q_when_part,
+        query_last_duration: .info.stats.last_scrub_duration,
+        query_last_stamp: .info.history.last_scrub_stamp,
+        query_last_scrub: (.info.history.last_scrub| sub($spt;"x") ),
+        query_is_future: .q_when_is_future,
+        query_vs_date: .q_vs_date,
+        query_scrub_seq: .scrubber.test_sequence
+      }
+   '`
+  (( extr_dbg >= 1 )) && echo $from_qry " " $from_dmp | jq -s -r 'add | "(",(to_entries | .[] | "["+(.key)+"]="+(.value|@sh)),")"'
+
+  # note that using a ref to an associative array directly is tricky. Instead - we are copying:
+  local -A dict_src=`echo $from_qry " " $from_dmp | jq -s -r 'add | "(",(to_entries | .[] | "["+(.key)+"]="+(.value|@sh)),")"'`
+  dict=()
+  for k in "${!dict_src[@]}"; do dict[$k]=${dict_src[$k]}; done
+
+  if [[ -n "$saved_echo_flag" ]]; then set -x; fi
+}
+
+# query the PG, until any of the conditions in the 'expected' array are met
+#
+# A condition may be negated by an additional entry in the 'expected' array. Its
+# form should be:
+#  key: the original key, with a "_neg" suffix;
+#  Value: not checked
+#
+# $1: pg id
+# $2: max retries
+# $3: a date to use in comparisons
+# $4: set of K/V conditions
+# $5: debug message
+# $6: [out] the results array
+function wait_any_cond() {
+  local pgid="$1"
+  local retries=$2
+  local cmp_date=$3
+  local -n ep=$4
+  local -n out_array=$6
+  local -A sc_data
+  local extr_dbg=2
+
+  #turn off '-x' (but remember previous state)
+  local saved_echo_flag=${-//[^x]/}
+  set +x
+
+  local now_is=`date -I"ns"`
+  (( extr_dbg >= 2 )) && echo "waiting for any condition ($5): pg:$pgid dt:$cmp_date ($retries retries)"
+
+  for i in $(seq 1 $retries)
+  do
+    sleep 0.5
+    extract_published_sch $pgid $now_is $cmp_date sc_data
+    (( extr_dbg >= 4 )) && echo "${sc_data['dmp_last_duration']}"
+    (( extr_dbg >= 4 )) && echo "----> loop:  $i  ~ ${sc_data['dmp_last_duration']}  / " ${sc_data['query_vs_date']} " /   ${sc_data['dmp_is_future']}"
+    (( extr_dbg >= 2 )) && echo "--> loop:  $i ~ ${sc_data['query_active']} / ${sc_data['query_seq']} / ${sc_data['dmp_seq']} " \
+                      "/ ${sc_data['query_is_future']} / ${sc_data['query_last_stamp']} / ${sc_data['query_schedule']} %%% ${!ep[@]}"
+
+    # perform schedule_against_expected(), but with slightly different out-messages behaviour
+    for k_ref in "${!ep[@]}"
+    do
+      (( extr_dbg >= 3 )) && echo "key is $k_ref"
+      # is this a real key, or just a negation flag for another key??
+      [[ $k_ref =~ "_neg" ]] && continue
+
+      local act_val=${sc_data[$k_ref]}
+      local exp_val=${ep[$k_ref]}
+
+      # possible negation? look for a matching key
+      local neg_key="${k_ref}_neg"
+      (( extr_dbg >= 3 )) && echo "neg-key is $neg_key"
+      if [ -v 'ep[$neg_key]' ]; then
+        is_neg=1
+      else
+        is_neg=0
+      fi
+
+      (( extr_dbg >= 1 )) && echo "key is $k_ref: negation:$is_neg # expected: $exp_val # in actual: $act_val"
+      is_eq=0
+      [[ $exp_val == $act_val ]] && is_eq=1
+      if (($is_eq ^ $is_neg))  
+      then
+        echo "$5 - '$k_ref' actual value ($act_val) matches expected ($exp_val) (negation: $is_neg)"
+        for k in "${!sc_data[@]}"; do out_array[$k]=${sc_data[$k]}; done
+        if [[ -n "$saved_echo_flag" ]]; then set -x; fi
+        return 0
+      fi
+    done
+  done
+
+  echo "$5: wait_any_cond(): failure. Note: query-active=${sc_data['query_active']}"
+  if [[ -n "$saved_echo_flag" ]]; then set -x; fi
+  return 1
+}
+
+
+# schedule_against_expected()
+#
+# Compare the scrub scheduling state collected by extract_published_sch() to a set of expected values.
+# All values are expected to match.
+#
+# $1: the published scheduling state
+# $2: a set of conditions to verify
+# $3: text to be echoed for a failed match
+#
+function schedule_against_expected() {
+  local -n dict=$1 # a ref to the published state
+  local -n ep=$2  # the expected results
+  local extr_dbg=1
+
+  # turn off '-x' (but remember previous state)
+  local saved_echo_flag=${-//[^x]/}
+  set +x
+
+  (( extr_dbg >= 1 )) && echo "-- - comparing:"
+  for k_ref in "${!ep[@]}"
+  do
+    local act_val=${dict[$k_ref]}
+    local exp_val=${ep[$k_ref]}
+    (( extr_dbg >= 1 )) && echo "key is " $k_ref "  expected: " $exp_val " in actual: " $act_val
+    if [[ $exp_val != $act_val ]]
+    then
+      echo "$3 - '$k_ref' actual value ($act_val) differs from expected ($exp_val)"
+      echo '####################################################^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^'
+
+      if [[ -n "$saved_echo_flag" ]]; then set -x; fi
+      return 1
+    fi
+  done
+
+  if [[ -n "$saved_echo_flag" ]]; then set -x; fi
+  return 0
+}
+
+
+# Start the cluster "nodes" and create a pool for testing.
+#
+# The OSDs are started with a set of parameters aimed in creating a repeatable
+# and stable scrub sequence:
+#  - no scrub randomizations/backoffs
+#  - no autoscaler
+#
+# $1: the test directory
+# $2: [in/out] an array of configuration values
+#
+# The function adds/updates the configuration dictionary with the name of the
+# pool created, and its ID.
+#
+# Argument 2 might look like this:
+#
+#  declare -A test_conf=( 
+#    ['osds_num']="3"
+#    ['pgs_in_pool']="7"
+#    ['extras']="--extra1 --extra2"
+#    ['pool_name']="testpl"
+#  )
+function standard_scrub_cluster() {
+    local dir=$1
+    local -n args=$2
+
+    local OSDS=${args['osds_num']:-"3"}
+    local pg_num=${args['pgs_in_pool']:-"8"}
+    local poolname="${args['pool_name']:-test}"
+    args['pool_name']=$poolname
+    local extra_pars=${args['extras']}
+    local debug_msg=${args['msg']:-"dbg"}
+
+    # turn off '-x' (but remember previous state)
+    local saved_echo_flag=${-//[^x]/}
+    set +x
+
+    run_mon $dir a --osd_pool_default_size=$OSDS || return 1
+    run_mgr $dir x || return 1
+
+    local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \
+            --osd_scrub_interval_randomize_ratio=0 \
+            --osd_scrub_backoff_ratio=0.0 \
+            --osd_pool_default_pg_autoscale_mode=off \
+            --osd_pg_stat_report_interval_max=1 \
+            $extra_pars"
+
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd $(echo $ceph_osd_args) || return 1
+    done
+
+    create_pool $poolname $pg_num $pg_num
+    wait_for_clean || return 1
+
+    # update the in/out 'args' with the ID of the new pool
+    sleep 1
+    name_n_id=`ceph osd dump | awk '/^pool.*'$poolname'/ { gsub(/'"'"'/," ",$3); print $3," ", $2}'`
+    echo "standard_scrub_cluster: $debug_msg: test pool is $name_n_id"
+    args['pool_id']="${name_n_id##* }"
+    args['osd_args']=$ceph_osd_args
+    if [[ -n "$saved_echo_flag" ]]; then set -x; fi
+}
+
+
+# Start the cluster "nodes" and create a pool for testing - wpq version.
+#
+# A variant of standard_scrub_cluster() that selects the wpq scheduler and sets a value to
+# osd_scrub_sleep. To be used when the test is attempting to "catch" the scrubber during an
+# ongoing scrub.
+#
+# See standard_scrub_cluster() for more details.
+#
+# $1: the test directory
+# $2: [in/out] an array of configuration values
+# $3: osd_scrub_sleep
+#
+# The function adds/updates the configuration dictionary with the name of the
+# pool created, and its ID.
+function standard_scrub_wpq_cluster() {
+    local dir=$1
+    local -n conf=$2
+    local osd_sleep=$3
+
+    conf['extras']=" --osd_op_queue=wpq --osd_scrub_sleep=$osd_sleep ${conf['extras']}"
+
+    standard_scrub_cluster $dir conf || return 1
+}
+
+
+# A debug flag is set for the PG specified, causing the 'pg query' command to display
+# an additional 'scrub sessions counter' field.
+#
+# $1: PG id
+#
+function set_query_debug() {
+    local pgid=$1
+    local prim_osd=`ceph pg dump pgs_brief | \
+      awk -v pg="^$pgid" -n -e '$0 ~ pg { print(gensub(/[^0-9]*([0-9]+).*/,"\\\\1","g",$5)); }' `
+
+    echo "Setting scrub debug data. Primary for $pgid is $prim_osd"
+    CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.$prim_osd) \
+          scrubdebug $pgid set sessions
+}
+
diff --git a/qa/standalone/special/ceph_objectstore_tool.py b/qa/standalone/special/ceph_objectstore_tool.py
new file mode 100755
index 000000000..98a2c8723
--- /dev/null
+++ b/qa/standalone/special/ceph_objectstore_tool.py
@@ -0,0 +1,2045 @@
+#!/usr/bin/python3
+
+from subprocess import call, check_output, DEVNULL
+
+import filecmp
+import os
+import subprocess
+import math
+import time
+import sys
+import re
+import logging
+import json
+import tempfile
+import platform
+
+logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.WARNING,
+                    datefmt="%FT%T")
+
+
+def wait_for_health():
+    print("Wait for health_ok...", end="")
+    tries = 0
+    while call("{path}/ceph health 2> /dev/null | grep -v 'HEALTH_OK\|HEALTH_WARN' > /dev/null".format(path=CEPH_BIN), shell=True) == 0:
+        tries += 1
+        if tries == 150:
+            raise Exception("Time exceeded to go to health")
+        time.sleep(1)
+    print("DONE")
+
+
+def get_pool_id(name, nullfd):
+    cmd = "{path}/ceph osd pool stats {pool}".format(pool=name, path=CEPH_BIN).split()
+    # pool {pool} id # .... grab the 4 field
+    return check_output(cmd, stderr=nullfd).decode().split()[3]
+
+
+# return a list of unique PGS given an osd subdirectory
+def get_osd_pgs(SUBDIR, ID):
+    PGS = []
+    if ID:
+        endhead = re.compile("{id}.*_head$".format(id=ID))
+    DIR = os.path.join(SUBDIR, "current")
+    PGS += [f for f in os.listdir(DIR) if os.path.isdir(os.path.join(DIR, f)) and (ID is None or endhead.match(f))]
+    PGS = [re.sub("_head", "", p) for p in PGS if "_head" in p]
+    return PGS
+
+
+# return a sorted list of unique PGs given a directory
+def get_pgs(DIR, ID):
+    OSDS = [f for f in os.listdir(DIR) if os.path.isdir(os.path.join(DIR, f)) and f.find("osd") == 0]
+    PGS = []
+    for d in OSDS:
+        SUBDIR = os.path.join(DIR, d)
+        PGS += get_osd_pgs(SUBDIR, ID)
+    return sorted(set(PGS))
+
+
+# return a sorted list of PGS a subset of ALLPGS that contain objects with prefix specified
+def get_objs(ALLPGS, prefix, DIR, ID):
+    OSDS = [f for f in os.listdir(DIR) if os.path.isdir(os.path.join(DIR, f)) and f.find("osd") == 0]
+    PGS = []
+    for d in OSDS:
+        DIRL2 = os.path.join(DIR, d)
+        SUBDIR = os.path.join(DIRL2, "current")
+        for p in ALLPGS:
+            PGDIR = p + "_head"
+            if not os.path.isdir(os.path.join(SUBDIR, PGDIR)):
+                continue
+            FINALDIR = os.path.join(SUBDIR, PGDIR)
+            # See if there are any objects there
+            if any(f for f in [val for _, _, fl in os.walk(FINALDIR) for val in fl] if f.startswith(prefix)):
+                PGS += [p]
+    return sorted(set(PGS))
+
+
+# return a sorted list of OSDS which have data from a given PG
+def get_osds(PG, DIR):
+    ALLOSDS = [f for f in os.listdir(DIR) if os.path.isdir(os.path.join(DIR, f)) and f.find("osd") == 0]
+    OSDS = []
+    for d in ALLOSDS:
+        DIRL2 = os.path.join(DIR, d)
+        SUBDIR = os.path.join(DIRL2, "current")
+        PGDIR = PG + "_head"
+        if not os.path.isdir(os.path.join(SUBDIR, PGDIR)):
+            continue
+        OSDS += [d]
+    return sorted(OSDS)
+
+
+def get_lines(filename):
+    tmpfd = open(filename, "r")
+    line = True
+    lines = []
+    while line:
+        line = tmpfd.readline().rstrip('\n')
+        if line:
+            lines += [line]
+    tmpfd.close()
+    os.unlink(filename)
+    return lines
+
+
+def cat_file(level, filename):
+    if level < logging.getLogger().getEffectiveLevel():
+        return
+    print("File: " + filename)
+    with open(filename, "r") as f:
+        while True:
+            line = f.readline().rstrip('\n')
+            if not line:
+                break
+            print(line)
+    print("<EOF>")
+
+
+def vstart(new, opt="-o osd_pool_default_pg_autoscale_mode=off"):
+    print("vstarting....", end="")
+    NEW = new and "-n" or "-k"
+    call("MON=1 OSD=4 MDS=0 MGR=1 CEPH_PORT=7400 MGR_PYTHON_PATH={path}/src/pybind/mgr {path}/src/vstart.sh --filestore --short -l {new} -d {opt} > /dev/null 2>&1".format(new=NEW, opt=opt, path=CEPH_ROOT), shell=True)
+    print("DONE")
+
+
+def test_failure(cmd, errmsg, tty=False):
+    if tty:
+        try:
+            ttyfd = open("/dev/tty", "rwb")
+        except Exception as e:
+            logging.info(str(e))
+            logging.info("SKIP " + cmd)
+            return 0
+    TMPFILE = r"/tmp/tmp.{pid}".format(pid=os.getpid())
+    tmpfd = open(TMPFILE, "wb")
+
+    logging.debug(cmd)
+    if tty:
+        ret = call(cmd, shell=True, stdin=ttyfd, stdout=ttyfd, stderr=tmpfd)
+        ttyfd.close()
+    else:
+        ret = call(cmd, shell=True, stderr=tmpfd)
+    tmpfd.close()
+    if ret == 0:
+        logging.error(cmd)
+        logging.error("Should have failed, but got exit 0")
+        return 1
+    lines = get_lines(TMPFILE)
+    matched = [ l for l in lines if errmsg in l ]
+    if any(matched):
+        logging.info("Correctly failed with message \"" + matched[0] + "\"")
+        return 0
+    else:
+        logging.error("Command: " + cmd )
+        logging.error("Bad messages to stderr \"" + str(lines) + "\"")
+        logging.error("Expected \"" + errmsg + "\"")
+        return 1
+
+
+def get_nspace(num):
+    if num == 0:
+        return ""
+    return "ns{num}".format(num=num)
+
+
+def verify(DATADIR, POOL, NAME_PREFIX, db):
+    TMPFILE = r"/tmp/tmp.{pid}".format(pid=os.getpid())
+    ERRORS = 0
+    for rawnsfile in [f for f in os.listdir(DATADIR) if f.split('-')[1].find(NAME_PREFIX) == 0]:
+        nsfile = rawnsfile.split("__")[0]
+        clone = rawnsfile.split("__")[1]
+        nspace = nsfile.split("-")[0]
+        file = nsfile.split("-")[1]
+        # Skip clones
+        if clone != "head":
+            continue
+        path = os.path.join(DATADIR, rawnsfile)
+        try:
+            os.unlink(TMPFILE)
+        except:
+            pass
+        cmd = "{path}/rados -p {pool} -N '{nspace}' get {file} {out}".format(pool=POOL, file=file, out=TMPFILE, nspace=nspace, path=CEPH_BIN)
+        logging.debug(cmd)
+        call(cmd, shell=True, stdout=DEVNULL, stderr=DEVNULL)
+        cmd = "diff -q {src} {result}".format(src=path, result=TMPFILE)
+        logging.debug(cmd)
+        ret = call(cmd, shell=True)
+        if ret != 0:
+            logging.error("{file} data not imported properly".format(file=file))
+            ERRORS += 1
+        try:
+            os.unlink(TMPFILE)
+        except:
+            pass
+        for key, val in db[nspace][file]["xattr"].items():
+            cmd = "{path}/rados -p {pool} -N '{nspace}' getxattr {name} {key}".format(pool=POOL, name=file, key=key, nspace=nspace, path=CEPH_BIN)
+            logging.debug(cmd)
+            getval = check_output(cmd, shell=True, stderr=DEVNULL).decode()
+            logging.debug("getxattr {key} {val}".format(key=key, val=getval))
+            if getval != val:
+                logging.error("getxattr of key {key} returned wrong val: {get} instead of {orig}".format(key=key, get=getval, orig=val))
+                ERRORS += 1
+                continue
+        hdr = db[nspace][file].get("omapheader", "")
+        cmd = "{path}/rados -p {pool} -N '{nspace}' getomapheader {name} {file}".format(pool=POOL, name=file, nspace=nspace, file=TMPFILE, path=CEPH_BIN)
+        logging.debug(cmd)
+        ret = call(cmd, shell=True, stderr=DEVNULL)
+        if ret != 0:
+            logging.error("rados getomapheader returned {ret}".format(ret=ret))
+            ERRORS += 1
+        else:
+            getlines = get_lines(TMPFILE)
+            assert(len(getlines) == 0 or len(getlines) == 1)
+            if len(getlines) == 0:
+                gethdr = ""
+            else:
+                gethdr = getlines[0]
+            logging.debug("header: {hdr}".format(hdr=gethdr))
+            if gethdr != hdr:
+                logging.error("getomapheader returned wrong val: {get} instead of {orig}".format(get=gethdr, orig=hdr))
+                ERRORS += 1
+        for key, val in db[nspace][file]["omap"].items():
+            cmd = "{path}/rados -p {pool} -N '{nspace}' getomapval {name} {key} {file}".format(pool=POOL, name=file, key=key, nspace=nspace, file=TMPFILE, path=CEPH_BIN)
+            logging.debug(cmd)
+            ret = call(cmd, shell=True, stderr=DEVNULL)
+            if ret != 0:
+                logging.error("getomapval returned {ret}".format(ret=ret))
+                ERRORS += 1
+                continue
+            getlines = get_lines(TMPFILE)
+            if len(getlines) != 1:
+                logging.error("Bad data from getomapval {lines}".format(lines=getlines))
+                ERRORS += 1
+                continue
+            getval = getlines[0]
+            logging.debug("getomapval {key} {val}".format(key=key, val=getval))
+            if getval != val:
+                logging.error("getomapval returned wrong val: {get} instead of {orig}".format(get=getval, orig=val))
+                ERRORS += 1
+        try:
+            os.unlink(TMPFILE)
+        except:
+            pass
+    return ERRORS
+
+
+def check_journal(jsondict):
+    errors = 0
+    if 'header' not in jsondict:
+        logging.error("Key 'header' not in dump-journal")
+        errors += 1
+    elif 'max_size' not in jsondict['header']:
+        logging.error("Key 'max_size' not in dump-journal header")
+        errors += 1
+    else:
+        print("\tJournal max_size = {size}".format(size=jsondict['header']['max_size']))
+    if 'entries' not in jsondict:
+        logging.error("Key 'entries' not in dump-journal output")
+        errors += 1
+    elif len(jsondict['entries']) == 0:
+        logging.info("No entries in journal found")
+    else:
+        errors += check_journal_entries(jsondict['entries'])
+    return errors
+
+
+def check_journal_entries(entries):
+    errors = 0
+    for enum in range(len(entries)):
+        if 'offset' not in entries[enum]:
+            logging.error("No 'offset' key in entry {e}".format(e=enum))
+            errors += 1
+        if 'seq' not in entries[enum]:
+            logging.error("No 'seq' key in entry {e}".format(e=enum))
+            errors += 1
+        if 'transactions' not in entries[enum]:
+            logging.error("No 'transactions' key in entry {e}".format(e=enum))
+            errors += 1
+        elif len(entries[enum]['transactions']) == 0:
+            logging.error("No transactions found in entry {e}".format(e=enum))
+            errors += 1
+        else:
+            errors += check_entry_transactions(entries[enum], enum)
+    return errors
+
+
+def check_entry_transactions(entry, enum):
+    errors = 0
+    for tnum in range(len(entry['transactions'])):
+        if 'trans_num' not in entry['transactions'][tnum]:
+            logging.error("Key 'trans_num' missing from entry {e} trans {t}".format(e=enum, t=tnum))
+            errors += 1
+        elif entry['transactions'][tnum]['trans_num'] != tnum:
+            ft = entry['transactions'][tnum]['trans_num']
+            logging.error("Bad trans_num ({ft}) entry {e} trans {t}".format(ft=ft, e=enum, t=tnum))
+            errors += 1
+        if 'ops' not in entry['transactions'][tnum]:
+            logging.error("Key 'ops' missing from entry {e} trans {t}".format(e=enum, t=tnum))
+            errors += 1
+        else:
+            errors += check_transaction_ops(entry['transactions'][tnum]['ops'], enum, tnum)
+    return errors
+
+
+def check_transaction_ops(ops, enum, tnum):
+    if len(ops) == 0:
+        logging.warning("No ops found in entry {e} trans {t}".format(e=enum, t=tnum))
+    errors = 0
+    for onum in range(len(ops)):
+        if 'op_num' not in ops[onum]:
+            logging.error("Key 'op_num' missing from entry {e} trans {t} op {o}".format(e=enum, t=tnum, o=onum))
+            errors += 1
+        elif ops[onum]['op_num'] != onum:
+            fo = ops[onum]['op_num']
+            logging.error("Bad op_num ({fo}) from entry {e} trans {t} op {o}".format(fo=fo, e=enum, t=tnum, o=onum))
+            errors += 1
+        if 'op_name' not in ops[onum]:
+            logging.error("Key 'op_name' missing from entry {e} trans {t} op {o}".format(e=enum, t=tnum, o=onum))
+            errors += 1
+    return errors
+
+
+def test_dump_journal(CFSD_PREFIX, osds):
+    ERRORS = 0
+    pid = os.getpid()
+    TMPFILE = r"/tmp/tmp.{pid}".format(pid=pid)
+
+    for osd in osds:
+        # Test --op dump-journal by loading json
+        cmd = (CFSD_PREFIX + "--op dump-journal --format json").format(osd=osd)
+        logging.debug(cmd)
+        tmpfd = open(TMPFILE, "wb")
+        ret = call(cmd, shell=True, stdout=tmpfd)
+        if ret != 0:
+            logging.error("Bad exit status {ret} from {cmd}".format(ret=ret, cmd=cmd))
+            ERRORS += 1
+            continue
+        tmpfd.close()
+        tmpfd = open(TMPFILE, "r")
+        jsondict = json.load(tmpfd)
+        tmpfd.close()
+        os.unlink(TMPFILE)
+
+        journal_errors = check_journal(jsondict)
+        if journal_errors != 0:
+            logging.error(jsondict)
+        ERRORS += journal_errors
+
+    return ERRORS
+
+CEPH_BUILD_DIR = os.environ.get('CEPH_BUILD_DIR')
+CEPH_BIN = os.environ.get('CEPH_BIN')
+CEPH_ROOT = os.environ.get('CEPH_ROOT')
+
+if not CEPH_BUILD_DIR:
+    CEPH_BUILD_DIR=os.getcwd()
+    os.putenv('CEPH_BUILD_DIR', CEPH_BUILD_DIR)
+    CEPH_BIN=os.path.join(CEPH_BUILD_DIR, 'bin')
+    os.putenv('CEPH_BIN', CEPH_BIN)
+    CEPH_ROOT=os.path.dirname(CEPH_BUILD_DIR)
+    os.putenv('CEPH_ROOT', CEPH_ROOT)
+    CEPH_LIB=os.path.join(CEPH_BUILD_DIR, 'lib')
+    os.putenv('CEPH_LIB', CEPH_LIB)
+
+try:
+    os.mkdir("td")
+except:
+    pass # ok if this is already there
+CEPH_DIR = os.path.join(CEPH_BUILD_DIR, os.path.join("td", "cot_dir"))
+CEPH_CONF = os.path.join(CEPH_DIR, 'ceph.conf')
+
+def kill_daemons():
+    call("{path}/init-ceph -c {conf} stop > /dev/null 2>&1".format(conf=CEPH_CONF, path=CEPH_BIN), shell=True)
+
+
+def check_data(DATADIR, TMPFILE, OSDDIR, SPLIT_NAME):
+    repcount = 0
+    ERRORS = 0
+    for rawnsfile in [f for f in os.listdir(DATADIR) if f.split('-')[1].find(SPLIT_NAME) == 0]:
+        nsfile = rawnsfile.split("__")[0]
+        clone = rawnsfile.split("__")[1]
+        nspace = nsfile.split("-")[0]
+        file = nsfile.split("-")[1] + "__" + clone
+        # Skip clones
+        if clone != "head":
+            continue
+        path = os.path.join(DATADIR, rawnsfile)
+        tmpfd = open(TMPFILE, "wb")
+        cmd = "find {dir} -name '{file}_*_{nspace}_*'".format(dir=OSDDIR, file=file, nspace=nspace)
+        logging.debug(cmd)
+        ret = call(cmd, shell=True, stdout=tmpfd)
+        if ret:
+            logging.critical("INTERNAL ERROR")
+            return 1
+        tmpfd.close()
+        obj_locs = get_lines(TMPFILE)
+        if len(obj_locs) == 0:
+            logging.error("Can't find imported object {name}".format(name=file))
+            ERRORS += 1
+        for obj_loc in obj_locs:
+            # For btrfs skip snap_* dirs
+            if re.search("/snap_[0-9]*/", obj_loc) is not None:
+                continue
+            repcount += 1
+            cmd = "diff -q {src} {obj_loc}".format(src=path, obj_loc=obj_loc)
+            logging.debug(cmd)
+            ret = call(cmd, shell=True)
+            if ret != 0:
+                logging.error("{file} data not imported properly into {obj}".format(file=file, obj=obj_loc))
+                ERRORS += 1
+    return ERRORS, repcount
+
+
+def set_osd_weight(CFSD_PREFIX, osd_ids, osd_path, weight):
+    # change the weight of osd.0 to math.pi in the newest osdmap of given osd
+    osdmap_file = tempfile.NamedTemporaryFile(delete=True)
+    cmd = (CFSD_PREFIX + "--op get-osdmap --file {osdmap_file}").format(osd=osd_path,
+                                                                        osdmap_file=osdmap_file.name)
+    output = check_output(cmd, shell=True).decode()
+    epoch = int(re.findall('#(\d+)', output)[0])
+
+    new_crush_file = tempfile.NamedTemporaryFile(delete=True)
+    old_crush_file = tempfile.NamedTemporaryFile(delete=True)
+    ret = call("{path}/osdmaptool --export-crush {crush_file} {osdmap_file}".format(osdmap_file=osdmap_file.name,
+                                                                          crush_file=old_crush_file.name, path=CEPH_BIN),
+               stdout=DEVNULL,
+               stderr=DEVNULL,
+               shell=True)
+    assert(ret == 0)
+
+    for osd_id in osd_ids:
+        cmd = "{path}/crushtool -i {crush_file} --reweight-item osd.{osd} {weight} -o {new_crush_file}".format(osd=osd_id,
+                                                                                                          crush_file=old_crush_file.name,
+                                                                                                          weight=weight,
+                                                                                                          new_crush_file=new_crush_file.name, path=CEPH_BIN)
+        ret = call(cmd, stdout=DEVNULL, shell=True)
+        assert(ret == 0)
+        old_crush_file, new_crush_file = new_crush_file, old_crush_file
+
+    # change them back, since we don't need to preapre for another round
+    old_crush_file, new_crush_file = new_crush_file, old_crush_file
+    old_crush_file.close()
+
+    ret = call("{path}/osdmaptool --import-crush {crush_file} {osdmap_file}".format(osdmap_file=osdmap_file.name,
+                                                                               crush_file=new_crush_file.name, path=CEPH_BIN),
+               stdout=DEVNULL,
+               stderr=DEVNULL,
+               shell=True)
+    assert(ret == 0)
+
+    # Minimum test of --dry-run by using it, but not checking anything
+    cmd = CFSD_PREFIX + "--op set-osdmap --file {osdmap_file} --epoch {epoch} --force --dry-run"
+    cmd = cmd.format(osd=osd_path, osdmap_file=osdmap_file.name, epoch=epoch)
+    ret = call(cmd, stdout=DEVNULL, shell=True)
+    assert(ret == 0)
+
+    # osdmaptool increases the epoch of the changed osdmap, so we need to force the tool
+    # to use use a different epoch than the one in osdmap
+    cmd = CFSD_PREFIX + "--op set-osdmap --file {osdmap_file} --epoch {epoch} --force"
+    cmd = cmd.format(osd=osd_path, osdmap_file=osdmap_file.name, epoch=epoch)
+    ret = call(cmd, stdout=DEVNULL, shell=True)
+
+    return ret == 0
+
+def get_osd_weights(CFSD_PREFIX, osd_ids, osd_path):
+    osdmap_file = tempfile.NamedTemporaryFile(delete=True)
+    cmd = (CFSD_PREFIX + "--op get-osdmap --file {osdmap_file}").format(osd=osd_path,
+                                                                        osdmap_file=osdmap_file.name)
+    ret = call(cmd, stdout=DEVNULL, shell=True)
+    if ret != 0:
+        return None
+    # we have to read the weights from the crush map, even we can query the weights using
+    # osdmaptool, but please keep in mind, they are different:
+    #    item weights in crush map versus weight associated with each osd in osdmap
+    crush_file = tempfile.NamedTemporaryFile(delete=True)
+    ret = call("{path}/osdmaptool --export-crush {crush_file} {osdmap_file}".format(osdmap_file=osdmap_file.name,
+                                                                               crush_file=crush_file.name, path=CEPH_BIN),
+               stdout=DEVNULL,
+               shell=True)
+    assert(ret == 0)
+    output = check_output("{path}/crushtool --tree -i {crush_file} | tail -n {num_osd}".format(crush_file=crush_file.name,
+                                                                                          num_osd=len(osd_ids), path=CEPH_BIN),
+                          stderr=DEVNULL,
+                          shell=True).decode()
+    weights = []
+    for line in output.strip().split('\n'):
+        print(line)
+        linev = re.split('\s+', line)
+        if linev[0] == '':
+            linev.pop(0)
+        print('linev %s' % linev)
+        weights.append(float(linev[2]))
+
+    return weights
+
+
+def test_get_set_osdmap(CFSD_PREFIX, osd_ids, osd_paths):
+    print("Testing get-osdmap and set-osdmap")
+    errors = 0
+    kill_daemons()
+    weight = 1 / math.e           # just some magic number in [0, 1]
+    changed = []
+    for osd_path in osd_paths:
+        if set_osd_weight(CFSD_PREFIX, osd_ids, osd_path, weight):
+            changed.append(osd_path)
+        else:
+            logging.warning("Failed to change the weights: {0}".format(osd_path))
+    # i am pissed off if none of the store gets changed
+    if not changed:
+        errors += 1
+
+    for osd_path in changed:
+        weights = get_osd_weights(CFSD_PREFIX, osd_ids, osd_path)
+        if not weights:
+            errors += 1
+            continue
+        if any(abs(w - weight) > 1e-5 for w in weights):
+            logging.warning("Weight is not changed: {0} != {1}".format(weights, weight))
+            errors += 1
+    return errors
+
+def test_get_set_inc_osdmap(CFSD_PREFIX, osd_path):
+    # incrementals are not used unless we need to build an MOSDMap to update
+    # OSD's peers, so an obvious way to test it is simply overwrite an epoch
+    # with a different copy, and read it back to see if it matches.
+    kill_daemons()
+    file_e2 = tempfile.NamedTemporaryFile(delete=True)
+    cmd = (CFSD_PREFIX + "--op get-inc-osdmap --file {file}").format(osd=osd_path,
+                                                                     file=file_e2.name)
+    output = check_output(cmd, shell=True).decode()
+    epoch = int(re.findall('#(\d+)', output)[0])
+    # backup e1 incremental before overwriting it
+    epoch -= 1
+    file_e1_backup = tempfile.NamedTemporaryFile(delete=True)
+    cmd = CFSD_PREFIX + "--op get-inc-osdmap --epoch {epoch} --file {file}"
+    ret = call(cmd.format(osd=osd_path, epoch=epoch, file=file_e1_backup.name), shell=True)
+    if ret: return 1
+    # overwrite e1 with e2
+    cmd = CFSD_PREFIX + "--op set-inc-osdmap --force --epoch {epoch} --file {file}"
+    ret = call(cmd.format(osd=osd_path, epoch=epoch, file=file_e2.name), shell=True)
+    if ret: return 1
+    # Use dry-run to set back to e1 which shouldn't happen
+    cmd = CFSD_PREFIX + "--op set-inc-osdmap --dry-run --epoch {epoch} --file {file}"
+    ret = call(cmd.format(osd=osd_path, epoch=epoch, file=file_e1_backup.name), shell=True)
+    if ret: return 1
+    # read from e1
+    file_e1_read = tempfile.NamedTemporaryFile(delete=True)
+    cmd = CFSD_PREFIX + "--op get-inc-osdmap --epoch {epoch} --file {file}"
+    ret = call(cmd.format(osd=osd_path, epoch=epoch, file=file_e1_read.name), shell=True)
+    if ret: return 1
+    errors = 0
+    try:
+        if not filecmp.cmp(file_e2.name, file_e1_read.name, shallow=False):
+            logging.error("{{get,set}}-inc-osdmap mismatch {0} != {1}".format(file_e2.name, file_e1_read.name))
+            errors += 1
+    finally:
+        # revert the change with file_e1_backup
+        cmd = CFSD_PREFIX + "--op set-inc-osdmap --epoch {epoch} --file {file}"
+        ret = call(cmd.format(osd=osd_path, epoch=epoch, file=file_e1_backup.name), shell=True)
+        if ret:
+            logging.error("Failed to revert the changed inc-osdmap")
+            errors += 1
+
+    return errors
+
+
+def test_removeall(CFSD_PREFIX, db, OBJREPPGS, REP_POOL, CEPH_BIN, OSDDIR, REP_NAME, NUM_CLONED_REP_OBJECTS):
+    # Test removeall
+    TMPFILE = r"/tmp/tmp.{pid}".format(pid=os.getpid())
+    nullfd = open(os.devnull, "w")
+    errors=0
+    print("Test removeall")
+    kill_daemons()
+    test_force_remove = 0
+    for nspace in db.keys():
+        for basename in db[nspace].keys():
+            JSON = db[nspace][basename]['json']
+            for pg in OBJREPPGS:
+                OSDS = get_osds(pg, OSDDIR)
+                for osd in OSDS:
+                    DIR = os.path.join(OSDDIR, os.path.join(osd, os.path.join("current", "{pg}_head".format(pg=pg))))
+                    fnames = [f for f in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, f))
+                              and f.split("_")[0] == basename and f.split("_")[4] == nspace]
+                    if not fnames:
+                        continue
+
+                    if int(basename.split(REP_NAME)[1]) <= int(NUM_CLONED_REP_OBJECTS):
+                        cmd = (CFSD_PREFIX + "'{json}' remove").format(osd=osd, json=JSON)
+                        errors += test_failure(cmd, "Clones are present, use removeall to delete everything")
+                        if not test_force_remove:
+
+                            cmd = (CFSD_PREFIX + " '{json}' set-attr snapset /dev/null").format(osd=osd, json=JSON)
+                            logging.debug(cmd)
+                            ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+                            if ret != 0:
+                                logging.error("Test set-up to corrupt snapset failed for {json}".format(json=JSON))
+                                errors += 1
+                                # Do the removeall since this test failed to set-up
+                            else:
+                                test_force_remove = 1
+
+                                cmd = (CFSD_PREFIX + " '{json}' --force remove").format(osd=osd, json=JSON)
+                                logging.debug(cmd)
+                                ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+                                if ret != 0:
+                                    logging.error("forced remove with corrupt snapset failed for {json}".format(json=JSON))
+                                    errors += 1
+                                continue
+
+                    cmd = (CFSD_PREFIX + " --force --dry-run '{json}' remove").format(osd=osd, json=JSON)
+                    logging.debug(cmd)
+                    ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+                    if ret != 0:
+                        logging.error("remove with --force failed for {json}".format(json=JSON))
+                        errors += 1
+
+                    cmd = (CFSD_PREFIX + " --dry-run '{json}' removeall").format(osd=osd, json=JSON)
+                    logging.debug(cmd)
+                    ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+                    if ret != 0:
+                        logging.error("removeall failed for {json}".format(json=JSON))
+                        errors += 1
+
+                    cmd = (CFSD_PREFIX + " '{json}' removeall").format(osd=osd, json=JSON)
+                    logging.debug(cmd)
+                    ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+                    if ret != 0:
+                        logging.error("removeall failed for {json}".format(json=JSON))
+                        errors += 1
+
+                    tmpfd = open(TMPFILE, "w")
+                    cmd = (CFSD_PREFIX + "--op list --pgid {pg} --namespace {ns} {name}").format(osd=osd, pg=pg, ns=nspace, name=basename)
+                    logging.debug(cmd)
+                    ret = call(cmd, shell=True, stdout=tmpfd)
+                    if ret != 0:
+                        logging.error("Bad exit status {ret} from {cmd}".format(ret=ret, cmd=cmd))
+                        errors += 1
+                    tmpfd.close()
+                    lines = get_lines(TMPFILE)
+                    if len(lines) != 0:
+                        logging.error("Removeall didn't remove all objects {ns}/{name} : {lines}".format(ns=nspace, name=basename, lines=lines))
+                        errors += 1
+    vstart(new=False)
+    wait_for_health()
+    cmd = "{path}/rados -p {pool} rmsnap snap1".format(pool=REP_POOL, path=CEPH_BIN)
+    logging.debug(cmd)
+    ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+    if ret != 0:
+        logging.error("rados rmsnap failed")
+        errors += 1
+    time.sleep(2)
+    wait_for_health()
+    return errors
+
+
+def main(argv):
+    stdout = sys.stdout.buffer
+    if len(argv) > 1 and argv[1] == "debug":
+        nullfd = stdout
+    else:
+        nullfd = DEVNULL
+
+    call("rm -fr {dir}; mkdir -p {dir}".format(dir=CEPH_DIR), shell=True)
+    os.chdir(CEPH_DIR)
+    os.environ["CEPH_DIR"] = CEPH_DIR
+    OSDDIR = "dev"
+    REP_POOL = "rep_pool"
+    REP_NAME = "REPobject"
+    EC_POOL = "ec_pool"
+    EC_NAME = "ECobject"
+    if len(argv) > 0 and argv[0] == 'large':
+        PG_COUNT = 12
+        NUM_REP_OBJECTS = 200
+        NUM_CLONED_REP_OBJECTS = 50
+        NUM_EC_OBJECTS = 12
+        NUM_NSPACES = 4
+        # Larger data sets for first object per namespace
+        DATALINECOUNT = 50000
+        # Number of objects to do xattr/omap testing on
+        ATTR_OBJS = 10
+    else:
+        PG_COUNT = 4
+        NUM_REP_OBJECTS = 2
+        NUM_CLONED_REP_OBJECTS = 2
+        NUM_EC_OBJECTS = 2
+        NUM_NSPACES = 2
+        # Larger data sets for first object per namespace
+        DATALINECOUNT = 10
+        # Number of objects to do xattr/omap testing on
+        ATTR_OBJS = 2
+    ERRORS = 0
+    pid = os.getpid()
+    TESTDIR = "/tmp/test.{pid}".format(pid=pid)
+    DATADIR = "/tmp/data.{pid}".format(pid=pid)
+    CFSD_PREFIX = CEPH_BIN + "/ceph-objectstore-tool --no-mon-config --data-path " + OSDDIR + "/{osd} "
+    PROFNAME = "testecprofile"
+
+    os.environ['CEPH_CONF'] = CEPH_CONF
+    vstart(new=True)
+    wait_for_health()
+
+    cmd = "{path}/ceph osd pool create {pool} {pg} {pg} replicated".format(pool=REP_POOL, pg=PG_COUNT, path=CEPH_BIN)
+    logging.debug(cmd)
+    call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+    time.sleep(2)
+    REPID = get_pool_id(REP_POOL, nullfd)
+
+    print("Created Replicated pool #{repid}".format(repid=REPID))
+
+    cmd = "{path}/ceph osd erasure-code-profile set {prof} crush-failure-domain=osd".format(prof=PROFNAME, path=CEPH_BIN)
+    logging.debug(cmd)
+    call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+    cmd = "{path}/ceph osd erasure-code-profile get {prof}".format(prof=PROFNAME, path=CEPH_BIN)
+    logging.debug(cmd)
+    call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+    cmd = "{path}/ceph osd pool create {pool} {pg} {pg} erasure {prof}".format(pool=EC_POOL, prof=PROFNAME, pg=PG_COUNT, path=CEPH_BIN)
+    logging.debug(cmd)
+    call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+    ECID = get_pool_id(EC_POOL, nullfd)
+
+    print("Created Erasure coded pool #{ecid}".format(ecid=ECID))
+
+    print("Creating {objs} objects in replicated pool".format(objs=(NUM_REP_OBJECTS*NUM_NSPACES)))
+    cmd = "mkdir -p {datadir}".format(datadir=DATADIR)
+    logging.debug(cmd)
+    call(cmd, shell=True)
+
+    db = {}
+
+    objects = range(1, NUM_REP_OBJECTS + 1)
+    nspaces = range(NUM_NSPACES)
+    for n in nspaces:
+        nspace = get_nspace(n)
+
+        db[nspace] = {}
+
+        for i in objects:
+            NAME = REP_NAME + "{num}".format(num=i)
+            LNAME = nspace + "-" + NAME
+            DDNAME = os.path.join(DATADIR, LNAME)
+            DDNAME += "__head"
+
+            cmd = "rm -f " + DDNAME
+            logging.debug(cmd)
+            call(cmd, shell=True)
+
+            if i == 1:
+                dataline = range(DATALINECOUNT)
+            else:
+                dataline = range(1)
+            fd = open(DDNAME, "w")
+            data = "This is the replicated data for " + LNAME + "\n"
+            for _ in dataline:
+                fd.write(data)
+            fd.close()
+
+            cmd = "{path}/rados -p {pool} -N '{nspace}' put {name} {ddname}".format(pool=REP_POOL, name=NAME, ddname=DDNAME, nspace=nspace, path=CEPH_BIN)
+            logging.debug(cmd)
+            ret = call(cmd, shell=True, stderr=nullfd)
+            if ret != 0:
+                logging.critical("Rados put command failed with {ret}".format(ret=ret))
+                return 1
+
+            db[nspace][NAME] = {}
+
+            if i < ATTR_OBJS + 1:
+                keys = range(i)
+            else:
+                keys = range(0)
+            db[nspace][NAME]["xattr"] = {}
+            for k in keys:
+                if k == 0:
+                    continue
+                mykey = "key{i}-{k}".format(i=i, k=k)
+                myval = "val{i}-{k}".format(i=i, k=k)
+                cmd = "{path}/rados -p {pool} -N '{nspace}' setxattr {name} {key} {val}".format(pool=REP_POOL, name=NAME, key=mykey, val=myval, nspace=nspace, path=CEPH_BIN)
+                logging.debug(cmd)
+                ret = call(cmd, shell=True)
+                if ret != 0:
+                    logging.error("setxattr failed with {ret}".format(ret=ret))
+                    ERRORS += 1
+                db[nspace][NAME]["xattr"][mykey] = myval
+
+            # Create omap header in all objects but REPobject1
+            if i < ATTR_OBJS + 1 and i != 1:
+                myhdr = "hdr{i}".format(i=i)
+                cmd = "{path}/rados -p {pool} -N '{nspace}' setomapheader {name} {hdr}".format(pool=REP_POOL, name=NAME, hdr=myhdr, nspace=nspace, path=CEPH_BIN)
+                logging.debug(cmd)
+                ret = call(cmd, shell=True)
+                if ret != 0:
+                    logging.critical("setomapheader failed with {ret}".format(ret=ret))
+                    ERRORS += 1
+                db[nspace][NAME]["omapheader"] = myhdr
+
+            db[nspace][NAME]["omap"] = {}
+            for k in keys:
+                if k == 0:
+                    continue
+                mykey = "okey{i}-{k}".format(i=i, k=k)
+                myval = "oval{i}-{k}".format(i=i, k=k)
+                cmd = "{path}/rados -p {pool} -N '{nspace}' setomapval {name} {key} {val}".format(pool=REP_POOL, name=NAME, key=mykey, val=myval, nspace=nspace, path=CEPH_BIN)
+                logging.debug(cmd)
+                ret = call(cmd, shell=True)
+                if ret != 0:
+                    logging.critical("setomapval failed with {ret}".format(ret=ret))
+                db[nspace][NAME]["omap"][mykey] = myval
+
+    # Create some clones
+    cmd = "{path}/rados -p {pool} mksnap snap1".format(pool=REP_POOL, path=CEPH_BIN)
+    logging.debug(cmd)
+    call(cmd, shell=True)
+
+    objects = range(1, NUM_CLONED_REP_OBJECTS + 1)
+    nspaces = range(NUM_NSPACES)
+    for n in nspaces:
+        nspace = get_nspace(n)
+
+        for i in objects:
+            NAME = REP_NAME + "{num}".format(num=i)
+            LNAME = nspace + "-" + NAME
+            DDNAME = os.path.join(DATADIR, LNAME)
+            # First clone
+            CLONENAME = DDNAME + "__1"
+            DDNAME += "__head"
+
+            cmd = "mv -f " + DDNAME + " " + CLONENAME
+            logging.debug(cmd)
+            call(cmd, shell=True)
+
+            if i == 1:
+                dataline = range(DATALINECOUNT)
+            else:
+                dataline = range(1)
+            fd = open(DDNAME, "w")
+            data = "This is the replicated data after a snapshot for " + LNAME + "\n"
+            for _ in dataline:
+                fd.write(data)
+            fd.close()
+
+            cmd = "{path}/rados -p {pool} -N '{nspace}' put {name} {ddname}".format(pool=REP_POOL, name=NAME, ddname=DDNAME, nspace=nspace, path=CEPH_BIN)
+            logging.debug(cmd)
+            ret = call(cmd, shell=True, stderr=nullfd)
+            if ret != 0:
+                logging.critical("Rados put command failed with {ret}".format(ret=ret))
+                return 1
+
+    print("Creating {objs} objects in erasure coded pool".format(objs=(NUM_EC_OBJECTS*NUM_NSPACES)))
+
+    objects = range(1, NUM_EC_OBJECTS + 1)
+    nspaces = range(NUM_NSPACES)
+    for n in nspaces:
+        nspace = get_nspace(n)
+
+        for i in objects:
+            NAME = EC_NAME + "{num}".format(num=i)
+            LNAME = nspace + "-" + NAME
+            DDNAME = os.path.join(DATADIR, LNAME)
+            DDNAME += "__head"
+
+            cmd = "rm -f " + DDNAME
+            logging.debug(cmd)
+            call(cmd, shell=True)
+
+            if i == 1:
+                dataline = range(DATALINECOUNT)
+            else:
+                dataline = range(1)
+            fd = open(DDNAME, "w")
+            data = "This is the erasure coded data for " + LNAME + "\n"
+            for j in dataline:
+                fd.write(data)
+            fd.close()
+
+            cmd = "{path}/rados -p {pool} -N '{nspace}' put {name} {ddname}".format(pool=EC_POOL, name=NAME, ddname=DDNAME, nspace=nspace, path=CEPH_BIN)
+            logging.debug(cmd)
+            ret = call(cmd, shell=True, stderr=nullfd)
+            if ret != 0:
+                logging.critical("Erasure coded pool creation failed with {ret}".format(ret=ret))
+                return 1
+
+            db[nspace][NAME] = {}
+
+            db[nspace][NAME]["xattr"] = {}
+            if i < ATTR_OBJS + 1:
+                keys = range(i)
+            else:
+                keys = range(0)
+            for k in keys:
+                if k == 0:
+                    continue
+                mykey = "key{i}-{k}".format(i=i, k=k)
+                myval = "val{i}-{k}".format(i=i, k=k)
+                cmd = "{path}/rados -p {pool} -N '{nspace}' setxattr {name} {key} {val}".format(pool=EC_POOL, name=NAME, key=mykey, val=myval, nspace=nspace, path=CEPH_BIN)
+                logging.debug(cmd)
+                ret = call(cmd, shell=True)
+                if ret != 0:
+                    logging.error("setxattr failed with {ret}".format(ret=ret))
+                    ERRORS += 1
+                db[nspace][NAME]["xattr"][mykey] = myval
+
+            # Omap isn't supported in EC pools
+            db[nspace][NAME]["omap"] = {}
+
+    logging.debug(db)
+
+    kill_daemons()
+
+    if ERRORS:
+        logging.critical("Unable to set up test")
+        return 1
+
+    ALLREPPGS = get_pgs(OSDDIR, REPID)
+    logging.debug(ALLREPPGS)
+    ALLECPGS = get_pgs(OSDDIR, ECID)
+    logging.debug(ALLECPGS)
+
+    OBJREPPGS = get_objs(ALLREPPGS, REP_NAME, OSDDIR, REPID)
+    logging.debug(OBJREPPGS)
+    OBJECPGS = get_objs(ALLECPGS, EC_NAME, OSDDIR, ECID)
+    logging.debug(OBJECPGS)
+
+    ONEPG = ALLREPPGS[0]
+    logging.debug(ONEPG)
+    osds = get_osds(ONEPG, OSDDIR)
+    ONEOSD = osds[0]
+    logging.debug(ONEOSD)
+
+    print("Test invalid parameters")
+    # On export can't use stdout to a terminal
+    cmd = (CFSD_PREFIX + "--op export --pgid {pg}").format(osd=ONEOSD, pg=ONEPG)
+    ERRORS += test_failure(cmd, "stdout is a tty and no --file filename specified", tty=True)
+
+    # On export can't use stdout to a terminal
+    cmd = (CFSD_PREFIX + "--op export --pgid {pg} --file -").format(osd=ONEOSD, pg=ONEPG)
+    ERRORS += test_failure(cmd, "stdout is a tty and no --file filename specified", tty=True)
+
+    # Prep a valid ec export file for import failure tests
+    ONEECPG = ALLECPGS[0]
+    osds = get_osds(ONEECPG, OSDDIR)
+    ONEECOSD = osds[0]
+    OTHERFILE = "/tmp/foo.{pid}".format(pid=pid)
+    cmd = (CFSD_PREFIX + "--op export --pgid {pg} --file {file}").format(osd=ONEECOSD, pg=ONEECPG, file=OTHERFILE)
+    logging.debug(cmd)
+    call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+
+    os.unlink(OTHERFILE)
+
+    # Prep a valid export file for import failure tests
+    OTHERFILE = "/tmp/foo.{pid}".format(pid=pid)
+    cmd = (CFSD_PREFIX + "--op export --pgid {pg} --file {file}").format(osd=ONEOSD, pg=ONEPG, file=OTHERFILE)
+    logging.debug(cmd)
+    call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+
+    # On import can't specify a different pgid than the file
+    TMPPG="{pool}.80".format(pool=REPID)
+    cmd = (CFSD_PREFIX + "--op import --pgid 12.dd --file {file}").format(osd=ONEOSD, pg=TMPPG, file=OTHERFILE)
+    ERRORS += test_failure(cmd, "specified pgid 12.dd does not match actual pgid")
+
+    os.unlink(OTHERFILE)
+    cmd = (CFSD_PREFIX + "--op import --file {FOO}").format(osd=ONEOSD, FOO=OTHERFILE)
+    ERRORS += test_failure(cmd, "file: {FOO}: No such file or directory".format(FOO=OTHERFILE))
+
+    cmd = "{path}/ceph-objectstore-tool --no-mon-config --data-path BAD_DATA_PATH --op list".format(path=CEPH_BIN)
+    ERRORS += test_failure(cmd, "data-path: BAD_DATA_PATH: No such file or directory")
+
+    cmd = (CFSD_PREFIX + "--journal-path BAD_JOURNAL_PATH --op list").format(osd=ONEOSD)
+    ERRORS += test_failure(cmd, "journal-path: BAD_JOURNAL_PATH: No such file or directory")
+
+    cmd = (CFSD_PREFIX + "--journal-path /bin --op list").format(osd=ONEOSD)
+    ERRORS += test_failure(cmd, "journal-path: /bin: (21) Is a directory")
+
+    # On import can't use stdin from a terminal
+    cmd = (CFSD_PREFIX + "--op import --pgid {pg}").format(osd=ONEOSD, pg=ONEPG)
+    ERRORS += test_failure(cmd, "stdin is a tty and no --file filename specified", tty=True)
+
+    # On import can't use stdin from a terminal
+    cmd = (CFSD_PREFIX + "--op import --pgid {pg} --file -").format(osd=ONEOSD, pg=ONEPG)
+    ERRORS += test_failure(cmd, "stdin is a tty and no --file filename specified", tty=True)
+
+    # Specify a bad --type
+    os.mkdir(OSDDIR + "/fakeosd")
+    cmd = ("{path}/ceph-objectstore-tool --no-mon-config --data-path " + OSDDIR + "/{osd} --type foobar --op list --pgid {pg}").format(osd="fakeosd", pg=ONEPG, path=CEPH_BIN)
+    ERRORS += test_failure(cmd, "Unable to create store of type foobar")
+
+    # Don't specify a data-path
+    cmd = "{path}/ceph-objectstore-tool --no-mon-config --type memstore --op list --pgid {pg}".format(pg=ONEPG, path=CEPH_BIN)
+    ERRORS += test_failure(cmd, "Must provide --data-path")
+
+    cmd = (CFSD_PREFIX + "--op remove --pgid 2.0").format(osd=ONEOSD)
+    ERRORS += test_failure(cmd, "Please use export-remove or you must use --force option")
+
+    cmd = (CFSD_PREFIX + "--force --op remove").format(osd=ONEOSD)
+    ERRORS += test_failure(cmd, "Must provide pgid")
+
+    # Don't secify a --op nor object command
+    cmd = CFSD_PREFIX.format(osd=ONEOSD)
+    ERRORS += test_failure(cmd, "Must provide --op or object command...")
+
+    # Specify a bad --op command
+    cmd = (CFSD_PREFIX + "--op oops").format(osd=ONEOSD)
+    ERRORS += test_failure(cmd, "Must provide --op (info, log, remove, mkfs, fsck, repair, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, dump-export, trim-pg-log, statfs)")
+
+    # Provide just the object param not a command
+    cmd = (CFSD_PREFIX + "object").format(osd=ONEOSD)
+    ERRORS += test_failure(cmd, "Invalid syntax, missing command")
+
+    # Provide an object name that doesn't exist
+    cmd = (CFSD_PREFIX + "NON_OBJECT get-bytes").format(osd=ONEOSD)
+    ERRORS += test_failure(cmd, "No object id 'NON_OBJECT' found")
+
+    # Provide an invalid object command
+    cmd = (CFSD_PREFIX + "--pgid {pg} '' notacommand").format(osd=ONEOSD, pg=ONEPG)
+    ERRORS += test_failure(cmd, "Unknown object command 'notacommand'")
+
+    cmd = (CFSD_PREFIX + "foo list-omap").format(osd=ONEOSD, pg=ONEPG)
+    ERRORS += test_failure(cmd, "No object id 'foo' found or invalid JSON specified")
+
+    cmd = (CFSD_PREFIX + "'{{\"oid\":\"obj4\",\"key\":\"\",\"snapid\":-1,\"hash\":2826278768,\"max\":0,\"pool\":1,\"namespace\":\"\"}}' list-omap").format(osd=ONEOSD, pg=ONEPG)
+    ERRORS += test_failure(cmd, "Without --pgid the object '{\"oid\":\"obj4\",\"key\":\"\",\"snapid\":-1,\"hash\":2826278768,\"max\":0,\"pool\":1,\"namespace\":\"\"}' must be a JSON array")
+
+    cmd = (CFSD_PREFIX + "'[]' list-omap").format(osd=ONEOSD, pg=ONEPG)
+    ERRORS += test_failure(cmd, "Object '[]' must be a JSON array with 2 elements")
+
+    cmd = (CFSD_PREFIX + "'[\"1.0\"]' list-omap").format(osd=ONEOSD, pg=ONEPG)
+    ERRORS += test_failure(cmd, "Object '[\"1.0\"]' must be a JSON array with 2 elements")
+
+    cmd = (CFSD_PREFIX + "'[\"1.0\", 5, 8, 9]' list-omap").format(osd=ONEOSD, pg=ONEPG)
+    ERRORS += test_failure(cmd, "Object '[\"1.0\", 5, 8, 9]' must be a JSON array with 2 elements")
+
+    cmd = (CFSD_PREFIX + "'[1, 2]' list-omap").format(osd=ONEOSD, pg=ONEPG)
+    ERRORS += test_failure(cmd, "Object '[1, 2]' must be a JSON array with the first element a string")
+
+    cmd = (CFSD_PREFIX + "'[\"1.3\",{{\"snapid\":\"not an int\"}}]' list-omap").format(osd=ONEOSD, pg=ONEPG)
+    ERRORS += test_failure(cmd, "Decode object JSON error: value type is 2 not 4")
+
+    TMPFILE = r"/tmp/tmp.{pid}".format(pid=pid)
+    ALLPGS = OBJREPPGS + OBJECPGS
+    OSDS = get_osds(ALLPGS[0], OSDDIR)
+    osd = OSDS[0]
+
+    print("Test all --op dump-journal")
+    ALLOSDS = [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0]
+    ERRORS += test_dump_journal(CFSD_PREFIX, ALLOSDS)
+
+    # Test --op list and generate json for all objects
+    print("Test --op list variants")
+
+    # retrieve all objects from all PGs
+    tmpfd = open(TMPFILE, "wb")
+    cmd = (CFSD_PREFIX + "--op list --format json").format(osd=osd)
+    logging.debug(cmd)
+    ret = call(cmd, shell=True, stdout=tmpfd)
+    if ret != 0:
+        logging.error("Bad exit status {ret} from {cmd}".format(ret=ret, cmd=cmd))
+        ERRORS += 1
+    tmpfd.close()
+    lines = get_lines(TMPFILE)
+    JSONOBJ = sorted(set(lines))
+    (pgid, coll, jsondict) = json.loads(JSONOBJ[0])[0]
+
+    # retrieve all objects in a given PG
+    tmpfd = open(OTHERFILE, "ab")
+    cmd = (CFSD_PREFIX + "--op list --pgid {pg} --format json").format(osd=osd, pg=pgid)
+    logging.debug(cmd)
+    ret = call(cmd, shell=True, stdout=tmpfd)
+    if ret != 0:
+        logging.error("Bad exit status {ret} from {cmd}".format(ret=ret, cmd=cmd))
+        ERRORS += 1
+    tmpfd.close()
+    lines = get_lines(OTHERFILE)
+    JSONOBJ = sorted(set(lines))
+    (other_pgid, other_coll, other_jsondict) = json.loads(JSONOBJ[0])[0]
+
+    if pgid != other_pgid or jsondict != other_jsondict or coll != other_coll:
+        logging.error("the first line of --op list is different "
+                      "from the first line of --op list --pgid {pg}".format(pg=pgid))
+        ERRORS += 1
+
+    # retrieve all objects with a given name in a given PG
+    tmpfd = open(OTHERFILE, "wb")
+    cmd = (CFSD_PREFIX + "--op list --pgid {pg} {object} --format json").format(osd=osd, pg=pgid, object=jsondict['oid'])
+    logging.debug(cmd)
+    ret = call(cmd, shell=True, stdout=tmpfd)
+    if ret != 0:
+        logging.error("Bad exit status {ret} from {cmd}".format(ret=ret, cmd=cmd))
+        ERRORS += 1
+    tmpfd.close()
+    lines = get_lines(OTHERFILE)
+    JSONOBJ = sorted(set(lines))
+    (other_pgid, other_coll, other_jsondict) in json.loads(JSONOBJ[0])[0]
+
+    if pgid != other_pgid or jsondict != other_jsondict or coll != other_coll:
+        logging.error("the first line of --op list is different "
+                      "from the first line of --op list --pgid {pg} {object}".format(pg=pgid, object=jsondict['oid']))
+        ERRORS += 1
+
+    print("Test --op list by generating json for all objects using default format")
+    for pg in ALLPGS:
+        OSDS = get_osds(pg, OSDDIR)
+        for osd in OSDS:
+            tmpfd = open(TMPFILE, "ab")
+            cmd = (CFSD_PREFIX + "--op list --pgid {pg}").format(osd=osd, pg=pg)
+            logging.debug(cmd)
+            ret = call(cmd, shell=True, stdout=tmpfd)
+            if ret != 0:
+                logging.error("Bad exit status {ret} from --op list request".format(ret=ret))
+                ERRORS += 1
+
+    tmpfd.close()
+    lines = get_lines(TMPFILE)
+    JSONOBJ = sorted(set(lines))
+    for JSON in JSONOBJ:
+        (pgid, jsondict) = json.loads(JSON)
+        # Skip clones for now
+        if jsondict['snapid'] != -2:
+            continue
+        db[jsondict['namespace']][jsondict['oid']]['json'] = json.dumps((pgid, jsondict))
+        # print db[jsondict['namespace']][jsondict['oid']]['json']
+        if jsondict['oid'].find(EC_NAME) == 0 and 'shard_id' not in jsondict:
+            logging.error("Malformed JSON {json}".format(json=JSON))
+            ERRORS += 1
+
+    # Test get-bytes
+    print("Test get-bytes and set-bytes")
+    for nspace in db.keys():
+        for basename in db[nspace].keys():
+            file = os.path.join(DATADIR, nspace + "-" + basename + "__head")
+            JSON = db[nspace][basename]['json']
+            GETNAME = "/tmp/getbytes.{pid}".format(pid=pid)
+            TESTNAME = "/tmp/testbytes.{pid}".format(pid=pid)
+            SETNAME = "/tmp/setbytes.{pid}".format(pid=pid)
+            BADNAME = "/tmp/badbytes.{pid}".format(pid=pid)
+            for pg in OBJREPPGS:
+                OSDS = get_osds(pg, OSDDIR)
+                for osd in OSDS:
+                    DIR = os.path.join(OSDDIR, os.path.join(osd, os.path.join("current", "{pg}_head".format(pg=pg))))
+                    fnames = [f for f in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, f))
+                              and f.split("_")[0] == basename and f.split("_")[4] == nspace]
+                    if not fnames:
+                        continue
+                    try:
+                        os.unlink(GETNAME)
+                    except:
+                        pass
+                    cmd = (CFSD_PREFIX + " --pgid {pg} '{json}' get-bytes {fname}").format(osd=osd, pg=pg, json=JSON, fname=GETNAME)
+                    logging.debug(cmd)
+                    ret = call(cmd, shell=True)
+                    if ret != 0:
+                        logging.error("Bad exit status {ret}".format(ret=ret))
+                        ERRORS += 1
+                        continue
+                    cmd = "diff -q {file} {getfile}".format(file=file, getfile=GETNAME)
+                    ret = call(cmd, shell=True)
+                    if ret != 0:
+                        logging.error("Data from get-bytes differ")
+                        logging.debug("Got:")
+                        cat_file(logging.DEBUG, GETNAME)
+                        logging.debug("Expected:")
+                        cat_file(logging.DEBUG, file)
+                        ERRORS += 1
+                    fd = open(SETNAME, "w")
+                    data = "put-bytes going into {file}\n".format(file=file)
+                    fd.write(data)
+                    fd.close()
+                    cmd = (CFSD_PREFIX + "--pgid {pg} '{json}' set-bytes {sname}").format(osd=osd, pg=pg, json=JSON, sname=SETNAME)
+                    logging.debug(cmd)
+                    ret = call(cmd, shell=True)
+                    if ret != 0:
+                        logging.error("Bad exit status {ret} from set-bytes".format(ret=ret))
+                        ERRORS += 1
+                    fd = open(TESTNAME, "wb")
+                    cmd = (CFSD_PREFIX + "--pgid {pg} '{json}' get-bytes -").format(osd=osd, pg=pg, json=JSON)
+                    logging.debug(cmd)
+                    ret = call(cmd, shell=True, stdout=fd)
+                    fd.close()
+                    if ret != 0:
+                        logging.error("Bad exit status {ret} from get-bytes".format(ret=ret))
+                        ERRORS += 1
+                    cmd = "diff -q {setfile} {testfile}".format(setfile=SETNAME, testfile=TESTNAME)
+                    logging.debug(cmd)
+                    ret = call(cmd, shell=True)
+                    if ret != 0:
+                        logging.error("Data after set-bytes differ")
+                        logging.debug("Got:")
+                        cat_file(logging.DEBUG, TESTNAME)
+                        logging.debug("Expected:")
+                        cat_file(logging.DEBUG, SETNAME)
+                        ERRORS += 1
+
+                    # Use set-bytes with --dry-run and make sure contents haven't changed
+                    fd = open(BADNAME, "w")
+                    data = "Bad data for --dry-run in {file}\n".format(file=file)
+                    fd.write(data)
+                    fd.close()
+                    cmd = (CFSD_PREFIX + "--dry-run --pgid {pg} '{json}' set-bytes {sname}").format(osd=osd, pg=pg, json=JSON, sname=BADNAME)
+                    logging.debug(cmd)
+                    ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+                    if ret != 0:
+                        logging.error("Bad exit status {ret} from set-bytes --dry-run".format(ret=ret))
+                        ERRORS += 1
+                    fd = open(TESTNAME, "wb")
+                    cmd = (CFSD_PREFIX + "--pgid {pg} '{json}' get-bytes -").format(osd=osd, pg=pg, json=JSON)
+                    logging.debug(cmd)
+                    ret = call(cmd, shell=True, stdout=fd)
+                    fd.close()
+                    if ret != 0:
+                        logging.error("Bad exit status {ret} from get-bytes".format(ret=ret))
+                        ERRORS += 1
+                    cmd = "diff -q {setfile} {testfile}".format(setfile=SETNAME, testfile=TESTNAME)
+                    logging.debug(cmd)
+                    ret = call(cmd, shell=True)
+                    if ret != 0:
+                        logging.error("Data after set-bytes --dry-run changed!")
+                        logging.debug("Got:")
+                        cat_file(logging.DEBUG, TESTNAME)
+                        logging.debug("Expected:")
+                        cat_file(logging.DEBUG, SETNAME)
+                        ERRORS += 1
+
+                    fd = open(file, "rb")
+                    cmd = (CFSD_PREFIX + "--pgid {pg} '{json}' set-bytes").format(osd=osd, pg=pg, json=JSON)
+                    logging.debug(cmd)
+                    ret = call(cmd, shell=True, stdin=fd)
+                    if ret != 0:
+                        logging.error("Bad exit status {ret} from set-bytes to restore object".format(ret=ret))
+                        ERRORS += 1
+                    fd.close()
+
+    try:
+        os.unlink(GETNAME)
+    except:
+        pass
+    try:
+        os.unlink(TESTNAME)
+    except:
+        pass
+    try:
+        os.unlink(SETNAME)
+    except:
+        pass
+    try:
+        os.unlink(BADNAME)
+    except:
+        pass
+
+    # Test get-attr, set-attr, rm-attr, get-omaphdr, set-omaphdr, get-omap, set-omap, rm-omap
+    print("Test get-attr, set-attr, rm-attr, get-omaphdr, set-omaphdr, get-omap, set-omap, rm-omap")
+    for nspace in db.keys():
+        for basename in db[nspace].keys():
+            file = os.path.join(DATADIR, nspace + "-" + basename + "__head")
+            JSON = db[nspace][basename]['json']
+            for pg in OBJREPPGS:
+                OSDS = get_osds(pg, OSDDIR)
+                for osd in OSDS:
+                    DIR = os.path.join(OSDDIR, os.path.join(osd, os.path.join("current", "{pg}_head".format(pg=pg))))
+                    fnames = [f for f in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, f))
+                              and f.split("_")[0] == basename and f.split("_")[4] == nspace]
+                    if not fnames:
+                        continue
+                    for key, val in db[nspace][basename]["xattr"].items():
+                        attrkey = "_" + key
+                        cmd = (CFSD_PREFIX + " '{json}' get-attr {key}").format(osd=osd, json=JSON, key=attrkey)
+                        logging.debug(cmd)
+                        getval = check_output(cmd, shell=True).decode()
+                        if getval != val:
+                            logging.error("get-attr of key {key} returned wrong val: {get} instead of {orig}".format(key=attrkey, get=getval, orig=val))
+                            ERRORS += 1
+                            continue
+                        # set-attr to bogus value "foobar"
+                        cmd = ("echo -n foobar | " + CFSD_PREFIX + " --pgid {pg} '{json}' set-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey)
+                        logging.debug(cmd)
+                        ret = call(cmd, shell=True)
+                        if ret != 0:
+                            logging.error("Bad exit status {ret} from set-attr".format(ret=ret))
+                            ERRORS += 1
+                            continue
+                        # Test set-attr with dry-run
+                        cmd = ("echo -n dryrunbroken | " + CFSD_PREFIX + "--dry-run '{json}' set-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey)
+                        logging.debug(cmd)
+                        ret = call(cmd, shell=True, stdout=nullfd)
+                        if ret != 0:
+                            logging.error("Bad exit status {ret} from set-attr".format(ret=ret))
+                            ERRORS += 1
+                            continue
+                        # Check the set-attr
+                        cmd = (CFSD_PREFIX + " --pgid {pg} '{json}' get-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey)
+                        logging.debug(cmd)
+                        getval = check_output(cmd, shell=True).decode()
+                        if ret != 0:
+                            logging.error("Bad exit status {ret} from get-attr".format(ret=ret))
+                            ERRORS += 1
+                            continue
+                        if getval != "foobar":
+                            logging.error("Check of set-attr failed because we got {val}".format(val=getval))
+                            ERRORS += 1
+                            continue
+                        # Test rm-attr
+                        cmd = (CFSD_PREFIX + "'{json}' rm-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey)
+                        logging.debug(cmd)
+                        ret = call(cmd, shell=True)
+                        if ret != 0:
+                            logging.error("Bad exit status {ret} from rm-attr".format(ret=ret))
+                            ERRORS += 1
+                            continue
+                        # Check rm-attr with dry-run
+                        cmd = (CFSD_PREFIX + "--dry-run '{json}' rm-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey)
+                        logging.debug(cmd)
+                        ret = call(cmd, shell=True, stdout=nullfd)
+                        if ret != 0:
+                            logging.error("Bad exit status {ret} from rm-attr".format(ret=ret))
+                            ERRORS += 1
+                            continue
+                        cmd = (CFSD_PREFIX + "'{json}' get-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey)
+                        logging.debug(cmd)
+                        ret = call(cmd, shell=True, stderr=nullfd, stdout=nullfd)
+                        if ret == 0:
+                            logging.error("For rm-attr expect get-attr to fail, but it succeeded")
+                            ERRORS += 1
+                        # Put back value
+                        cmd = ("echo -n {val} | " + CFSD_PREFIX + " --pgid {pg} '{json}' set-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey, val=val)
+                        logging.debug(cmd)
+                        ret = call(cmd, shell=True)
+                        if ret != 0:
+                            logging.error("Bad exit status {ret} from set-attr".format(ret=ret))
+                            ERRORS += 1
+                            continue
+
+                    hdr = db[nspace][basename].get("omapheader", "")
+                    cmd = (CFSD_PREFIX + "'{json}' get-omaphdr").format(osd=osd, json=JSON)
+                    logging.debug(cmd)
+                    gethdr = check_output(cmd, shell=True).decode()
+                    if gethdr != hdr:
+                        logging.error("get-omaphdr was wrong: {get} instead of {orig}".format(get=gethdr, orig=hdr))
+                        ERRORS += 1
+                        continue
+                    # set-omaphdr to bogus value "foobar"
+                    cmd = ("echo -n foobar | " + CFSD_PREFIX + "'{json}' set-omaphdr").format(osd=osd, pg=pg, json=JSON)
+                    logging.debug(cmd)
+                    ret = call(cmd, shell=True)
+                    if ret != 0:
+                        logging.error("Bad exit status {ret} from set-omaphdr".format(ret=ret))
+                        ERRORS += 1
+                        continue
+                    # Check the set-omaphdr
+                    cmd = (CFSD_PREFIX + "'{json}' get-omaphdr").format(osd=osd, pg=pg, json=JSON)
+                    logging.debug(cmd)
+                    gethdr = check_output(cmd, shell=True).decode()
+                    if ret != 0:
+                        logging.error("Bad exit status {ret} from get-omaphdr".format(ret=ret))
+                        ERRORS += 1
+                        continue
+                    if gethdr != "foobar":
+                        logging.error("Check of set-omaphdr failed because we got {val}".format(val=getval))
+                        ERRORS += 1
+                        continue
+                    # Test dry-run with set-omaphdr
+                    cmd = ("echo -n dryrunbroken | " + CFSD_PREFIX + "--dry-run '{json}' set-omaphdr").format(osd=osd, pg=pg, json=JSON)
+                    logging.debug(cmd)
+                    ret = call(cmd, shell=True, stdout=nullfd)
+                    if ret != 0:
+                        logging.error("Bad exit status {ret} from set-omaphdr".format(ret=ret))
+                        ERRORS += 1
+                        continue
+                    # Put back value
+                    cmd = ("echo -n {val} | " + CFSD_PREFIX + "'{json}' set-omaphdr").format(osd=osd, pg=pg, json=JSON, val=hdr)
+                    logging.debug(cmd)
+                    ret = call(cmd, shell=True)
+                    if ret != 0:
+                        logging.error("Bad exit status {ret} from set-omaphdr".format(ret=ret))
+                        ERRORS += 1
+                        continue
+
+                    for omapkey, val in db[nspace][basename]["omap"].items():
+                        cmd = (CFSD_PREFIX + " '{json}' get-omap {key}").format(osd=osd, json=JSON, key=omapkey)
+                        logging.debug(cmd)
+                        getval = check_output(cmd, shell=True).decode()
+                        if getval != val:
+                            logging.error("get-omap of key {key} returned wrong val: {get} instead of {orig}".format(key=omapkey, get=getval, orig=val))
+                            ERRORS += 1
+                            continue
+                        # set-omap to bogus value "foobar"
+                        cmd = ("echo -n foobar | " + CFSD_PREFIX + " --pgid {pg} '{json}' set-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey)
+                        logging.debug(cmd)
+                        ret = call(cmd, shell=True)
+                        if ret != 0:
+                            logging.error("Bad exit status {ret} from set-omap".format(ret=ret))
+                            ERRORS += 1
+                            continue
+                        # Check set-omap with dry-run
+                        cmd = ("echo -n dryrunbroken | " + CFSD_PREFIX + "--dry-run --pgid {pg} '{json}' set-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey)
+                        logging.debug(cmd)
+                        ret = call(cmd, shell=True, stdout=nullfd)
+                        if ret != 0:
+                            logging.error("Bad exit status {ret} from set-omap".format(ret=ret))
+                            ERRORS += 1
+                            continue
+                        # Check the set-omap
+                        cmd = (CFSD_PREFIX + " --pgid {pg} '{json}' get-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey)
+                        logging.debug(cmd)
+                        getval = check_output(cmd, shell=True).decode()
+                        if ret != 0:
+                            logging.error("Bad exit status {ret} from get-omap".format(ret=ret))
+                            ERRORS += 1
+                            continue
+                        if getval != "foobar":
+                            logging.error("Check of set-omap failed because we got {val}".format(val=getval))
+                            ERRORS += 1
+                            continue
+                        # Test rm-omap
+                        cmd = (CFSD_PREFIX + "'{json}' rm-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey)
+                        logging.debug(cmd)
+                        ret = call(cmd, shell=True)
+                        if ret != 0:
+                            logging.error("Bad exit status {ret} from rm-omap".format(ret=ret))
+                            ERRORS += 1
+                        # Check rm-omap with dry-run
+                        cmd = (CFSD_PREFIX + "--dry-run '{json}' rm-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey)
+                        logging.debug(cmd)
+                        ret = call(cmd, shell=True, stdout=nullfd)
+                        if ret != 0:
+                            logging.error("Bad exit status {ret} from rm-omap".format(ret=ret))
+                            ERRORS += 1
+                        cmd = (CFSD_PREFIX + "'{json}' get-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey)
+                        logging.debug(cmd)
+                        ret = call(cmd, shell=True, stderr=nullfd, stdout=nullfd)
+                        if ret == 0:
+                            logging.error("For rm-omap expect get-omap to fail, but it succeeded")
+                            ERRORS += 1
+                        # Put back value
+                        cmd = ("echo -n {val} | " + CFSD_PREFIX + " --pgid {pg} '{json}' set-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey, val=val)
+                        logging.debug(cmd)
+                        ret = call(cmd, shell=True)
+                        if ret != 0:
+                            logging.error("Bad exit status {ret} from set-omap".format(ret=ret))
+                            ERRORS += 1
+                            continue
+
+    # Test dump
+    print("Test dump")
+    for nspace in db.keys():
+        for basename in db[nspace].keys():
+            file = os.path.join(DATADIR, nspace + "-" + basename + "__head")
+            JSON = db[nspace][basename]['json']
+            jsondict = json.loads(JSON)
+            for pg in OBJREPPGS:
+                OSDS = get_osds(pg, OSDDIR)
+                for osd in OSDS:
+                    DIR = os.path.join(OSDDIR, os.path.join(osd, os.path.join("current", "{pg}_head".format(pg=pg))))
+                    fnames = [f for f in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, f))
+                              and f.split("_")[0] == basename and f.split("_")[4] == nspace]
+                    if not fnames:
+                        continue
+                    if int(basename.split(REP_NAME)[1]) > int(NUM_CLONED_REP_OBJECTS):
+                        continue
+                    logging.debug("REPobject " + JSON)
+                    cmd = (CFSD_PREFIX + " '{json}' dump | grep '\"snap\": 1,' > /dev/null").format(osd=osd, json=JSON)
+                    logging.debug(cmd)
+                    ret = call(cmd, shell=True)
+                    if ret != 0:
+                        logging.error("Invalid dump for {json}".format(json=JSON))
+                        ERRORS += 1
+            if 'shard_id' in jsondict[1]:
+                logging.debug("ECobject " + JSON)
+                for pg in OBJECPGS:
+                    OSDS = get_osds(pg, OSDDIR)
+                    jsondict = json.loads(JSON)
+                    for osd in OSDS:
+                        DIR = os.path.join(OSDDIR, os.path.join(osd, os.path.join("current", "{pg}_head".format(pg=pg))))
+                        fnames = [f for f in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, f))
+                                  and f.split("_")[0] == basename and f.split("_")[4] == nspace]
+                        if not fnames:
+                            continue
+                        if int(basename.split(EC_NAME)[1]) > int(NUM_EC_OBJECTS):
+                            continue
+                        # Fix shard_id since we only have one json instance for each object
+                        jsondict[1]['shard_id'] = int(pg.split('s')[1])
+                        cmd = (CFSD_PREFIX + " '{json}' dump | grep '\"hinfo\": [{{]' > /dev/null").format(osd=osd, json=json.dumps((pg, jsondict[1])))
+                        logging.debug(cmd)
+                        ret = call(cmd, shell=True)
+                        if ret != 0:
+                            logging.error("Invalid dump for {json}".format(json=JSON))
+
+    print("Test list-attrs get-attr")
+    ATTRFILE = r"/tmp/attrs.{pid}".format(pid=pid)
+    VALFILE = r"/tmp/val.{pid}".format(pid=pid)
+    for nspace in db.keys():
+        for basename in db[nspace].keys():
+            file = os.path.join(DATADIR, nspace + "-" + basename)
+            JSON = db[nspace][basename]['json']
+            jsondict = json.loads(JSON)
+
+            if 'shard_id' in jsondict[1]:
+                logging.debug("ECobject " + JSON)
+                found = 0
+                for pg in OBJECPGS:
+                    OSDS = get_osds(pg, OSDDIR)
+                    # Fix shard_id since we only have one json instance for each object
+                    jsondict[1]['shard_id'] = int(pg.split('s')[1])
+                    JSON = json.dumps((pg, jsondict[1]))
+                    for osd in OSDS:
+                        cmd = (CFSD_PREFIX + " --tty '{json}' get-attr hinfo_key").format(osd=osd, json=JSON)
+                        logging.debug("TRY: " + cmd)
+                        try:
+                            out = check_output(cmd, shell=True, stderr=subprocess.STDOUT).decode()
+                            logging.debug("FOUND: {json} in {osd} has value '{val}'".format(osd=osd, json=JSON, val=out))
+                            found += 1
+                        except subprocess.CalledProcessError as e:
+                            logging.debug("Error message: {output}".format(output=e.output))
+                            if "No such file or directory" not in str(e.output) and \
+                               "No data available" not in str(e.output) and \
+                               "not contained by pg" not in str(e.output):
+                                raise
+                # Assuming k=2 m=1 for the default ec pool
+                if found != 3:
+                    logging.error("{json} hinfo_key found {found} times instead of 3".format(json=JSON, found=found))
+                    ERRORS += 1
+
+            for pg in ALLPGS:
+                # Make sure rep obj with rep pg or ec obj with ec pg
+                if ('shard_id' in jsondict[1]) != (pg.find('s') > 0):
+                    continue
+                if 'shard_id' in jsondict[1]:
+                    # Fix shard_id since we only have one json instance for each object
+                    jsondict[1]['shard_id'] = int(pg.split('s')[1])
+                    JSON = json.dumps((pg, jsondict[1]))
+                OSDS = get_osds(pg, OSDDIR)
+                for osd in OSDS:
+                    DIR = os.path.join(OSDDIR, os.path.join(osd, os.path.join("current", "{pg}_head".format(pg=pg))))
+                    fnames = [f for f in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, f))
+                              and f.split("_")[0] == basename and f.split("_")[4] == nspace]
+                    if not fnames:
+                        continue
+                    afd = open(ATTRFILE, "wb")
+                    cmd = (CFSD_PREFIX + " '{json}' list-attrs").format(osd=osd, json=JSON)
+                    logging.debug(cmd)
+                    ret = call(cmd, shell=True, stdout=afd)
+                    afd.close()
+                    if ret != 0:
+                        logging.error("list-attrs failed with {ret}".format(ret=ret))
+                        ERRORS += 1
+                        continue
+                    keys = get_lines(ATTRFILE)
+                    values = dict(db[nspace][basename]["xattr"])
+                    for key in keys:
+                        if key == "_" or key == "snapset" or key == "hinfo_key":
+                            continue
+                        key = key.strip("_")
+                        if key not in values:
+                            logging.error("Unexpected key {key} present".format(key=key))
+                            ERRORS += 1
+                            continue
+                        exp = values.pop(key)
+                        vfd = open(VALFILE, "wb")
+                        cmd = (CFSD_PREFIX + " '{json}' get-attr {key}").format(osd=osd, json=JSON, key="_" + key)
+                        logging.debug(cmd)
+                        ret = call(cmd, shell=True, stdout=vfd)
+                        vfd.close()
+                        if ret != 0:
+                            logging.error("get-attr failed with {ret}".format(ret=ret))
+                            ERRORS += 1
+                            continue
+                        lines = get_lines(VALFILE)
+                        val = lines[0]
+                        if exp != val:
+                            logging.error("For key {key} got value {got} instead of {expected}".format(key=key, got=val, expected=exp))
+                            ERRORS += 1
+                    if len(values) != 0:
+                        logging.error("Not all keys found, remaining keys:")
+                        print(values)
+
+    print("Test --op meta-list")
+    tmpfd = open(TMPFILE, "wb")
+    cmd = (CFSD_PREFIX + "--op meta-list").format(osd=ONEOSD)
+    logging.debug(cmd)
+    ret = call(cmd, shell=True, stdout=tmpfd)
+    if ret != 0:
+        logging.error("Bad exit status {ret} from --op meta-list request".format(ret=ret))
+        ERRORS += 1
+
+    print("Test get-bytes on meta")
+    tmpfd.close()
+    lines = get_lines(TMPFILE)
+    JSONOBJ = sorted(set(lines))
+    for JSON in JSONOBJ:
+        (pgid, jsondict) = json.loads(JSON)
+        if pgid != "meta":
+            logging.error("pgid incorrect for --op meta-list {pgid}".format(pgid=pgid))
+            ERRORS += 1
+        if jsondict['namespace'] != "":
+            logging.error("namespace non null --op meta-list {ns}".format(ns=jsondict['namespace']))
+            ERRORS += 1
+        logging.info(JSON)
+        try:
+            os.unlink(GETNAME)
+        except:
+            pass
+        cmd = (CFSD_PREFIX + "'{json}' get-bytes {fname}").format(osd=ONEOSD, json=JSON, fname=GETNAME)
+        logging.debug(cmd)
+        ret = call(cmd, shell=True)
+        if ret != 0:
+            logging.error("Bad exit status {ret}".format(ret=ret))
+            ERRORS += 1
+
+    try:
+        os.unlink(GETNAME)
+    except:
+        pass
+    try:
+        os.unlink(TESTNAME)
+    except:
+        pass
+
+    print("Test pg info")
+    for pg in ALLREPPGS + ALLECPGS:
+        for osd in get_osds(pg, OSDDIR):
+            cmd = (CFSD_PREFIX + "--op info --pgid {pg} | grep '\"pgid\": \"{pg}\"'").format(osd=osd, pg=pg)
+            logging.debug(cmd)
+            ret = call(cmd, shell=True, stdout=nullfd)
+            if ret != 0:
+                logging.error("Getting info failed for pg {pg} from {osd} with {ret}".format(pg=pg, osd=osd, ret=ret))
+                ERRORS += 1
+
+    print("Test pg logging")
+    if len(ALLREPPGS + ALLECPGS) == len(OBJREPPGS + OBJECPGS):
+        logging.warning("All PGs have objects, so no log without modify entries")
+    for pg in ALLREPPGS + ALLECPGS:
+        for osd in get_osds(pg, OSDDIR):
+            tmpfd = open(TMPFILE, "wb")
+            cmd = (CFSD_PREFIX + "--op log --pgid {pg}").format(osd=osd, pg=pg)
+            logging.debug(cmd)
+            ret = call(cmd, shell=True, stdout=tmpfd)
+            if ret != 0:
+                logging.error("Getting log failed for pg {pg} from {osd} with {ret}".format(pg=pg, osd=osd, ret=ret))
+                ERRORS += 1
+            HASOBJ = pg in OBJREPPGS + OBJECPGS
+            MODOBJ = False
+            for line in get_lines(TMPFILE):
+                if line.find("modify") != -1:
+                    MODOBJ = True
+                    break
+            if HASOBJ != MODOBJ:
+                logging.error("Bad log for pg {pg} from {osd}".format(pg=pg, osd=osd))
+                MSG = (HASOBJ and [""] or ["NOT "])[0]
+                print("Log should {msg}have a modify entry".format(msg=MSG))
+                ERRORS += 1
+
+    try:
+        os.unlink(TMPFILE)
+    except:
+        pass
+
+    print("Test list-pgs")
+    for osd in [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0]:
+
+        CHECK_PGS = get_osd_pgs(os.path.join(OSDDIR, osd), None)
+        CHECK_PGS = sorted(CHECK_PGS)
+
+        cmd = (CFSD_PREFIX + "--op list-pgs").format(osd=osd)
+        logging.debug(cmd)
+        TEST_PGS = check_output(cmd, shell=True).decode().split("\n")
+        TEST_PGS = sorted(TEST_PGS)[1:]  # Skip extra blank line
+
+        if TEST_PGS != CHECK_PGS:
+            logging.error("list-pgs got wrong result for osd.{osd}".format(osd=osd))
+            logging.error("Expected {pgs}".format(pgs=CHECK_PGS))
+            logging.error("Got {pgs}".format(pgs=TEST_PGS))
+            ERRORS += 1
+
+    EXP_ERRORS = 0
+    print("Test pg export --dry-run")
+    pg = ALLREPPGS[0]
+    osd = get_osds(pg, OSDDIR)[0]
+    fname = "/tmp/fname.{pid}".format(pid=pid)
+    cmd = (CFSD_PREFIX + "--dry-run --op export --pgid {pg} --file {file}").format(osd=osd, pg=pg, file=fname)
+    logging.debug(cmd)
+    ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+    if ret != 0:
+        logging.error("Exporting --dry-run failed for pg {pg} on {osd} with {ret}".format(pg=pg, osd=osd, ret=ret))
+        EXP_ERRORS += 1
+    elif os.path.exists(fname):
+        logging.error("Exporting --dry-run created file")
+        EXP_ERRORS += 1
+
+    cmd = (CFSD_PREFIX + "--dry-run --op export --pgid {pg} > {file}").format(osd=osd, pg=pg, file=fname)
+    logging.debug(cmd)
+    ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+    if ret != 0:
+        logging.error("Exporting --dry-run failed for pg {pg} on {osd} with {ret}".format(pg=pg, osd=osd, ret=ret))
+        EXP_ERRORS += 1
+    else:
+        outdata = get_lines(fname)
+        if len(outdata) > 0:
+            logging.error("Exporting --dry-run to stdout not empty")
+            logging.error("Data: " + outdata)
+            EXP_ERRORS += 1
+
+    os.mkdir(TESTDIR)
+    for osd in [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0]:
+        os.mkdir(os.path.join(TESTDIR, osd))
+    print("Test pg export")
+    for pg in ALLREPPGS + ALLECPGS:
+        for osd in get_osds(pg, OSDDIR):
+            mydir = os.path.join(TESTDIR, osd)
+            fname = os.path.join(mydir, pg)
+            if pg == ALLREPPGS[0]:
+                cmd = (CFSD_PREFIX + "--op export --pgid {pg} > {file}").format(osd=osd, pg=pg, file=fname)
+            elif pg == ALLREPPGS[1]:
+                cmd = (CFSD_PREFIX + "--op export --pgid {pg} --file - > {file}").format(osd=osd, pg=pg, file=fname)
+            else:
+                cmd = (CFSD_PREFIX + "--op export --pgid {pg} --file {file}").format(osd=osd, pg=pg, file=fname)
+            logging.debug(cmd)
+            ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+            if ret != 0:
+                logging.error("Exporting failed for pg {pg} on {osd} with {ret}".format(pg=pg, osd=osd, ret=ret))
+                EXP_ERRORS += 1
+
+    ERRORS += EXP_ERRORS
+
+    print("Test clear-data-digest")
+    for nspace in db.keys():
+        for basename in db[nspace].keys():
+          JSON = db[nspace][basename]['json']
+          cmd = (CFSD_PREFIX + "'{json}' clear-data-digest").format(osd='osd0', json=JSON)
+          logging.debug(cmd)
+          ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+          if ret != 0:
+              logging.error("Clearing data digest failed for {json}".format(json=JSON))
+              ERRORS += 1
+              break
+          cmd = (CFSD_PREFIX + "'{json}' dump | grep '\"data_digest\": \"0xff'").format(osd='osd0', json=JSON)
+          logging.debug(cmd)
+          ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+          if ret != 0:
+              logging.error("Data digest not cleared for {json}".format(json=JSON))
+              ERRORS += 1
+              break
+          break
+        break
+
+    print("Test pg removal")
+    RM_ERRORS = 0
+    for pg in ALLREPPGS + ALLECPGS:
+        for osd in get_osds(pg, OSDDIR):
+            # This should do nothing
+            cmd = (CFSD_PREFIX + "--op remove --pgid {pg} --dry-run").format(pg=pg, osd=osd)
+            logging.debug(cmd)
+            ret = call(cmd, shell=True, stdout=nullfd)
+            if ret != 0:
+                logging.error("Removing --dry-run failed for pg {pg} on {osd} with {ret}".format(pg=pg, osd=osd, ret=ret))
+                RM_ERRORS += 1
+            cmd = (CFSD_PREFIX + "--force --op remove --pgid {pg}").format(pg=pg, osd=osd)
+            logging.debug(cmd)
+            ret = call(cmd, shell=True, stdout=nullfd)
+            if ret != 0:
+                logging.error("Removing failed for pg {pg} on {osd} with {ret}".format(pg=pg, osd=osd, ret=ret))
+                RM_ERRORS += 1
+
+    ERRORS += RM_ERRORS
+
+    IMP_ERRORS = 0
+    if EXP_ERRORS == 0 and RM_ERRORS == 0:
+        print("Test pg import")
+        for osd in [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0]:
+            dir = os.path.join(TESTDIR, osd)
+            PGS = [f for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f))]
+            for pg in PGS:
+                file = os.path.join(dir, pg)
+                # Make sure this doesn't crash
+                cmd = (CFSD_PREFIX + "--op dump-export --file {file}").format(osd=osd, file=file)
+                logging.debug(cmd)
+                ret = call(cmd, shell=True, stdout=nullfd)
+                if ret != 0:
+                    logging.error("Dump-export failed from {file} with {ret}".format(file=file, ret=ret))
+                    IMP_ERRORS += 1
+                # This should do nothing
+                cmd = (CFSD_PREFIX + "--op import --file {file} --dry-run").format(osd=osd, file=file)
+                logging.debug(cmd)
+                ret = call(cmd, shell=True, stdout=nullfd)
+                if ret != 0:
+                    logging.error("Import failed from {file} with {ret}".format(file=file, ret=ret))
+                    IMP_ERRORS += 1
+                if pg == PGS[0]:
+                    cmd = ("cat {file} |".format(file=file) + CFSD_PREFIX + "--op import").format(osd=osd)
+                elif pg == PGS[1]:
+                    cmd = (CFSD_PREFIX + "--op import --file - --pgid {pg} < {file}").format(osd=osd, file=file, pg=pg)
+                else:
+                    cmd = (CFSD_PREFIX + "--op import --file {file}").format(osd=osd, file=file)
+                logging.debug(cmd)
+                ret = call(cmd, shell=True, stdout=nullfd)
+                if ret != 0:
+                    logging.error("Import failed from {file} with {ret}".format(file=file, ret=ret))
+                    IMP_ERRORS += 1
+    else:
+        logging.warning("SKIPPING IMPORT TESTS DUE TO PREVIOUS FAILURES")
+
+    ERRORS += IMP_ERRORS
+    logging.debug(cmd)
+
+    if EXP_ERRORS == 0 and RM_ERRORS == 0 and IMP_ERRORS == 0:
+        print("Verify replicated import data")
+        data_errors, _ = check_data(DATADIR, TMPFILE, OSDDIR, REP_NAME)
+        ERRORS += data_errors
+    else:
+        logging.warning("SKIPPING CHECKING IMPORT DATA DUE TO PREVIOUS FAILURES")
+
+    print("Test all --op dump-journal again")
+    ALLOSDS = [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0]
+    ERRORS += test_dump_journal(CFSD_PREFIX, ALLOSDS)
+
+    vstart(new=False)
+    wait_for_health()
+
+    if EXP_ERRORS == 0 and RM_ERRORS == 0 and IMP_ERRORS == 0:
+        print("Verify erasure coded import data")
+        ERRORS += verify(DATADIR, EC_POOL, EC_NAME, db)
+        # Check replicated data/xattr/omap using rados
+        print("Verify replicated import data using rados")
+        ERRORS += verify(DATADIR, REP_POOL, REP_NAME, db)
+
+    if EXP_ERRORS == 0:
+        NEWPOOL = "rados-import-pool"
+        cmd = "{path}/ceph osd pool create {pool} 8".format(pool=NEWPOOL, path=CEPH_BIN)
+        logging.debug(cmd)
+        ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+
+        print("Test rados import")
+        first = True
+        for osd in [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0]:
+            dir = os.path.join(TESTDIR, osd)
+            for pg in [f for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f))]:
+                if pg.find("{id}.".format(id=REPID)) != 0:
+                    continue
+                file = os.path.join(dir, pg)
+                if first:
+                    first = False
+                    # This should do nothing
+                    cmd = "{path}/rados import -p {pool} --dry-run {file}".format(pool=NEWPOOL, file=file, path=CEPH_BIN)
+                    logging.debug(cmd)
+                    ret = call(cmd, shell=True, stdout=nullfd)
+                    if ret != 0:
+                        logging.error("Rados import --dry-run failed from {file} with {ret}".format(file=file, ret=ret))
+                        ERRORS += 1
+                    cmd = "{path}/rados -p {pool} ls".format(pool=NEWPOOL, path=CEPH_BIN)
+                    logging.debug(cmd)
+                    data = check_output(cmd, shell=True).decode()
+                    if data:
+                        logging.error("'{data}'".format(data=data))
+                        logging.error("Found objects after dry-run")
+                        ERRORS += 1
+                cmd = "{path}/rados import -p {pool} {file}".format(pool=NEWPOOL, file=file, path=CEPH_BIN)
+                logging.debug(cmd)
+                ret = call(cmd, shell=True, stdout=nullfd)
+                if ret != 0:
+                    logging.error("Rados import failed from {file} with {ret}".format(file=file, ret=ret))
+                    ERRORS += 1
+                cmd = "{path}/rados import -p {pool} --no-overwrite {file}".format(pool=NEWPOOL, file=file, path=CEPH_BIN)
+                logging.debug(cmd)
+                ret = call(cmd, shell=True, stdout=nullfd)
+                if ret != 0:
+                    logging.error("Rados import --no-overwrite failed from {file} with {ret}".format(file=file, ret=ret))
+                    ERRORS += 1
+
+        ERRORS += verify(DATADIR, NEWPOOL, REP_NAME, db)
+    else:
+        logging.warning("SKIPPING IMPORT-RADOS TESTS DUE TO PREVIOUS FAILURES")
+
+    # Clear directories of previous portion
+    call("/bin/rm -rf {dir}".format(dir=TESTDIR), shell=True)
+    call("/bin/rm -rf {dir}".format(dir=DATADIR), shell=True)
+    os.mkdir(TESTDIR)
+    os.mkdir(DATADIR)
+
+    # Cause SPLIT_POOL to split and test import with object/log filtering
+    print("Testing import all objects after a split")
+    SPLIT_POOL = "split_pool"
+    PG_COUNT = 1
+    SPLIT_OBJ_COUNT = 5
+    SPLIT_NSPACE_COUNT = 2
+    SPLIT_NAME = "split"
+    cmd = "{path}/ceph osd pool create {pool} {pg} {pg} replicated".format(pool=SPLIT_POOL, pg=PG_COUNT, path=CEPH_BIN)
+    logging.debug(cmd)
+    call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+    SPLITID = get_pool_id(SPLIT_POOL, nullfd)
+    pool_size = int(check_output("{path}/ceph osd pool get {pool} size".format(pool=SPLIT_POOL, path=CEPH_BIN), shell=True, stderr=nullfd).decode().split(" ")[1])
+    EXP_ERRORS = 0
+    RM_ERRORS = 0
+    IMP_ERRORS = 0
+
+    objects = range(1, SPLIT_OBJ_COUNT + 1)
+    nspaces = range(SPLIT_NSPACE_COUNT)
+    for n in nspaces:
+        nspace = get_nspace(n)
+
+        for i in objects:
+            NAME = SPLIT_NAME + "{num}".format(num=i)
+            LNAME = nspace + "-" + NAME
+            DDNAME = os.path.join(DATADIR, LNAME)
+            DDNAME += "__head"
+
+            cmd = "rm -f " + DDNAME
+            logging.debug(cmd)
+            call(cmd, shell=True)
+
+            if i == 1:
+                dataline = range(DATALINECOUNT)
+            else:
+                dataline = range(1)
+            fd = open(DDNAME, "w")
+            data = "This is the split data for " + LNAME + "\n"
+            for _ in dataline:
+                fd.write(data)
+            fd.close()
+
+            cmd = "{path}/rados -p {pool} -N '{nspace}' put {name} {ddname}".format(pool=SPLIT_POOL, name=NAME, ddname=DDNAME, nspace=nspace, path=CEPH_BIN)
+            logging.debug(cmd)
+            ret = call(cmd, shell=True, stderr=nullfd)
+            if ret != 0:
+                logging.critical("Rados put command failed with {ret}".format(ret=ret))
+                return 1
+
+    wait_for_health()
+    kill_daemons()
+
+    for osd in [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0]:
+        os.mkdir(os.path.join(TESTDIR, osd))
+
+    pg = "{pool}.0".format(pool=SPLITID)
+    EXPORT_PG = pg
+
+    export_osds = get_osds(pg, OSDDIR)
+    for osd in export_osds:
+        mydir = os.path.join(TESTDIR, osd)
+        fname = os.path.join(mydir, pg)
+        cmd = (CFSD_PREFIX + "--op export --pgid {pg} --file {file}").format(osd=osd, pg=pg, file=fname)
+        logging.debug(cmd)
+        ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+        if ret != 0:
+            logging.error("Exporting failed for pg {pg} on {osd} with {ret}".format(pg=pg, osd=osd, ret=ret))
+            EXP_ERRORS += 1
+
+    ERRORS += EXP_ERRORS
+
+    if EXP_ERRORS == 0:
+        vstart(new=False)
+        wait_for_health()
+
+        cmd = "{path}/ceph osd pool set {pool} pg_num 2".format(pool=SPLIT_POOL, path=CEPH_BIN)
+        logging.debug(cmd)
+        ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd)
+        time.sleep(5)
+        wait_for_health()
+
+        kill_daemons()
+
+        # Now 2 PGs, poolid.0 and poolid.1
+        # make note of pgs before we remove the pgs...
+        osds = get_osds("{pool}.0".format(pool=SPLITID), OSDDIR);
+        for seed in range(2):
+            pg = "{pool}.{seed}".format(pool=SPLITID, seed=seed)
+
+            for osd in osds:
+                cmd = (CFSD_PREFIX + "--force --op remove --pgid {pg}").format(pg=pg, osd=osd)
+                logging.debug(cmd)
+                ret = call(cmd, shell=True, stdout=nullfd)
+
+        which = 0
+        for osd in osds:
+            # This is weird.  The export files are based on only the EXPORT_PG
+            # and where that pg was before the split.  Use 'which' to use all
+            # export copies in import.
+            mydir = os.path.join(TESTDIR, export_osds[which])
+            fname = os.path.join(mydir, EXPORT_PG)
+            which += 1
+            cmd = (CFSD_PREFIX + "--op import --pgid {pg} --file {file}").format(osd=osd, pg=EXPORT_PG, file=fname)
+            logging.debug(cmd)
+            ret = call(cmd, shell=True, stdout=nullfd)
+            if ret != 0:
+                logging.error("Import failed from {file} with {ret}".format(file=file, ret=ret))
+                IMP_ERRORS += 1
+
+        ERRORS += IMP_ERRORS
+
+        # Start up again to make sure imports didn't corrupt anything
+        if IMP_ERRORS == 0:
+            print("Verify split import data")
+            data_errors, count = check_data(DATADIR, TMPFILE, OSDDIR, SPLIT_NAME)
+            ERRORS += data_errors
+            if count != (SPLIT_OBJ_COUNT * SPLIT_NSPACE_COUNT * pool_size):
+                logging.error("Incorrect number of replicas seen {count}".format(count=count))
+                ERRORS += 1
+            vstart(new=False)
+            wait_for_health()
+
+    call("/bin/rm -rf {dir}".format(dir=TESTDIR), shell=True)
+    call("/bin/rm -rf {dir}".format(dir=DATADIR), shell=True)
+
+    ERRORS += test_removeall(CFSD_PREFIX, db, OBJREPPGS, REP_POOL, CEPH_BIN, OSDDIR, REP_NAME, NUM_CLONED_REP_OBJECTS)
+
+    # vstart() starts 4 OSDs
+    ERRORS += test_get_set_osdmap(CFSD_PREFIX, list(range(4)), ALLOSDS)
+    ERRORS += test_get_set_inc_osdmap(CFSD_PREFIX, ALLOSDS[0])
+
+    kill_daemons()
+    CORES = [f for f in os.listdir(CEPH_DIR) if f.startswith("core.")]
+    if CORES:
+        CORE_DIR = os.path.join("/tmp", "cores.{pid}".format(pid=os.getpid()))
+        os.mkdir(CORE_DIR)
+        call("/bin/mv {ceph_dir}/core.* {core_dir}".format(ceph_dir=CEPH_DIR, core_dir=CORE_DIR), shell=True)
+        logging.error("Failure due to cores found")
+        logging.error("See {core_dir} for cores".format(core_dir=CORE_DIR))
+        ERRORS += len(CORES)
+
+    if ERRORS == 0:
+        print("TEST PASSED")
+        return 0
+    else:
+        print("TEST FAILED WITH {errcount} ERRORS".format(errcount=ERRORS))
+        return 1
+
+
+def remove_btrfs_subvolumes(path):
+    if platform.system() == "FreeBSD":
+        return
+    result = subprocess.Popen("stat -f -c '%%T' %s" % path, shell=True, stdout=subprocess.PIPE)
+    for line in result.stdout:
+        filesystem = line.decode('utf-8').rstrip('\n')
+    if filesystem == "btrfs":
+        result = subprocess.Popen("sudo btrfs subvolume list %s" % path, shell=True, stdout=subprocess.PIPE)
+        for line in result.stdout:
+            subvolume = line.decode('utf-8').split()[8]
+            # extracting the relative volume name
+            m = re.search(".*(%s.*)" % path, subvolume)
+            if m:
+                found = m.group(1)
+                call("sudo btrfs subvolume delete %s" % found, shell=True)
+
+
+if __name__ == "__main__":
+    status = 1
+    try:
+        status = main(sys.argv[1:])
+    finally:
+        kill_daemons()
+        os.chdir(CEPH_BUILD_DIR)
+        remove_btrfs_subvolumes(CEPH_DIR)
+        call("/bin/rm -fr {dir}".format(dir=CEPH_DIR), shell=True)
+    sys.exit(status)
diff --git a/qa/standalone/special/test-failure.sh b/qa/standalone/special/test-failure.sh
new file mode 100755
index 000000000..cede887d2
--- /dev/null
+++ b/qa/standalone/special/test-failure.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+set -ex
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7202" # git grep '\<7202\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_failure_log() {
+    local dir=$1
+
+    cat > $dir/test_failure.log << EOF
+This is a fake log file
+*
+*
+*
+*
+*
+This ends the fake log file
+EOF
+
+    # Test fails
+    return 1
+}
+
+function TEST_failure_core_only() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    kill_daemons $dir SEGV mon 5
+    return 0
+}
+
+main test_failure "$@"
diff --git a/qa/suites/.qa b/qa/suites/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/big/.qa b/qa/suites/big/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/big/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/big/rados-thrash/% b/qa/suites/big/rados-thrash/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/big/rados-thrash/%
diff --git a/qa/suites/big/rados-thrash/.qa b/qa/suites/big/rados-thrash/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/big/rados-thrash/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/big/rados-thrash/ceph/.qa b/qa/suites/big/rados-thrash/ceph/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/big/rados-thrash/ceph/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/big/rados-thrash/ceph/cephadm.yaml b/qa/suites/big/rados-thrash/ceph/cephadm.yaml
new file mode 100644
index 000000000..a225422c5
--- /dev/null
+++ b/qa/suites/big/rados-thrash/ceph/cephadm.yaml
@@ -0,0 +1,8 @@
+overrides:
+  kernel:
+    hwe: true
+tasks:
+- install:
+- nvme_loop:
+- cephadm:
+    roleless: true
diff --git a/qa/suites/big/rados-thrash/clusters/.qa b/qa/suites/big/rados-thrash/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/big/rados-thrash/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/big/rados-thrash/clusters/big.yaml b/qa/suites/big/rados-thrash/clusters/big.yaml
new file mode 100644
index 000000000..d9a8de8e7
--- /dev/null
+++ b/qa/suites/big/rados-thrash/clusters/big.yaml
@@ -0,0 +1,68 @@
+roles:
+- [host.a, client.0]
+- [host.b, client.1]
+- [host.c, client.2]
+- [host.d, client.3]
+- [host.e, client.4]
+- [host.f, client.5]
+- [host.g, client.6]
+- [host.h, client.7]
+- [host.i, client.8]
+- [host.j, client.9]
+- [host.k, client.10]
+- [host.l, client.11]
+- [host.m, client.12]
+- [host.n, client.13]
+- [host.o, client.14]
+- [host.p, client.15]
+- [host.q, client.16]
+- [host.r, client.17]
+- [host.s, client.18]
+- [host.t, client.19]
+- [host.u, client.20]
+- [host.v, client.21]
+- [host.w, client.22]
+- [host.x, client.23]
+- [host.y, client.24]
+- [host.z, client.25]
+- [host.aa, client.26]
+- [host.ab, client.27]
+- [host.ac, client.28]
+- [host.ad, client.29]
+- [host.ae, client.30]
+- [host.af, client.31]
+- [host.ag, client.32]
+- [host.ah, client.33]
+- [host.ai, client.34]
+- [host.aj, client.35]
+- [host.ak, client.36]
+- [host.al, client.37]
+- [host.am, client.38]
+- [host.an, client.39]
+- [host.ao, client.40]
+- [host.ap, client.41]
+- [host.aq, client.42]
+- [host.ar, client.43]
+- [host.as, client.44]
+- [host.at, client.45]
+- [host.au, client.46]
+- [host.av, client.47]
+- [host.aw, client.48]
+- [host.ax, client.49]
+- [host.ay, client.50]
+- [host.az, client.51]
+- [host.ba, client.52]
+- [host.bb, client.53]
+- [host.bc, client.54]
+- [host.bd, client.55]
+- [host.be, client.56]
+- [host.bf, client.57]
+- [host.bg, client.58]
+- [host.bh, client.59]
+- [host.bi, client.60]
+- [host.bj, client.61]
+- [host.bk, client.62]
+- [host.bl, client.63]
+- [host.bm, client.64]
+- [host.bn, client.65]
+- [host.bo, client.66]
diff --git a/qa/suites/big/rados-thrash/clusters/medium.yaml b/qa/suites/big/rados-thrash/clusters/medium.yaml
new file mode 100644
index 000000000..05a16b053
--- /dev/null
+++ b/qa/suites/big/rados-thrash/clusters/medium.yaml
@@ -0,0 +1,22 @@
+roles:
+- [host.a, client.0]
+- [host.b, client.1]
+- [host.c, client.2]
+- [host.d, client.3]
+- [host.e, client.4]
+- [host.f, client.5]
+- [host.g, client.6]
+- [host.h, client.7]
+- [host.i, client.8]
+- [host.j, client.9]
+- [host.k, client.10]
+- [host.l, client.11]
+- [host.m, client.12]
+- [host.n, client.13]
+- [host.o, client.14]
+- [host.p, client.15]
+- [host.q, client.16]
+- [host.r, client.17]
+- [host.s, client.18]
+- [host.t, client.19]
+- [host.u, client.20]
diff --git a/qa/suites/big/rados-thrash/clusters/small.yaml b/qa/suites/big/rados-thrash/clusters/small.yaml
new file mode 100644
index 000000000..0ff9f33f1
--- /dev/null
+++ b/qa/suites/big/rados-thrash/clusters/small.yaml
@@ -0,0 +1,6 @@
+roles:
+- [host.a, client.0]
+- [host.b, client.1]
+- [host.c, client.2]
+- [host.d, client.3]
+- [host.e, client.4]
diff --git a/qa/suites/big/rados-thrash/openstack.yaml b/qa/suites/big/rados-thrash/openstack.yaml
new file mode 100644
index 000000000..4d6edcd07
--- /dev/null
+++ b/qa/suites/big/rados-thrash/openstack.yaml
@@ -0,0 +1,8 @@
+openstack:
+  - machine:
+      disk: 40 # GB
+      ram: 8000 # MB
+      cpus: 1
+    volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
diff --git a/qa/suites/big/rados-thrash/workloads/.qa b/qa/suites/big/rados-thrash/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/big/rados-thrash/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/big/rados-thrash/workloads/radosbench.yaml b/qa/suites/big/rados-thrash/workloads/radosbench.yaml
new file mode 100644
index 000000000..60f72886c
--- /dev/null
+++ b/qa/suites/big/rados-thrash/workloads/radosbench.yaml
@@ -0,0 +1,3 @@
+tasks:
+- radosbench:
+    time: 300
diff --git a/qa/suites/big/rados-thrash/workloads/snaps-few-objects.yaml b/qa/suites/big/rados-thrash/workloads/snaps-few-objects.yaml
new file mode 100644
index 000000000..b73bb6781
--- /dev/null
+++ b/qa/suites/big/rados-thrash/workloads/snaps-few-objects.yaml
@@ -0,0 +1,13 @@
+tasks:
+- rados:
+    ops: 4000
+    max_seconds: 3600
+    objects: 50
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
diff --git a/qa/suites/buildpackages/.qa b/qa/suites/buildpackages/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/buildpackages/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/buildpackages/any/% b/qa/suites/buildpackages/any/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/buildpackages/any/%
diff --git a/qa/suites/buildpackages/any/.qa b/qa/suites/buildpackages/any/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/buildpackages/any/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/buildpackages/any/distros b/qa/suites/buildpackages/any/distros
new file mode 120000
index 000000000..0e1f13037
--- /dev/null
+++ b/qa/suites/buildpackages/any/distros
@@ -0,0 +1 @@
+.qa/distros/all
+\ No newline at end of file
diff --git a/qa/suites/buildpackages/any/tasks/.qa b/qa/suites/buildpackages/any/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/buildpackages/any/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/buildpackages/any/tasks/release.yaml b/qa/suites/buildpackages/any/tasks/release.yaml
new file mode 100644
index 000000000..d7a3b62c8
--- /dev/null
+++ b/qa/suites/buildpackages/any/tasks/release.yaml
@@ -0,0 +1,8 @@
+# --suite buildpackages/any --ceph v10.0.1 --filter centos_7,ubuntu_14.04
+roles:
+    - [client.0]
+tasks:
+    - install:
+    - exec:
+        client.0:
+          - ceph --version | grep 'version '
diff --git a/qa/suites/buildpackages/tests/% b/qa/suites/buildpackages/tests/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/buildpackages/tests/%
diff --git a/qa/suites/buildpackages/tests/.qa b/qa/suites/buildpackages/tests/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/buildpackages/tests/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/buildpackages/tests/distros b/qa/suites/buildpackages/tests/distros
new file mode 120000
index 000000000..0e1f13037
--- /dev/null
+++ b/qa/suites/buildpackages/tests/distros
@@ -0,0 +1 @@
+.qa/distros/all
+\ No newline at end of file
diff --git a/qa/suites/buildpackages/tests/tasks/.qa b/qa/suites/buildpackages/tests/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/buildpackages/tests/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/buildpackages/tests/tasks/release.yaml b/qa/suites/buildpackages/tests/tasks/release.yaml
new file mode 100644
index 000000000..05e87789d
--- /dev/null
+++ b/qa/suites/buildpackages/tests/tasks/release.yaml
@@ -0,0 +1,20 @@
+# --suite buildpackages/tests --ceph v10.0.1 --filter centos_7.2,ubuntu_14.04
+overrides:
+   ansible.cephlab:
+     playbook: users.yml
+   buildpackages:
+     good_machine:
+       disk: 20 # GB
+       ram: 2000 # MB
+       cpus: 2
+     min_machine:
+       disk: 10 # GB
+       ram: 1000 # MB
+       cpus: 1
+roles:
+    - [client.0]
+tasks:
+    - install:
+    - exec:
+        client.0:
+          - ceph --version | grep 'version '
diff --git a/qa/suites/ceph-ansible/.qa b/qa/suites/ceph-ansible/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/ceph-ansible/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/ceph-ansible/smoke/.qa b/qa/suites/ceph-ansible/smoke/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/ceph-ansible/smoke/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/ceph-ansible/smoke/basic/% b/qa/suites/ceph-ansible/smoke/basic/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/ceph-ansible/smoke/basic/%
diff --git a/qa/suites/ceph-ansible/smoke/basic/.qa b/qa/suites/ceph-ansible/smoke/basic/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/ceph-ansible/smoke/basic/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/ceph-ansible/smoke/basic/0-clusters/.qa b/qa/suites/ceph-ansible/smoke/basic/0-clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/ceph-ansible/smoke/basic/0-clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/ceph-ansible/smoke/basic/0-clusters/3-node.yaml b/qa/suites/ceph-ansible/smoke/basic/0-clusters/3-node.yaml
new file mode 100644
index 000000000..86dd366b9
--- /dev/null
+++ b/qa/suites/ceph-ansible/smoke/basic/0-clusters/3-node.yaml
@@ -0,0 +1,12 @@
+meta:
+- desc: |
+   3-node cluster
+   install and run ceph-ansible on a mon.a node alone with ceph 
+roles:
+- [mon.a, mds.a, osd.0, osd.1, osd.2]
+- [mon.b, mgr.x, osd.3, osd.4, osd.5]
+- [mon.c, mgr.y, osd.6, osd.7, osd.8, client.0]
+openstack:
+- volumes: # attached to each instance
+    count: 3
+    size: 10 # GB
diff --git a/qa/suites/ceph-ansible/smoke/basic/0-clusters/4-node.yaml b/qa/suites/ceph-ansible/smoke/basic/0-clusters/4-node.yaml
new file mode 100644
index 000000000..b1754432c
--- /dev/null
+++ b/qa/suites/ceph-ansible/smoke/basic/0-clusters/4-node.yaml
@@ -0,0 +1,13 @@
+meta:
+- desc: |
+   4-node cluster
+   install and run ceph-ansible on installer.0 stand alone node
+roles:
+- [mon.a, mds.a, osd.0, osd.1, osd.2]
+- [mon.b, mgr.x, osd.3, osd.4, osd.5]
+- [mon.c, mgr.y, osd.6, osd.7, osd.8, client.0]
+- [installer.0]
+openstack:
+- volumes: # attached to each instance
+    count: 3
+    size: 10 # GB
diff --git a/qa/suites/ceph-ansible/smoke/basic/1-distros/.qa b/qa/suites/ceph-ansible/smoke/basic/1-distros/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/ceph-ansible/smoke/basic/1-distros/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/ceph-ansible/smoke/basic/1-distros/centos_latest.yaml b/qa/suites/ceph-ansible/smoke/basic/1-distros/centos_latest.yaml
new file mode 120000
index 000000000..bd9854e70
--- /dev/null
+++ b/qa/suites/ceph-ansible/smoke/basic/1-distros/centos_latest.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/centos_latest.yaml
+\ No newline at end of file
diff --git a/qa/suites/ceph-ansible/smoke/basic/1-distros/ubuntu_latest.yaml b/qa/suites/ceph-ansible/smoke/basic/1-distros/ubuntu_latest.yaml
new file mode 120000
index 000000000..3a09f9abb
--- /dev/null
+++ b/qa/suites/ceph-ansible/smoke/basic/1-distros/ubuntu_latest.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/ubuntu_latest.yaml
+\ No newline at end of file
diff --git a/qa/suites/ceph-ansible/smoke/basic/2-ceph/.qa b/qa/suites/ceph-ansible/smoke/basic/2-ceph/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/ceph-ansible/smoke/basic/2-ceph/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/ceph-ansible/smoke/basic/2-ceph/ceph_ansible.yaml b/qa/suites/ceph-ansible/smoke/basic/2-ceph/ceph_ansible.yaml
new file mode 100644
index 000000000..7e7ede3e3
--- /dev/null
+++ b/qa/suites/ceph-ansible/smoke/basic/2-ceph/ceph_ansible.yaml
@@ -0,0 +1,36 @@
+meta:
+- desc: "Build the ceph cluster using ceph-ansible"
+
+overrides:
+   ceph_ansible:
+     ansible-version: '2.9'
+     vars:
+        ceph_conf_overrides:
+          global:
+            osd default pool size: 2
+            mon pg warn min per osd: 2
+            osd pool default pg num: 64
+            osd pool default pgp num: 64
+            mon_max_pg_per_osd: 1024
+        ceph_test: true
+        ceph_stable_release: nautilus
+        osd_scenario: lvm
+        journal_size: 1024
+        osd_auto_discovery: false
+        ceph_origin: repository
+        ceph_repository: dev
+        ceph_mgr_modules:
+          - status
+          - restful
+        cephfs_pools:
+          - name: "cephfs_data"
+            pg_num: "64"
+            rule_name: "replicated_rule"
+          - name: "cephfs_metadata"
+            pg_num: "64"
+            rule_name: "replicated_rule"
+        dashboard_enabled: false
+tasks:
+- ssh-keys:
+- ceph_ansible:
+- install.ship_utilities:
diff --git a/qa/suites/ceph-ansible/smoke/basic/3-config/.qa b/qa/suites/ceph-ansible/smoke/basic/3-config/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/ceph-ansible/smoke/basic/3-config/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/ceph-ansible/smoke/basic/3-config/bluestore_with_dmcrypt.yaml b/qa/suites/ceph-ansible/smoke/basic/3-config/bluestore_with_dmcrypt.yaml
new file mode 100644
index 000000000..604e757ad
--- /dev/null
+++ b/qa/suites/ceph-ansible/smoke/basic/3-config/bluestore_with_dmcrypt.yaml
@@ -0,0 +1,8 @@
+meta:
+- desc: "use bluestore + dmcrypt option"
+
+overrides:
+   ceph_ansible:
+     vars:
+        osd_objectstore: bluestore
+        dmcrypt: True
diff --git a/qa/suites/ceph-ansible/smoke/basic/3-config/dmcrypt_off.yaml b/qa/suites/ceph-ansible/smoke/basic/3-config/dmcrypt_off.yaml
new file mode 100644
index 000000000..4bbd1c7c5
--- /dev/null
+++ b/qa/suites/ceph-ansible/smoke/basic/3-config/dmcrypt_off.yaml
@@ -0,0 +1,7 @@
+meta:
+- desc: "without dmcrypt"
+
+overrides:
+   ceph_ansible:
+     vars:
+        dmcrypt: False
diff --git a/qa/suites/ceph-ansible/smoke/basic/3-config/dmcrypt_on.yaml b/qa/suites/ceph-ansible/smoke/basic/3-config/dmcrypt_on.yaml
new file mode 100644
index 000000000..12d63d325
--- /dev/null
+++ b/qa/suites/ceph-ansible/smoke/basic/3-config/dmcrypt_on.yaml
@@ -0,0 +1,7 @@
+meta:
+- desc: "use dmcrypt option"
+
+overrides:
+   ceph_ansible:
+     vars:
+        dmcrypt: True
diff --git a/qa/suites/ceph-ansible/smoke/basic/4-tasks/.qa b/qa/suites/ceph-ansible/smoke/basic/4-tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/ceph-ansible/smoke/basic/4-tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/ceph-ansible/smoke/basic/4-tasks/ceph-admin-commands.yaml b/qa/suites/ceph-ansible/smoke/basic/4-tasks/ceph-admin-commands.yaml
new file mode 100644
index 000000000..33642d5cf
--- /dev/null
+++ b/qa/suites/ceph-ansible/smoke/basic/4-tasks/ceph-admin-commands.yaml
@@ -0,0 +1,7 @@
+meta:
+- desc: "Run ceph-admin-commands.sh"
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - ceph-tests/ceph-admin-commands.sh
diff --git a/qa/suites/ceph-ansible/smoke/basic/4-tasks/rbd_import_export.yaml b/qa/suites/ceph-ansible/smoke/basic/4-tasks/rbd_import_export.yaml
new file mode 100644
index 000000000..9495934e6
--- /dev/null
+++ b/qa/suites/ceph-ansible/smoke/basic/4-tasks/rbd_import_export.yaml
@@ -0,0 +1,7 @@
+meta:
+- desc: "Run the rbd import/export tests"
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/import_export.sh
diff --git a/qa/suites/ceph-ansible/smoke/basic/4-tasks/rest.yaml b/qa/suites/ceph-ansible/smoke/basic/4-tasks/rest.yaml
new file mode 100644
index 000000000..8e389134b
--- /dev/null
+++ b/qa/suites/ceph-ansible/smoke/basic/4-tasks/rest.yaml
@@ -0,0 +1,15 @@
+tasks:
+- exec:
+    mgr.x:
+      - systemctl stop ceph-mgr.target
+      - sleep 5
+      - ceph -s
+- exec:
+    mon.a:
+      - ceph restful create-key admin
+      - ceph restful create-self-signed-cert
+      - ceph restful restart
+- workunit:
+    clients:
+      client.0:
+        - rest/test-restful.sh
diff --git a/qa/suites/ceph-deploy/% b/qa/suites/ceph-deploy/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/ceph-deploy/%
diff --git a/qa/suites/ceph-deploy/.qa b/qa/suites/ceph-deploy/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/ceph-deploy/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/ceph-deploy/cluster/.qa b/qa/suites/ceph-deploy/cluster/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/ceph-deploy/cluster/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/ceph-deploy/cluster/4node.yaml b/qa/suites/ceph-deploy/cluster/4node.yaml
new file mode 100644
index 000000000..bf4a7f986
--- /dev/null
+++ b/qa/suites/ceph-deploy/cluster/4node.yaml
@@ -0,0 +1,15 @@
+overrides:
+ ansible.cephlab: 
+  vars: 
+   quick_lvs_to_create: 4
+openstack:
+  - machine:
+      disk: 10
+    volumes:
+      count: 4
+      size: 20
+roles:
+- [mon.a, mgr.y, osd.0, osd.1]
+- [mon.b, osd.2, osd.3]
+- [mon.c, osd.4, osd.5]
+- [mgr.x, client.0]
diff --git a/qa/suites/ceph-deploy/config/.qa b/qa/suites/ceph-deploy/config/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/ceph-deploy/config/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/ceph-deploy/config/ceph_volume_bluestore.yaml b/qa/suites/ceph-deploy/config/ceph_volume_bluestore.yaml
new file mode 100644
index 000000000..e484e6120
--- /dev/null
+++ b/qa/suites/ceph-deploy/config/ceph_volume_bluestore.yaml
@@ -0,0 +1,7 @@
+overrides:
+   ceph-deploy:
+      use-ceph-volume: True
+      bluestore: True
+      conf:
+        osd:
+          bluestore fsck on mount: true
diff --git a/qa/suites/ceph-deploy/config/ceph_volume_bluestore_dmcrypt.yaml b/qa/suites/ceph-deploy/config/ceph_volume_bluestore_dmcrypt.yaml
new file mode 100644
index 000000000..d424b6423
--- /dev/null
+++ b/qa/suites/ceph-deploy/config/ceph_volume_bluestore_dmcrypt.yaml
@@ -0,0 +1,8 @@
+overrides:
+   ceph-deploy:
+      use-ceph-volume: True
+      bluestore: True
+      dmcrypt: True
+      conf:
+        osd:
+          bluestore fsck on mount: true
diff --git a/qa/suites/ceph-deploy/config/ceph_volume_dmcrypt_off.yaml b/qa/suites/ceph-deploy/config/ceph_volume_dmcrypt_off.yaml
new file mode 100644
index 000000000..097014414
--- /dev/null
+++ b/qa/suites/ceph-deploy/config/ceph_volume_dmcrypt_off.yaml
@@ -0,0 +1,3 @@
+overrides:
+   ceph-deploy:
+      use-ceph-volume: True
diff --git a/qa/suites/ceph-deploy/distros/.qa b/qa/suites/ceph-deploy/distros/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/ceph-deploy/distros/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/ceph-deploy/distros/centos_latest.yaml b/qa/suites/ceph-deploy/distros/centos_latest.yaml
new file mode 120000
index 000000000..bd9854e70
--- /dev/null
+++ b/qa/suites/ceph-deploy/distros/centos_latest.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/centos_latest.yaml
+\ No newline at end of file
diff --git a/qa/suites/ceph-deploy/distros/ubuntu_latest.yaml b/qa/suites/ceph-deploy/distros/ubuntu_latest.yaml
new file mode 120000
index 000000000..3a09f9abb
--- /dev/null
+++ b/qa/suites/ceph-deploy/distros/ubuntu_latest.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/ubuntu_latest.yaml
+\ No newline at end of file
diff --git a/qa/suites/ceph-deploy/python_versions/.qa b/qa/suites/ceph-deploy/python_versions/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/ceph-deploy/python_versions/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/ceph-deploy/python_versions/python_2.yaml b/qa/suites/ceph-deploy/python_versions/python_2.yaml
new file mode 100644
index 000000000..51c865bfa
--- /dev/null
+++ b/qa/suites/ceph-deploy/python_versions/python_2.yaml
@@ -0,0 +1,3 @@
+overrides:
+  ceph-deploy:
+    python_version: "2"
diff --git a/qa/suites/ceph-deploy/python_versions/python_3.yaml b/qa/suites/ceph-deploy/python_versions/python_3.yaml
new file mode 100644
index 000000000..22deecaea
--- /dev/null
+++ b/qa/suites/ceph-deploy/python_versions/python_3.yaml
@@ -0,0 +1,3 @@
+overrides:
+  ceph-deploy:
+    python_version: "3"
diff --git a/qa/suites/ceph-deploy/tasks/.qa b/qa/suites/ceph-deploy/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/ceph-deploy/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/ceph-deploy/tasks/ceph-admin-commands.yaml b/qa/suites/ceph-deploy/tasks/ceph-admin-commands.yaml
new file mode 100644
index 000000000..b7dbfe1ae
--- /dev/null
+++ b/qa/suites/ceph-deploy/tasks/ceph-admin-commands.yaml
@@ -0,0 +1,12 @@
+meta:
+- desc: "test basic ceph admin commands"
+tasks:
+- ssh_keys:
+- print: "**** done ssh_keys"
+- ceph-deploy:
+- print: "**** done ceph-deploy"
+- workunit:
+     clients:
+        client.0:
+           - ceph-tests/ceph-admin-commands.sh
+- print: "**** done ceph-tests/ceph-admin-commands.sh"
diff --git a/qa/suites/ceph-deploy/tasks/rbd_import_export.yaml b/qa/suites/ceph-deploy/tasks/rbd_import_export.yaml
new file mode 100644
index 000000000..1c09735a6
--- /dev/null
+++ b/qa/suites/ceph-deploy/tasks/rbd_import_export.yaml
@@ -0,0 +1,9 @@
+meta:
+- desc: "Setup cluster using ceph-deploy, Run the rbd import/export tests"
+tasks:
+- ssh-keys:
+- ceph-deploy:
+- workunit:
+    clients:
+      client.0:
+        - rbd/import_export.sh
diff --git a/qa/suites/cephmetrics/% b/qa/suites/cephmetrics/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/cephmetrics/%
diff --git a/qa/suites/cephmetrics/.qa b/qa/suites/cephmetrics/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/cephmetrics/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/cephmetrics/0-clusters/.qa b/qa/suites/cephmetrics/0-clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/cephmetrics/0-clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/cephmetrics/0-clusters/3-node.yaml b/qa/suites/cephmetrics/0-clusters/3-node.yaml
new file mode 100644
index 000000000..3935e7cc3
--- /dev/null
+++ b/qa/suites/cephmetrics/0-clusters/3-node.yaml
@@ -0,0 +1,11 @@
+meta:
+- desc: "4-node cluster"
+roles:
+- [mon.a, mds.a, osd.0, osd.1, osd.2]
+- [mon.b, mgr.x, osd.3, osd.4, osd.5]
+- [mon.c, mgr.y, osd.6, osd.7, osd.8, client.0]
+- [cephmetrics.0]
+openstack:
+- volumes: # attached to each instance
+    count: 3
+    size: 10 # GB
diff --git a/qa/suites/cephmetrics/1-distros/.qa b/qa/suites/cephmetrics/1-distros/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/cephmetrics/1-distros/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/cephmetrics/1-distros/centos_latest.yaml b/qa/suites/cephmetrics/1-distros/centos_latest.yaml
new file mode 120000
index 000000000..bd9854e70
--- /dev/null
+++ b/qa/suites/cephmetrics/1-distros/centos_latest.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/centos_latest.yaml
+\ No newline at end of file
diff --git a/qa/suites/cephmetrics/1-distros/ubuntu_latest.yaml b/qa/suites/cephmetrics/1-distros/ubuntu_latest.yaml
new file mode 120000
index 000000000..3a09f9abb
--- /dev/null
+++ b/qa/suites/cephmetrics/1-distros/ubuntu_latest.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/ubuntu_latest.yaml
+\ No newline at end of file
diff --git a/qa/suites/cephmetrics/2-ceph/.qa b/qa/suites/cephmetrics/2-ceph/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/cephmetrics/2-ceph/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/cephmetrics/2-ceph/ceph_ansible.yaml b/qa/suites/cephmetrics/2-ceph/ceph_ansible.yaml
new file mode 100644
index 000000000..309f50600
--- /dev/null
+++ b/qa/suites/cephmetrics/2-ceph/ceph_ansible.yaml
@@ -0,0 +1,32 @@
+meta:
+- desc: "Build the ceph cluster using ceph-ansible"
+
+overrides:
+   ceph_ansible:
+     vars:
+        ceph_conf_overrides:
+          global:
+            osd default pool size: 2
+            mon pg warn min per osd: 2
+            osd pool default pg num: 64
+            osd pool default pgp num: 64
+            mon_max_pg_per_osd: 1024
+        ceph_test: true
+        ceph_stable_release: luminous
+        osd_scenario: collocated
+        journal_size: 1024
+        osd_auto_discovery: false
+        ceph_origin: repository
+        ceph_repository: dev
+        ceph_mgr_modules:
+          - status
+          - restful
+        cephfs_pools:
+          - name: "cephfs_data"
+            pg_num: "64"
+          - name: "cephfs_metadata"
+            pg_num: "64"
+tasks:
+- ssh-keys:
+- ceph_ansible:
+- install.ship_utilities:
diff --git a/qa/suites/cephmetrics/3-ceph-config/.qa b/qa/suites/cephmetrics/3-ceph-config/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/cephmetrics/3-ceph-config/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/cephmetrics/3-ceph-config/bluestore_with_dmcrypt.yaml b/qa/suites/cephmetrics/3-ceph-config/bluestore_with_dmcrypt.yaml
new file mode 100644
index 000000000..16db8ab27
--- /dev/null
+++ b/qa/suites/cephmetrics/3-ceph-config/bluestore_with_dmcrypt.yaml
@@ -0,0 +1,8 @@
+meta:
+- desc: "use bluestore + dmcrypt"
+
+overrides:
+   ceph_ansible:
+     vars:
+        osd_objectstore: bluestore
+        dmcrypt: True
diff --git a/qa/suites/cephmetrics/3-ceph-config/bluestore_without_dmcrypt.yaml b/qa/suites/cephmetrics/3-ceph-config/bluestore_without_dmcrypt.yaml
new file mode 100644
index 000000000..fc879fc8e
--- /dev/null
+++ b/qa/suites/cephmetrics/3-ceph-config/bluestore_without_dmcrypt.yaml
@@ -0,0 +1,8 @@
+meta:
+- desc: "use bluestore without dmcrypt"
+
+overrides:
+   ceph_ansible:
+     vars:
+        osd_objectstore: bluestore
+        dmcrypt: False
diff --git a/qa/suites/cephmetrics/3-ceph-config/dmcrypt_off.yaml b/qa/suites/cephmetrics/3-ceph-config/dmcrypt_off.yaml
new file mode 100644
index 000000000..4bbd1c7c5
--- /dev/null
+++ b/qa/suites/cephmetrics/3-ceph-config/dmcrypt_off.yaml
@@ -0,0 +1,7 @@
+meta:
+- desc: "without dmcrypt"
+
+overrides:
+   ceph_ansible:
+     vars:
+        dmcrypt: False
diff --git a/qa/suites/cephmetrics/3-ceph-config/dmcrypt_on.yaml b/qa/suites/cephmetrics/3-ceph-config/dmcrypt_on.yaml
new file mode 100644
index 000000000..519ad1d79
--- /dev/null
+++ b/qa/suites/cephmetrics/3-ceph-config/dmcrypt_on.yaml
@@ -0,0 +1,7 @@
+meta:
+- desc: "with dmcrypt"
+
+overrides:
+   ceph_ansible:
+     vars:
+        dmcrypt: True
diff --git a/qa/suites/cephmetrics/4-epel/.qa b/qa/suites/cephmetrics/4-epel/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/cephmetrics/4-epel/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/cephmetrics/4-epel/no_epel.yaml b/qa/suites/cephmetrics/4-epel/no_epel.yaml
new file mode 100644
index 000000000..1538fd7f8
--- /dev/null
+++ b/qa/suites/cephmetrics/4-epel/no_epel.yaml
@@ -0,0 +1,7 @@
+meta:
+  - desc: "Without EPEL"
+overrides:
+  cephmetrics:
+    group_vars:
+      all:
+        use_epel: false
diff --git a/qa/suites/cephmetrics/4-epel/use_epel.yaml b/qa/suites/cephmetrics/4-epel/use_epel.yaml
new file mode 100644
index 000000000..d496a43ea
--- /dev/null
+++ b/qa/suites/cephmetrics/4-epel/use_epel.yaml
@@ -0,0 +1,7 @@
+meta:
+  - desc: "Using EPEL"
+overrides:
+  cephmetrics:
+    group_vars:
+      all:
+        use_epel: true
diff --git a/qa/suites/cephmetrics/5-containers/.qa b/qa/suites/cephmetrics/5-containers/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/cephmetrics/5-containers/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/cephmetrics/5-containers/containerized.yaml b/qa/suites/cephmetrics/5-containers/containerized.yaml
new file mode 100644
index 000000000..686de08a2
--- /dev/null
+++ b/qa/suites/cephmetrics/5-containers/containerized.yaml
@@ -0,0 +1,10 @@
+meta:
+  - desc: "Containerized prometheus and grafana"
+overrides:
+  cephmetrics:
+    group_vars:
+      all:
+        prometheus:
+          containerized: true
+        grafana:
+          containerized: true
diff --git a/qa/suites/cephmetrics/5-containers/no_containers.yaml b/qa/suites/cephmetrics/5-containers/no_containers.yaml
new file mode 100644
index 000000000..29c690939
--- /dev/null
+++ b/qa/suites/cephmetrics/5-containers/no_containers.yaml
@@ -0,0 +1,10 @@
+meta:
+  - desc: "Packaged prometheus and grafana"
+overrides:
+  cephmetrics:
+    group_vars:
+      all:
+        prometheus:
+          containerized: false
+        grafana:
+          containerized: false
diff --git a/qa/suites/cephmetrics/6-tasks/.qa b/qa/suites/cephmetrics/6-tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/cephmetrics/6-tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/cephmetrics/6-tasks/cephmetrics.yaml b/qa/suites/cephmetrics/6-tasks/cephmetrics.yaml
new file mode 100644
index 000000000..15f90394c
--- /dev/null
+++ b/qa/suites/cephmetrics/6-tasks/cephmetrics.yaml
@@ -0,0 +1,4 @@
+meta:
+- desc: "Deploy cephmetrics and run integration tests"
+tasks:
+- cephmetrics:
diff --git a/qa/suites/crimson-rados-experimental/.qa b/qa/suites/crimson-rados-experimental/.qa
new file mode 120000
index 000000000..fea2489fd
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/.qa
@@ -0,0 +1 @@
+../.qa
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados-experimental/seastore/.qa b/qa/suites/crimson-rados-experimental/seastore/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/seastore/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/% b/qa/suites/crimson-rados-experimental/seastore/basic/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/seastore/basic/%
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/.qa b/qa/suites/crimson-rados-experimental/seastore/basic/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/seastore/basic/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/centos_latest.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/centos_latest.yaml
new file mode 120000
index 000000000..bd9854e70
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/seastore/basic/centos_latest.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/centos_latest.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/clusters/.qa b/qa/suites/crimson-rados-experimental/seastore/basic/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/seastore/basic/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/clusters/fixed-1.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/clusters/fixed-1.yaml
new file mode 100644
index 000000000..d8e5898b9
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/seastore/basic/clusters/fixed-1.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph-deploy:
+    conf:
+      global:
+        osd pool default size: 2
+        osd crush chooseleaf type: 0
+        osd pool default pg num:  128
+        osd pool default pgp num:  128
+  ceph:
+    conf:
+      osd:
+        osd shutdown pgref assert: true
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0]
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/clusters/fixed-2.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/clusters/fixed-2.yaml
new file mode 100644
index 000000000..9774de688
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/seastore/basic/clusters/fixed-2.yaml
@@ -0,0 +1,15 @@
+roles:
+- [mon.a, osd.0, osd.1, client.0, node-exporter.a]
+- [mgr.x, osd.2, osd.3, client.1, prometheus.a, node-exporter.b]
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd shutdown pgref assert: true
+      global:
+        ms cluster mode: crc
+        ms service mode: crc
+        ms client mode: crc
+        ms mon service mode: crc
+        ms mon cluster mode: crc
+        ms mon client mode: crc
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/crimson_qa_overrides.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/crimson_qa_overrides.yaml
new file mode 120000
index 000000000..2bf67af1b
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/seastore/basic/crimson_qa_overrides.yaml
@@ -0,0 +1 @@
+.qa/config/crimson_qa_overrides.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/deploy/.qa b/qa/suites/crimson-rados-experimental/seastore/basic/deploy/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/seastore/basic/deploy/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/deploy/ceph.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/deploy/ceph.yaml
new file mode 100644
index 000000000..c22f08eec
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/seastore/basic/deploy/ceph.yaml
@@ -0,0 +1,18 @@
+overrides:
+  install:
+    ceph:
+      flavor: crimson
+tasks:
+- install:
+- ceph:
+    conf:
+      osd:
+        debug monc: 20
+      mon:
+        mon min osdmap epochs: 50
+        paxos service trim min: 10
+        # prune full osdmaps regularly
+        mon osdmap full prune min: 15
+        mon osdmap full prune interval: 2
+        mon osdmap full prune txsize: 2
+    flavor: crimson
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/objectstore/.qa b/qa/suites/crimson-rados-experimental/seastore/basic/objectstore/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/seastore/basic/objectstore/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/objectstore/seastore.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/objectstore/seastore.yaml
new file mode 100644
index 000000000..713d93225
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/seastore/basic/objectstore/seastore.yaml
@@ -0,0 +1,6 @@
+overrides:
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: seastore
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/tasks/.qa b/qa/suites/crimson-rados-experimental/seastore/basic/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/seastore/basic/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/tasks/rados_api_tests.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/tasks/rados_api_tests.yaml
new file mode 100644
index 000000000..ad8c92142
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/seastore/basic/tasks/rados_api_tests.yaml
@@ -0,0 +1,28 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - reached quota
+    - but it is still running
+    - overall HEALTH_
+    - \(POOL_FULL\)
+    - \(SMALLER_PGP_NUM\)
+    - \(CACHE_POOL_NO_HIT_SET\)
+    - \(CACHE_POOL_NEAR_FULL\)
+    - \(POOL_APP_NOT_ENABLED\)
+    - \(PG_AVAILABILITY\)
+    - \(PG_DEGRADED\)
+    conf:
+      client:
+        debug ms: 1
+      mon:
+        mon warn on pool no app: false
+      osd:
+        osd class load list: "*"
+        osd class default list: "*"
+        osd blocked scrub grace period: 3600
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rados/test.sh
+        - rados/test_pool_quota.sh
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/tasks/readwrite.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/tasks/readwrite.yaml
new file mode 100644
index 000000000..f135107c7
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/seastore/basic/tasks/readwrite.yaml
@@ -0,0 +1,17 @@
+overrides:
+  ceph:
+    crush_tunables: optimal
+    conf:
+      mon:
+        mon osd initial require min compat client: luminous
+      osd:
+        osd_discard_disconnected_ops: false
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 500
+    op_weights:
+      read: 45
+      write: 45
+      delete: 10
diff --git a/qa/suites/crimson-rados/.qa b/qa/suites/crimson-rados/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/crimson-rados/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/basic/% b/qa/suites/crimson-rados/basic/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/crimson-rados/basic/%
diff --git a/qa/suites/crimson-rados/basic/.qa b/qa/suites/crimson-rados/basic/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/crimson-rados/basic/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/basic/centos_8.stream.yaml b/qa/suites/crimson-rados/basic/centos_8.stream.yaml
new file mode 120000
index 000000000..5dceec7e2
--- /dev/null
+++ b/qa/suites/crimson-rados/basic/centos_8.stream.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/centos_8.stream.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/basic/clusters/.qa b/qa/suites/crimson-rados/basic/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/crimson-rados/basic/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/basic/clusters/fixed-2.yaml b/qa/suites/crimson-rados/basic/clusters/fixed-2.yaml
new file mode 100644
index 000000000..9774de688
--- /dev/null
+++ b/qa/suites/crimson-rados/basic/clusters/fixed-2.yaml
@@ -0,0 +1,15 @@
+roles:
+- [mon.a, osd.0, osd.1, client.0, node-exporter.a]
+- [mgr.x, osd.2, osd.3, client.1, prometheus.a, node-exporter.b]
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd shutdown pgref assert: true
+      global:
+        ms cluster mode: crc
+        ms service mode: crc
+        ms client mode: crc
+        ms mon service mode: crc
+        ms mon cluster mode: crc
+        ms mon client mode: crc
diff --git a/qa/suites/crimson-rados/basic/crimson-supported-all-distro b/qa/suites/crimson-rados/basic/crimson-supported-all-distro
new file mode 120000
index 000000000..a5b729b9e
--- /dev/null
+++ b/qa/suites/crimson-rados/basic/crimson-supported-all-distro
@@ -0,0 +1 @@
+.qa/distros/crimson-supported-all-distro/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/basic/crimson_qa_overrides.yaml b/qa/suites/crimson-rados/basic/crimson_qa_overrides.yaml
new file mode 120000
index 000000000..2bf67af1b
--- /dev/null
+++ b/qa/suites/crimson-rados/basic/crimson_qa_overrides.yaml
@@ -0,0 +1 @@
+.qa/config/crimson_qa_overrides.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/basic/deploy/.qa b/qa/suites/crimson-rados/basic/deploy/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/crimson-rados/basic/deploy/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/basic/deploy/ceph.yaml b/qa/suites/crimson-rados/basic/deploy/ceph.yaml
new file mode 100644
index 000000000..c22f08eec
--- /dev/null
+++ b/qa/suites/crimson-rados/basic/deploy/ceph.yaml
@@ -0,0 +1,18 @@
+overrides:
+  install:
+    ceph:
+      flavor: crimson
+tasks:
+- install:
+- ceph:
+    conf:
+      osd:
+        debug monc: 20
+      mon:
+        mon min osdmap epochs: 50
+        paxos service trim min: 10
+        # prune full osdmaps regularly
+        mon osdmap full prune min: 15
+        mon osdmap full prune interval: 2
+        mon osdmap full prune txsize: 2
+    flavor: crimson
diff --git a/qa/suites/crimson-rados/basic/deploy/cephadm.yaml.disabled b/qa/suites/crimson-rados/basic/deploy/cephadm.yaml.disabled
new file mode 100644
index 000000000..0c2062240
--- /dev/null
+++ b/qa/suites/crimson-rados/basic/deploy/cephadm.yaml.disabled
@@ -0,0 +1,16 @@
+# no need to verify os + flavor + sha1
+verify_ceph_hash: false
+tasks:
+- cephadm:
+    conf:
+      mgr:
+        debug ms: 1
+        debug mgr: 20
+        debug osd: 10
+- cephadm.shell:
+    mon.a:
+      - ceph orch status
+      - ceph orch ps
+      - ceph orch ls
+      - ceph orch host ls
+      - ceph orch device ls
diff --git a/qa/suites/crimson-rados/basic/tasks/.qa b/qa/suites/crimson-rados/basic/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/crimson-rados/basic/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/basic/tasks/rados_api_tests.yaml b/qa/suites/crimson-rados/basic/tasks/rados_api_tests.yaml
new file mode 100644
index 000000000..ad8c92142
--- /dev/null
+++ b/qa/suites/crimson-rados/basic/tasks/rados_api_tests.yaml
@@ -0,0 +1,28 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - reached quota
+    - but it is still running
+    - overall HEALTH_
+    - \(POOL_FULL\)
+    - \(SMALLER_PGP_NUM\)
+    - \(CACHE_POOL_NO_HIT_SET\)
+    - \(CACHE_POOL_NEAR_FULL\)
+    - \(POOL_APP_NOT_ENABLED\)
+    - \(PG_AVAILABILITY\)
+    - \(PG_DEGRADED\)
+    conf:
+      client:
+        debug ms: 1
+      mon:
+        mon warn on pool no app: false
+      osd:
+        osd class load list: "*"
+        osd class default list: "*"
+        osd blocked scrub grace period: 3600
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rados/test.sh
+        - rados/test_pool_quota.sh
diff --git a/qa/suites/crimson-rados/basic/tasks/rados_python.yaml b/qa/suites/crimson-rados/basic/tasks/rados_python.yaml
new file mode 100644
index 000000000..aa8719d9f
--- /dev/null
+++ b/qa/suites/crimson-rados/basic/tasks/rados_python.yaml
@@ -0,0 +1,20 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - overall HEALTH_
+    - \(OSDMAP_FLAGS\)
+    - \(PG_
+    - \(OSD_
+    - \(OBJECT_
+    - \(POOL_APP_NOT_ENABLED\)
+  install:
+    ceph:
+      extra_system_packages:
+      - python3-pytest
+tasks:
+- workunit:
+    timeout: 1h
+    clients:
+      client.0:
+        - rados/test_python.sh -m 'not (wait or tier or ec or bench or stats)'
diff --git a/qa/suites/crimson-rados/basic/tasks/readwrite.yaml b/qa/suites/crimson-rados/basic/tasks/readwrite.yaml
new file mode 100644
index 000000000..f135107c7
--- /dev/null
+++ b/qa/suites/crimson-rados/basic/tasks/readwrite.yaml
@@ -0,0 +1,17 @@
+overrides:
+  ceph:
+    crush_tunables: optimal
+    conf:
+      mon:
+        mon osd initial require min compat client: luminous
+      osd:
+        osd_discard_disconnected_ops: false
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 500
+    op_weights:
+      read: 45
+      write: 45
+      delete: 10
diff --git a/qa/suites/crimson-rados/rbd/% b/qa/suites/crimson-rados/rbd/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/crimson-rados/rbd/%
diff --git a/qa/suites/crimson-rados/rbd/.qa b/qa/suites/crimson-rados/rbd/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/crimson-rados/rbd/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/rbd/centos_8.stream.yaml b/qa/suites/crimson-rados/rbd/centos_8.stream.yaml
new file mode 120000
index 000000000..5dceec7e2
--- /dev/null
+++ b/qa/suites/crimson-rados/rbd/centos_8.stream.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/centos_8.stream.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/rbd/clusters/.qa b/qa/suites/crimson-rados/rbd/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/crimson-rados/rbd/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/rbd/clusters/fixed-1.yaml b/qa/suites/crimson-rados/rbd/clusters/fixed-1.yaml
new file mode 100644
index 000000000..d204f3eb2
--- /dev/null
+++ b/qa/suites/crimson-rados/rbd/clusters/fixed-1.yaml
@@ -0,0 +1,21 @@
+overrides:
+  ceph-deploy:
+    conf:
+      global:
+        osd pool default size: 2
+        osd crush chooseleaf type: 0
+        osd pool default pg num:  128
+        osd pool default pgp num:  128
+  ceph:
+    conf:
+      osd:
+        osd shutdown pgref assert: true
+      global:
+        ms cluster mode: crc
+        ms service mode: crc
+        ms client mode: crc
+        ms mon service mode: crc
+        ms mon cluster mode: crc
+        ms mon client mode: crc
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0]
diff --git a/qa/suites/crimson-rados/rbd/crimson-supported-all-distro b/qa/suites/crimson-rados/rbd/crimson-supported-all-distro
new file mode 120000
index 000000000..a5b729b9e
--- /dev/null
+++ b/qa/suites/crimson-rados/rbd/crimson-supported-all-distro
@@ -0,0 +1 @@
+.qa/distros/crimson-supported-all-distro/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/rbd/crimson_qa_overrides.yaml b/qa/suites/crimson-rados/rbd/crimson_qa_overrides.yaml
new file mode 120000
index 000000000..2bf67af1b
--- /dev/null
+++ b/qa/suites/crimson-rados/rbd/crimson_qa_overrides.yaml
@@ -0,0 +1 @@
+.qa/config/crimson_qa_overrides.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/rbd/deploy/.qa b/qa/suites/crimson-rados/rbd/deploy/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/crimson-rados/rbd/deploy/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/rbd/deploy/ceph.yaml b/qa/suites/crimson-rados/rbd/deploy/ceph.yaml
new file mode 100644
index 000000000..c22f08eec
--- /dev/null
+++ b/qa/suites/crimson-rados/rbd/deploy/ceph.yaml
@@ -0,0 +1,18 @@
+overrides:
+  install:
+    ceph:
+      flavor: crimson
+tasks:
+- install:
+- ceph:
+    conf:
+      osd:
+        debug monc: 20
+      mon:
+        mon min osdmap epochs: 50
+        paxos service trim min: 10
+        # prune full osdmaps regularly
+        mon osdmap full prune min: 15
+        mon osdmap full prune interval: 2
+        mon osdmap full prune txsize: 2
+    flavor: crimson
diff --git a/qa/suites/crimson-rados/rbd/deploy/cephadm.yaml.disabled b/qa/suites/crimson-rados/rbd/deploy/cephadm.yaml.disabled
new file mode 100644
index 000000000..0c2062240
--- /dev/null
+++ b/qa/suites/crimson-rados/rbd/deploy/cephadm.yaml.disabled
@@ -0,0 +1,16 @@
+# no need to verify os + flavor + sha1
+verify_ceph_hash: false
+tasks:
+- cephadm:
+    conf:
+      mgr:
+        debug ms: 1
+        debug mgr: 20
+        debug osd: 10
+- cephadm.shell:
+    mon.a:
+      - ceph orch status
+      - ceph orch ps
+      - ceph orch ls
+      - ceph orch host ls
+      - ceph orch device ls
diff --git a/qa/suites/crimson-rados/rbd/tasks/.qa b/qa/suites/crimson-rados/rbd/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/crimson-rados/rbd/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/rbd/tasks/rbd_api_tests.yaml b/qa/suites/crimson-rados/rbd/tasks/rbd_api_tests.yaml
new file mode 100644
index 000000000..b0d019bbe
--- /dev/null
+++ b/qa/suites/crimson-rados/rbd/tasks/rbd_api_tests.yaml
@@ -0,0 +1,21 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - \(POOL_APP_NOT_ENABLED\)
+      - is full \(reached quota
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(POOL_
+      - \(CACHE_POOL_
+      - \(POOL_FULL\)
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/crimson/test_crimson_librbd.sh
+    env:
+      RBD_FEATURES: "61"
+
diff --git a/qa/suites/crimson-rados/rbd/tasks/rbd_api_tests_old_format.yaml b/qa/suites/crimson-rados/rbd/tasks/rbd_api_tests_old_format.yaml
new file mode 100644
index 000000000..a6b85f7b3
--- /dev/null
+++ b/qa/suites/crimson-rados/rbd/tasks/rbd_api_tests_old_format.yaml
@@ -0,0 +1,13 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - \(POOL_APP_NOT_ENABLED\)
+      - is full \(reached quota
+      - \(POOL_FULL\)
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/crimson/test_crimson_librbd.sh
diff --git a/qa/suites/crimson-rados/rbd/tasks/rbd_cls_tests.yaml b/qa/suites/crimson-rados/rbd/tasks/rbd_cls_tests.yaml
new file mode 100644
index 000000000..51b35e2e1
--- /dev/null
+++ b/qa/suites/crimson-rados/rbd/tasks/rbd_cls_tests.yaml
@@ -0,0 +1,7 @@
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - cls/test_cls_rbd.sh
+        - cls/test_cls_lock.sh
+        - cls/test_cls_journal.sh
diff --git a/qa/suites/crimson-rados/rbd/tasks/rbd_lock_and_fence.yaml b/qa/suites/crimson-rados/rbd/tasks/rbd_lock_and_fence.yaml
new file mode 100644
index 000000000..d2c80ad65
--- /dev/null
+++ b/qa/suites/crimson-rados/rbd/tasks/rbd_lock_and_fence.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/test_lock_fence.sh
diff --git a/qa/suites/crimson-rados/rbd/tasks/rbd_python_api_tests.yaml b/qa/suites/crimson-rados/rbd/tasks/rbd_python_api_tests.yaml
new file mode 100644
index 000000000..fd65589e0
--- /dev/null
+++ b/qa/suites/crimson-rados/rbd/tasks/rbd_python_api_tests.yaml
@@ -0,0 +1,16 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - \(SLOW_OPS\)
+      - slow request
+  install:
+    ceph:
+      extra_system_packages:
+      - python3-pytest
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/test_librbd_python.sh -m 'not skip_if_crimson'
+    env:
+      RBD_FEATURES: "61"
diff --git a/qa/suites/crimson-rados/rbd/tasks/rbd_python_api_tests_old_format.yaml b/qa/suites/crimson-rados/rbd/tasks/rbd_python_api_tests_old_format.yaml
new file mode 100644
index 000000000..95140de6d
--- /dev/null
+++ b/qa/suites/crimson-rados/rbd/tasks/rbd_python_api_tests_old_format.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - \(SLOW_OPS\)
+      - slow request
+  install:
+    ceph:
+      extra_system_packages:
+      - python3-pytest
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/test_librbd_python.sh -m 'not skip_if_crimson'
diff --git a/qa/suites/crimson-rados/singleton/% b/qa/suites/crimson-rados/singleton/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/crimson-rados/singleton/%
diff --git a/qa/suites/crimson-rados/singleton/.qa b/qa/suites/crimson-rados/singleton/.qa
new file mode 120000
index 000000000..fea2489fd
--- /dev/null
+++ b/qa/suites/crimson-rados/singleton/.qa
@@ -0,0 +1 @@
+../.qa
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/singleton/all/.qa b/qa/suites/crimson-rados/singleton/all/.qa
new file mode 120000
index 000000000..fea2489fd
--- /dev/null
+++ b/qa/suites/crimson-rados/singleton/all/.qa
@@ -0,0 +1 @@
+../.qa
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/singleton/all/osd-backfill.yaml b/qa/suites/crimson-rados/singleton/all/osd-backfill.yaml
new file mode 100644
index 000000000..f475d5dc3
--- /dev/null
+++ b/qa/suites/crimson-rados/singleton/all/osd-backfill.yaml
@@ -0,0 +1,29 @@
+roles:
+- - mon.a
+  - mon.b
+  - mon.c
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+tasks:
+- install:
+    flavor: crimson
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+      - but it is still running
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(OBJECT_
+    conf:
+      osd:
+        osd min pg log entries: 5
+- osd_backfill:
diff --git a/qa/suites/crimson-rados/singleton/crimson-supported-all-distro b/qa/suites/crimson-rados/singleton/crimson-supported-all-distro
new file mode 120000
index 000000000..a5b729b9e
--- /dev/null
+++ b/qa/suites/crimson-rados/singleton/crimson-supported-all-distro
@@ -0,0 +1 @@
+.qa/distros/crimson-supported-all-distro/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/singleton/crimson_qa_overrides.yaml b/qa/suites/crimson-rados/singleton/crimson_qa_overrides.yaml
new file mode 120000
index 000000000..2bf67af1b
--- /dev/null
+++ b/qa/suites/crimson-rados/singleton/crimson_qa_overrides.yaml
@@ -0,0 +1 @@
+.qa/config/crimson_qa_overrides.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/singleton/objectstore b/qa/suites/crimson-rados/singleton/objectstore
new file mode 120000
index 000000000..dbccf5ad9
--- /dev/null
+++ b/qa/suites/crimson-rados/singleton/objectstore
@@ -0,0 +1 @@
+../thrash/objectstore
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/singleton/rados.yaml b/qa/suites/crimson-rados/singleton/rados.yaml
new file mode 120000
index 000000000..e95c99ef2
--- /dev/null
+++ b/qa/suites/crimson-rados/singleton/rados.yaml
@@ -0,0 +1 @@
+./.qa/suites/rados/singleton/rados.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash/% b/qa/suites/crimson-rados/thrash/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/%
diff --git a/qa/suites/crimson-rados/thrash/.qa b/qa/suites/crimson-rados/thrash/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash/0-size-min-size-overrides/.qa b/qa/suites/crimson-rados/thrash/0-size-min-size-overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/0-size-min-size-overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml.disabled b/qa/suites/crimson-rados/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml.disabled
new file mode 120000
index 000000000..5393a7554
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml.disabled
@@ -0,0 +1 @@
+.qa/overrides/2-size-2-min-size.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml b/qa/suites/crimson-rados/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml
new file mode 120000
index 000000000..5ff70eadf
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml
@@ -0,0 +1 @@
+.qa/overrides/3-size-2-min-size.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash/1-pg-log-overrides/.qa b/qa/suites/crimson-rados/thrash/1-pg-log-overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/1-pg-log-overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash/1-pg-log-overrides/normal_pg_log.yaml b/qa/suites/crimson-rados/thrash/1-pg-log-overrides/normal_pg_log.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/1-pg-log-overrides/normal_pg_log.yaml
diff --git a/qa/suites/crimson-rados/thrash/1-pg-log-overrides/short_pg_log.yaml.disabled b/qa/suites/crimson-rados/thrash/1-pg-log-overrides/short_pg_log.yaml.disabled
new file mode 120000
index 000000000..abd86d7d9
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/1-pg-log-overrides/short_pg_log.yaml.disabled
@@ -0,0 +1 @@
+.qa/overrides/short_pg_log.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash/2-recovery-overrides/$ b/qa/suites/crimson-rados/thrash/2-recovery-overrides/$
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/2-recovery-overrides/$
diff --git a/qa/suites/crimson-rados/thrash/2-recovery-overrides/.qa b/qa/suites/crimson-rados/thrash/2-recovery-overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/2-recovery-overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash/2-recovery-overrides/default.yaml b/qa/suites/crimson-rados/thrash/2-recovery-overrides/default.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/2-recovery-overrides/default.yaml
diff --git a/qa/suites/crimson-rados/thrash/2-recovery-overrides/more-active-recovery.yaml.disabled b/qa/suites/crimson-rados/thrash/2-recovery-overrides/more-active-recovery.yaml.disabled
new file mode 120000
index 000000000..47afd7020
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/2-recovery-overrides/more-active-recovery.yaml.disabled
@@ -0,0 +1 @@
+.qa/overrides/more-active-recovery.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash/2-recovery-overrides/more-async-partial-recovery.yaml.disabled b/qa/suites/crimson-rados/thrash/2-recovery-overrides/more-async-partial-recovery.yaml.disabled
new file mode 100644
index 000000000..0bbc72db7
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/2-recovery-overrides/more-async-partial-recovery.yaml.disabled
@@ -0,0 +1,6 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        osd_async_recovery_min_cost: 1
+        osd_object_clean_region_max_num_intervals: 1000
diff --git a/qa/suites/crimson-rados/thrash/2-recovery-overrides/more-async-recovery.yaml.disabled b/qa/suites/crimson-rados/thrash/2-recovery-overrides/more-async-recovery.yaml.disabled
new file mode 100644
index 000000000..4aed086bc
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/2-recovery-overrides/more-async-recovery.yaml.disabled
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        osd_async_recovery_min_cost: 1
diff --git a/qa/suites/crimson-rados/thrash/2-recovery-overrides/more-partial-recovery.yaml.disabled b/qa/suites/crimson-rados/thrash/2-recovery-overrides/more-partial-recovery.yaml.disabled
new file mode 100644
index 000000000..88f15f2f6
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/2-recovery-overrides/more-partial-recovery.yaml.disabled
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        osd_object_clean_region_max_num_intervals: 1000
diff --git a/qa/suites/crimson-rados/thrash/centos_8.stream.yaml b/qa/suites/crimson-rados/thrash/centos_8.stream.yaml
new file mode 120000
index 000000000..5dceec7e2
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/centos_8.stream.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/centos_8.stream.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash/clusters/+ b/qa/suites/crimson-rados/thrash/clusters/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/clusters/+
diff --git a/qa/suites/crimson-rados/thrash/clusters/.qa b/qa/suites/crimson-rados/thrash/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash/clusters/fixed-2.yaml b/qa/suites/crimson-rados/thrash/clusters/fixed-2.yaml
new file mode 100644
index 000000000..9774de688
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/clusters/fixed-2.yaml
@@ -0,0 +1,15 @@
+roles:
+- [mon.a, osd.0, osd.1, client.0, node-exporter.a]
+- [mgr.x, osd.2, osd.3, client.1, prometheus.a, node-exporter.b]
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd shutdown pgref assert: true
+      global:
+        ms cluster mode: crc
+        ms service mode: crc
+        ms client mode: crc
+        ms mon service mode: crc
+        ms mon cluster mode: crc
+        ms mon client mode: crc
diff --git a/qa/suites/crimson-rados/thrash/clusters/openstack.yaml.disabled b/qa/suites/crimson-rados/thrash/clusters/openstack.yaml.disabled
new file mode 100644
index 000000000..e559d9126
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/clusters/openstack.yaml.disabled
@@ -0,0 +1,4 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 4
+      size: 10 # GB
diff --git a/qa/suites/crimson-rados/thrash/crimson-supported-all-distro b/qa/suites/crimson-rados/thrash/crimson-supported-all-distro
new file mode 120000
index 000000000..a5b729b9e
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/crimson-supported-all-distro
@@ -0,0 +1 @@
+.qa/distros/crimson-supported-all-distro/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash/crimson_qa_overrides.yaml b/qa/suites/crimson-rados/thrash/crimson_qa_overrides.yaml
new file mode 120000
index 000000000..2bf67af1b
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/crimson_qa_overrides.yaml
@@ -0,0 +1 @@
+.qa/config/crimson_qa_overrides.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash/deploy/.qa b/qa/suites/crimson-rados/thrash/deploy/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/deploy/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash/deploy/ceph.yaml b/qa/suites/crimson-rados/thrash/deploy/ceph.yaml
new file mode 100644
index 000000000..ecad09cfe
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/deploy/ceph.yaml
@@ -0,0 +1,11 @@
+overrides:
+  install:
+    ceph:
+      flavor: crimson
+tasks:
+- install:
+- ceph:
+    conf:
+      osd:
+        debug monc: 20
+    flavor: crimson
diff --git a/qa/suites/crimson-rados/thrash/deploy/cephadm.yaml.disabled b/qa/suites/crimson-rados/thrash/deploy/cephadm.yaml.disabled
new file mode 100644
index 000000000..0c2062240
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/deploy/cephadm.yaml.disabled
@@ -0,0 +1,16 @@
+# no need to verify os + flavor + sha1
+verify_ceph_hash: false
+tasks:
+- cephadm:
+    conf:
+      mgr:
+        debug ms: 1
+        debug mgr: 20
+        debug osd: 10
+- cephadm.shell:
+    mon.a:
+      - ceph orch status
+      - ceph orch ps
+      - ceph orch ls
+      - ceph orch host ls
+      - ceph orch device ls
diff --git a/qa/suites/crimson-rados/thrash/objectstore/.qa b/qa/suites/crimson-rados/thrash/objectstore/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/objectstore/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash/objectstore/bluestore.yaml b/qa/suites/crimson-rados/thrash/objectstore/bluestore.yaml
new file mode 100644
index 000000000..99c532f11
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/objectstore/bluestore.yaml
@@ -0,0 +1,23 @@
+overrides:
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore compression mode: aggressive
+        bluestore fsck on mount: true
+        bluestore compression algorithm: snappy
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+        bluestore rocksdb cf: false
+        log to stderr: true
+        err to stderr: true
+        log flush on exit: true
+        log to file: false
diff --git a/qa/suites/crimson-rados/thrash/thrashers/.qa b/qa/suites/crimson-rados/thrash/thrashers/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/thrashers/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash/thrashers/default.yaml b/qa/suites/crimson-rados/thrash/thrashers/default.yaml
new file mode 100644
index 000000000..5ffbcbd7f
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/thrashers/default.yaml
@@ -0,0 +1,35 @@
+overrides:
+  ceph:
+    wait-for-scrub: false
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    conf:
+      osd:
+        osd debug reject backfill probability: .3
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd max backfills: 3
+        osd snap trim sleep: 2
+        osd delete sleep: 1
+      mon:
+        mon min osdmap epochs: 50
+        paxos service trim min: 10
+        # prune full osdmaps regularly
+        mon osdmap full prune min: 15
+        mon osdmap full prune interval: 2
+        mon osdmap full prune txsize: 2
+tasks:
+- thrashosds:
+    timeout: 2400
+    dump_ops_enable: false
+    sighup_delay: 0
+    min_in: 3
+    noscrub_toggle_delay: 0
+    chance_down: 0
+    chance_thrash_pg_upmap: 0
+    reweight_osd: 0
+    thrash_primary_affinity: false
+    ceph_objectstore_tool: false
+    chance_inject_pause_short: 0
+    chance_thrash_cluster_full: 0
diff --git a/qa/suites/crimson-rados/thrash/thrashosds-health.yaml b/qa/suites/crimson-rados/thrash/thrashosds-health.yaml
new file mode 120000
index 000000000..9124eb1aa
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/thrashosds-health.yaml
@@ -0,0 +1 @@
+.qa/tasks/thrashosds-health.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash/workloads/.qa b/qa/suites/crimson-rados/thrash/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash/workloads/admin_socket_objecter_requests.yaml b/qa/suites/crimson-rados/thrash/workloads/admin_socket_objecter_requests.yaml
new file mode 100644
index 000000000..8c9764ade
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/workloads/admin_socket_objecter_requests.yaml
@@ -0,0 +1,13 @@
+overrides:
+  ceph:
+    conf:
+      client.0:
+        admin socket: /var/run/ceph/ceph-$name.asok
+tasks:
+- radosbench:
+    clients: [client.0]
+    time: 150
+- admin_socket:
+    client.0:
+      objecter_requests:
+        test: "http://git.ceph.com/?p={repo};a=blob_plain;f=src/test/admin_socket/objecter_requests;hb={branch}"
diff --git a/qa/suites/crimson-rados/thrash/workloads/pool-snaps-few-objects.yaml b/qa/suites/crimson-rados/thrash/workloads/pool-snaps-few-objects.yaml
new file mode 100644
index 000000000..fff5cda6e
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/workloads/pool-snaps-few-objects.yaml
@@ -0,0 +1,20 @@
+override:
+  conf:
+    osd:
+      osd deep scrub update digest min age: 0
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 50
+    pool_snaps: true
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 0
+      # TODO: CEPH_OSD_OP_COPY_FROM
+      copy_from: 0
+
diff --git a/qa/suites/crimson-rados/thrash/workloads/radosbench-high-concurrency.yaml b/qa/suites/crimson-rados/thrash/workloads/radosbench-high-concurrency.yaml
new file mode 100644
index 000000000..902c4b56a
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/workloads/radosbench-high-concurrency.yaml
@@ -0,0 +1,49 @@
+overrides:
+  ceph:
+    conf:
+      client.0:
+        debug ms: 1
+        debug objecter: 20
+        debug rados: 20
+tasks:
+- full_sequential:
+  - radosbench:
+      clients: [client.0]
+      concurrency: 128
+      size: 8192
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      concurrency: 128
+      size: 8192
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      concurrency: 128
+      size: 8192
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      concurrency: 128
+      size: 8192
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      concurrency: 128
+      size: 8192
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      concurrency: 128
+      size: 8192
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      concurrency: 128
+      size: 8192
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      concurrency: 128
+      size: 8192
+      time: 90
diff --git a/qa/suites/crimson-rados/thrash/workloads/radosbench.yaml b/qa/suites/crimson-rados/thrash/workloads/radosbench.yaml
new file mode 100644
index 000000000..071f55e39
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/workloads/radosbench.yaml
@@ -0,0 +1,24 @@
+overrides:
+  ceph:
+    conf:
+      client.0:
+        debug ms: 1
+        debug objecter: 20
+        debug rados: 20
+tasks:
+- full_sequential:
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
diff --git a/qa/suites/crimson-rados/thrash/workloads/small-objects-balanced.yaml b/qa/suites/crimson-rados/thrash/workloads/small-objects-balanced.yaml
new file mode 100644
index 000000000..0c50dc136
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/workloads/small-objects-balanced.yaml
@@ -0,0 +1,23 @@
+overrides:
+  ceph:
+    crush_tunables: jewel
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 400000
+    max_seconds: 600
+    max_in_flight: 64
+    objects: 1024
+    size: 16384
+    balance_reads: true
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 0
+      # TODO: CEPH_OSD_OP_COPY_FROM
+      copy_from: 0
+      setattr: 25
+      rmattr: 25
diff --git a/qa/suites/crimson-rados/thrash/workloads/small-objects-localized.yaml b/qa/suites/crimson-rados/thrash/workloads/small-objects-localized.yaml
new file mode 100644
index 000000000..df5c114f1
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/workloads/small-objects-localized.yaml
@@ -0,0 +1,23 @@
+overrides:
+  ceph:
+    crush_tunables: jewel
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 400000
+    max_seconds: 600
+    max_in_flight: 64
+    objects: 1024
+    size: 16384
+    localize_reads: true
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 0
+      # TODO: CEPH_OSD_OP_COPY_FROM
+      copy_from: 0
+      setattr: 25
+      rmattr: 25
diff --git a/qa/suites/crimson-rados/thrash/workloads/small-objects.yaml b/qa/suites/crimson-rados/thrash/workloads/small-objects.yaml
new file mode 100644
index 000000000..32928c303
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/workloads/small-objects.yaml
@@ -0,0 +1,22 @@
+overrides:
+  ceph:
+    crush_tunables: jewel
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 400000
+    max_seconds: 600
+    max_in_flight: 64
+    objects: 1024
+    size: 16384
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 0
+      # TODO: CEPH_OSD_OP_COPY_FROM
+      copy_from: 0
+      setattr: 25
+      rmattr: 25
diff --git a/qa/suites/crimson-rados/thrash/workloads/snaps-few-objects-balanced.yaml b/qa/suites/crimson-rados/thrash/workloads/snaps-few-objects-balanced.yaml
new file mode 100644
index 000000000..1161c3cc2
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/workloads/snaps-few-objects-balanced.yaml
@@ -0,0 +1,15 @@
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 50
+    balance_reads: true
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 0
+      # TODO: CEPH_OSD_OP_COPY_FROM
+      copy_from: 0
diff --git a/qa/suites/crimson-rados/thrash/workloads/snaps-few-objects-localized.yaml b/qa/suites/crimson-rados/thrash/workloads/snaps-few-objects-localized.yaml
new file mode 100644
index 000000000..80af0def0
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/workloads/snaps-few-objects-localized.yaml
@@ -0,0 +1,15 @@
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 50
+    localize_reads: true
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 0
+      # TODO: CEPH_OSD_OP_COPY_FROM
+      copy_from: 0
diff --git a/qa/suites/crimson-rados/thrash/workloads/snaps-few-objects.yaml b/qa/suites/crimson-rados/thrash/workloads/snaps-few-objects.yaml
new file mode 100644
index 000000000..0694ffcd0
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/workloads/snaps-few-objects.yaml
@@ -0,0 +1,14 @@
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 50
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 0
+      # TODO: CEPH_OSD_OP_COPY_FROM
+      copy_from: 0
diff --git a/qa/suites/crimson-rados/thrash/workloads/write_fadvise_dontneed.yaml b/qa/suites/crimson-rados/thrash/workloads/write_fadvise_dontneed.yaml
new file mode 100644
index 000000000..606dcae69
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/workloads/write_fadvise_dontneed.yaml
@@ -0,0 +1,8 @@
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 500
+    write_fadvise_dontneed: true
+    op_weights:
+      write: 100
diff --git a/qa/suites/dummy/% b/qa/suites/dummy/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/dummy/%
diff --git a/qa/suites/dummy/.qa b/qa/suites/dummy/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/dummy/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/dummy/all/.qa b/qa/suites/dummy/all/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/dummy/all/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/dummy/all/nop.yaml b/qa/suites/dummy/all/nop.yaml
new file mode 100644
index 000000000..0f00ffc88
--- /dev/null
+++ b/qa/suites/dummy/all/nop.yaml
@@ -0,0 +1,6 @@
+roles:
+    - [mon.a, mgr.x, mds.a, osd.0, osd.1, client.0]
+
+tasks:
+    - nop:
+
diff --git a/qa/suites/experimental/.qa b/qa/suites/experimental/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/experimental/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/experimental/multimds/% b/qa/suites/experimental/multimds/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/experimental/multimds/%
diff --git a/qa/suites/experimental/multimds/.qa b/qa/suites/experimental/multimds/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/experimental/multimds/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/experimental/multimds/clusters/.qa b/qa/suites/experimental/multimds/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/experimental/multimds/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/experimental/multimds/clusters/7-multimds.yaml b/qa/suites/experimental/multimds/clusters/7-multimds.yaml
new file mode 100644
index 000000000..7b2763f19
--- /dev/null
+++ b/qa/suites/experimental/multimds/clusters/7-multimds.yaml
@@ -0,0 +1,8 @@
+roles:
+- [mon.a, mgr.x, mds.a, mds.d]
+- [mon.b, mgr.y, mds.b, mds.e]
+- [mon.c, mgr.z, mds.c, mds.f]
+- [osd.0]
+- [osd.1]
+- [osd.2]
+- [client.0]
diff --git a/qa/suites/experimental/multimds/tasks/.qa b/qa/suites/experimental/multimds/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/experimental/multimds/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/experimental/multimds/tasks/fsstress_thrash_subtrees.yaml b/qa/suites/experimental/multimds/tasks/fsstress_thrash_subtrees.yaml
new file mode 100644
index 000000000..bee01a835
--- /dev/null
+++ b/qa/suites/experimental/multimds/tasks/fsstress_thrash_subtrees.yaml
@@ -0,0 +1,15 @@
+tasks:
+- install:
+- ceph:
+    conf:
+      mds:
+        mds thrash exports: 1
+        mds debug subtrees: 1
+        mds debug scatterstat: 1
+        mds verify scatter: 1
+- ceph-fuse:
+- workunit:
+    clients:
+      client.0:
+        - suites/fsstress.sh
+
diff --git a/qa/suites/fs/.qa b/qa/suites/fs/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/32bits/% b/qa/suites/fs/32bits/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/32bits/%
diff --git a/qa/suites/fs/32bits/.qa b/qa/suites/fs/32bits/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/32bits/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/32bits/begin b/qa/suites/fs/32bits/begin
new file mode 120000
index 000000000..77af91f7d
--- /dev/null
+++ b/qa/suites/fs/32bits/begin
@@ -0,0 +1 @@
+.qa/cephfs/begin/
+\ No newline at end of file
diff --git a/qa/suites/fs/32bits/clusters/.qa b/qa/suites/fs/32bits/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/32bits/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/32bits/clusters/fixed-2-ucephfs.yaml b/qa/suites/fs/32bits/clusters/fixed-2-ucephfs.yaml
new file mode 120000
index 000000000..b0c41a89a
--- /dev/null
+++ b/qa/suites/fs/32bits/clusters/fixed-2-ucephfs.yaml
@@ -0,0 +1 @@
+.qa/cephfs/clusters/fixed-2-ucephfs.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/32bits/conf b/qa/suites/fs/32bits/conf
new file mode 120000
index 000000000..16e8cc44b
--- /dev/null
+++ b/qa/suites/fs/32bits/conf
@@ -0,0 +1 @@
+.qa/cephfs/conf
+\ No newline at end of file
diff --git a/qa/suites/fs/32bits/distro b/qa/suites/fs/32bits/distro
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/fs/32bits/distro
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/fs/32bits/mount/.qa b/qa/suites/fs/32bits/mount/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/32bits/mount/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/32bits/mount/fuse.yaml b/qa/suites/fs/32bits/mount/fuse.yaml
new file mode 120000
index 000000000..0e55da9fb
--- /dev/null
+++ b/qa/suites/fs/32bits/mount/fuse.yaml
@@ -0,0 +1 @@
+.qa/cephfs/mount/fuse.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/32bits/objectstore-ec b/qa/suites/fs/32bits/objectstore-ec
new file mode 120000
index 000000000..affe29493
--- /dev/null
+++ b/qa/suites/fs/32bits/objectstore-ec
@@ -0,0 +1 @@
+.qa/cephfs/objectstore-ec
+\ No newline at end of file
diff --git a/qa/suites/fs/32bits/overrides/+ b/qa/suites/fs/32bits/overrides/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/32bits/overrides/+
diff --git a/qa/suites/fs/32bits/overrides/.qa b/qa/suites/fs/32bits/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/32bits/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/32bits/overrides/faked-ino.yaml b/qa/suites/fs/32bits/overrides/faked-ino.yaml
new file mode 100644
index 000000000..102df684d
--- /dev/null
+++ b/qa/suites/fs/32bits/overrides/faked-ino.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        client use faked inos: true
diff --git a/qa/suites/fs/32bits/overrides/ignorelist_health.yaml b/qa/suites/fs/32bits/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..5cb891a95
--- /dev/null
+++ b/qa/suites/fs/32bits/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/32bits/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/32bits/overrides/ignorelist_wrongly_marked_down.yaml
new file mode 120000
index 000000000..f317cb714
--- /dev/null
+++ b/qa/suites/fs/32bits/overrides/ignorelist_wrongly_marked_down.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/32bits/tasks/.qa b/qa/suites/fs/32bits/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/32bits/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/32bits/tasks/cfuse_workunit_suites_fsstress.yaml b/qa/suites/fs/32bits/tasks/cfuse_workunit_suites_fsstress.yaml
new file mode 120000
index 000000000..c2e859fff
--- /dev/null
+++ b/qa/suites/fs/32bits/tasks/cfuse_workunit_suites_fsstress.yaml
@@ -0,0 +1 @@
+.qa/cephfs/tasks/cfuse_workunit_suites_fsstress.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/32bits/tasks/cfuse_workunit_suites_pjd.yaml b/qa/suites/fs/32bits/tasks/cfuse_workunit_suites_pjd.yaml
new file mode 100644
index 000000000..f7784383b
--- /dev/null
+++ b/qa/suites/fs/32bits/tasks/cfuse_workunit_suites_pjd.yaml
@@ -0,0 +1,11 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        fuse set user groups: true
+tasks:
+- workunit:
+    timeout: 6h
+    clients:
+      all:
+        - suites/pjd.sh
diff --git a/qa/suites/fs/bugs/.qa b/qa/suites/fs/bugs/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/bugs/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/bugs/client_trim_caps/% b/qa/suites/fs/bugs/client_trim_caps/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/bugs/client_trim_caps/%
diff --git a/qa/suites/fs/bugs/client_trim_caps/.qa b/qa/suites/fs/bugs/client_trim_caps/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/bugs/client_trim_caps/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/bugs/client_trim_caps/begin b/qa/suites/fs/bugs/client_trim_caps/begin
new file mode 120000
index 000000000..77af91f7d
--- /dev/null
+++ b/qa/suites/fs/bugs/client_trim_caps/begin
@@ -0,0 +1 @@
+.qa/cephfs/begin/
+\ No newline at end of file
diff --git a/qa/suites/fs/bugs/client_trim_caps/centos_latest.yaml b/qa/suites/fs/bugs/client_trim_caps/centos_latest.yaml
new file mode 120000
index 000000000..bd9854e70
--- /dev/null
+++ b/qa/suites/fs/bugs/client_trim_caps/centos_latest.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/centos_latest.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/bugs/client_trim_caps/clusters/.qa b/qa/suites/fs/bugs/client_trim_caps/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/bugs/client_trim_caps/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/bugs/client_trim_caps/clusters/small-cluster.yaml b/qa/suites/fs/bugs/client_trim_caps/clusters/small-cluster.yaml
new file mode 100644
index 000000000..5cd97a3ae
--- /dev/null
+++ b/qa/suites/fs/bugs/client_trim_caps/clusters/small-cluster.yaml
@@ -0,0 +1,11 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, osd.2, mds.a, mds.b, client.0]
+openstack:
+- volumes: # attached to each instance
+    count: 2
+    size: 10 # GB
+- machine:
+    disk: 100 # GB
+log-rotate:
+  ceph-mds: 10G
+  ceph-osd: 10G
diff --git a/qa/suites/fs/bugs/client_trim_caps/conf b/qa/suites/fs/bugs/client_trim_caps/conf
new file mode 120000
index 000000000..16e8cc44b
--- /dev/null
+++ b/qa/suites/fs/bugs/client_trim_caps/conf
@@ -0,0 +1 @@
+.qa/cephfs/conf
+\ No newline at end of file
diff --git a/qa/suites/fs/bugs/client_trim_caps/objectstore/.qa b/qa/suites/fs/bugs/client_trim_caps/objectstore/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/bugs/client_trim_caps/objectstore/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/bugs/client_trim_caps/objectstore/bluestore-bitmap.yaml b/qa/suites/fs/bugs/client_trim_caps/objectstore/bluestore-bitmap.yaml
new file mode 120000
index 000000000..a59cf5175
--- /dev/null
+++ b/qa/suites/fs/bugs/client_trim_caps/objectstore/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/bugs/client_trim_caps/overrides/+ b/qa/suites/fs/bugs/client_trim_caps/overrides/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/bugs/client_trim_caps/overrides/+
diff --git a/qa/suites/fs/bugs/client_trim_caps/overrides/.qa b/qa/suites/fs/bugs/client_trim_caps/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/bugs/client_trim_caps/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/bugs/client_trim_caps/overrides/ignorelist_health.yaml b/qa/suites/fs/bugs/client_trim_caps/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..5cb891a95
--- /dev/null
+++ b/qa/suites/fs/bugs/client_trim_caps/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/bugs/client_trim_caps/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/bugs/client_trim_caps/overrides/ignorelist_wrongly_marked_down.yaml
new file mode 120000
index 000000000..f317cb714
--- /dev/null
+++ b/qa/suites/fs/bugs/client_trim_caps/overrides/ignorelist_wrongly_marked_down.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/bugs/client_trim_caps/overrides/no_client_pidfile.yaml b/qa/suites/fs/bugs/client_trim_caps/overrides/no_client_pidfile.yaml
new file mode 120000
index 000000000..8888f3327
--- /dev/null
+++ b/qa/suites/fs/bugs/client_trim_caps/overrides/no_client_pidfile.yaml
@@ -0,0 +1 @@
+.qa/overrides/no_client_pidfile.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/bugs/client_trim_caps/tasks/.qa b/qa/suites/fs/bugs/client_trim_caps/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/bugs/client_trim_caps/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/bugs/client_trim_caps/tasks/trim-i24137.yaml b/qa/suites/fs/bugs/client_trim_caps/tasks/trim-i24137.yaml
new file mode 100644
index 000000000..098f7d053
--- /dev/null
+++ b/qa/suites/fs/bugs/client_trim_caps/tasks/trim-i24137.yaml
@@ -0,0 +1,14 @@
+# Note this test is unlikely to exercise the code as expected in the future:
+# "It's too tricky to arrange inodes in session->caps. we don't know if it
+# still works in the future." -Zheng
+
+tasks:
+- exec:
+    mon.a:
+    - ceph config set mds mds_min_caps_per_client 1
+- background_exec:
+    mon.a:
+    - "sleep 30 && ceph config set mds mds_max_caps_per_client 1"
+- exec:
+    client.0:
+    - ceph_test_trim_caps
diff --git a/qa/suites/fs/cephadm/.qa b/qa/suites/fs/cephadm/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/cephadm/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/cephadm/multivolume/% b/qa/suites/fs/cephadm/multivolume/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/cephadm/multivolume/%
diff --git a/qa/suites/fs/cephadm/multivolume/.qa b/qa/suites/fs/cephadm/multivolume/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/cephadm/multivolume/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/cephadm/multivolume/0-start.yaml b/qa/suites/fs/cephadm/multivolume/0-start.yaml
new file mode 100644
index 000000000..13dc863d2
--- /dev/null
+++ b/qa/suites/fs/cephadm/multivolume/0-start.yaml
@@ -0,0 +1,39 @@
+roles:
+- - host.a
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+  - client.0
+- - host.b
+  - osd.4
+  - osd.5
+  - osd.6
+  - osd.7
+  - client.1
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 10 # GB
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd shutdown pgref assert: true
+tasks:
+- install:
+- cephadm:
+    roleless: true
+- cephadm.shell:
+    host.a:
+      - ceph orch status
+      - ceph orch ps
+      - ceph orch ls
+      - ceph orch host ls
+      - ceph orch device ls
+- cephadm.shell:
+    host.a:
+      - ceph fs volume create foo
+      - ceph fs volume create bar
+- fs.ready:
+    timeout: 300
diff --git a/qa/suites/fs/cephadm/multivolume/1-mount.yaml b/qa/suites/fs/cephadm/multivolume/1-mount.yaml
new file mode 100644
index 000000000..713621972
--- /dev/null
+++ b/qa/suites/fs/cephadm/multivolume/1-mount.yaml
@@ -0,0 +1,7 @@
+tasks:
+  - ceph-fuse:
+      client.0:
+        cephfs_name: foo
+  - ceph-fuse:
+      client.1:
+        cephfs_name: bar
diff --git a/qa/suites/fs/cephadm/multivolume/2-workload/.qa b/qa/suites/fs/cephadm/multivolume/2-workload/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/cephadm/multivolume/2-workload/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/cephadm/multivolume/2-workload/dbench.yaml b/qa/suites/fs/cephadm/multivolume/2-workload/dbench.yaml
new file mode 120000
index 000000000..9fb8adcea
--- /dev/null
+++ b/qa/suites/fs/cephadm/multivolume/2-workload/dbench.yaml
@@ -0,0 +1 @@
+.qa/suites/fs/workload/tasks/5-workunit/suites/dbench.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/cephadm/multivolume/distro/.qa b/qa/suites/fs/cephadm/multivolume/distro/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/cephadm/multivolume/distro/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/cephadm/multivolume/distro/single-container-host.yaml b/qa/suites/fs/cephadm/multivolume/distro/single-container-host.yaml
new file mode 120000
index 000000000..7406e749c
--- /dev/null
+++ b/qa/suites/fs/cephadm/multivolume/distro/single-container-host.yaml
@@ -0,0 +1 @@
+.qa/distros/single-container-host.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/cephadm/renamevolume/% b/qa/suites/fs/cephadm/renamevolume/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/cephadm/renamevolume/%
diff --git a/qa/suites/fs/cephadm/renamevolume/.qa b/qa/suites/fs/cephadm/renamevolume/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/cephadm/renamevolume/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/cephadm/renamevolume/0-start.yaml b/qa/suites/fs/cephadm/renamevolume/0-start.yaml
new file mode 100644
index 000000000..727837580
--- /dev/null
+++ b/qa/suites/fs/cephadm/renamevolume/0-start.yaml
@@ -0,0 +1,38 @@
+roles:
+- - host.a
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+  - client.0
+- - host.b
+  - osd.4
+  - osd.5
+  - osd.6
+  - osd.7
+  - client.1
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 10 # GB
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd shutdown pgref assert: true
+tasks:
+- install:
+- cephadm:
+    roleless: true
+- cephadm.shell:
+    host.a:
+      - ceph orch status
+      - ceph orch ps
+      - ceph orch ls
+      - ceph orch host ls
+      - ceph orch device ls
+- cephadm.shell:
+    host.a:
+      - ceph fs volume create foo
+- fs.ready:
+    timeout: 300
diff --git a/qa/suites/fs/cephadm/renamevolume/1-rename.yaml b/qa/suites/fs/cephadm/renamevolume/1-rename.yaml
new file mode 100644
index 000000000..7f9bc8906
--- /dev/null
+++ b/qa/suites/fs/cephadm/renamevolume/1-rename.yaml
@@ -0,0 +1,11 @@
+tasks:
+- cephadm.shell:
+    host.a:
+      - ceph fs volume rename foo bar --yes-i-really-mean-it
+- fs.ready:
+    timeout: 300
+- cephadm.shell:
+    host.a:
+      - |
+        set -ex
+        ceph orch ls mds --format=json | jq ".[] | .service_name" | grep "mds.bar"
diff --git a/qa/suites/fs/cephadm/renamevolume/distro/.qa b/qa/suites/fs/cephadm/renamevolume/distro/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/cephadm/renamevolume/distro/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/cephadm/renamevolume/distro/single-container-host.yaml b/qa/suites/fs/cephadm/renamevolume/distro/single-container-host.yaml
new file mode 120000
index 000000000..7406e749c
--- /dev/null
+++ b/qa/suites/fs/cephadm/renamevolume/distro/single-container-host.yaml
@@ -0,0 +1 @@
+.qa/distros/single-container-host.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/cephadm/renamevolume/overrides/.qa b/qa/suites/fs/cephadm/renamevolume/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/cephadm/renamevolume/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/cephadm/renamevolume/overrides/ignorelist_health.yaml b/qa/suites/fs/cephadm/renamevolume/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..5cb891a95
--- /dev/null
+++ b/qa/suites/fs/cephadm/renamevolume/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/fscrypt/% b/qa/suites/fs/fscrypt/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/fscrypt/%
diff --git a/qa/suites/fs/fscrypt/.qa b/qa/suites/fs/fscrypt/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/fscrypt/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/fscrypt/begin b/qa/suites/fs/fscrypt/begin
new file mode 120000
index 000000000..77af91f7d
--- /dev/null
+++ b/qa/suites/fs/fscrypt/begin
@@ -0,0 +1 @@
+.qa/cephfs/begin/
+\ No newline at end of file
diff --git a/qa/suites/fs/fscrypt/bluestore-bitmap.yaml b/qa/suites/fs/fscrypt/bluestore-bitmap.yaml
new file mode 120000
index 000000000..fb603bc9a
--- /dev/null
+++ b/qa/suites/fs/fscrypt/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/cephfs/objectstore-ec/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/fscrypt/clusters/.qa b/qa/suites/fs/fscrypt/clusters/.qa
new file mode 120000
index 000000000..fea2489fd
--- /dev/null
+++ b/qa/suites/fs/fscrypt/clusters/.qa
@@ -0,0 +1 @@
+../.qa
+\ No newline at end of file
diff --git a/qa/suites/fs/fscrypt/clusters/1-mds-1-client.yaml b/qa/suites/fs/fscrypt/clusters/1-mds-1-client.yaml
new file mode 120000
index 000000000..64bdb79fe
--- /dev/null
+++ b/qa/suites/fs/fscrypt/clusters/1-mds-1-client.yaml
@@ -0,0 +1 @@
+.qa/cephfs/clusters/1-mds-1-client.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/fscrypt/conf b/qa/suites/fs/fscrypt/conf
new file mode 120000
index 000000000..6d4712984
--- /dev/null
+++ b/qa/suites/fs/fscrypt/conf
@@ -0,0 +1 @@
+.qa/cephfs/conf/
+\ No newline at end of file
diff --git a/qa/suites/fs/fscrypt/distro/$ b/qa/suites/fs/fscrypt/distro/$
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/fscrypt/distro/$
diff --git a/qa/suites/fs/fscrypt/distro/.qa b/qa/suites/fs/fscrypt/distro/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/fscrypt/distro/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/fscrypt/distro/centos_latest.yaml b/qa/suites/fs/fscrypt/distro/centos_latest.yaml
new file mode 120000
index 000000000..bd9854e70
--- /dev/null
+++ b/qa/suites/fs/fscrypt/distro/centos_latest.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/centos_latest.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/fscrypt/distro/ubuntu_latest.yaml b/qa/suites/fs/fscrypt/distro/ubuntu_latest.yaml
new file mode 120000
index 000000000..3a09f9abb
--- /dev/null
+++ b/qa/suites/fs/fscrypt/distro/ubuntu_latest.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/ubuntu_latest.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/fscrypt/mount/.qa b/qa/suites/fs/fscrypt/mount/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/fscrypt/mount/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/fscrypt/mount/kclient/% b/qa/suites/fs/fscrypt/mount/kclient/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/fscrypt/mount/kclient/%
diff --git a/qa/suites/fs/fscrypt/mount/kclient/.qa b/qa/suites/fs/fscrypt/mount/kclient/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/fscrypt/mount/kclient/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/fscrypt/mount/kclient/mount-syntax/.qa b/qa/suites/fs/fscrypt/mount/kclient/mount-syntax/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/fscrypt/mount/kclient/mount-syntax/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/fscrypt/mount/kclient/mount-syntax/v1.yaml b/qa/suites/fs/fscrypt/mount/kclient/mount-syntax/v1.yaml
new file mode 120000
index 000000000..3315775c1
--- /dev/null
+++ b/qa/suites/fs/fscrypt/mount/kclient/mount-syntax/v1.yaml
@@ -0,0 +1 @@
+.qa/cephfs/mount/kclient/mount-syntax/v1.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/fscrypt/mount/kclient/mount.yaml b/qa/suites/fs/fscrypt/mount/kclient/mount.yaml
new file mode 120000
index 000000000..9967f23e2
--- /dev/null
+++ b/qa/suites/fs/fscrypt/mount/kclient/mount.yaml
@@ -0,0 +1 @@
+.qa/cephfs/mount/kclient/mount.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/fscrypt/mount/kclient/overrides/% b/qa/suites/fs/fscrypt/mount/kclient/overrides/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/fscrypt/mount/kclient/overrides/%
diff --git a/qa/suites/fs/fscrypt/mount/kclient/overrides/.qa b/qa/suites/fs/fscrypt/mount/kclient/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/fscrypt/mount/kclient/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/fscrypt/mount/kclient/overrides/distro/.qa b/qa/suites/fs/fscrypt/mount/kclient/overrides/distro/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/fscrypt/mount/kclient/overrides/distro/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/fscrypt/mount/kclient/overrides/distro/testing/.qa b/qa/suites/fs/fscrypt/mount/kclient/overrides/distro/testing/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/fscrypt/mount/kclient/overrides/distro/testing/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/fscrypt/mount/kclient/overrides/distro/testing/k-testing.yaml b/qa/suites/fs/fscrypt/mount/kclient/overrides/distro/testing/k-testing.yaml
new file mode 120000
index 000000000..bec80be29
--- /dev/null
+++ b/qa/suites/fs/fscrypt/mount/kclient/overrides/distro/testing/k-testing.yaml
@@ -0,0 +1 @@
+.qa/cephfs/mount/kclient/overrides/distro/testing/k-testing.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/fscrypt/overrides/+ b/qa/suites/fs/fscrypt/overrides/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/fscrypt/overrides/+
diff --git a/qa/suites/fs/fscrypt/overrides/.qa b/qa/suites/fs/fscrypt/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/fscrypt/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/fscrypt/overrides/ignorelist_health.yaml b/qa/suites/fs/fscrypt/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..5cb891a95
--- /dev/null
+++ b/qa/suites/fs/fscrypt/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/fscrypt/overrides/ignorelist_health_more.yaml b/qa/suites/fs/fscrypt/overrides/ignorelist_health_more.yaml
new file mode 100644
index 000000000..948352527
--- /dev/null
+++ b/qa/suites/fs/fscrypt/overrides/ignorelist_health_more.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - Reduced data availability
+      - Degraded data redundancy
diff --git a/qa/suites/fs/fscrypt/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/fscrypt/overrides/ignorelist_wrongly_marked_down.yaml
new file mode 120000
index 000000000..f317cb714
--- /dev/null
+++ b/qa/suites/fs/fscrypt/overrides/ignorelist_wrongly_marked_down.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/fscrypt/overrides/pg-warn.yaml b/qa/suites/fs/fscrypt/overrides/pg-warn.yaml
new file mode 100644
index 000000000..4ae54a40d
--- /dev/null
+++ b/qa/suites/fs/fscrypt/overrides/pg-warn.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        mon pg warn min per osd: 0
diff --git a/qa/suites/fs/fscrypt/tasks/.qa b/qa/suites/fs/fscrypt/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/fscrypt/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/fscrypt/tasks/fscrypt-common.yaml b/qa/suites/fs/fscrypt/tasks/fscrypt-common.yaml
new file mode 100644
index 000000000..5cb34d981
--- /dev/null
+++ b/qa/suites/fs/fscrypt/tasks/fscrypt-common.yaml
@@ -0,0 +1,5 @@
+tasks:
+  - cephfs_test_runner:
+      fail_on_skip: false
+      modules:
+        - tasks.cephfs.test_fscrypt
diff --git a/qa/suites/fs/fscrypt/tasks/fscrypt-dbench.yaml b/qa/suites/fs/fscrypt/tasks/fscrypt-dbench.yaml
new file mode 100644
index 000000000..324bc64c3
--- /dev/null
+++ b/qa/suites/fs/fscrypt/tasks/fscrypt-dbench.yaml
@@ -0,0 +1,7 @@
+tasks:
+- workunit:
+    timeout: 6h
+    clients:
+      client.0:
+        - fs/fscrypt.sh none dbench
+        - fs/fscrypt.sh unlocked dbench
diff --git a/qa/suites/fs/fscrypt/tasks/fscrypt-ffsb.yaml b/qa/suites/fs/fscrypt/tasks/fscrypt-ffsb.yaml
new file mode 100644
index 000000000..0c70fe963
--- /dev/null
+++ b/qa/suites/fs/fscrypt/tasks/fscrypt-ffsb.yaml
@@ -0,0 +1,7 @@
+tasks:
+- workunit:
+    timeout: 6h
+    clients:
+      client.0:
+        - fs/fscrypt.sh none ffsb
+        - fs/fscrypt.sh unlocked ffsb
diff --git a/qa/suites/fs/fscrypt/tasks/fscrypt-iozone.yaml b/qa/suites/fs/fscrypt/tasks/fscrypt-iozone.yaml
new file mode 100644
index 000000000..5e9bb5aa3
--- /dev/null
+++ b/qa/suites/fs/fscrypt/tasks/fscrypt-iozone.yaml
@@ -0,0 +1,7 @@
+tasks:
+- workunit:
+    timeout: 6h
+    clients:
+      client.0:
+        - fs/fscrypt.sh none iozone
+        - fs/fscrypt.sh unlocked iozone
diff --git a/qa/suites/fs/fscrypt/tasks/fscrypt-pjd.yaml b/qa/suites/fs/fscrypt/tasks/fscrypt-pjd.yaml
new file mode 100644
index 000000000..6d8f709bd
--- /dev/null
+++ b/qa/suites/fs/fscrypt/tasks/fscrypt-pjd.yaml
@@ -0,0 +1,7 @@
+tasks:
+- workunit:
+    timeout: 6h
+    clients:
+      client.0:
+        - fs/fscrypt.sh none pjd
+        - fs/fscrypt.sh unlocked pjd
diff --git a/qa/suites/fs/full/% b/qa/suites/fs/full/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/full/%
diff --git a/qa/suites/fs/full/.qa b/qa/suites/fs/full/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/full/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/full/begin b/qa/suites/fs/full/begin
new file mode 120000
index 000000000..77af91f7d
--- /dev/null
+++ b/qa/suites/fs/full/begin
@@ -0,0 +1 @@
+.qa/cephfs/begin/
+\ No newline at end of file
diff --git a/qa/suites/fs/full/clusters/.qa b/qa/suites/fs/full/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/full/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/full/clusters/1-node-1-mds-1-osd.yaml b/qa/suites/fs/full/clusters/1-node-1-mds-1-osd.yaml
new file mode 120000
index 000000000..517b76547
--- /dev/null
+++ b/qa/suites/fs/full/clusters/1-node-1-mds-1-osd.yaml
@@ -0,0 +1 @@
+.qa/cephfs/clusters/1-node-1-mds-1-osd.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/full/conf b/qa/suites/fs/full/conf
new file mode 120000
index 000000000..16e8cc44b
--- /dev/null
+++ b/qa/suites/fs/full/conf
@@ -0,0 +1 @@
+.qa/cephfs/conf
+\ No newline at end of file
diff --git a/qa/suites/fs/full/distro b/qa/suites/fs/full/distro
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/fs/full/distro
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/fs/full/mount/.qa b/qa/suites/fs/full/mount/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/full/mount/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/full/mount/fuse.yaml b/qa/suites/fs/full/mount/fuse.yaml
new file mode 100644
index 000000000..8338cc493
--- /dev/null
+++ b/qa/suites/fs/full/mount/fuse.yaml
@@ -0,0 +1,2 @@
+tasks:
+  - ceph-fuse:
diff --git a/qa/suites/fs/full/objectstore/.qa b/qa/suites/fs/full/objectstore/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/full/objectstore/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/full/objectstore/bluestore-bitmap.yaml b/qa/suites/fs/full/objectstore/bluestore-bitmap.yaml
new file mode 120000
index 000000000..a59cf5175
--- /dev/null
+++ b/qa/suites/fs/full/objectstore/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/full/overrides.yaml b/qa/suites/fs/full/overrides.yaml
new file mode 100644
index 000000000..921528d66
--- /dev/null
+++ b/qa/suites/fs/full/overrides.yaml
@@ -0,0 +1,19 @@
+overrides:
+  ceph:
+    conf:
+      mgr:
+        debug client: 20
+    log-ignorelist:
+      - OSD full dropping all updates
+      - OSD near full
+      - pausewr flag
+      - failsafe engaged, dropping updates
+      - failsafe disengaged, no longer dropping
+      - is full \(reached quota
+      - POOL_FULL
+      - POOL_NEARFULL
+      - POOL_BACKFILLFULL
+      - PG_DEGRADED
+      - OSD_OUT_OF_ORDER_FULL
+      - OSD_NEARFULL
+      - OSD_FULL
diff --git a/qa/suites/fs/full/overrides/+ b/qa/suites/fs/full/overrides/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/full/overrides/+
diff --git a/qa/suites/fs/full/overrides/.qa b/qa/suites/fs/full/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/full/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/full/overrides/ignorelist_health.yaml b/qa/suites/fs/full/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..5cb891a95
--- /dev/null
+++ b/qa/suites/fs/full/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/full/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/full/overrides/ignorelist_wrongly_marked_down.yaml
new file mode 120000
index 000000000..f317cb714
--- /dev/null
+++ b/qa/suites/fs/full/overrides/ignorelist_wrongly_marked_down.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/full/overrides/no_client_pidfile.yaml b/qa/suites/fs/full/overrides/no_client_pidfile.yaml
new file mode 120000
index 000000000..8888f3327
--- /dev/null
+++ b/qa/suites/fs/full/overrides/no_client_pidfile.yaml
@@ -0,0 +1 @@
+.qa/overrides/no_client_pidfile.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/full/tasks/.qa b/qa/suites/fs/full/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/full/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/full/tasks/mgr-osd-full.yaml b/qa/suites/fs/full/tasks/mgr-osd-full.yaml
new file mode 100644
index 000000000..b4f673e39
--- /dev/null
+++ b/qa/suites/fs/full/tasks/mgr-osd-full.yaml
@@ -0,0 +1,31 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        osd_pool_default_size: 1
+        osd_pool_default_min_size: 1
+      client:
+        debug ms: 1
+        debug client: 20
+      mds:
+        debug ms: 1
+        debug mds: 20
+      osd: # force bluestore since it's required for ec overwrites
+        osd objectstore: bluestore
+        bluestore block size: 1073741824
+tasks:
+- workunit:
+    cleanup: true
+    clients:
+      client.0:
+        - fs/full/subvolume_rm.sh
+- workunit:
+    cleanup: true
+    clients:
+      client.0:
+        - fs/full/subvolume_clone.sh
+- workunit:
+    cleanup: true
+    clients:
+      client.0:
+        - fs/full/subvolume_snapshot_rm.sh
diff --git a/qa/suites/fs/functional/% b/qa/suites/fs/functional/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/functional/%
diff --git a/qa/suites/fs/functional/.qa b/qa/suites/fs/functional/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/functional/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/functional/begin b/qa/suites/fs/functional/begin
new file mode 120000
index 000000000..77af91f7d
--- /dev/null
+++ b/qa/suites/fs/functional/begin
@@ -0,0 +1 @@
+.qa/cephfs/begin/
+\ No newline at end of file
diff --git a/qa/suites/fs/functional/clusters/.qa b/qa/suites/fs/functional/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/functional/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/functional/clusters/1a3s-mds-4c-client.yaml b/qa/suites/fs/functional/clusters/1a3s-mds-4c-client.yaml
new file mode 120000
index 000000000..5c722a30b
--- /dev/null
+++ b/qa/suites/fs/functional/clusters/1a3s-mds-4c-client.yaml
@@ -0,0 +1 @@
+.qa/cephfs/clusters/1a3s-mds-4c-client.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/functional/conf b/qa/suites/fs/functional/conf
new file mode 120000
index 000000000..16e8cc44b
--- /dev/null
+++ b/qa/suites/fs/functional/conf
@@ -0,0 +1 @@
+.qa/cephfs/conf
+\ No newline at end of file
diff --git a/qa/suites/fs/functional/distro b/qa/suites/fs/functional/distro
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/fs/functional/distro
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/fs/functional/mount b/qa/suites/fs/functional/mount
new file mode 120000
index 000000000..e3600f453
--- /dev/null
+++ b/qa/suites/fs/functional/mount
@@ -0,0 +1 @@
+.qa/cephfs/mount/
+\ No newline at end of file
diff --git a/qa/suites/fs/functional/objectstore/.qa b/qa/suites/fs/functional/objectstore/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/functional/objectstore/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/functional/objectstore/bluestore-bitmap.yaml b/qa/suites/fs/functional/objectstore/bluestore-bitmap.yaml
new file mode 120000
index 000000000..a59cf5175
--- /dev/null
+++ b/qa/suites/fs/functional/objectstore/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/functional/objectstore/bluestore-ec-root.yaml b/qa/suites/fs/functional/objectstore/bluestore-ec-root.yaml
new file mode 120000
index 000000000..4edebd682
--- /dev/null
+++ b/qa/suites/fs/functional/objectstore/bluestore-ec-root.yaml
@@ -0,0 +1 @@
+.qa/cephfs/objectstore-ec/bluestore-ec-root.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/functional/overrides/+ b/qa/suites/fs/functional/overrides/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/functional/overrides/+
diff --git a/qa/suites/fs/functional/overrides/.qa b/qa/suites/fs/functional/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/functional/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/functional/overrides/ignorelist_health.yaml b/qa/suites/fs/functional/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..5cb891a95
--- /dev/null
+++ b/qa/suites/fs/functional/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/functional/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/functional/overrides/ignorelist_wrongly_marked_down.yaml
new file mode 120000
index 000000000..f317cb714
--- /dev/null
+++ b/qa/suites/fs/functional/overrides/ignorelist_wrongly_marked_down.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/functional/overrides/no_client_pidfile.yaml b/qa/suites/fs/functional/overrides/no_client_pidfile.yaml
new file mode 120000
index 000000000..8888f3327
--- /dev/null
+++ b/qa/suites/fs/functional/overrides/no_client_pidfile.yaml
@@ -0,0 +1 @@
+.qa/overrides/no_client_pidfile.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/functional/tasks/.qa b/qa/suites/fs/functional/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/functional/tasks/acls.yaml b/qa/suites/fs/functional/tasks/acls.yaml
new file mode 100644
index 000000000..c44f6e00d
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/acls.yaml
@@ -0,0 +1,10 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - Reduced data availability
+      - Degraded data redundancy
+tasks:
+  - cephfs_test_runner:
+      fail_on_skip: false
+      modules:
+        - tasks.cephfs.test_acls
diff --git a/qa/suites/fs/functional/tasks/admin.yaml b/qa/suites/fs/functional/tasks/admin.yaml
new file mode 100644
index 000000000..97ecc4cf4
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/admin.yaml
@@ -0,0 +1,12 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        lockdep: true
+    log-ignorelist:
+      - missing required features
+tasks:
+  - cephfs_test_runner:
+      fail_on_skip: false
+      modules:
+        - tasks.cephfs.test_admin
diff --git a/qa/suites/fs/functional/tasks/alternate-pool.yaml b/qa/suites/fs/functional/tasks/alternate-pool.yaml
new file mode 100644
index 000000000..7a1caa473
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/alternate-pool.yaml
@@ -0,0 +1,19 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - bad backtrace
+      - object missing on disk
+      - error reading table object
+      - error reading sessionmap
+      - unmatched fragstat
+      - unmatched rstat
+      - was unreadable, recreating it now
+      - Scrub error on inode
+      - Metadata damage detected
+      - MDS_FAILED
+      - MDS_DAMAGE
+
+tasks:
+  - cephfs_test_runner:
+      modules:
+        - tasks.cephfs.test_recovery_pool
diff --git a/qa/suites/fs/functional/tasks/asok_dump_tree.yaml b/qa/suites/fs/functional/tasks/asok_dump_tree.yaml
new file mode 100644
index 000000000..7fa561470
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/asok_dump_tree.yaml
@@ -0,0 +1,4 @@
+tasks:
+- cephfs_test_runner:
+    modules:
+      - tasks.cephfs.test_dump_tree
diff --git a/qa/suites/fs/functional/tasks/auto-repair.yaml b/qa/suites/fs/functional/tasks/auto-repair.yaml
new file mode 100644
index 000000000..1a3ff3efb
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/auto-repair.yaml
@@ -0,0 +1,11 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - force file system read-only
+      - bad backtrace
+      - MDS in read-only mode
+      - \(MDS_READ_ONLY\)
+tasks:
+  - cephfs_test_runner:
+      modules:
+        - tasks.cephfs.test_auto_repair
diff --git a/qa/suites/fs/functional/tasks/backtrace.yaml b/qa/suites/fs/functional/tasks/backtrace.yaml
new file mode 100644
index 000000000..6d80c32cc
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/backtrace.yaml
@@ -0,0 +1,4 @@
+tasks:
+  - cephfs_test_runner:
+      modules:
+        - tasks.cephfs.test_backtrace
diff --git a/qa/suites/fs/functional/tasks/cap-flush.yaml b/qa/suites/fs/functional/tasks/cap-flush.yaml
new file mode 100644
index 000000000..f063654a9
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/cap-flush.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - Replacing daemon mds.a
+tasks:
+  - cephfs_test_runner:
+      modules:
+        - tasks.cephfs.test_cap_flush
diff --git a/qa/suites/fs/functional/tasks/client-limits.yaml b/qa/suites/fs/functional/tasks/client-limits.yaml
new file mode 100644
index 000000000..b6b3c9858
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/client-limits.yaml
@@ -0,0 +1,18 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - responding to mclientcaps\(revoke\)
+      - not advance its oldest_client_tid
+      - failing to advance its oldest client/flush tid
+      - Too many inodes in cache
+      - failing to respond to cache pressure
+      - slow requests are blocked
+      - failing to respond to capability release
+      - MDS cache is too large
+      - \(MDS_CLIENT_OLDEST_TID\)
+      - \(MDS_CACHE_OVERSIZED\)
+tasks:
+  - cephfs_test_runner:
+      fail_on_skip: false
+      modules:
+        - tasks.cephfs.test_client_limits
diff --git a/qa/suites/fs/functional/tasks/client-readahead.yaml b/qa/suites/fs/functional/tasks/client-readahead.yaml
new file mode 100644
index 000000000..7c5c850e6
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/client-readahead.yaml
@@ -0,0 +1,5 @@
+tasks:
+  - cephfs_test_runner:
+      fail_on_skip: false
+      modules:
+        - tasks.cephfs.test_readahead
diff --git a/qa/suites/fs/functional/tasks/client-recovery.yaml b/qa/suites/fs/functional/tasks/client-recovery.yaml
new file mode 100644
index 000000000..e67acc3ab
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/client-recovery.yaml
@@ -0,0 +1,16 @@
+# The task interferes with the network, so we need
+# to permit OSDs to complain about that.
+overrides:
+  ceph:
+    log-ignorelist:
+      - evicting unresponsive client
+      - but it is still running
+      - slow request
+      - MDS_CLIENT_LATE_RELEASE
+      - t responding to mclientcaps
+      - file system flag refuse_client_session is set
+tasks:
+  - cephfs_test_runner:
+      fail_on_skip: false
+      modules:
+        - tasks.cephfs.test_client_recovery
diff --git a/qa/suites/fs/functional/tasks/damage.yaml b/qa/suites/fs/functional/tasks/damage.yaml
new file mode 100644
index 000000000..7703aee93
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/damage.yaml
@@ -0,0 +1,27 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - bad backtrace
+      - object missing on disk
+      - error reading table object
+      - error reading sessionmap
+      - Error loading MDS rank
+      - missing journal object
+      - Error recovering journal
+      - error decoding table object
+      - failed to read JournalPointer
+      - Corrupt directory entry
+      - Corrupt fnode header
+      - corrupt sessionmap header
+      - Corrupt dentry
+      - Scrub error on inode
+      - Metadata damage detected
+      - MDS_READ_ONLY
+      - force file system read-only
+      - with standby daemon mds
+      - MDS abort because newly corrupt dentry
+tasks:
+  - cephfs_test_runner:
+      modules:
+        - tasks.cephfs.test_damage
+
diff --git a/qa/suites/fs/functional/tasks/data-scan.yaml b/qa/suites/fs/functional/tasks/data-scan.yaml
new file mode 100644
index 000000000..b704a0036
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/data-scan.yaml
@@ -0,0 +1,18 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - bad backtrace
+      - object missing on disk
+      - error reading table object
+      - error reading sessionmap
+      - unmatched fragstat
+      - unmatched rstat
+      - was unreadable, recreating it now
+      - Scrub error on inode
+      - Metadata damage detected
+      - inconsistent rstat on inode
+      - Error recovering journal
+tasks:
+  - cephfs_test_runner:
+      modules:
+        - tasks.cephfs.test_data_scan
diff --git a/qa/suites/fs/functional/tasks/exports.yaml b/qa/suites/fs/functional/tasks/exports.yaml
new file mode 100644
index 000000000..76819fee9
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/exports.yaml
@@ -0,0 +1,9 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - Replacing daemon mds
+tasks:
+- cephfs_test_runner:
+    fail_on_skip: false
+    modules:
+      - tasks.cephfs.test_exports
diff --git a/qa/suites/fs/functional/tasks/forward-scrub.yaml b/qa/suites/fs/functional/tasks/forward-scrub.yaml
new file mode 100644
index 000000000..961d25db0
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/forward-scrub.yaml
@@ -0,0 +1,13 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - inode wrongly marked free
+      - bad backtrace on inode
+      - inode table repaired for inode
+      - Scrub error on inode
+      - Scrub error on dir
+      - Metadata damage detected
+tasks:
+  - cephfs_test_runner:
+      modules:
+        - tasks.cephfs.test_forward_scrub
diff --git a/qa/suites/fs/functional/tasks/fragment.yaml b/qa/suites/fs/functional/tasks/fragment.yaml
new file mode 100644
index 000000000..482caad85
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/fragment.yaml
@@ -0,0 +1,5 @@
+
+tasks:
+  - cephfs_test_runner:
+      modules:
+        - tasks.cephfs.test_fragment
diff --git a/qa/suites/fs/functional/tasks/journal-repair.yaml b/qa/suites/fs/functional/tasks/journal-repair.yaml
new file mode 100644
index 000000000..0dae6143c
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/journal-repair.yaml
@@ -0,0 +1,13 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - bad backtrace on directory inode
+      - error reading table object
+      - Metadata damage detected
+      - slow requests are blocked
+      - Behind on trimming
+      - error reading sessionmap
+tasks:
+  - cephfs_test_runner:
+      modules:
+        - tasks.cephfs.test_journal_repair
diff --git a/qa/suites/fs/functional/tasks/mds-flush.yaml b/qa/suites/fs/functional/tasks/mds-flush.yaml
new file mode 100644
index 000000000..be8c4bee6
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/mds-flush.yaml
@@ -0,0 +1,4 @@
+tasks:
+  - cephfs_test_runner:
+      modules:
+        - tasks.cephfs.test_flush
diff --git a/qa/suites/fs/functional/tasks/mds-full.yaml b/qa/suites/fs/functional/tasks/mds-full.yaml
new file mode 100644
index 000000000..9399890c4
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/mds-full.yaml
@@ -0,0 +1,38 @@
+overrides:
+  ceph:
+    cephfs:
+      ec_profile:
+        - disabled
+    log-ignorelist:
+      - OSD full dropping all updates
+      - OSD near full
+      - pausewr flag
+      - failsafe engaged, dropping updates
+      - failsafe disengaged, no longer dropping
+      - is full \(reached quota
+      - POOL_FULL
+      - POOL_BACKFILLFULL
+      - PG_RECOVERY_FULL
+      - PG_DEGRADED
+    conf:
+      mon:
+        mon osd nearfull ratio: 0.6
+        mon osd backfillfull ratio: 0.6
+        mon osd full ratio: 0.7
+      osd:
+        osd mon report interval: 5
+        osd objectstore: memstore
+        osd failsafe full ratio: 1.0
+        memstore device bytes: 200000000
+      client.0:
+        debug client: 20
+        debug objecter: 20
+        debug objectcacher: 20
+      client.1:
+        debug client: 20
+        debug objecter: 20
+        debug objectcacher: 20
+tasks:
+  - cephfs_test_runner:
+      modules:
+        - tasks.cephfs.test_full
diff --git a/qa/suites/fs/functional/tasks/mds_creation_retry.yaml b/qa/suites/fs/functional/tasks/mds_creation_retry.yaml
new file mode 100644
index 000000000..fd23aa8ba
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/mds_creation_retry.yaml
@@ -0,0 +1,6 @@
+tasks:
+-mds_creation_failure:
+- workunit:
+    clients:
+      all: [fs/misc/trivial_sync.sh]
+
diff --git a/qa/suites/fs/functional/tasks/metrics.yaml b/qa/suites/fs/functional/tasks/metrics.yaml
new file mode 100644
index 000000000..7e5ac4150
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/metrics.yaml
@@ -0,0 +1,5 @@
+tasks:
+- cephfs_test_runner:
+    fail_on_skip: false
+    modules:
+      - tasks.cephfs.test_mds_metrics
diff --git a/qa/suites/fs/functional/tasks/multimds_misc.yaml b/qa/suites/fs/functional/tasks/multimds_misc.yaml
new file mode 100644
index 000000000..8cdf90310
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/multimds_misc.yaml
@@ -0,0 +1,9 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - Scrub error on inode
+
+tasks:
+- cephfs_test_runner:
+    modules:
+      - tasks.cephfs.test_multimds_misc
diff --git a/qa/suites/fs/functional/tasks/openfiletable.yaml b/qa/suites/fs/functional/tasks/openfiletable.yaml
new file mode 100644
index 000000000..20cfa4f37
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/openfiletable.yaml
@@ -0,0 +1,4 @@
+tasks:
+  - cephfs_test_runner:
+      modules:
+        - tasks.cephfs.test_openfiletable
diff --git a/qa/suites/fs/functional/tasks/pool-perm.yaml b/qa/suites/fs/functional/tasks/pool-perm.yaml
new file mode 100644
index 000000000..41392fe51
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/pool-perm.yaml
@@ -0,0 +1,4 @@
+tasks:
+  - cephfs_test_runner:
+      modules:
+        - tasks.cephfs.test_pool_perm
diff --git a/qa/suites/fs/functional/tasks/quota.yaml b/qa/suites/fs/functional/tasks/quota.yaml
new file mode 100644
index 000000000..d44021e48
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/quota.yaml
@@ -0,0 +1,4 @@
+tasks:
+  - cephfs_test_runner:
+      modules:
+        - tasks.cephfs.test_quota
diff --git a/qa/suites/fs/functional/tasks/recovery-fs.yaml b/qa/suites/fs/functional/tasks/recovery-fs.yaml
new file mode 100644
index 000000000..d354e9fbe
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/recovery-fs.yaml
@@ -0,0 +1,4 @@
+tasks:
+- cephfs_test_runner:
+    modules:
+      - tasks.cephfs.test_recovery_fs
diff --git a/qa/suites/fs/functional/tasks/scrub.yaml b/qa/suites/fs/functional/tasks/scrub.yaml
new file mode 100644
index 000000000..09e666849
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/scrub.yaml
@@ -0,0 +1,19 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - Replacing daemon mds
+      - Scrub error on inode
+      - Behind on trimming
+      - Metadata damage detected
+      - bad backtrace on inode
+      - overall HEALTH_
+      - \(MDS_TRIM\)
+    conf:
+      mds:
+        mds log max segments: 1
+        mds cache max size: 1000
+tasks:
+- cephfs_test_runner:
+    modules:
+      - tasks.cephfs.test_scrub_checks
+      - tasks.cephfs.test_scrub
diff --git a/qa/suites/fs/functional/tasks/sessionmap.yaml b/qa/suites/fs/functional/tasks/sessionmap.yaml
new file mode 100644
index 000000000..c12632e77
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/sessionmap.yaml
@@ -0,0 +1,9 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - client session with non-allowable root
+tasks:
+  - cephfs_test_runner:
+      fail_on_skip: false
+      modules:
+        - tasks.cephfs.test_sessionmap
diff --git a/qa/suites/fs/functional/tasks/snap-schedule.yaml b/qa/suites/fs/functional/tasks/snap-schedule.yaml
new file mode 100644
index 000000000..f2e62b050
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/snap-schedule.yaml
@@ -0,0 +1,22 @@
+overrides:
+  ceph:
+    conf:
+      mgr:
+        debug mgr: 20
+        debug ms: 1
+        debug finisher: 20
+        debug client: 20
+    log-whitelist:
+      - OSD full dropping all updates
+      - OSD near full
+      - pausewr flag
+      - failsafe engaged, dropping updates
+      - failsafe disengaged, no longer dropping
+      - is full \(reached quota
+      - POOL_FULL
+      - POOL_BACKFILLFULL
+
+tasks:
+  - cephfs_test_runner:
+      modules:
+        - tasks.cephfs.test_snap_schedules
diff --git a/qa/suites/fs/functional/tasks/snap_schedule_snapdir.yaml b/qa/suites/fs/functional/tasks/snap_schedule_snapdir.yaml
new file mode 100644
index 000000000..7bbcf000f
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/snap_schedule_snapdir.yaml
@@ -0,0 +1,30 @@
+overrides:
+  ceph:
+    conf:
+      mgr:
+        debug mgr: 20
+        debug ms: 1
+        debug finisher: 20
+        debug client: 20
+    log-whitelist:
+      - OSD full dropping all updates
+      - OSD near full
+      - pausewr flag
+      - failsafe engaged, dropping updates
+      - failsafe disengaged, no longer dropping
+      - is full \(reached quota
+      - POOL_FULL
+      - POOL_BACKFILLFULL
+
+overrides:
+  kclient:
+    snapdirname: .customsnapkernel
+  ceph:
+    conf:
+      client:
+        client snapdir: .customsnapfuse
+
+tasks:
+  - cephfs_test_runner:
+      modules:
+        - tasks.cephfs.test_snap_schedules.TestSnapSchedulesSnapdir
diff --git a/qa/suites/fs/functional/tasks/snapshots.yaml b/qa/suites/fs/functional/tasks/snapshots.yaml
new file mode 100644
index 000000000..d5951468b
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/snapshots.yaml
@@ -0,0 +1,16 @@
+overrides:
+  check-counter:
+    dry_run: true
+  ceph:
+    log-ignorelist:
+      - evicting unresponsive client
+      - RECENT_CRASH
+
+tasks:
+- exec:
+    mon.a:
+      - ceph config set mgr mgr/crash/warn_recent_interval 0
+- cephfs_test_runner:
+    fail_on_skip: false
+    modules:
+      - tasks.cephfs.test_snapshots
diff --git a/qa/suites/fs/functional/tasks/strays.yaml b/qa/suites/fs/functional/tasks/strays.yaml
new file mode 100644
index 000000000..3866ce510
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/strays.yaml
@@ -0,0 +1,4 @@
+tasks:
+  - cephfs_test_runner:
+      modules:
+        - tasks.cephfs.test_strays
diff --git a/qa/suites/fs/functional/tasks/test_journal_migration.yaml b/qa/suites/fs/functional/tasks/test_journal_migration.yaml
new file mode 100644
index 000000000..f5c4dbc70
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/test_journal_migration.yaml
@@ -0,0 +1,4 @@
+tasks:
+- cephfs_test_runner:
+    modules:
+      - tasks.cephfs.test_journal_migration
diff --git a/qa/suites/fs/functional/tasks/truncate_delay.yaml b/qa/suites/fs/functional/tasks/truncate_delay.yaml
new file mode 100644
index 000000000..acd1a5a04
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/truncate_delay.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        ms_inject_delay_probability: 1
+        ms_inject_delay_type: osd
+        ms_inject_delay_max: 5
+        client_oc_max_dirty_age: 1
+tasks:
+- exec:
+    client.0:
+      - cd $TESTDIR/mnt.0 && dd if=/dev/zero of=./foo count=100
+      - sleep 2
+      - cd $TESTDIR/mnt.0 && truncate --size 0 ./foo
diff --git a/qa/suites/fs/functional/tasks/workunit/.qa b/qa/suites/fs/functional/tasks/workunit/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/workunit/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/functional/tasks/workunit/dir-max-entries.yaml b/qa/suites/fs/functional/tasks/workunit/dir-max-entries.yaml
new file mode 100644
index 000000000..087dcc3d5
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/workunit/dir-max-entries.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - fs/maxentries
diff --git a/qa/suites/fs/functional/tasks/workunit/quota.yaml b/qa/suites/fs/functional/tasks/workunit/quota.yaml
new file mode 100644
index 000000000..7ac8714c5
--- /dev/null
+++ b/qa/suites/fs/functional/tasks/workunit/quota.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      all:
+        - fs/quota
diff --git a/qa/suites/fs/libcephfs/% b/qa/suites/fs/libcephfs/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/libcephfs/%
diff --git a/qa/suites/fs/libcephfs/.qa b/qa/suites/fs/libcephfs/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/libcephfs/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/libcephfs/begin b/qa/suites/fs/libcephfs/begin
new file mode 120000
index 000000000..77af91f7d
--- /dev/null
+++ b/qa/suites/fs/libcephfs/begin
@@ -0,0 +1 @@
+.qa/cephfs/begin/
+\ No newline at end of file
diff --git a/qa/suites/fs/libcephfs/clusters/.qa b/qa/suites/fs/libcephfs/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/libcephfs/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/libcephfs/clusters/1-mds-1-client-coloc.yaml b/qa/suites/fs/libcephfs/clusters/1-mds-1-client-coloc.yaml
new file mode 120000
index 000000000..d15ecfda0
--- /dev/null
+++ b/qa/suites/fs/libcephfs/clusters/1-mds-1-client-coloc.yaml
@@ -0,0 +1 @@
+.qa/cephfs/clusters/1-mds-1-client-coloc.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/libcephfs/conf b/qa/suites/fs/libcephfs/conf
new file mode 120000
index 000000000..16e8cc44b
--- /dev/null
+++ b/qa/suites/fs/libcephfs/conf
@@ -0,0 +1 @@
+.qa/cephfs/conf
+\ No newline at end of file
diff --git a/qa/suites/fs/libcephfs/distro b/qa/suites/fs/libcephfs/distro
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/fs/libcephfs/distro
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/fs/libcephfs/objectstore/.qa b/qa/suites/fs/libcephfs/objectstore/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/libcephfs/objectstore/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/libcephfs/objectstore/bluestore-bitmap.yaml b/qa/suites/fs/libcephfs/objectstore/bluestore-bitmap.yaml
new file mode 120000
index 000000000..a59cf5175
--- /dev/null
+++ b/qa/suites/fs/libcephfs/objectstore/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/libcephfs/overrides/+ b/qa/suites/fs/libcephfs/overrides/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/libcephfs/overrides/+
diff --git a/qa/suites/fs/libcephfs/overrides/.qa b/qa/suites/fs/libcephfs/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/libcephfs/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/libcephfs/overrides/ignorelist_health.yaml b/qa/suites/fs/libcephfs/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..5cb891a95
--- /dev/null
+++ b/qa/suites/fs/libcephfs/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/libcephfs/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/libcephfs/overrides/ignorelist_wrongly_marked_down.yaml
new file mode 120000
index 000000000..f317cb714
--- /dev/null
+++ b/qa/suites/fs/libcephfs/overrides/ignorelist_wrongly_marked_down.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/libcephfs/overrides/no_client_pidfile.yaml b/qa/suites/fs/libcephfs/overrides/no_client_pidfile.yaml
new file mode 120000
index 000000000..8888f3327
--- /dev/null
+++ b/qa/suites/fs/libcephfs/overrides/no_client_pidfile.yaml
@@ -0,0 +1 @@
+.qa/overrides/no_client_pidfile.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/libcephfs/tasks/.qa b/qa/suites/fs/libcephfs/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/libcephfs/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/libcephfs/tasks/client.yaml b/qa/suites/fs/libcephfs/tasks/client.yaml
new file mode 100644
index 000000000..bfdfee4a8
--- /dev/null
+++ b/qa/suites/fs/libcephfs/tasks/client.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        debug ms: 1
+        debug client: 20
+      mds:
+        debug ms: 1
+        debug mds: 20
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - client/test.sh
diff --git a/qa/suites/fs/libcephfs/tasks/ino_release_cb.yaml b/qa/suites/fs/libcephfs/tasks/ino_release_cb.yaml
new file mode 100644
index 000000000..5b5247489
--- /dev/null
+++ b/qa/suites/fs/libcephfs/tasks/ino_release_cb.yaml
@@ -0,0 +1,19 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        debug ms: 1
+        debug client: 20
+      mds:
+        debug ms: 1
+        debug mds: 20
+tasks:
+- exec:
+    mon.a:
+    - ceph config set mds mds_min_caps_per_client 1
+- background_exec:
+    mon.a:
+    - "sleep 30 && ceph config set mds mds_max_caps_per_client 1"
+- exec:
+    client.0:
+    - ceph_test_ino_release_cb
diff --git a/qa/suites/fs/libcephfs/tasks/libcephfs/+ b/qa/suites/fs/libcephfs/tasks/libcephfs/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/libcephfs/tasks/libcephfs/+
diff --git a/qa/suites/fs/libcephfs/tasks/libcephfs/.qa b/qa/suites/fs/libcephfs/tasks/libcephfs/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/libcephfs/tasks/libcephfs/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/libcephfs/tasks/libcephfs/frag.yaml b/qa/suites/fs/libcephfs/tasks/libcephfs/frag.yaml
new file mode 120000
index 000000000..5e5cdaed8
--- /dev/null
+++ b/qa/suites/fs/libcephfs/tasks/libcephfs/frag.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/frag.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/libcephfs/tasks/libcephfs/test.yaml b/qa/suites/fs/libcephfs/tasks/libcephfs/test.yaml
new file mode 100644
index 000000000..70afa2da3
--- /dev/null
+++ b/qa/suites/fs/libcephfs/tasks/libcephfs/test.yaml
@@ -0,0 +1,18 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        debug ms: 1
+        debug client: 20
+      mds:
+        debug ms: 1
+        debug mds: 20
+tasks:
+- check-counter:
+    counters:
+      mds:
+        - "mds.dir_split"
+- workunit:
+    clients:
+      client.0:
+        - libcephfs/test.sh
diff --git a/qa/suites/fs/libcephfs/tasks/libcephfs_python.yaml b/qa/suites/fs/libcephfs/tasks/libcephfs_python.yaml
new file mode 100644
index 000000000..68ccd579c
--- /dev/null
+++ b/qa/suites/fs/libcephfs/tasks/libcephfs_python.yaml
@@ -0,0 +1,17 @@
+overrides:
+  ceph-fuse:
+    disabled: true
+  kclient:
+    disabled: true
+  install:
+    ceph:
+      extra_system_packages:
+        deb:
+        - python3-pytest
+        rpm:
+        - python3-pytest
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - fs/test_python.sh
diff --git a/qa/suites/fs/mirror-ha/% b/qa/suites/fs/mirror-ha/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/mirror-ha/%
diff --git a/qa/suites/fs/mirror-ha/.qa b/qa/suites/fs/mirror-ha/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/mirror-ha/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/mirror-ha/begin b/qa/suites/fs/mirror-ha/begin
new file mode 120000
index 000000000..77af91f7d
--- /dev/null
+++ b/qa/suites/fs/mirror-ha/begin
@@ -0,0 +1 @@
+.qa/cephfs/begin/
+\ No newline at end of file
diff --git a/qa/suites/fs/mirror-ha/cephfs-mirror/+ b/qa/suites/fs/mirror-ha/cephfs-mirror/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/mirror-ha/cephfs-mirror/+
diff --git a/qa/suites/fs/mirror-ha/cephfs-mirror/.qa b/qa/suites/fs/mirror-ha/cephfs-mirror/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/mirror-ha/cephfs-mirror/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/mirror-ha/cephfs-mirror/1-volume-create-rm.yaml b/qa/suites/fs/mirror-ha/cephfs-mirror/1-volume-create-rm.yaml
new file mode 100644
index 000000000..4ee16e1c9
--- /dev/null
+++ b/qa/suites/fs/mirror-ha/cephfs-mirror/1-volume-create-rm.yaml
@@ -0,0 +1,14 @@
+meta:
+- desc: create/rm volumes and set configs
+
+tasks:
+  - exec:
+      mon.a:
+        - "ceph fs volume create dc"
+        - "ceph fs volume create dc-backup"
+  - full_sequential_finally:
+    - exec:
+        mon.a:
+          - ceph config set mon mon_allow_pool_delete true
+          - ceph fs volume rm dc --yes-i-really-mean-it
+          - ceph fs volume rm dc-backup --yes-i-really-mean-it
diff --git a/qa/suites/fs/mirror-ha/cephfs-mirror/2-three-per-cluster.yaml b/qa/suites/fs/mirror-ha/cephfs-mirror/2-three-per-cluster.yaml
new file mode 100644
index 000000000..095f0893a
--- /dev/null
+++ b/qa/suites/fs/mirror-ha/cephfs-mirror/2-three-per-cluster.yaml
@@ -0,0 +1,12 @@
+meta:
+- desc: run one cephfs-mirror daemon on primary cluster
+tasks:
+- cephfs-mirror:
+    client: client.mirror1
+    run_in_foreground: True
+- cephfs-mirror:
+    client: client.mirror2
+    run_in_foreground: True
+- cephfs-mirror:
+    client: client.mirror3
+    run_in_foreground: True
diff --git a/qa/suites/fs/mirror-ha/clients/+ b/qa/suites/fs/mirror-ha/clients/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/mirror-ha/clients/+
diff --git a/qa/suites/fs/mirror-ha/clients/.qa b/qa/suites/fs/mirror-ha/clients/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/mirror-ha/clients/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/mirror-ha/clients/mirror.yaml b/qa/suites/fs/mirror-ha/clients/mirror.yaml
new file mode 100644
index 000000000..620c821e1
--- /dev/null
+++ b/qa/suites/fs/mirror-ha/clients/mirror.yaml
@@ -0,0 +1,32 @@
+meta:
+- desc: configure the permissions for client.mirror
+overrides:
+  ceph:
+    conf:
+      client:
+        debug cephfs_mirror: 20
+        log to stderr: false
+      # make these predictable
+      client.mirror1:
+        admin socket: /var/run/ceph/cephfs-mirror1.asok
+        pid file: /var/run/ceph/cephfs-mirror1.pid
+      client.mirror2:
+        admin socket: /var/run/ceph/cephfs-mirror2.asok
+        pid file: /var/run/ceph/cephfs-mirror2.pid
+      client.mirror3:
+        admin socket: /var/run/ceph/cephfs-mirror3.asok
+        pid file: /var/run/ceph/cephfs-mirror3.pid
+tasks:
+- exec:
+    client.mirror1:
+      - "sudo ceph auth caps client.mirror1 mon 'profile cephfs-mirror' mds 'allow r' osd 'allow rw tag cephfs metadata=*, allow r tag cephfs data=*' mgr 'allow r'"
+    client.mirror2:
+      - "sudo ceph auth caps client.mirror2 mon 'profile cephfs-mirror' mds 'allow r' osd 'allow rw tag cephfs metadata=*, allow r tag cephfs data=*' mgr 'allow r'"
+    client.mirror3:
+      - "sudo ceph auth caps client.mirror3 mon 'profile cephfs-mirror' mds 'allow r' osd 'allow rw tag cephfs metadata=*, allow r tag cephfs data=*' mgr 'allow r'"
+    client.mirror_remote:
+      - "sudo ceph auth caps client.mirror_remote mon 'allow r' mds 'allow rwps' osd 'allow rw tag cephfs *=*' mgr 'allow r'"
+    client.1:
+      - "sudo ceph auth caps client.0 mon 'allow r' mds 'allow rwps' osd 'allow rw tag cephfs *=*' mgr 'allow r'"
+    client.2:
+      - "sudo ceph auth caps client.1 mon 'allow r' mds 'allow rwps' osd 'allow rw tag cephfs *=*' mgr 'allow r'"
diff --git a/qa/suites/fs/mirror-ha/cluster/+ b/qa/suites/fs/mirror-ha/cluster/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/mirror-ha/cluster/+
diff --git a/qa/suites/fs/mirror-ha/cluster/.qa b/qa/suites/fs/mirror-ha/cluster/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/mirror-ha/cluster/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/mirror-ha/cluster/1-node.yaml b/qa/suites/fs/mirror-ha/cluster/1-node.yaml
new file mode 100644
index 000000000..cc70c106d
--- /dev/null
+++ b/qa/suites/fs/mirror-ha/cluster/1-node.yaml
@@ -0,0 +1,20 @@
+meta:
+- desc: 1 ceph cluster with 1 mon, 1 mgr, 3 osds, 5 mdss
+roles:
+- - mon.a
+  - mgr.x
+  - mds.a
+  - mds.b
+  - mds.c
+  - mds.d
+  - mds.e
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+  - client.1
+  - client.2
+  - client.mirror1
+  - client.mirror2
+  - client.mirror3
+  - client.mirror_remote
+\ No newline at end of file
diff --git a/qa/suites/fs/mirror-ha/objectstore/.qa b/qa/suites/fs/mirror-ha/objectstore/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/mirror-ha/objectstore/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/mirror-ha/objectstore/bluestore-bitmap.yaml b/qa/suites/fs/mirror-ha/objectstore/bluestore-bitmap.yaml
new file mode 120000
index 000000000..a59cf5175
--- /dev/null
+++ b/qa/suites/fs/mirror-ha/objectstore/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/mirror-ha/overrides/+ b/qa/suites/fs/mirror-ha/overrides/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/mirror-ha/overrides/+
diff --git a/qa/suites/fs/mirror-ha/overrides/.qa b/qa/suites/fs/mirror-ha/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/mirror-ha/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/mirror-ha/overrides/whitelist_health.yaml b/qa/suites/fs/mirror-ha/overrides/whitelist_health.yaml
new file mode 100644
index 000000000..d40fa4cb8
--- /dev/null
+++ b/qa/suites/fs/mirror-ha/overrides/whitelist_health.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(FS_DEGRADED\)
+      - \(MDS_FAILED\)
+      - \(MDS_DEGRADED\)
+      - \(FS_WITH_FAILED_MDS\)
+      - \(MDS_DAMAGE\)
+      - \(MDS_ALL_DOWN\)
+      - \(MDS_UP_LESS_THAN_MAX\)
+      - \(FS_INLINE_DATA_DEPRECATED\)
+      - Reduced data availability
+      - Degraded data redundancy
diff --git a/qa/suites/fs/mirror-ha/supported-random-distro$ b/qa/suites/fs/mirror-ha/supported-random-distro$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/fs/mirror-ha/supported-random-distro$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/fs/mirror-ha/workloads/.qa b/qa/suites/fs/mirror-ha/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/mirror-ha/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/mirror-ha/workloads/cephfs-mirror-ha-workunit.yaml b/qa/suites/fs/mirror-ha/workloads/cephfs-mirror-ha-workunit.yaml
new file mode 100644
index 000000000..ce4dddf78
--- /dev/null
+++ b/qa/suites/fs/mirror-ha/workloads/cephfs-mirror-ha-workunit.yaml
@@ -0,0 +1,33 @@
+meta:
+- desc: run the cephfs_mirror_ha.sh workunit to test cephfs-mirror daemon in HA active/active mode
+
+overrides:
+  ceph:
+    conf:
+      mgr:
+        debug client: 10
+
+tasks:
+  - ceph-fuse:
+      client.1:
+        cephfs_name: dc
+      client.2:
+        cephfs_name: dc-backup
+  - cephfs_mirror_thrash:
+      randomize: False
+      max_thrash_delay: 10
+  - workunit:
+      subdir: mirror
+      cleanup: False
+      clients:
+        client.1: [fs/cephfs_mirror_ha_gen.sh]
+      timeout: 1h
+  - exec:
+      client.2:
+        - "echo verifying synchronized snapshots..."
+  - workunit:
+      subdir: mirror
+      cleanup: False
+      clients:
+        client.2: [fs/cephfs_mirror_ha_verify.sh]
+      timeout: 3h
+\ No newline at end of file
diff --git a/qa/suites/fs/mirror/% b/qa/suites/fs/mirror/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/mirror/%
diff --git a/qa/suites/fs/mirror/.qa b/qa/suites/fs/mirror/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/mirror/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/mirror/begin b/qa/suites/fs/mirror/begin
new file mode 120000
index 000000000..77af91f7d
--- /dev/null
+++ b/qa/suites/fs/mirror/begin
@@ -0,0 +1 @@
+.qa/cephfs/begin/
+\ No newline at end of file
diff --git a/qa/suites/fs/mirror/cephfs-mirror/.qa b/qa/suites/fs/mirror/cephfs-mirror/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/mirror/cephfs-mirror/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/mirror/cephfs-mirror/one-per-cluster.yaml b/qa/suites/fs/mirror/cephfs-mirror/one-per-cluster.yaml
new file mode 100644
index 000000000..c355a9db5
--- /dev/null
+++ b/qa/suites/fs/mirror/cephfs-mirror/one-per-cluster.yaml
@@ -0,0 +1,5 @@
+meta:
+- desc: run one cephfs-mirror daemon on primary cluster
+tasks:
+- cephfs-mirror:
+    client: client.mirror
diff --git a/qa/suites/fs/mirror/clients/+ b/qa/suites/fs/mirror/clients/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/mirror/clients/+
diff --git a/qa/suites/fs/mirror/clients/.qa b/qa/suites/fs/mirror/clients/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/mirror/clients/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/mirror/clients/mirror.yaml b/qa/suites/fs/mirror/clients/mirror.yaml
new file mode 100644
index 000000000..1a68fea8b
--- /dev/null
+++ b/qa/suites/fs/mirror/clients/mirror.yaml
@@ -0,0 +1,18 @@
+meta:
+- desc: configure the permissions for client.mirror
+overrides:
+  ceph:
+    conf:
+      client:
+        debug cephfs_mirror: 20
+        log to stderr: false
+      # make these predictable
+      client.mirror:
+        admin socket: /var/run/ceph/cephfs-mirror.asok
+        pid file: /var/run/ceph/cephfs-mirror.pid
+tasks:
+- exec:
+    client.mirror:
+      - "sudo ceph auth caps client.mirror mon 'profile cephfs-mirror' mds 'allow r' osd 'allow rw tag cephfs metadata=*, allow r tag cephfs data=*' mgr 'allow r'"
+    client.mirror_remote:
+      - "sudo ceph auth caps client.mirror_remote mon 'allow r' mds 'allow rwps' osd 'allow rw tag cephfs *=*' mgr 'allow r'"
diff --git a/qa/suites/fs/mirror/cluster/+ b/qa/suites/fs/mirror/cluster/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/mirror/cluster/+
diff --git a/qa/suites/fs/mirror/cluster/.qa b/qa/suites/fs/mirror/cluster/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/mirror/cluster/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/mirror/cluster/1-node.yaml b/qa/suites/fs/mirror/cluster/1-node.yaml
new file mode 100644
index 000000000..a9748e071
--- /dev/null
+++ b/qa/suites/fs/mirror/cluster/1-node.yaml
@@ -0,0 +1,17 @@
+meta:
+- desc: 1 ceph cluster with 1 mon, 1 mgr, 3 osds, 5 mdss
+roles:
+- - mon.a
+  - mgr.x
+  - mds.a
+  - mds.b
+  - mds.c
+  - mds.d
+  - mds.e
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+  - client.1
+  - client.mirror
+  - client.mirror_remote
+\ No newline at end of file
diff --git a/qa/suites/fs/mirror/mount/.qa b/qa/suites/fs/mirror/mount/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/mirror/mount/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/mirror/mount/fuse.yaml b/qa/suites/fs/mirror/mount/fuse.yaml
new file mode 100644
index 000000000..1fdf55ab4
--- /dev/null
+++ b/qa/suites/fs/mirror/mount/fuse.yaml
@@ -0,0 +1,2 @@
+tasks:
+  - ceph-fuse: [client.0, client.1]
diff --git a/qa/suites/fs/mirror/objectstore/.qa b/qa/suites/fs/mirror/objectstore/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/mirror/objectstore/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/mirror/objectstore/bluestore-bitmap.yaml b/qa/suites/fs/mirror/objectstore/bluestore-bitmap.yaml
new file mode 120000
index 000000000..a59cf5175
--- /dev/null
+++ b/qa/suites/fs/mirror/objectstore/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/mirror/overrides/+ b/qa/suites/fs/mirror/overrides/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/mirror/overrides/+
diff --git a/qa/suites/fs/mirror/overrides/.qa b/qa/suites/fs/mirror/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/mirror/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/mirror/overrides/whitelist_health.yaml b/qa/suites/fs/mirror/overrides/whitelist_health.yaml
new file mode 100644
index 000000000..d40fa4cb8
--- /dev/null
+++ b/qa/suites/fs/mirror/overrides/whitelist_health.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(FS_DEGRADED\)
+      - \(MDS_FAILED\)
+      - \(MDS_DEGRADED\)
+      - \(FS_WITH_FAILED_MDS\)
+      - \(MDS_DAMAGE\)
+      - \(MDS_ALL_DOWN\)
+      - \(MDS_UP_LESS_THAN_MAX\)
+      - \(FS_INLINE_DATA_DEPRECATED\)
+      - Reduced data availability
+      - Degraded data redundancy
diff --git a/qa/suites/fs/mirror/supported-random-distros$ b/qa/suites/fs/mirror/supported-random-distros$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/fs/mirror/supported-random-distros$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/fs/mirror/tasks/.qa b/qa/suites/fs/mirror/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/mirror/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/mirror/tasks/mirror.yaml b/qa/suites/fs/mirror/tasks/mirror.yaml
new file mode 100644
index 000000000..07c1e24ef
--- /dev/null
+++ b/qa/suites/fs/mirror/tasks/mirror.yaml
@@ -0,0 +1,10 @@
+overrides:
+  ceph:
+    conf:
+      mgr:
+        debug client: 10
+
+tasks:
+  - cephfs_test_runner:
+      modules:
+        - tasks.cephfs.test_mirroring.TestMirroring
diff --git a/qa/suites/fs/mixed-clients/% b/qa/suites/fs/mixed-clients/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/mixed-clients/%
diff --git a/qa/suites/fs/mixed-clients/.qa b/qa/suites/fs/mixed-clients/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/mixed-clients/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/mixed-clients/begin b/qa/suites/fs/mixed-clients/begin
new file mode 120000
index 000000000..77af91f7d
--- /dev/null
+++ b/qa/suites/fs/mixed-clients/begin
@@ -0,0 +1 @@
+.qa/cephfs/begin/
+\ No newline at end of file
diff --git a/qa/suites/fs/mixed-clients/clusters/.qa b/qa/suites/fs/mixed-clients/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/mixed-clients/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/mixed-clients/clusters/1a3s-mds-2c-client.yaml b/qa/suites/fs/mixed-clients/clusters/1a3s-mds-2c-client.yaml
new file mode 120000
index 000000000..c190ea92f
--- /dev/null
+++ b/qa/suites/fs/mixed-clients/clusters/1a3s-mds-2c-client.yaml
@@ -0,0 +1 @@
+.qa/cephfs/clusters/1a3s-mds-2c-client.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/mixed-clients/conf b/qa/suites/fs/mixed-clients/conf
new file mode 120000
index 000000000..16e8cc44b
--- /dev/null
+++ b/qa/suites/fs/mixed-clients/conf
@@ -0,0 +1 @@
+.qa/cephfs/conf
+\ No newline at end of file
diff --git a/qa/suites/fs/mixed-clients/distro/$ b/qa/suites/fs/mixed-clients/distro/$
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/mixed-clients/distro/$
diff --git a/qa/suites/fs/mixed-clients/distro/.qa b/qa/suites/fs/mixed-clients/distro/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/mixed-clients/distro/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/mixed-clients/distro/centos_8.yaml b/qa/suites/fs/mixed-clients/distro/centos_8.yaml
new file mode 120000
index 000000000..380a1443b
--- /dev/null
+++ b/qa/suites/fs/mixed-clients/distro/centos_8.yaml
@@ -0,0 +1 @@
+.qa/distros/all/centos_8.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/mixed-clients/distro/rhel_8.yaml b/qa/suites/fs/mixed-clients/distro/rhel_8.yaml
new file mode 120000
index 000000000..133acf27b
--- /dev/null
+++ b/qa/suites/fs/mixed-clients/distro/rhel_8.yaml
@@ -0,0 +1 @@
+.qa/distros/all/rhel_8.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/mixed-clients/distro/ubuntu/+ b/qa/suites/fs/mixed-clients/distro/ubuntu/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/mixed-clients/distro/ubuntu/+
diff --git a/qa/suites/fs/mixed-clients/distro/ubuntu/.qa b/qa/suites/fs/mixed-clients/distro/ubuntu/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/mixed-clients/distro/ubuntu/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/mixed-clients/distro/ubuntu/latest.yaml b/qa/suites/fs/mixed-clients/distro/ubuntu/latest.yaml
new file mode 120000
index 000000000..0a708b4db
--- /dev/null
+++ b/qa/suites/fs/mixed-clients/distro/ubuntu/latest.yaml
@@ -0,0 +1 @@
+.qa/distros/all/ubuntu_22.04.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/mixed-clients/distro/ubuntu/overrides.yaml b/qa/suites/fs/mixed-clients/distro/ubuntu/overrides.yaml
new file mode 100644
index 000000000..fdd7f5e5a
--- /dev/null
+++ b/qa/suites/fs/mixed-clients/distro/ubuntu/overrides.yaml
@@ -0,0 +1,4 @@
+overrides:
+  ceph:
+    valgrind:
+      exit_on_first_error: false
diff --git a/qa/suites/fs/mixed-clients/kclient-overrides b/qa/suites/fs/mixed-clients/kclient-overrides
new file mode 120000
index 000000000..58b04fb24
--- /dev/null
+++ b/qa/suites/fs/mixed-clients/kclient-overrides
@@ -0,0 +1 @@
+.qa/cephfs/mount/kclient/overrides/
+\ No newline at end of file
diff --git a/qa/suites/fs/mixed-clients/objectstore-ec b/qa/suites/fs/mixed-clients/objectstore-ec
new file mode 120000
index 000000000..affe29493
--- /dev/null
+++ b/qa/suites/fs/mixed-clients/objectstore-ec
@@ -0,0 +1 @@
+.qa/cephfs/objectstore-ec
+\ No newline at end of file
diff --git a/qa/suites/fs/mixed-clients/overrides/+ b/qa/suites/fs/mixed-clients/overrides/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/mixed-clients/overrides/+
diff --git a/qa/suites/fs/mixed-clients/overrides/.qa b/qa/suites/fs/mixed-clients/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/mixed-clients/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/mixed-clients/overrides/ignorelist_health.yaml b/qa/suites/fs/mixed-clients/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..5cb891a95
--- /dev/null
+++ b/qa/suites/fs/mixed-clients/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/mixed-clients/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/mixed-clients/overrides/ignorelist_wrongly_marked_down.yaml
new file mode 120000
index 000000000..f317cb714
--- /dev/null
+++ b/qa/suites/fs/mixed-clients/overrides/ignorelist_wrongly_marked_down.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/mixed-clients/overrides/osd-asserts.yaml b/qa/suites/fs/mixed-clients/overrides/osd-asserts.yaml
new file mode 120000
index 000000000..f290c749b
--- /dev/null
+++ b/qa/suites/fs/mixed-clients/overrides/osd-asserts.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/osd-asserts.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/mixed-clients/tasks/.qa b/qa/suites/fs/mixed-clients/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/mixed-clients/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/mixed-clients/tasks/kernel_cfuse_workunits_dbench_iozone.yaml b/qa/suites/fs/mixed-clients/tasks/kernel_cfuse_workunits_dbench_iozone.yaml
new file mode 100644
index 000000000..78b2d7611
--- /dev/null
+++ b/qa/suites/fs/mixed-clients/tasks/kernel_cfuse_workunits_dbench_iozone.yaml
@@ -0,0 +1,18 @@
+tasks:
+- parallel:
+   - user-workload
+   - kclient-workload
+user-workload:
+  sequential:
+  - ceph-fuse: [client.0]
+  - workunit:
+      clients:
+         client.0:
+           - suites/iozone.sh
+kclient-workload:
+  sequential:
+  - kclient: [client.1]
+  - workunit:
+      clients:
+         client.1:
+           - suites/dbench.sh
diff --git a/qa/suites/fs/mixed-clients/tasks/kernel_cfuse_workunits_untarbuild_blogbench.yaml b/qa/suites/fs/mixed-clients/tasks/kernel_cfuse_workunits_untarbuild_blogbench.yaml
new file mode 100644
index 000000000..d637ff989
--- /dev/null
+++ b/qa/suites/fs/mixed-clients/tasks/kernel_cfuse_workunits_untarbuild_blogbench.yaml
@@ -0,0 +1,18 @@
+tasks:
+- parallel:
+   - user-workload
+   - kclient-workload
+user-workload:
+  sequential:
+  - ceph-fuse: [client.0]
+  - workunit:
+      clients:
+         client.0:
+           - suites/blogbench.sh
+kclient-workload:
+  sequential:
+  - kclient: [client.1]
+  - workunit:
+      clients:
+         client.1:
+           - kernel_untar_build.sh
diff --git a/qa/suites/fs/multiclient/% b/qa/suites/fs/multiclient/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/multiclient/%
diff --git a/qa/suites/fs/multiclient/.qa b/qa/suites/fs/multiclient/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/multiclient/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/multiclient/begin b/qa/suites/fs/multiclient/begin
new file mode 120000
index 000000000..77af91f7d
--- /dev/null
+++ b/qa/suites/fs/multiclient/begin
@@ -0,0 +1 @@
+.qa/cephfs/begin/
+\ No newline at end of file
diff --git a/qa/suites/fs/multiclient/clusters/.qa b/qa/suites/fs/multiclient/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/multiclient/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/multiclient/clusters/1-mds-2-client.yaml b/qa/suites/fs/multiclient/clusters/1-mds-2-client.yaml
new file mode 120000
index 000000000..9f4f161a3
--- /dev/null
+++ b/qa/suites/fs/multiclient/clusters/1-mds-2-client.yaml
@@ -0,0 +1 @@
+.qa/cephfs/clusters/1-mds-2-client.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/multiclient/clusters/1-mds-3-client.yaml b/qa/suites/fs/multiclient/clusters/1-mds-3-client.yaml
new file mode 120000
index 000000000..6b25e07c4
--- /dev/null
+++ b/qa/suites/fs/multiclient/clusters/1-mds-3-client.yaml
@@ -0,0 +1 @@
+.qa/cephfs/clusters/1-mds-3-client.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/multiclient/conf b/qa/suites/fs/multiclient/conf
new file mode 120000
index 000000000..16e8cc44b
--- /dev/null
+++ b/qa/suites/fs/multiclient/conf
@@ -0,0 +1 @@
+.qa/cephfs/conf
+\ No newline at end of file
diff --git a/qa/suites/fs/multiclient/distros/.qa b/qa/suites/fs/multiclient/distros/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/multiclient/distros/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/multiclient/distros/ubuntu_latest.yaml b/qa/suites/fs/multiclient/distros/ubuntu_latest.yaml
new file mode 120000
index 000000000..3a09f9abb
--- /dev/null
+++ b/qa/suites/fs/multiclient/distros/ubuntu_latest.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/ubuntu_latest.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/multiclient/mount/.qa b/qa/suites/fs/multiclient/mount/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/multiclient/mount/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/multiclient/mount/fuse.yaml b/qa/suites/fs/multiclient/mount/fuse.yaml
new file mode 120000
index 000000000..0e55da9fb
--- /dev/null
+++ b/qa/suites/fs/multiclient/mount/fuse.yaml
@@ -0,0 +1 @@
+.qa/cephfs/mount/fuse.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/multiclient/mount/kclient.yaml.disabled b/qa/suites/fs/multiclient/mount/kclient.yaml.disabled
new file mode 100644
index 000000000..f00f16aea
--- /dev/null
+++ b/qa/suites/fs/multiclient/mount/kclient.yaml.disabled
@@ -0,0 +1,7 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms die on skipped message: false
+tasks:
+- kclient:
diff --git a/qa/suites/fs/multiclient/objectstore-ec b/qa/suites/fs/multiclient/objectstore-ec
new file mode 120000
index 000000000..affe29493
--- /dev/null
+++ b/qa/suites/fs/multiclient/objectstore-ec
@@ -0,0 +1 @@
+.qa/cephfs/objectstore-ec
+\ No newline at end of file
diff --git a/qa/suites/fs/multiclient/overrides/+ b/qa/suites/fs/multiclient/overrides/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/multiclient/overrides/+
diff --git a/qa/suites/fs/multiclient/overrides/.qa b/qa/suites/fs/multiclient/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/multiclient/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/multiclient/overrides/ignorelist_health.yaml b/qa/suites/fs/multiclient/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..5cb891a95
--- /dev/null
+++ b/qa/suites/fs/multiclient/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/multiclient/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/multiclient/overrides/ignorelist_wrongly_marked_down.yaml
new file mode 120000
index 000000000..f317cb714
--- /dev/null
+++ b/qa/suites/fs/multiclient/overrides/ignorelist_wrongly_marked_down.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/multiclient/tasks/.qa b/qa/suites/fs/multiclient/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/multiclient/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/multiclient/tasks/cephfs_misc_tests.yaml b/qa/suites/fs/multiclient/tasks/cephfs_misc_tests.yaml
new file mode 100644
index 000000000..e6d6ef99b
--- /dev/null
+++ b/qa/suites/fs/multiclient/tasks/cephfs_misc_tests.yaml
@@ -0,0 +1,14 @@
+tasks:
+- cephfs_test_runner:
+    modules:
+      - tasks.cephfs.test_misc
+
+overrides:
+  ceph:
+    log-ignorelist:
+      - evicting unresponsive client
+      - POOL_APP_NOT_ENABLED
+      - has not responded to cap revoke by MDS for over
+      - MDS_CLIENT_LATE_RELEASE
+      - responding to mclientcaps
+      - RECENT_CRASH
diff --git a/qa/suites/fs/multiclient/tasks/fsx-mpi.yaml.disabled b/qa/suites/fs/multiclient/tasks/fsx-mpi.yaml.disabled
new file mode 100644
index 000000000..888de867f
--- /dev/null
+++ b/qa/suites/fs/multiclient/tasks/fsx-mpi.yaml.disabled
@@ -0,0 +1,17 @@
+# make sure we get the same MPI version on all hosts
+tasks:
+- pexec:
+    clients:
+      - cd $TESTDIR
+      - wget http://download.ceph.com/qa/fsx-mpi.c
+      - mpicc fsx-mpi.c -o fsx-mpi
+      - rm fsx-mpi.c
+      - ln -s $TESTDIR/mnt.* $TESTDIR/gmnt
+- ssh_keys:
+- mpi:
+    exec: sudo $TESTDIR/fsx-mpi -o 1MB -N 50000 -p 10000 -l 1048576 $TESTDIR/gmnt/test
+    workdir: $TESTDIR/gmnt
+- pexec:
+    clients:
+      - rm $TESTDIR/gmnt
+      - rm $TESTDIR/fsx-mpi
diff --git a/qa/suites/fs/multiclient/tasks/ior-shared-file.yaml b/qa/suites/fs/multiclient/tasks/ior-shared-file.yaml
new file mode 100644
index 000000000..8293595e2
--- /dev/null
+++ b/qa/suites/fs/multiclient/tasks/ior-shared-file.yaml
@@ -0,0 +1,31 @@
+# make sure we get the same MPI version on all hosts
+tasks:
+- pexec:
+    clients:
+      - set -x
+      - cd $TESTDIR
+      # partially or incorrectly installed mpich will create a mess and the
+      # configure script or the build process (which is initiated using "make"
+      # command) for the ior project will fail
+      - sudo apt purge -y mpich
+      - sudo apt install -y mpich
+      - wget http://download.ceph.com/qa/ior-3.3.0.tar.bz2
+      - tar xvfj ior-3.3.0.tar.bz2
+      - cd ior-3.3.0
+      - ./configure
+      - make
+      - make install DESTDIR=$TESTDIR/binary/
+      - cd $TESTDIR/
+      - sudo apt install -y tree
+      - tree binary/
+      - rm ior-3.3.0.tar.bz2
+      - rm -r ior-3.3.0
+      - ln -s $TESTDIR/mnt.* $TESTDIR/gmnt
+- ssh_keys:
+- mpi:
+    exec: $TESTDIR/binary/usr/local/bin/ior -e -w -r -W -b 10m -a POSIX -o $TESTDIR/gmnt/ior.testfile
+- pexec:
+    clients:
+      - rm -f $TESTDIR/gmnt/ior.testfile
+      - rm -f $TESTDIR/gmnt
+      - rm -rf $TESTDIR/binary
diff --git a/qa/suites/fs/multiclient/tasks/mdtest.yaml b/qa/suites/fs/multiclient/tasks/mdtest.yaml
new file mode 100644
index 000000000..32720e488
--- /dev/null
+++ b/qa/suites/fs/multiclient/tasks/mdtest.yaml
@@ -0,0 +1,34 @@
+# make sure we get the same MPI version on all hosts
+tasks:
+- pexec:
+    clients:
+      - set -x
+      - cd $TESTDIR
+      - sudo apt purge -y mpich
+      - sudo apt install -y mpich
+      # use ior project instead of mdtest project because latter has been
+      # merged into former. See:
+      # https://github.com/MDTEST-LANL/mdtest/blob/master/README.md
+      - wget http://download.ceph.com/qa/ior-3.3.0.tar.bz2
+      - tar xvfj ior-3.3.0.tar.bz2
+      - cd ior-3.3.0
+      # this option was set originall when mdtest binary was built using
+      # mdtest PR and not through ior project.
+      #- MPI_CC=mpicc make
+      - ./configure
+      - make
+      - make install DESTDIR=$TESTDIR/binary/
+      - cd $TESTDIR/
+      - sudo apt install -y tree
+      - tree binary/
+      - rm ior-3.3.0.tar.bz2
+      - rm -r ior-3.3.0
+      - ln -s $TESTDIR/mnt.* $TESTDIR/gmnt
+- ssh_keys:
+- mpi:
+    exec: $TESTDIR/binary/usr/local/bin/mdtest -d $TESTDIR/gmnt -I 20 -z 5 -b 2 -R
+- pexec:
+    clients:
+      - rm -f $TESTDIR/gmnt/ior.testfile
+      - rm -f $TESTDIR/gmnt
+      - rm -rf $TESTDIR/binary
diff --git a/qa/suites/fs/multifs/% b/qa/suites/fs/multifs/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/multifs/%
diff --git a/qa/suites/fs/multifs/.qa b/qa/suites/fs/multifs/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/multifs/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/multifs/begin b/qa/suites/fs/multifs/begin
new file mode 120000
index 000000000..77af91f7d
--- /dev/null
+++ b/qa/suites/fs/multifs/begin
@@ -0,0 +1 @@
+.qa/cephfs/begin/
+\ No newline at end of file
diff --git a/qa/suites/fs/multifs/clusters/.qa b/qa/suites/fs/multifs/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/multifs/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/multifs/clusters/1a3s-mds-2c-client.yaml b/qa/suites/fs/multifs/clusters/1a3s-mds-2c-client.yaml
new file mode 120000
index 000000000..c190ea92f
--- /dev/null
+++ b/qa/suites/fs/multifs/clusters/1a3s-mds-2c-client.yaml
@@ -0,0 +1 @@
+.qa/cephfs/clusters/1a3s-mds-2c-client.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/multifs/conf b/qa/suites/fs/multifs/conf
new file mode 120000
index 000000000..16e8cc44b
--- /dev/null
+++ b/qa/suites/fs/multifs/conf
@@ -0,0 +1 @@
+.qa/cephfs/conf
+\ No newline at end of file
diff --git a/qa/suites/fs/multifs/distro b/qa/suites/fs/multifs/distro
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/fs/multifs/distro
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/fs/multifs/mount b/qa/suites/fs/multifs/mount
new file mode 120000
index 000000000..e3600f453
--- /dev/null
+++ b/qa/suites/fs/multifs/mount
@@ -0,0 +1 @@
+.qa/cephfs/mount/
+\ No newline at end of file
diff --git a/qa/suites/fs/multifs/objectstore-ec b/qa/suites/fs/multifs/objectstore-ec
new file mode 120000
index 000000000..affe29493
--- /dev/null
+++ b/qa/suites/fs/multifs/objectstore-ec
@@ -0,0 +1 @@
+.qa/cephfs/objectstore-ec
+\ No newline at end of file
diff --git a/qa/suites/fs/multifs/overrides/+ b/qa/suites/fs/multifs/overrides/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/multifs/overrides/+
diff --git a/qa/suites/fs/multifs/overrides/.qa b/qa/suites/fs/multifs/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/multifs/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/multifs/overrides/ignorelist_health.yaml b/qa/suites/fs/multifs/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..5cb891a95
--- /dev/null
+++ b/qa/suites/fs/multifs/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/multifs/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/multifs/overrides/ignorelist_wrongly_marked_down.yaml
new file mode 120000
index 000000000..f317cb714
--- /dev/null
+++ b/qa/suites/fs/multifs/overrides/ignorelist_wrongly_marked_down.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/multifs/overrides/mon-debug.yaml b/qa/suites/fs/multifs/overrides/mon-debug.yaml
new file mode 100644
index 000000000..24b454c00
--- /dev/null
+++ b/qa/suites/fs/multifs/overrides/mon-debug.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      mon:
+        debug mon: 20
diff --git a/qa/suites/fs/multifs/tasks/.qa b/qa/suites/fs/multifs/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/multifs/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/multifs/tasks/failover.yaml b/qa/suites/fs/multifs/tasks/failover.yaml
new file mode 100644
index 000000000..9c403c76d
--- /dev/null
+++ b/qa/suites/fs/multifs/tasks/failover.yaml
@@ -0,0 +1,20 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - Replacing daemon mds
+      - \(MDS_INSUFFICIENT_STANDBY\)
+      - \(MDS_ALL_DOWN\)
+      - \(MDS_UP_LESS_THAN_MAX\)
+      - \(MDS_DAMAGE\)
+      - \(FS_DEGRADED\)
+  ceph-fuse:
+    disabled: true
+tasks:
+  - exec:
+      mon.a:
+        - ceph config set mgr mgr/crash/warn_recent_interval 0
+  - cephfs_test_runner:
+      fail_on_skip: false
+      modules:
+        - tasks.cephfs.test_failover
+
diff --git a/qa/suites/fs/multifs/tasks/multifs-auth.yaml b/qa/suites/fs/multifs/tasks/multifs-auth.yaml
new file mode 100644
index 000000000..ed1bdb475
--- /dev/null
+++ b/qa/suites/fs/multifs/tasks/multifs-auth.yaml
@@ -0,0 +1,5 @@
+tasks:
+  - cephfs_test_runner:
+      fail_on_skip: false
+      modules:
+        - tasks.cephfs.test_multifs_auth
diff --git a/qa/suites/fs/nfs/% b/qa/suites/fs/nfs/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/nfs/%
diff --git a/qa/suites/fs/nfs/.qa b/qa/suites/fs/nfs/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/nfs/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/nfs/cluster/+ b/qa/suites/fs/nfs/cluster/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/nfs/cluster/+
diff --git a/qa/suites/fs/nfs/cluster/.qa b/qa/suites/fs/nfs/cluster/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/nfs/cluster/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/nfs/cluster/1-node.yaml b/qa/suites/fs/nfs/cluster/1-node.yaml
new file mode 100644
index 000000000..8eeec7d2d
--- /dev/null
+++ b/qa/suites/fs/nfs/cluster/1-node.yaml
@@ -0,0 +1,16 @@
+meta:
+- desc: 1 ceph cluster with 1 mon, 1 mgr, 3 osds, 2 mds, 1 client
+roles:
+- - host.a
+  - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+tasks:
+- install:
+- cephadm:
+- cephadm.shell:
+    host.a:
+      - ceph orch apply mds a
diff --git a/qa/suites/fs/nfs/overrides/.qa b/qa/suites/fs/nfs/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/nfs/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/nfs/overrides/ignorelist_health.yaml b/qa/suites/fs/nfs/overrides/ignorelist_health.yaml
new file mode 100644
index 000000000..8bfe4dc6f
--- /dev/null
+++ b/qa/suites/fs/nfs/overrides/ignorelist_health.yaml
@@ -0,0 +1,13 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(FS_DEGRADED\)
+      - \(MDS_FAILED\)
+      - \(MDS_DEGRADED\)
+      - \(FS_WITH_FAILED_MDS\)
+      - \(MDS_DAMAGE\)
+      - \(MDS_ALL_DOWN\)
+      - \(MDS_UP_LESS_THAN_MAX\)
+      - \(FS_INLINE_DATA_DEPRECATED\)
+      - \(OSD_DOWN\)
diff --git a/qa/suites/fs/nfs/supported-random-distros$ b/qa/suites/fs/nfs/supported-random-distros$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/fs/nfs/supported-random-distros$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/fs/nfs/tasks/.qa b/qa/suites/fs/nfs/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/nfs/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/nfs/tasks/nfs.yaml b/qa/suites/fs/nfs/tasks/nfs.yaml
new file mode 100644
index 000000000..aa966bff2
--- /dev/null
+++ b/qa/suites/fs/nfs/tasks/nfs.yaml
@@ -0,0 +1,4 @@
+tasks:
+  - cephfs_test_runner:
+      modules:
+        - tasks.cephfs.test_nfs
diff --git a/qa/suites/fs/permission/% b/qa/suites/fs/permission/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/permission/%
diff --git a/qa/suites/fs/permission/.qa b/qa/suites/fs/permission/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/permission/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/permission/begin b/qa/suites/fs/permission/begin
new file mode 120000
index 000000000..77af91f7d
--- /dev/null
+++ b/qa/suites/fs/permission/begin
@@ -0,0 +1 @@
+.qa/cephfs/begin/
+\ No newline at end of file
diff --git a/qa/suites/fs/permission/clusters/.qa b/qa/suites/fs/permission/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/permission/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/permission/clusters/fixed-2-ucephfs.yaml b/qa/suites/fs/permission/clusters/fixed-2-ucephfs.yaml
new file mode 120000
index 000000000..b0c41a89a
--- /dev/null
+++ b/qa/suites/fs/permission/clusters/fixed-2-ucephfs.yaml
@@ -0,0 +1 @@
+.qa/cephfs/clusters/fixed-2-ucephfs.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/permission/conf b/qa/suites/fs/permission/conf
new file mode 120000
index 000000000..16e8cc44b
--- /dev/null
+++ b/qa/suites/fs/permission/conf
@@ -0,0 +1 @@
+.qa/cephfs/conf
+\ No newline at end of file
diff --git a/qa/suites/fs/permission/distro b/qa/suites/fs/permission/distro
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/fs/permission/distro
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/fs/permission/mount/.qa b/qa/suites/fs/permission/mount/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/permission/mount/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/permission/mount/fuse.yaml b/qa/suites/fs/permission/mount/fuse.yaml
new file mode 120000
index 000000000..0e55da9fb
--- /dev/null
+++ b/qa/suites/fs/permission/mount/fuse.yaml
@@ -0,0 +1 @@
+.qa/cephfs/mount/fuse.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/permission/objectstore-ec b/qa/suites/fs/permission/objectstore-ec
new file mode 120000
index 000000000..affe29493
--- /dev/null
+++ b/qa/suites/fs/permission/objectstore-ec
@@ -0,0 +1 @@
+.qa/cephfs/objectstore-ec
+\ No newline at end of file
diff --git a/qa/suites/fs/permission/overrides/+ b/qa/suites/fs/permission/overrides/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/permission/overrides/+
diff --git a/qa/suites/fs/permission/overrides/.qa b/qa/suites/fs/permission/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/permission/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/permission/overrides/ignorelist_health.yaml b/qa/suites/fs/permission/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..5cb891a95
--- /dev/null
+++ b/qa/suites/fs/permission/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/permission/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/permission/overrides/ignorelist_wrongly_marked_down.yaml
new file mode 120000
index 000000000..f317cb714
--- /dev/null
+++ b/qa/suites/fs/permission/overrides/ignorelist_wrongly_marked_down.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/permission/tasks/.qa b/qa/suites/fs/permission/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/permission/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/permission/tasks/cfuse_workunit_misc.yaml b/qa/suites/fs/permission/tasks/cfuse_workunit_misc.yaml
new file mode 100644
index 000000000..ca026c45f
--- /dev/null
+++ b/qa/suites/fs/permission/tasks/cfuse_workunit_misc.yaml
@@ -0,0 +1,12 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        client acl type: posix_acl
+tasks:
+- workunit:
+    clients:
+      all:
+        - fs/misc/acl.sh
+        - fs/misc/chmod.sh
+        - fs/misc/dac_override.sh
diff --git a/qa/suites/fs/permission/tasks/cfuse_workunit_suites_pjd.yaml b/qa/suites/fs/permission/tasks/cfuse_workunit_suites_pjd.yaml
new file mode 100644
index 000000000..a81a3b46e
--- /dev/null
+++ b/qa/suites/fs/permission/tasks/cfuse_workunit_suites_pjd.yaml
@@ -0,0 +1,12 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        fuse set user groups: true
+        client acl type: posix_acl
+tasks:
+- workunit:
+    timeout: 6h
+    clients:
+      all:
+        - suites/pjd.sh
diff --git a/qa/suites/fs/shell/% b/qa/suites/fs/shell/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/shell/%
diff --git a/qa/suites/fs/shell/.qa b/qa/suites/fs/shell/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/shell/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/shell/begin b/qa/suites/fs/shell/begin
new file mode 120000
index 000000000..77af91f7d
--- /dev/null
+++ b/qa/suites/fs/shell/begin
@@ -0,0 +1 @@
+.qa/cephfs/begin/
+\ No newline at end of file
diff --git a/qa/suites/fs/shell/clusters/.qa b/qa/suites/fs/shell/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/shell/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/shell/clusters/1-mds-1-client-coloc.yaml b/qa/suites/fs/shell/clusters/1-mds-1-client-coloc.yaml
new file mode 120000
index 000000000..d15ecfda0
--- /dev/null
+++ b/qa/suites/fs/shell/clusters/1-mds-1-client-coloc.yaml
@@ -0,0 +1 @@
+.qa/cephfs/clusters/1-mds-1-client-coloc.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/shell/conf b/qa/suites/fs/shell/conf
new file mode 120000
index 000000000..16e8cc44b
--- /dev/null
+++ b/qa/suites/fs/shell/conf
@@ -0,0 +1 @@
+.qa/cephfs/conf
+\ No newline at end of file
diff --git a/qa/suites/fs/shell/distro b/qa/suites/fs/shell/distro
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/fs/shell/distro
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/fs/shell/mount/.qa b/qa/suites/fs/shell/mount/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/shell/mount/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/shell/mount/fuse.yaml b/qa/suites/fs/shell/mount/fuse.yaml
new file mode 120000
index 000000000..0e55da9fb
--- /dev/null
+++ b/qa/suites/fs/shell/mount/fuse.yaml
@@ -0,0 +1 @@
+.qa/cephfs/mount/fuse.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/shell/objectstore/.qa b/qa/suites/fs/shell/objectstore/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/shell/objectstore/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/shell/objectstore/bluestore-bitmap.yaml b/qa/suites/fs/shell/objectstore/bluestore-bitmap.yaml
new file mode 120000
index 000000000..a59cf5175
--- /dev/null
+++ b/qa/suites/fs/shell/objectstore/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/shell/overrides/+ b/qa/suites/fs/shell/overrides/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/shell/overrides/+
diff --git a/qa/suites/fs/shell/overrides/.qa b/qa/suites/fs/shell/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/shell/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/shell/overrides/ignorelist_health.yaml b/qa/suites/fs/shell/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..5cb891a95
--- /dev/null
+++ b/qa/suites/fs/shell/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/shell/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/shell/overrides/ignorelist_wrongly_marked_down.yaml
new file mode 120000
index 000000000..f317cb714
--- /dev/null
+++ b/qa/suites/fs/shell/overrides/ignorelist_wrongly_marked_down.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/shell/overrides/no_client_pidfile.yaml b/qa/suites/fs/shell/overrides/no_client_pidfile.yaml
new file mode 120000
index 000000000..8888f3327
--- /dev/null
+++ b/qa/suites/fs/shell/overrides/no_client_pidfile.yaml
@@ -0,0 +1 @@
+.qa/overrides/no_client_pidfile.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/shell/tasks/.qa b/qa/suites/fs/shell/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/shell/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/shell/tasks/cephfs-shell.yaml b/qa/suites/fs/shell/tasks/cephfs-shell.yaml
new file mode 100644
index 000000000..9708252e9
--- /dev/null
+++ b/qa/suites/fs/shell/tasks/cephfs-shell.yaml
@@ -0,0 +1,8 @@
+# Right now, cephfs-shell is only available as a package on Ubuntu
+# This overrides the random distribution that's chosen in the other yaml fragments.
+os_type: ubuntu
+os_version: "20.04"
+tasks:
+  - cephfs_test_runner:
+      modules:
+        - tasks.cephfs.test_cephfs_shell
diff --git a/qa/suites/fs/snaps/% b/qa/suites/fs/snaps/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/snaps/%
diff --git a/qa/suites/fs/snaps/.qa b/qa/suites/fs/snaps/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/snaps/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/snaps/begin b/qa/suites/fs/snaps/begin
new file mode 120000
index 000000000..77af91f7d
--- /dev/null
+++ b/qa/suites/fs/snaps/begin
@@ -0,0 +1 @@
+.qa/cephfs/begin/
+\ No newline at end of file
diff --git a/qa/suites/fs/snaps/clusters/.qa b/qa/suites/fs/snaps/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/snaps/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/snaps/clusters/1a3s-mds-1c-client.yaml b/qa/suites/fs/snaps/clusters/1a3s-mds-1c-client.yaml
new file mode 120000
index 000000000..4ab7357dc
--- /dev/null
+++ b/qa/suites/fs/snaps/clusters/1a3s-mds-1c-client.yaml
@@ -0,0 +1 @@
+.qa/cephfs/clusters/1a3s-mds-1c-client.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/snaps/conf b/qa/suites/fs/snaps/conf
new file mode 120000
index 000000000..16e8cc44b
--- /dev/null
+++ b/qa/suites/fs/snaps/conf
@@ -0,0 +1 @@
+.qa/cephfs/conf
+\ No newline at end of file
diff --git a/qa/suites/fs/snaps/distro b/qa/suites/fs/snaps/distro
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/fs/snaps/distro
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/fs/snaps/mount b/qa/suites/fs/snaps/mount
new file mode 120000
index 000000000..e3600f453
--- /dev/null
+++ b/qa/suites/fs/snaps/mount
@@ -0,0 +1 @@
+.qa/cephfs/mount/
+\ No newline at end of file
diff --git a/qa/suites/fs/snaps/objectstore-ec b/qa/suites/fs/snaps/objectstore-ec
new file mode 120000
index 000000000..affe29493
--- /dev/null
+++ b/qa/suites/fs/snaps/objectstore-ec
@@ -0,0 +1 @@
+.qa/cephfs/objectstore-ec
+\ No newline at end of file
diff --git a/qa/suites/fs/snaps/overrides/+ b/qa/suites/fs/snaps/overrides/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/snaps/overrides/+
diff --git a/qa/suites/fs/snaps/overrides/.qa b/qa/suites/fs/snaps/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/snaps/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/snaps/overrides/ignorelist_health.yaml b/qa/suites/fs/snaps/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..5cb891a95
--- /dev/null
+++ b/qa/suites/fs/snaps/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/snaps/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/snaps/overrides/ignorelist_wrongly_marked_down.yaml
new file mode 120000
index 000000000..f317cb714
--- /dev/null
+++ b/qa/suites/fs/snaps/overrides/ignorelist_wrongly_marked_down.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/snaps/tasks/.qa b/qa/suites/fs/snaps/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/snaps/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/snaps/tasks/workunit/.qa b/qa/suites/fs/snaps/tasks/workunit/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/snaps/tasks/workunit/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/snaps/tasks/workunit/snaps.yaml b/qa/suites/fs/snaps/tasks/workunit/snaps.yaml
new file mode 100644
index 000000000..dd5a0abd4
--- /dev/null
+++ b/qa/suites/fs/snaps/tasks/workunit/snaps.yaml
@@ -0,0 +1,6 @@
+tasks:
+- workunit:
+    timeout: 6h
+    clients:
+      all:
+        - fs/snaps
diff --git a/qa/suites/fs/thrash/.qa b/qa/suites/fs/thrash/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/thrash/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/multifs/% b/qa/suites/fs/thrash/multifs/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/%
diff --git a/qa/suites/fs/thrash/multifs/.qa b/qa/suites/fs/thrash/multifs/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/multifs/begin b/qa/suites/fs/thrash/multifs/begin
new file mode 120000
index 000000000..77af91f7d
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/begin
@@ -0,0 +1 @@
+.qa/cephfs/begin/
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/multifs/clusters/.qa b/qa/suites/fs/thrash/multifs/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/multifs/clusters/1a3s-mds-2c-client.yaml b/qa/suites/fs/thrash/multifs/clusters/1a3s-mds-2c-client.yaml
new file mode 120000
index 000000000..c190ea92f
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/clusters/1a3s-mds-2c-client.yaml
@@ -0,0 +1 @@
+.qa/cephfs/clusters/1a3s-mds-2c-client.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/multifs/conf b/qa/suites/fs/thrash/multifs/conf
new file mode 120000
index 000000000..16e8cc44b
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/conf
@@ -0,0 +1 @@
+.qa/cephfs/conf
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/multifs/distro b/qa/suites/fs/thrash/multifs/distro
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/distro
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/multifs/mount b/qa/suites/fs/thrash/multifs/mount
new file mode 120000
index 000000000..e3600f453
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/mount
@@ -0,0 +1 @@
+.qa/cephfs/mount/
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/multifs/msgr-failures/.qa b/qa/suites/fs/thrash/multifs/msgr-failures/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/msgr-failures/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/multifs/msgr-failures/none.yaml b/qa/suites/fs/thrash/multifs/msgr-failures/none.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/msgr-failures/none.yaml
diff --git a/qa/suites/fs/thrash/multifs/msgr-failures/osd-mds-delay.yaml b/qa/suites/fs/thrash/multifs/msgr-failures/osd-mds-delay.yaml
new file mode 100644
index 000000000..17cbc5b90
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/msgr-failures/osd-mds-delay.yaml
@@ -0,0 +1,17 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        ms inject socket failures: 2500
+        ms inject delay type: client mds
+        ms inject delay probability: .005
+        ms inject delay max: 1
+        mon client directed command retry: 5
+      mds:
+        ms inject socket failures: 2500
+        ms inject delay type: client mds osd
+        ms inject delay probability: .005
+        ms inject delay max: 1
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/fs/thrash/multifs/objectstore/.qa b/qa/suites/fs/thrash/multifs/objectstore/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/objectstore/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/multifs/objectstore/bluestore-bitmap.yaml b/qa/suites/fs/thrash/multifs/objectstore/bluestore-bitmap.yaml
new file mode 120000
index 000000000..a59cf5175
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/objectstore/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/multifs/overrides/+ b/qa/suites/fs/thrash/multifs/overrides/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/overrides/+
diff --git a/qa/suites/fs/thrash/multifs/overrides/.qa b/qa/suites/fs/thrash/multifs/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/multifs/overrides/client-shutdown.yaml b/qa/suites/fs/thrash/multifs/overrides/client-shutdown.yaml
new file mode 100644
index 000000000..30b2ea981
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/overrides/client-shutdown.yaml
@@ -0,0 +1,6 @@
+# Lengthen the timeout for thrashed MDS
+overrides:
+  ceph:
+    conf:
+      client:
+        client_shutdown_timeout: 120
diff --git a/qa/suites/fs/thrash/multifs/overrides/frag.yaml b/qa/suites/fs/thrash/multifs/overrides/frag.yaml
new file mode 120000
index 000000000..5e5cdaed8
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/overrides/frag.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/frag.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/multifs/overrides/ignorelist_health.yaml b/qa/suites/fs/thrash/multifs/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..5cb891a95
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/multifs/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/thrash/multifs/overrides/ignorelist_wrongly_marked_down.yaml
new file mode 120000
index 000000000..f317cb714
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/overrides/ignorelist_wrongly_marked_down.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/multifs/overrides/multifs.yaml b/qa/suites/fs/thrash/multifs/overrides/multifs.yaml
new file mode 100644
index 000000000..faf7838c2
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/overrides/multifs.yaml
@@ -0,0 +1,16 @@
+overrides:
+  ceph:
+    cephfs:
+      fs:
+        - name: a
+        - name: b
+  ceph-fuse:
+    client.0:
+      cephfs_name: a
+    client.1:
+      cephfs_name: b
+  kclient:
+    client.0:
+      cephfs_name: a
+    client.1:
+      cephfs_name: b
diff --git a/qa/suites/fs/thrash/multifs/overrides/session_timeout.yaml b/qa/suites/fs/thrash/multifs/overrides/session_timeout.yaml
new file mode 120000
index 000000000..fce0318c5
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/overrides/session_timeout.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/session_timeout.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/multifs/overrides/thrashosds-health.yaml b/qa/suites/fs/thrash/multifs/overrides/thrashosds-health.yaml
new file mode 120000
index 000000000..9124eb1aa
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/overrides/thrashosds-health.yaml
@@ -0,0 +1 @@
+.qa/tasks/thrashosds-health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/multifs/tasks/% b/qa/suites/fs/thrash/multifs/tasks/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/tasks/%
diff --git a/qa/suites/fs/thrash/multifs/tasks/.qa b/qa/suites/fs/thrash/multifs/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/multifs/tasks/1-thrash/.qa b/qa/suites/fs/thrash/multifs/tasks/1-thrash/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/tasks/1-thrash/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/multifs/tasks/1-thrash/mds.yaml b/qa/suites/fs/thrash/multifs/tasks/1-thrash/mds.yaml
new file mode 100644
index 000000000..33748cea5
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/tasks/1-thrash/mds.yaml
@@ -0,0 +1,7 @@
+tasks:
+- mds_thrash:
+
+overrides:
+  ceph:
+    log-ignorelist:
+      - Replacing daemon mds
diff --git a/qa/suites/fs/thrash/multifs/tasks/1-thrash/mon.yaml b/qa/suites/fs/thrash/multifs/tasks/1-thrash/mon.yaml
new file mode 100644
index 000000000..fbbe16151
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/tasks/1-thrash/mon.yaml
@@ -0,0 +1,10 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(MON_DOWN\)
+tasks:
+- mon_thrash:
+    check_mds_failover: True
+    revive_delay: 20
+    thrash_delay: 10
diff --git a/qa/suites/fs/thrash/multifs/tasks/2-workunit/.qa b/qa/suites/fs/thrash/multifs/tasks/2-workunit/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/tasks/2-workunit/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_snaptests.yaml b/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_snaptests.yaml
new file mode 100644
index 000000000..dd5a0abd4
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_snaptests.yaml
@@ -0,0 +1,6 @@
+tasks:
+- workunit:
+    timeout: 6h
+    clients:
+      all:
+        - fs/snaps
diff --git a/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_suites_fsstress.yaml b/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_suites_fsstress.yaml
new file mode 120000
index 000000000..c2e859fff
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_suites_fsstress.yaml
@@ -0,0 +1 @@
+.qa/cephfs/tasks/cfuse_workunit_suites_fsstress.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_suites_pjd.yaml b/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_suites_pjd.yaml
new file mode 100644
index 000000000..f7784383b
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_suites_pjd.yaml
@@ -0,0 +1,11 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        fuse set user groups: true
+tasks:
+- workunit:
+    timeout: 6h
+    clients:
+      all:
+        - suites/pjd.sh
diff --git a/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_trivial_sync.yaml b/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_trivial_sync.yaml
new file mode 120000
index 000000000..a1df03277
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_trivial_sync.yaml
@@ -0,0 +1 @@
+.qa/cephfs/tasks/cfuse_workunit_trivial_sync.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/multifs/tasks/2-workunit/ffsb.yaml b/qa/suites/fs/thrash/multifs/tasks/2-workunit/ffsb.yaml
new file mode 100644
index 000000000..7e4f711a2
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/tasks/2-workunit/ffsb.yaml
@@ -0,0 +1,13 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - SLOW_OPS
+    - slow request
+    conf:
+      osd:
+        filestore flush min: 0
+tasks:
+- workunit:
+    clients:
+      all:
+        - suites/ffsb.sh
diff --git a/qa/suites/fs/thrash/multifs/tasks/2-workunit/iozone.yaml b/qa/suites/fs/thrash/multifs/tasks/2-workunit/iozone.yaml
new file mode 100644
index 000000000..9270f3c51
--- /dev/null
+++ b/qa/suites/fs/thrash/multifs/tasks/2-workunit/iozone.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      all:
+        - suites/iozone.sh
diff --git a/qa/suites/fs/thrash/workloads/% b/qa/suites/fs/thrash/workloads/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/%
diff --git a/qa/suites/fs/thrash/workloads/.qa b/qa/suites/fs/thrash/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/workloads/begin b/qa/suites/fs/thrash/workloads/begin
new file mode 120000
index 000000000..77af91f7d
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/begin
@@ -0,0 +1 @@
+.qa/cephfs/begin/
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/workloads/clusters/.qa b/qa/suites/fs/thrash/workloads/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/workloads/clusters/1a5s-mds-1c-client.yaml b/qa/suites/fs/thrash/workloads/clusters/1a5s-mds-1c-client.yaml
new file mode 120000
index 000000000..2ab33af1c
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/clusters/1a5s-mds-1c-client.yaml
@@ -0,0 +1 @@
+.qa/cephfs/clusters/1a5s-mds-1c-client.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/workloads/conf b/qa/suites/fs/thrash/workloads/conf
new file mode 120000
index 000000000..16e8cc44b
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/conf
@@ -0,0 +1 @@
+.qa/cephfs/conf
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/workloads/distro b/qa/suites/fs/thrash/workloads/distro
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/distro
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/workloads/mount b/qa/suites/fs/thrash/workloads/mount
new file mode 120000
index 000000000..e3600f453
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/mount
@@ -0,0 +1 @@
+.qa/cephfs/mount/
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/workloads/msgr-failures/.qa b/qa/suites/fs/thrash/workloads/msgr-failures/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/msgr-failures/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/workloads/msgr-failures/none.yaml b/qa/suites/fs/thrash/workloads/msgr-failures/none.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/msgr-failures/none.yaml
diff --git a/qa/suites/fs/thrash/workloads/msgr-failures/osd-mds-delay.yaml b/qa/suites/fs/thrash/workloads/msgr-failures/osd-mds-delay.yaml
new file mode 100644
index 000000000..17cbc5b90
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/msgr-failures/osd-mds-delay.yaml
@@ -0,0 +1,17 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        ms inject socket failures: 2500
+        ms inject delay type: client mds
+        ms inject delay probability: .005
+        ms inject delay max: 1
+        mon client directed command retry: 5
+      mds:
+        ms inject socket failures: 2500
+        ms inject delay type: client mds osd
+        ms inject delay probability: .005
+        ms inject delay max: 1
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/fs/thrash/workloads/objectstore-ec b/qa/suites/fs/thrash/workloads/objectstore-ec
new file mode 120000
index 000000000..affe29493
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/objectstore-ec
@@ -0,0 +1 @@
+.qa/cephfs/objectstore-ec
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/workloads/overrides/% b/qa/suites/fs/thrash/workloads/overrides/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/overrides/%
diff --git a/qa/suites/fs/thrash/workloads/overrides/.qa b/qa/suites/fs/thrash/workloads/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/workloads/overrides/client-shutdown.yaml b/qa/suites/fs/thrash/workloads/overrides/client-shutdown.yaml
new file mode 100644
index 000000000..30b2ea981
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/overrides/client-shutdown.yaml
@@ -0,0 +1,6 @@
+# Lengthen the timeout for thrashed MDS
+overrides:
+  ceph:
+    conf:
+      client:
+        client_shutdown_timeout: 120
diff --git a/qa/suites/fs/thrash/workloads/overrides/frag.yaml b/qa/suites/fs/thrash/workloads/overrides/frag.yaml
new file mode 120000
index 000000000..5e5cdaed8
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/overrides/frag.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/frag.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/workloads/overrides/ignorelist_health.yaml b/qa/suites/fs/thrash/workloads/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..5cb891a95
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/workloads/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/thrash/workloads/overrides/ignorelist_wrongly_marked_down.yaml
new file mode 120000
index 000000000..f317cb714
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/overrides/ignorelist_wrongly_marked_down.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/.qa b/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/no.yaml b/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/no.yaml
new file mode 100644
index 000000000..91b453679
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/no.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      mds:
+        mds oft prefetch dirfrags: false
diff --git a/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/yes.yaml b/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/yes.yaml
new file mode 100644
index 000000000..bd202f988
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/yes.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      mds:
+        mds oft prefetch dirfrags: true
diff --git a/qa/suites/fs/thrash/workloads/overrides/prefetch_entire_dirfrags b/qa/suites/fs/thrash/workloads/overrides/prefetch_entire_dirfrags
new file mode 120000
index 000000000..9b8024fba
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/overrides/prefetch_entire_dirfrags
@@ -0,0 +1 @@
+.qa/cephfs/overrides/prefetch_entire_dirfrags
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/workloads/overrides/races.yaml b/qa/suites/fs/thrash/workloads/overrides/races.yaml
new file mode 100644
index 000000000..e7d753896
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/overrides/races.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      mds:
+        mds_sleep_rank_change: 5000000.0
diff --git a/qa/suites/fs/thrash/workloads/overrides/session_timeout.yaml b/qa/suites/fs/thrash/workloads/overrides/session_timeout.yaml
new file mode 120000
index 000000000..fce0318c5
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/overrides/session_timeout.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/session_timeout.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/workloads/overrides/thrashosds-health.yaml b/qa/suites/fs/thrash/workloads/overrides/thrashosds-health.yaml
new file mode 120000
index 000000000..9124eb1aa
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/overrides/thrashosds-health.yaml
@@ -0,0 +1 @@
+.qa/tasks/thrashosds-health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/workloads/ranks/.qa b/qa/suites/fs/thrash/workloads/ranks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/ranks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/workloads/ranks/1.yaml b/qa/suites/fs/thrash/workloads/ranks/1.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/ranks/1.yaml
diff --git a/qa/suites/fs/thrash/workloads/ranks/3.yaml b/qa/suites/fs/thrash/workloads/ranks/3.yaml
new file mode 100644
index 000000000..9ed043c14
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/ranks/3.yaml
@@ -0,0 +1,9 @@
+overrides:
+  ceph:
+    cephfs:
+      max_mds: 3
+  check-counter:
+    counters:
+      mds:
+        - mds.exported
+        - mds.imported
diff --git a/qa/suites/fs/thrash/workloads/ranks/5.yaml b/qa/suites/fs/thrash/workloads/ranks/5.yaml
new file mode 100644
index 000000000..ed89cef3a
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/ranks/5.yaml
@@ -0,0 +1,9 @@
+overrides:
+  ceph:
+    cephfs:
+      max_mds: 5
+  check-counter:
+    counters:
+      mds:
+        - mds.exported
+        - mds.imported
diff --git a/qa/suites/fs/thrash/workloads/tasks/% b/qa/suites/fs/thrash/workloads/tasks/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/tasks/%
diff --git a/qa/suites/fs/thrash/workloads/tasks/.qa b/qa/suites/fs/thrash/workloads/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/workloads/tasks/1-thrash/.qa b/qa/suites/fs/thrash/workloads/tasks/1-thrash/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/tasks/1-thrash/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/workloads/tasks/1-thrash/mds.yaml b/qa/suites/fs/thrash/workloads/tasks/1-thrash/mds.yaml
new file mode 100644
index 000000000..33748cea5
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/tasks/1-thrash/mds.yaml
@@ -0,0 +1,7 @@
+tasks:
+- mds_thrash:
+
+overrides:
+  ceph:
+    log-ignorelist:
+      - Replacing daemon mds
diff --git a/qa/suites/fs/thrash/workloads/tasks/1-thrash/mon.yaml b/qa/suites/fs/thrash/workloads/tasks/1-thrash/mon.yaml
new file mode 100644
index 000000000..fbbe16151
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/tasks/1-thrash/mon.yaml
@@ -0,0 +1,10 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(MON_DOWN\)
+tasks:
+- mon_thrash:
+    check_mds_failover: True
+    revive_delay: 20
+    thrash_delay: 10
diff --git a/qa/suites/fs/thrash/workloads/tasks/1-thrash/osd.yaml b/qa/suites/fs/thrash/workloads/tasks/1-thrash/osd.yaml
new file mode 100644
index 000000000..037d399a7
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/tasks/1-thrash/osd.yaml
@@ -0,0 +1,9 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - but it is still running
+      - objects unfound and apparently lost
+      - MDS_SLOW_METADATA_IO
+      - MDS_TRIM
+tasks:
+- thrashosds:
diff --git a/qa/suites/fs/thrash/workloads/tasks/2-workunit/.qa b/qa/suites/fs/thrash/workloads/tasks/2-workunit/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/tasks/2-workunit/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/workloads/tasks/2-workunit/fs/.qa b/qa/suites/fs/thrash/workloads/tasks/2-workunit/fs/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/tasks/2-workunit/fs/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/workloads/tasks/2-workunit/fs/snaps.yaml b/qa/suites/fs/thrash/workloads/tasks/2-workunit/fs/snaps.yaml
new file mode 100644
index 000000000..dd5a0abd4
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/tasks/2-workunit/fs/snaps.yaml
@@ -0,0 +1,6 @@
+tasks:
+- workunit:
+    timeout: 6h
+    clients:
+      all:
+        - fs/snaps
diff --git a/qa/suites/fs/thrash/workloads/tasks/2-workunit/fs/trivial_sync.yaml b/qa/suites/fs/thrash/workloads/tasks/2-workunit/fs/trivial_sync.yaml
new file mode 120000
index 000000000..a1df03277
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/tasks/2-workunit/fs/trivial_sync.yaml
@@ -0,0 +1 @@
+.qa/cephfs/tasks/cfuse_workunit_trivial_sync.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/.qa b/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/ffsb.yaml b/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/ffsb.yaml
new file mode 100644
index 000000000..7e4f711a2
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/ffsb.yaml
@@ -0,0 +1,13 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - SLOW_OPS
+    - slow request
+    conf:
+      osd:
+        filestore flush min: 0
+tasks:
+- workunit:
+    clients:
+      all:
+        - suites/ffsb.sh
diff --git a/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/fsstress.yaml b/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/fsstress.yaml
new file mode 100644
index 000000000..bae220292
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/fsstress.yaml
@@ -0,0 +1,6 @@
+tasks:
+- workunit:
+    timeout: 6h
+    clients:
+      all:
+        - suites/fsstress.sh
diff --git a/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/iozone.yaml b/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/iozone.yaml
new file mode 100644
index 000000000..9270f3c51
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/iozone.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      all:
+        - suites/iozone.sh
diff --git a/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/pjd.yaml b/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/pjd.yaml
new file mode 100644
index 000000000..f7784383b
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/pjd.yaml
@@ -0,0 +1,11 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        fuse set user groups: true
+tasks:
+- workunit:
+    timeout: 6h
+    clients:
+      all:
+        - suites/pjd.sh
diff --git a/qa/suites/fs/top/% b/qa/suites/fs/top/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/top/%
diff --git a/qa/suites/fs/top/.qa b/qa/suites/fs/top/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/top/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/top/begin b/qa/suites/fs/top/begin
new file mode 120000
index 000000000..77af91f7d
--- /dev/null
+++ b/qa/suites/fs/top/begin
@@ -0,0 +1 @@
+.qa/cephfs/begin/
+\ No newline at end of file
diff --git a/qa/suites/fs/top/cluster/+ b/qa/suites/fs/top/cluster/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/top/cluster/+
diff --git a/qa/suites/fs/top/cluster/.qa b/qa/suites/fs/top/cluster/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/top/cluster/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/top/cluster/1-node.yaml b/qa/suites/fs/top/cluster/1-node.yaml
new file mode 100644
index 000000000..48c4996e7
--- /dev/null
+++ b/qa/suites/fs/top/cluster/1-node.yaml
@@ -0,0 +1,12 @@
+meta:
+- desc: 1 ceph cluster with 1 mon, 1 mgr, 3 osds, 2 mds, 2 clients
+roles:
+- - mon.a
+  - mgr.x
+  - mds.a
+  - mds.b
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+  - client.1
diff --git a/qa/suites/fs/top/mount/.qa b/qa/suites/fs/top/mount/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/top/mount/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/top/mount/fuse.yaml b/qa/suites/fs/top/mount/fuse.yaml
new file mode 120000
index 000000000..0e55da9fb
--- /dev/null
+++ b/qa/suites/fs/top/mount/fuse.yaml
@@ -0,0 +1 @@
+.qa/cephfs/mount/fuse.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/top/objectstore/.qa b/qa/suites/fs/top/objectstore/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/top/objectstore/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/top/objectstore/bluestore-bitmap.yaml b/qa/suites/fs/top/objectstore/bluestore-bitmap.yaml
new file mode 120000
index 000000000..a59cf5175
--- /dev/null
+++ b/qa/suites/fs/top/objectstore/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/top/overrides/.qa b/qa/suites/fs/top/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/top/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/top/overrides/ignorelist_health.yaml b/qa/suites/fs/top/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..5cb891a95
--- /dev/null
+++ b/qa/suites/fs/top/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/top/supported-random-distros$ b/qa/suites/fs/top/supported-random-distros$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/fs/top/supported-random-distros$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/fs/top/tasks/.qa b/qa/suites/fs/top/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/top/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/top/tasks/fstop.yaml b/qa/suites/fs/top/tasks/fstop.yaml
new file mode 100644
index 000000000..406f6804f
--- /dev/null
+++ b/qa/suites/fs/top/tasks/fstop.yaml
@@ -0,0 +1,4 @@
+tasks:
+  - cephfs_test_runner:
+      modules:
+        - tasks.cephfs.test_fstop
diff --git a/qa/suites/fs/traceless/% b/qa/suites/fs/traceless/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/traceless/%
diff --git a/qa/suites/fs/traceless/.qa b/qa/suites/fs/traceless/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/traceless/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/traceless/begin b/qa/suites/fs/traceless/begin
new file mode 120000
index 000000000..77af91f7d
--- /dev/null
+++ b/qa/suites/fs/traceless/begin
@@ -0,0 +1 @@
+.qa/cephfs/begin/
+\ No newline at end of file
diff --git a/qa/suites/fs/traceless/clusters/.qa b/qa/suites/fs/traceless/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/traceless/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/traceless/clusters/fixed-2-ucephfs.yaml b/qa/suites/fs/traceless/clusters/fixed-2-ucephfs.yaml
new file mode 120000
index 000000000..b0c41a89a
--- /dev/null
+++ b/qa/suites/fs/traceless/clusters/fixed-2-ucephfs.yaml
@@ -0,0 +1 @@
+.qa/cephfs/clusters/fixed-2-ucephfs.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/traceless/conf b/qa/suites/fs/traceless/conf
new file mode 120000
index 000000000..16e8cc44b
--- /dev/null
+++ b/qa/suites/fs/traceless/conf
@@ -0,0 +1 @@
+.qa/cephfs/conf
+\ No newline at end of file
diff --git a/qa/suites/fs/traceless/distro b/qa/suites/fs/traceless/distro
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/fs/traceless/distro
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/fs/traceless/mount/.qa b/qa/suites/fs/traceless/mount/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/traceless/mount/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/traceless/mount/fuse.yaml b/qa/suites/fs/traceless/mount/fuse.yaml
new file mode 120000
index 000000000..0e55da9fb
--- /dev/null
+++ b/qa/suites/fs/traceless/mount/fuse.yaml
@@ -0,0 +1 @@
+.qa/cephfs/mount/fuse.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/traceless/objectstore-ec b/qa/suites/fs/traceless/objectstore-ec
new file mode 120000
index 000000000..affe29493
--- /dev/null
+++ b/qa/suites/fs/traceless/objectstore-ec
@@ -0,0 +1 @@
+.qa/cephfs/objectstore-ec
+\ No newline at end of file
diff --git a/qa/suites/fs/traceless/overrides/+ b/qa/suites/fs/traceless/overrides/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/traceless/overrides/+
diff --git a/qa/suites/fs/traceless/overrides/.qa b/qa/suites/fs/traceless/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/traceless/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/traceless/overrides/frag.yaml b/qa/suites/fs/traceless/overrides/frag.yaml
new file mode 120000
index 000000000..5e5cdaed8
--- /dev/null
+++ b/qa/suites/fs/traceless/overrides/frag.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/frag.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/traceless/overrides/ignorelist_health.yaml b/qa/suites/fs/traceless/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..5cb891a95
--- /dev/null
+++ b/qa/suites/fs/traceless/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/traceless/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/traceless/overrides/ignorelist_wrongly_marked_down.yaml
new file mode 120000
index 000000000..f317cb714
--- /dev/null
+++ b/qa/suites/fs/traceless/overrides/ignorelist_wrongly_marked_down.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/traceless/tasks/.qa b/qa/suites/fs/traceless/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/traceless/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_blogbench.yaml b/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_blogbench.yaml
new file mode 120000
index 000000000..8702f4f3d
--- /dev/null
+++ b/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_blogbench.yaml
@@ -0,0 +1 @@
+.qa/cephfs/tasks/cfuse_workunit_suites_blogbench.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_dbench.yaml b/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_dbench.yaml
new file mode 120000
index 000000000..b0f876c3c
--- /dev/null
+++ b/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_dbench.yaml
@@ -0,0 +1 @@
+.qa/cephfs/tasks/cfuse_workunit_suites_dbench.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_ffsb.yaml b/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_ffsb.yaml
new file mode 120000
index 000000000..01e889b23
--- /dev/null
+++ b/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_ffsb.yaml
@@ -0,0 +1 @@
+.qa/cephfs/tasks/cfuse_workunit_suites_ffsb.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_fsstress.yaml b/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_fsstress.yaml
new file mode 120000
index 000000000..c2e859fff
--- /dev/null
+++ b/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_fsstress.yaml
@@ -0,0 +1 @@
+.qa/cephfs/tasks/cfuse_workunit_suites_fsstress.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/traceless/traceless/.qa b/qa/suites/fs/traceless/traceless/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/traceless/traceless/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/traceless/traceless/50pc.yaml b/qa/suites/fs/traceless/traceless/50pc.yaml
new file mode 100644
index 000000000..e0418bcb2
--- /dev/null
+++ b/qa/suites/fs/traceless/traceless/50pc.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      mds:
+        mds inject traceless reply probability: .5
diff --git a/qa/suites/fs/upgrade/.qa b/qa/suites/fs/upgrade/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/featureful_client/.qa b/qa/suites/fs/upgrade/featureful_client/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/% b/qa/suites/fs/upgrade/featureful_client/old_client/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/old_client/%
diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/.qa b/qa/suites/fs/upgrade/featureful_client/old_client/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/old_client/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/bluestore-bitmap.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/bluestore-bitmap.yaml
new file mode 120000
index 000000000..17ad98e79
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/old_client/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+../../../../../cephfs/objectstore-ec/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/centos_8.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/centos_8.yaml
new file mode 120000
index 000000000..5dceec7e2
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/old_client/centos_8.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/centos_8.stream.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/clusters/.qa b/qa/suites/fs/upgrade/featureful_client/old_client/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/old_client/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/clusters/1-mds-2-client-micro.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/clusters/1-mds-2-client-micro.yaml
new file mode 120000
index 000000000..feb68f343
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/old_client/clusters/1-mds-2-client-micro.yaml
@@ -0,0 +1 @@
+.qa/cephfs/clusters/1-mds-2-client-micro.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/conf b/qa/suites/fs/upgrade/featureful_client/old_client/conf
new file mode 120000
index 000000000..6d4712984
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/old_client/conf
@@ -0,0 +1 @@
+.qa/cephfs/conf/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/overrides/% b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/%
diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/overrides/.qa b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/overrides/ignorelist_health.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..5cb891a95
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/ignorelist_wrongly_marked_down.yaml
new file mode 120000
index 000000000..f317cb714
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/ignorelist_wrongly_marked_down.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/overrides/multimds/.qa b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/multimds/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/multimds/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/overrides/multimds/no.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/multimds/no.yaml
new file mode 100644
index 000000000..f9e95daa9
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/multimds/no.yaml
@@ -0,0 +1,4 @@
+overrides:
+  ceph:
+    cephfs:
+      max_mds: 1
diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/overrides/multimds/yes.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/multimds/yes.yaml
new file mode 100644
index 000000000..b3a9b5d67
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/multimds/yes.yaml
@@ -0,0 +1,4 @@
+overrides:
+  ceph:
+    cephfs:
+      max_mds: 2
diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/overrides/pg-warn.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/pg-warn.yaml
new file mode 100644
index 000000000..4ae54a40d
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/pg-warn.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        mon pg warn min per osd: 0
diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/tasks/% b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/%
diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/tasks/.qa b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/tasks/0-octopus.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/0-octopus.yaml
new file mode 100644
index 000000000..e7774423f
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/0-octopus.yaml
@@ -0,0 +1,39 @@
+meta:
+- desc: |
+   install ceph/octopus latest
+tasks:
+- install:
+    branch: octopus
+    exclude_packages:
+      - librados3
+      - ceph-mgr-dashboard
+      - ceph-mgr-diskprediction-local
+      - ceph-mgr-rook
+      - ceph-mgr-cephadm
+      - cephadm
+      - ceph-volume
+    extra_packages: ['librados2']
+- print: "**** done installing octopus"
+- ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(FS_
+      - \(MDS_
+      - \(OSD_
+      - \(MON_DOWN\)
+      - \(CACHE_POOL_
+      - \(POOL_
+      - \(MGR_DOWN\)
+      - \(PG_
+      - \(SMALLER_PGP_NUM\)
+      - Monitor daemon marked osd
+      - Behind on trimming
+      - Manager daemon
+    conf:
+      global:
+        mon warn on pool no app: false
+        ms bind msgr2: false
+- exec:
+    osd.0:
+      - ceph osd set-require-min-compat-client octopus
+- print: "**** done ceph"
diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/tasks/1-client.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/1-client.yaml
new file mode 100644
index 000000000..976d6e265
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/1-client.yaml
@@ -0,0 +1,8 @@
+tasks:
+- ceph-fuse:
+- print: "**** done octopus client"
+- workunit:
+    clients:
+      all:
+      - suites/fsstress.sh
+- print: "**** done fsstress"
diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/tasks/2-upgrade.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/2-upgrade.yaml
new file mode 100644
index 000000000..26c185946
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/2-upgrade.yaml
@@ -0,0 +1,48 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - scrub mismatch
+    - ScrubResult
+    - wrongly marked
+    - \(POOL_APP_NOT_ENABLED\)
+    - \(SLOW_OPS\)
+    - overall HEALTH_
+    - \(MON_MSGR2_NOT_ENABLED\)
+    - slow request
+    conf:
+      global:
+        bluestore warn on legacy statfs: false
+        bluestore warn on no per pool omap: false
+      mon:
+        mon warn on osd down out interval zero: false
+
+tasks:
+- mds_pre_upgrade:
+- print: "**** done mds pre-upgrade sequence"
+- install.upgrade:
+    # upgrade the single cluster node, which is running all the mon/mds/osd/mgr daemons
+    mon.a:
+      branch: quincy
+- print: "**** done install.upgrade the host"
+- ceph.restart:
+    daemons: [mon.*, mgr.*]
+    mon-health-to-clog: false
+    wait-for-healthy: false
+- ceph.healthy:
+- ceph.restart:
+    daemons: [osd.*]
+    wait-for-healthy: false
+    wait-for-osds-up: true
+- ceph.stop: [mds.*]
+- ceph.restart:
+    daemons: [mds.*]
+    wait-for-healthy: false
+    wait-for-osds-up: true
+- exec:
+    mon.a:
+    - ceph osd dump -f json-pretty
+    - ceph versions
+    - ceph osd require-osd-release quincy
+    - for f in `ceph osd pool ls` ; do ceph osd pool set $f pg_autoscale_mode off ; done
+- ceph.healthy:
+- print: "**** done ceph.restart"
diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/tasks/3-compat_client/.qa b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/3-compat_client/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/3-compat_client/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/tasks/3-compat_client/no.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/3-compat_client/no.yaml
new file mode 100644
index 000000000..b495eb41b
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/3-compat_client/no.yaml
@@ -0,0 +1,6 @@
+tasks:
+- workunit:
+    clients:
+      all:
+      - suites/fsstress.sh
+- print: "**** done fsstress"
diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/tasks/3-compat_client/quincy.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/3-compat_client/quincy.yaml
new file mode 100644
index 000000000..138d8f4e2
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/3-compat_client/quincy.yaml
@@ -0,0 +1,12 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - missing required features
+tasks:
+- exec:
+    mon.a:
+      - ceph fs dump --format=json-pretty
+      - ceph fs required_client_features cephfs add metric_collect
+- sleep:
+    duration: 5
+- fs.clients_evicted:
diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/% b/qa/suites/fs/upgrade/featureful_client/upgraded_client/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/%
diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/.qa b/qa/suites/fs/upgrade/featureful_client/upgraded_client/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/bluestore-bitmap.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/bluestore-bitmap.yaml
new file mode 120000
index 000000000..17ad98e79
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+../../../../../cephfs/objectstore-ec/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/centos_8.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/centos_8.yaml
new file mode 120000
index 000000000..5dceec7e2
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/centos_8.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/centos_8.stream.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/clusters/.qa b/qa/suites/fs/upgrade/featureful_client/upgraded_client/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/clusters/1-mds-2-client-micro.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/clusters/1-mds-2-client-micro.yaml
new file mode 120000
index 000000000..feb68f343
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/clusters/1-mds-2-client-micro.yaml
@@ -0,0 +1 @@
+.qa/cephfs/clusters/1-mds-2-client-micro.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/conf b/qa/suites/fs/upgrade/featureful_client/upgraded_client/conf
new file mode 120000
index 000000000..6d4712984
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/conf
@@ -0,0 +1 @@
+.qa/cephfs/conf/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/% b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/%
diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/.qa b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/ignorelist_health.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..5cb891a95
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/ignorelist_wrongly_marked_down.yaml
new file mode 120000
index 000000000..f317cb714
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/ignorelist_wrongly_marked_down.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/multimds/.qa b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/multimds/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/multimds/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/multimds/no.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/multimds/no.yaml
new file mode 100644
index 000000000..f9e95daa9
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/multimds/no.yaml
@@ -0,0 +1,4 @@
+overrides:
+  ceph:
+    cephfs:
+      max_mds: 1
diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/multimds/yes.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/multimds/yes.yaml
new file mode 100644
index 000000000..b3a9b5d67
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/multimds/yes.yaml
@@ -0,0 +1,4 @@
+overrides:
+  ceph:
+    cephfs:
+      max_mds: 2
diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/pg-warn.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/pg-warn.yaml
new file mode 100644
index 000000000..4ae54a40d
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/pg-warn.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        mon pg warn min per osd: 0
diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/% b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/%
diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/.qa b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/0-octopus.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/0-octopus.yaml
new file mode 100644
index 000000000..e7774423f
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/0-octopus.yaml
@@ -0,0 +1,39 @@
+meta:
+- desc: |
+   install ceph/octopus latest
+tasks:
+- install:
+    branch: octopus
+    exclude_packages:
+      - librados3
+      - ceph-mgr-dashboard
+      - ceph-mgr-diskprediction-local
+      - ceph-mgr-rook
+      - ceph-mgr-cephadm
+      - cephadm
+      - ceph-volume
+    extra_packages: ['librados2']
+- print: "**** done installing octopus"
+- ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(FS_
+      - \(MDS_
+      - \(OSD_
+      - \(MON_DOWN\)
+      - \(CACHE_POOL_
+      - \(POOL_
+      - \(MGR_DOWN\)
+      - \(PG_
+      - \(SMALLER_PGP_NUM\)
+      - Monitor daemon marked osd
+      - Behind on trimming
+      - Manager daemon
+    conf:
+      global:
+        mon warn on pool no app: false
+        ms bind msgr2: false
+- exec:
+    osd.0:
+      - ceph osd set-require-min-compat-client octopus
+- print: "**** done ceph"
diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/1-client.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/1-client.yaml
new file mode 100644
index 000000000..c9b4c046f
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/1-client.yaml
@@ -0,0 +1,11 @@
+nuke-on-error: false
+overrides:
+  nuke-on-error: false
+tasks:
+- ceph-fuse:
+- print: "**** done octopus client"
+#- workunit:
+#    clients:
+#      all:
+#      - suites/fsstress.sh
+- print: "**** done fsstress"
diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/2-upgrade.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/2-upgrade.yaml
new file mode 100644
index 000000000..e5ea8b19c
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/2-upgrade.yaml
@@ -0,0 +1,48 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - scrub mismatch
+    - ScrubResult
+    - wrongly marked
+    - \(POOL_APP_NOT_ENABLED\)
+    - \(SLOW_OPS\)
+    - overall HEALTH_
+    - \(MON_MSGR2_NOT_ENABLED\)
+    - slow request
+    conf:
+      global:
+        bluestore warn on legacy statfs: false
+        bluestore warn on no per pool omap: false
+      mon:
+        mon warn on osd down out interval zero: false
+
+tasks:
+- mds_pre_upgrade:
+- print: "**** done mds pre-upgrade sequence"
+- install.upgrade:
+    # upgrade the single cluster node, which is running all the mon/mds/osd/mgr daemons
+    mon.a:
+      branch: quincy
+- print: "**** done install.upgrade the host"
+- ceph.restart:
+    daemons: [mon.*, mgr.*]
+    mon-health-to-clog: false
+    wait-for-healthy: false
+- ceph.healthy:
+- ceph.restart:
+    daemons: [osd.*]
+    wait-for-healthy: false
+    wait-for-osds-up: true
+- ceph.stop: [mds.*]
+- ceph.restart:
+    daemons: [mds.*]
+    wait-for-healthy: false
+    wait-for-osds-up: true
+- exec:
+    mon.a:
+    - ceph versions
+    - ceph osd dump -f json-pretty
+    - ceph osd require-osd-release quincy
+    - for f in `ceph osd pool ls` ; do ceph osd pool set $f pg_autoscale_mode off ; done
+- ceph.healthy:
+- print: "**** done ceph.restart"
diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/3-client-upgrade.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/3-client-upgrade.yaml
new file mode 100644
index 000000000..251c349ac
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/3-client-upgrade.yaml
@@ -0,0 +1,15 @@
+tasks:
+- install.upgrade:
+    client.0:
+      branch: quincy
+- print: "**** done install.upgrade on client.0"
+- ceph-fuse:
+    client.0:
+      mounted: false
+    client.1:
+      skip: true
+- ceph-fuse:
+    client.0:
+    client.1:
+      skip: true
+- print: "**** done remount client"
diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/4-compat_client.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/4-compat_client.yaml
new file mode 100644
index 000000000..d8f260007
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/4-compat_client.yaml
@@ -0,0 +1,17 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - missing required features
+tasks:
+- exec:
+    mon.a:
+      - ceph fs dump --format=json-pretty
+      - ceph fs required_client_features cephfs add metric_collect
+- sleep:
+    duration: 5
+# client.0 is upgraded and client.1 is evicted by the MDS due to missing
+# feature compat set
+- fs.clients_evicted:
+    clients:
+      client.0: False
+      client.1: True
diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/5-client-sanity.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/5-client-sanity.yaml
new file mode 100644
index 000000000..e206457e6
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/5-client-sanity.yaml
@@ -0,0 +1,6 @@
+tasks:
+- workunit:
+    clients:
+      client.0:
+      - suites/fsstress.sh
+- print: "**** done fsstress"
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/% b/qa/suites/fs/upgrade/mds_upgrade_sequence/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/%
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/.qa b/qa/suites/fs/upgrade/mds_upgrade_sequence/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/bluestore-bitmap.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/bluestore-bitmap.yaml
new file mode 120000
index 000000000..fb603bc9a
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/cephfs/objectstore-ec/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/centos_8.stream_container_tools.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/centos_8.stream_container_tools.yaml
new file mode 120000
index 000000000..7a86f967f
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/centos_8.stream_container_tools.yaml
@@ -0,0 +1 @@
+.qa/distros/podman/centos_8.stream_container_tools.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/conf b/qa/suites/fs/upgrade/mds_upgrade_sequence/conf
new file mode 120000
index 000000000..6d4712984
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/conf
@@ -0,0 +1 @@
+.qa/cephfs/conf/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/fail_fs/no.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/fail_fs/no.yaml
new file mode 100644
index 000000000..868415bcb
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/fail_fs/no.yaml
@@ -0,0 +1,3 @@
+teuthology:
+  variables:
+    fail_fs: false
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/fail_fs/yes.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/fail_fs/yes.yaml
new file mode 100644
index 000000000..411ff3814
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/fail_fs/yes.yaml
@@ -0,0 +1,3 @@
+teuthology:
+  variables:
+    fail_fs: true
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/% b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/%
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/.qa b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/ignorelist_health.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..5cb891a95
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/ignorelist_wrongly_marked_down.yaml
new file mode 120000
index 000000000..f317cb714
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/ignorelist_wrongly_marked_down.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/pg-warn.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/pg-warn.yaml
new file mode 100644
index 000000000..4ae54a40d
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/pg-warn.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        mon pg warn min per osd: 0
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/syntax.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/syntax.yaml
new file mode 100644
index 000000000..84d5d43b2
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/syntax.yaml
@@ -0,0 +1,3 @@
+overrides:
+  kclient:
+      syntax: 'v1'
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/roles.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/roles.yaml
new file mode 100644
index 000000000..bce4ecd34
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/roles.yaml
@@ -0,0 +1,11 @@
+roles:
+- - host.a
+  - client.0
+  - osd.0
+  - osd.1
+  - osd.2
+- - host.b
+  - client.1
+  - osd.3
+  - osd.4
+  - osd.5
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/% b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/%
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/.qa b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/.qa b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/pacific.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/pacific.yaml
new file mode 100644
index 000000000..6432d7080
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/pacific.yaml
@@ -0,0 +1,32 @@
+meta:
+- desc: |
+   setup ceph/pacific
+
+tasks:
+- install:
+    branch: pacific
+    exclude_packages:
+      - ceph-volume
+- print: "**** done install task..."
+- cephadm:
+    image: quay.ceph.io/ceph-ci/ceph:pacific
+    roleless: true
+    cephadm_branch: pacific
+    cephadm_git_url: https://github.com/ceph/ceph
+    conf:
+      osd:
+        #set config option for which cls modules are allowed to be loaded / used
+        osd_class_load_list: "*"
+        osd_class_default_list: "*"
+- print: "**** done end installing pacific cephadm ..."
+- cephadm.shell:
+    host.a:
+      - ceph config set mgr mgr/cephadm/use_repo_digest true --force
+- print: "**** done cephadm.shell ceph config set mgr..."
+- cephadm.shell:
+    host.a:
+      - ceph orch status
+      - ceph orch ps
+      - ceph orch ls
+      - ceph orch host ls
+      - ceph orch device ls
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/v16.2.4.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/v16.2.4.yaml
new file mode 100644
index 000000000..36bfb1b91
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/v16.2.4.yaml
@@ -0,0 +1,34 @@
+teuthology:
+  postmerge:
+    - if yaml.teuthology.variables.fail_fs then reject() end
+
+meta:
+- desc: |
+   setup ceph/pacific v16.2.4
+
+tasks:
+# Disable metrics sending by kclient as it may crash (assert) a v16.2.4 MDS
+- pexec:
+    clients:
+      - sudo modprobe -r ceph
+      - sudo modprobe ceph disable_send_metrics=on
+- install:
+    tag: v16.2.4
+    exclude_packages:
+      - ceph-volume
+- print: "**** done install task..."
+- cephadm:
+    roleless: true
+    image: quay.io/ceph/ceph:v16.2.4
+    cephadm_branch: v16.2.4
+    cephadm_git_url: https://github.com/ceph/ceph
+    # needed for v16.2.4 due to --skip-admin-label
+    avoid_pacific_features: true
+- print: "**** done starting v16.2.4"
+- cephadm.shell:
+    host.a:
+      - ceph orch status
+      - ceph orch ps
+      - ceph orch ls
+      - ceph orch host ls
+      - ceph orch device ls
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/% b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/%
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/.qa b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/0-create.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/0-create.yaml
new file mode 100644
index 000000000..5ee0022c6
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/0-create.yaml
@@ -0,0 +1,5 @@
+tasks:
+- cephadm.shell:
+    host.a:
+      - ceph fs volume create cephfs --placement=4
+      - ceph fs dump
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/.qa b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/1.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/1.yaml
new file mode 100644
index 000000000..8c1cd2fe0
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/1.yaml
@@ -0,0 +1,4 @@
+tasks:
+- cephadm.shell:
+    host.a:
+      - ceph fs set cephfs max_mds 1
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/2.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/2.yaml
new file mode 100644
index 000000000..fcd3b1ea4
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/2.yaml
@@ -0,0 +1,4 @@
+tasks:
+- cephadm.shell:
+    host.a:
+      - ceph fs set cephfs max_mds 2
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/.qa b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/no.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/no.yaml
new file mode 100644
index 000000000..3dbc81089
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/no.yaml
@@ -0,0 +1,4 @@
+tasks:
+- cephadm.shell:
+    host.a:
+      - ceph fs set cephfs allow_standby_replay false
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/yes.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/yes.yaml
new file mode 100644
index 000000000..fb894425e
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/yes.yaml
@@ -0,0 +1,4 @@
+tasks:
+- cephadm.shell:
+    host.a:
+      - ceph fs set cephfs allow_standby_replay true
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/3-inline/.qa b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/3-inline/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/3-inline/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/3-inline/no.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/3-inline/no.yaml
new file mode 100644
index 000000000..107f30ecd
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/3-inline/no.yaml
@@ -0,0 +1,4 @@
+tasks:
+- cephadm.shell:
+    host.a:
+      - ceph fs set cephfs inline_data false
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/3-inline/yes.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/3-inline/yes.yaml
new file mode 100644
index 000000000..246ed71b4
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/3-inline/yes.yaml
@@ -0,0 +1,4 @@
+tasks:
+- cephadm.shell:
+    host.a:
+      - ceph fs set cephfs inline_data true --yes-i-really-really-mean-it
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/4-verify.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/4-verify.yaml
new file mode 100644
index 000000000..e71365ad1
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/4-verify.yaml
@@ -0,0 +1,7 @@
+tasks:
+- cephadm.shell:
+    host.a:
+      - ceph fs dump
+      - ceph --format=json fs dump | jq -e ".filesystems | length == 1"
+      - while ! ceph --format=json mds versions | jq -e ". | add == 4"; do sleep 1; done
+- fs.pre_upgrade_save:
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/2-client.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/2-client.yaml
new file mode 100644
index 000000000..92b9dda84
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/2-client.yaml
@@ -0,0 +1,3 @@
+tasks:
+- kclient:
+- print: "**** done client"
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/3-upgrade-mgr-staggered.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/3-upgrade-mgr-staggered.yaml
new file mode 100644
index 000000000..fac9e29db
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/3-upgrade-mgr-staggered.yaml
@@ -0,0 +1,18 @@
+teuthology:
+  premerge: |
+            if not yaml.teuthology.variables.fail_fs then reject() end
+upgrade-tasks:
+  sequential:
+    - cephadm.shell:
+        env: [sha1]
+        host.a:
+          - ceph config set mon mon_warn_on_insecure_global_id_reclaim false --force
+          - ceph config set mon mon_warn_on_insecure_global_id_reclaim_allowed false --force
+          - ceph config set global log_to_journald false --force
+          - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 --daemon-types mgr
+          - while ceph orch upgrade status | jq '.in_progress' | grep true && ! ceph orch upgrade status | jq '.message' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; sleep 30 ; done
+          - ceph versions | jq -e '.mgr | length == 1'
+          - ceph versions | jq -e '.mgr | keys' | grep $sha1
+          - ceph versions | jq -e '.overall | length == 2'
+          - ceph orch upgrade check quay.ceph.io/ceph-ci/ceph:$sha1 | jq -e '.up_to_date | length == 2'
+          - ceph orch ps
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/4-config-upgrade/+ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/4-config-upgrade/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/4-config-upgrade/+
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/4-config-upgrade/fail_fs.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/4-config-upgrade/fail_fs.yaml
new file mode 100644
index 000000000..fbde19a22
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/4-config-upgrade/fail_fs.yaml
@@ -0,0 +1,15 @@
+teuthology:
+  premerge: |
+            local set = yaml.teuthology.variables.fail_fs
+            local cmd = "ceph config set mgr mgr/orchestrator/fail_fs "..tostring(set)
+            local cmds = yaml_fragment['upgrade-tasks'].sequential[0]['cephadm.shell']['host.a']
+            if set then
+              py_attrgetter(cmds).append "ceph config set mgr mgr/orchestrator/fail_fs true"
+            else
+              py_attrgetter(cmds).append "ceph config set mgr mgr/orchestrator/fail_fs false || true"
+            end
+upgrade-tasks:
+  sequential:
+    - cephadm.shell:
+        env: [sha1]
+        host.a: []
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/5-upgrade-with-workload.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/5-upgrade-with-workload.yaml
new file mode 100644
index 000000000..392b1e66d
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/5-upgrade-with-workload.yaml
@@ -0,0 +1,35 @@
+tasks:
+- parallel:
+  - upgrade-tasks
+  - workload-tasks
+
+upgrade-tasks:
+  sequential:
+    - cephadm.shell:
+        env: [sha1]
+        host.a:
+          - ceph config set mon mon_warn_on_insecure_global_id_reclaim false --force
+          - ceph config set mon mon_warn_on_insecure_global_id_reclaim_allowed false --force
+          - ceph config set global log_to_journald false --force
+          - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1
+    - cephadm.shell:
+        env: [sha1]
+        host.a:
+          - while ceph orch upgrade status | jq '.in_progress' | grep true && ! ceph orch upgrade status | jq '.message' | grep Error ; do ceph orch ps ; ceph versions ; ceph fs dump; ceph orch upgrade status ; ceph health detail ; sleep 30 ; done
+          - ceph orch ps
+          - ceph orch upgrade status
+          - ceph health detail
+          - ceph versions
+          - echo "wait for servicemap items w/ changing names to refresh"
+          - sleep 60
+          - ceph orch ps
+          - ceph versions
+          - ceph versions | jq -e '.overall | length == 1'
+          - ceph versions | jq -e '.overall | keys' | grep $sha1
+
+workload-tasks:
+  sequential:
+    - workunit:
+        clients:
+          all:
+            - suites/fsstress.sh
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/6-verify.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/6-verify.yaml
new file mode 100644
index 000000000..c2b657e5a
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/6-verify.yaml
@@ -0,0 +1,5 @@
+tasks:
+- cephadm.shell:
+    host.a:
+      - ceph fs dump
+- fs.post_upgrade_checks:
diff --git a/qa/suites/fs/upgrade/nofs/% b/qa/suites/fs/upgrade/nofs/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/upgrade/nofs/%
diff --git a/qa/suites/fs/upgrade/nofs/.qa b/qa/suites/fs/upgrade/nofs/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/nofs/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/nofs/README b/qa/suites/fs/upgrade/nofs/README
new file mode 100644
index 000000000..e7f6960ef
--- /dev/null
+++ b/qa/suites/fs/upgrade/nofs/README
@@ -0,0 +1,3 @@
+This test just verifies that upgrades work with no file system present. In
+particular, catch that MDSMonitor doesn't blow up somehow with version
+mismatches.
diff --git a/qa/suites/fs/upgrade/nofs/bluestore-bitmap.yaml b/qa/suites/fs/upgrade/nofs/bluestore-bitmap.yaml
new file mode 120000
index 000000000..fb603bc9a
--- /dev/null
+++ b/qa/suites/fs/upgrade/nofs/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/cephfs/objectstore-ec/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/nofs/centos_8.yaml b/qa/suites/fs/upgrade/nofs/centos_8.yaml
new file mode 120000
index 000000000..5dceec7e2
--- /dev/null
+++ b/qa/suites/fs/upgrade/nofs/centos_8.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/centos_8.stream.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/nofs/conf b/qa/suites/fs/upgrade/nofs/conf
new file mode 120000
index 000000000..6d4712984
--- /dev/null
+++ b/qa/suites/fs/upgrade/nofs/conf
@@ -0,0 +1 @@
+.qa/cephfs/conf/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/nofs/no-mds-cluster.yaml b/qa/suites/fs/upgrade/nofs/no-mds-cluster.yaml
new file mode 100644
index 000000000..33c6fb16b
--- /dev/null
+++ b/qa/suites/fs/upgrade/nofs/no-mds-cluster.yaml
@@ -0,0 +1,6 @@
+roles:
+- [mon.a, mon.b, mon.c, mgr.x, mgr.y, osd.0, osd.1, osd.2, osd.3]
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 10 # GB
diff --git a/qa/suites/fs/upgrade/nofs/overrides/% b/qa/suites/fs/upgrade/nofs/overrides/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/upgrade/nofs/overrides/%
diff --git a/qa/suites/fs/upgrade/nofs/overrides/.qa b/qa/suites/fs/upgrade/nofs/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/nofs/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/nofs/overrides/ignorelist_health.yaml b/qa/suites/fs/upgrade/nofs/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..5cb891a95
--- /dev/null
+++ b/qa/suites/fs/upgrade/nofs/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/nofs/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/upgrade/nofs/overrides/ignorelist_wrongly_marked_down.yaml
new file mode 120000
index 000000000..f317cb714
--- /dev/null
+++ b/qa/suites/fs/upgrade/nofs/overrides/ignorelist_wrongly_marked_down.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/nofs/overrides/pg-warn.yaml b/qa/suites/fs/upgrade/nofs/overrides/pg-warn.yaml
new file mode 100644
index 000000000..4ae54a40d
--- /dev/null
+++ b/qa/suites/fs/upgrade/nofs/overrides/pg-warn.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        mon pg warn min per osd: 0
diff --git a/qa/suites/fs/upgrade/nofs/tasks/% b/qa/suites/fs/upgrade/nofs/tasks/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/upgrade/nofs/tasks/%
diff --git a/qa/suites/fs/upgrade/nofs/tasks/.qa b/qa/suites/fs/upgrade/nofs/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/nofs/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/nofs/tasks/0-pacific.yaml b/qa/suites/fs/upgrade/nofs/tasks/0-pacific.yaml
new file mode 100644
index 000000000..b74accc69
--- /dev/null
+++ b/qa/suites/fs/upgrade/nofs/tasks/0-pacific.yaml
@@ -0,0 +1,39 @@
+meta:
+- desc: |
+   install ceph/pacific latest
+tasks:
+- install:
+    branch: pacific
+    exclude_packages:
+      - librados3
+      - ceph-mgr-dashboard
+      - ceph-mgr-diskprediction-local
+      - ceph-mgr-rook
+      - ceph-mgr-cephadm
+      - cephadm
+      - ceph-volume
+    extra_packages: ['librados2']
+- print: "**** done installing pacific"
+- ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(FS_
+      - \(MDS_
+      - \(OSD_
+      - \(MON_DOWN\)
+      - \(CACHE_POOL_
+      - \(POOL_
+      - \(MGR_DOWN\)
+      - \(PG_
+      - \(SMALLER_PGP_NUM\)
+      - Monitor daemon marked osd
+      - Behind on trimming
+      - Manager daemon
+    conf:
+      global:
+        mon warn on pool no app: false
+        ms bind msgr2: false
+- exec:
+    osd.0:
+      - ceph osd set-require-min-compat-client pacific
+- print: "**** done ceph"
diff --git a/qa/suites/fs/upgrade/nofs/tasks/1-upgrade.yaml b/qa/suites/fs/upgrade/nofs/tasks/1-upgrade.yaml
new file mode 100644
index 000000000..858142871
--- /dev/null
+++ b/qa/suites/fs/upgrade/nofs/tasks/1-upgrade.yaml
@@ -0,0 +1,44 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - scrub mismatch
+    - ScrubResult
+    - wrongly marked
+    - \(POOL_APP_NOT_ENABLED\)
+    - \(SLOW_OPS\)
+    - overall HEALTH_
+    - \(MON_MSGR2_NOT_ENABLED\)
+    - slow request
+    conf:
+      global:
+        bluestore warn on legacy statfs: false
+        bluestore warn on no per pool omap: false
+      mon:
+        mon warn on osd down out interval zero: false
+
+tasks:
+- print: "*** upgrading, no cephfs present"
+- exec:
+    mon.a:
+    - ceph fs dump
+- install.upgrade:
+    mon.a:
+- print: "**** done install.upgrade"
+- ceph.restart:
+    daemons: [mon.*, mgr.*]
+    mon-health-to-clog: false
+    wait-for-healthy: false
+- ceph.healthy:
+- ceph.restart:
+    daemons: [osd.*]
+    wait-for-healthy: false
+    wait-for-osds-up: true
+- exec:
+    mon.a:
+    - ceph versions
+    - ceph osd dump -f json-pretty
+    - ceph fs dump
+    - ceph osd require-osd-release quincy
+    - for f in `ceph osd pool ls` ; do ceph osd pool set $f pg_autoscale_mode off ; done
+- ceph.healthy:
+- print: "**** done ceph.restart"
diff --git a/qa/suites/fs/upgrade/upgraded_client/% b/qa/suites/fs/upgrade/upgraded_client/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/%
diff --git a/qa/suites/fs/upgrade/upgraded_client/.qa b/qa/suites/fs/upgrade/upgraded_client/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgraded_client/bluestore-bitmap.yaml b/qa/suites/fs/upgrade/upgraded_client/bluestore-bitmap.yaml
new file mode 120000
index 000000000..675dce056
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+../../../../cephfs/objectstore-ec/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgraded_client/centos_8.yaml b/qa/suites/fs/upgrade/upgraded_client/centos_8.yaml
new file mode 120000
index 000000000..5dceec7e2
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/centos_8.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/centos_8.stream.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgraded_client/clusters/% b/qa/suites/fs/upgrade/upgraded_client/clusters/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/clusters/%
diff --git a/qa/suites/fs/upgrade/upgraded_client/clusters/.qa b/qa/suites/fs/upgrade/upgraded_client/clusters/.qa
new file mode 120000
index 000000000..fea2489fd
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/clusters/.qa
@@ -0,0 +1 @@
+../.qa
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgraded_client/clusters/1-mds-1-client-micro.yaml b/qa/suites/fs/upgrade/upgraded_client/clusters/1-mds-1-client-micro.yaml
new file mode 120000
index 000000000..50ffb6d53
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/clusters/1-mds-1-client-micro.yaml
@@ -0,0 +1 @@
+.qa/cephfs/clusters/1-mds-1-client-micro.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgraded_client/conf b/qa/suites/fs/upgrade/upgraded_client/conf
new file mode 120000
index 000000000..6d4712984
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/conf
@@ -0,0 +1 @@
+.qa/cephfs/conf/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgraded_client/overrides/% b/qa/suites/fs/upgrade/upgraded_client/overrides/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/overrides/%
diff --git a/qa/suites/fs/upgrade/upgraded_client/overrides/.qa b/qa/suites/fs/upgrade/upgraded_client/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgraded_client/overrides/ignorelist_health.yaml b/qa/suites/fs/upgrade/upgraded_client/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..5cb891a95
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgraded_client/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/upgrade/upgraded_client/overrides/ignorelist_wrongly_marked_down.yaml
new file mode 120000
index 000000000..f317cb714
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/overrides/ignorelist_wrongly_marked_down.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgraded_client/overrides/pg-warn.yaml b/qa/suites/fs/upgrade/upgraded_client/overrides/pg-warn.yaml
new file mode 100644
index 000000000..4ae54a40d
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/overrides/pg-warn.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        mon pg warn min per osd: 0
diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/% b/qa/suites/fs/upgrade/upgraded_client/tasks/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/tasks/%
diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/.qa b/qa/suites/fs/upgrade/upgraded_client/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/0-from/nautilus.yaml b/qa/suites/fs/upgrade/upgraded_client/tasks/0-from/nautilus.yaml
new file mode 100644
index 000000000..02f541eaf
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/tasks/0-from/nautilus.yaml
@@ -0,0 +1,53 @@
+meta:
+- desc: |
+   install ceph/nautilus latest
+tasks:
+- install:
+    branch: nautilus
+    exclude_packages:
+      - cephadm
+      - ceph-mgr-cephadm
+      - ceph-immutable-object-cache
+      - python3-rados
+      - python3-rgw
+      - python3-rbd
+      - python3-cephfs
+      - ceph-volume
+    extra_packages:
+      - python-rados
+      - python-rgw
+      - python-rbd
+      - python-cephfs
+    # For kernel_untar_build workunit
+    extra_system_packages:
+      - bison
+      - flex
+      - elfutils-libelf-devel
+      - openssl-devel
+      - NetworkManager
+      - iproute
+      - util-linux
+- print: "**** done installing nautilus"
+- ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(FS_
+      - \(MDS_
+      - \(OSD_
+      - \(MON_DOWN\)
+      - \(CACHE_POOL_
+      - \(POOL_
+      - \(MGR_DOWN\)
+      - \(PG_
+      - \(SMALLER_PGP_NUM\)
+      - Monitor daemon marked osd
+      - Behind on trimming
+      - Manager daemon
+    conf:
+      global:
+        mon warn on pool no app: false
+        ms bind msgr2: false
+- exec:
+    osd.0:
+      - ceph osd set-require-min-compat-client nautilus
+- print: "**** done ceph"
diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/0-from/pacific.yaml b/qa/suites/fs/upgrade/upgraded_client/tasks/0-from/pacific.yaml
new file mode 100644
index 000000000..defb03922
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/tasks/0-from/pacific.yaml
@@ -0,0 +1,53 @@
+meta:
+- desc: |
+   install ceph/pacific latest
+tasks:
+- install:
+    branch: pacific
+    exclude_packages:
+      - cephadm
+      - ceph-mgr-cephadm
+      - ceph-immutable-object-cache
+      - python3-rados
+      - python3-rgw
+      - python3-rbd
+      - python3-cephfs
+      - ceph-volume
+    extra_packages:
+      - python-rados
+      - python-rgw
+      - python-rbd
+      - python-cephfs
+    # For kernel_untar_build workunit
+    extra_system_packages:
+      - bison
+      - flex
+      - elfutils-libelf-devel
+      - openssl-devel
+      - NetworkManager
+      - iproute
+      - util-linux
+- print: "**** done installing pacific"
+- ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(FS_
+      - \(MDS_
+      - \(OSD_
+      - \(MON_DOWN\)
+      - \(CACHE_POOL_
+      - \(POOL_
+      - \(MGR_DOWN\)
+      - \(PG_
+      - \(SMALLER_PGP_NUM\)
+      - Monitor daemon marked osd
+      - Behind on trimming
+      - Manager daemon
+    conf:
+      global:
+        mon warn on pool no app: false
+        ms bind msgr2: false
+- exec:
+    osd.0:
+      - ceph osd set-require-min-compat-client pacific
+- print: "**** done ceph"
diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/1-mount/.qa b/qa/suites/fs/upgrade/upgraded_client/tasks/1-mount/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/tasks/1-mount/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/1-mount/mount b/qa/suites/fs/upgrade/upgraded_client/tasks/1-mount/mount
new file mode 120000
index 000000000..e3600f453
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/tasks/1-mount/mount
@@ -0,0 +1 @@
+.qa/cephfs/mount/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/.qa b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/new_ops/% b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/new_ops/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/new_ops/%
diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/new_ops/0-clients/fuse-upgrade.yaml b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/new_ops/0-clients/fuse-upgrade.yaml
new file mode 100644
index 000000000..34c85a00e
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/new_ops/0-clients/fuse-upgrade.yaml
@@ -0,0 +1,14 @@
+teuthology:
+  postmerge:
+    - if not is_fuse() then reject() end
+tasks:
+- ceph-fuse:
+    client.0:
+      mounted: false
+- print: "**** done unmount client.0"
+- install.upgrade:
+    client.0:
+- print: "**** done install.upgrade on client.0"
+- ceph-fuse:
+    client.0:
+- print: "**** done remount client"
diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/new_ops/0-clients/kclient.yaml b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/new_ops/0-clients/kclient.yaml
new file mode 100644
index 000000000..ecc705600
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/new_ops/0-clients/kclient.yaml
@@ -0,0 +1,8 @@
+teuthology:
+  postmerge:
+    # Once can we make sure the distro kernels have included the newops fixes
+    # we can remove the is_kupstream() restriction. While since the Nautilus
+    # will only support the 'v1' mount syntax, so don't touch the mount syntax
+    # restriction.
+    - if not is_kupstream() or syntax_version() == 'v2' then reject() end
+tasks:
diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/new_ops/1-client-sanity.yaml b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/new_ops/1-client-sanity.yaml
new file mode 100644
index 000000000..9508cce65
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/new_ops/1-client-sanity.yaml
@@ -0,0 +1,4 @@
+tasks:
+  - cephfs_test_runner:
+      modules:
+        - tasks.cephfs.test_newops
diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/% b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/%
diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/.qa b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/0-client-upgrade.yaml b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/0-client-upgrade.yaml
new file mode 100644
index 000000000..2d948af19
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/0-client-upgrade.yaml
@@ -0,0 +1,14 @@
+teuthology:
+  postmerge:
+    - if not is_fuse() then reject() end
+tasks:
+- ceph-fuse:
+    client.0:
+      mounted: false
+- print: "**** done unmount client.0"
+- install.upgrade:
+    client.0:
+- print: "**** done install.upgrade on client.0"
+- ceph-fuse:
+    client.0:
+- print: "**** done remount client.0"
diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/.qa b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/blogbench.yaml b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/blogbench.yaml
new file mode 120000
index 000000000..a2f8b3052
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/blogbench.yaml
@@ -0,0 +1 @@
+.qa/suites/fs/workload/tasks/5-workunit/suites/blogbench.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/dbench.yaml b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/dbench.yaml
new file mode 120000
index 000000000..9fb8adcea
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/dbench.yaml
@@ -0,0 +1 @@
+.qa/suites/fs/workload/tasks/5-workunit/suites/dbench.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/fsstress.yaml b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/fsstress.yaml
new file mode 120000
index 000000000..dc777f36d
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/fsstress.yaml
@@ -0,0 +1 @@
+.qa/suites/fs/workload/tasks/5-workunit/suites/fsstress.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/iozone.yaml b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/iozone.yaml
new file mode 120000
index 000000000..f4d0ead4f
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/iozone.yaml
@@ -0,0 +1 @@
+.qa/suites/fs/workload/tasks/5-workunit/suites/iozone.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/kernel_untar_build.yaml b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/kernel_untar_build.yaml
new file mode 120000
index 000000000..317ebf8c4
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/kernel_untar_build.yaml
@@ -0,0 +1 @@
+.qa/suites/fs/workload/tasks/5-workunit/kernel_untar_build.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/valgrind/% b/qa/suites/fs/valgrind/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/valgrind/%
diff --git a/qa/suites/fs/valgrind/.qa b/qa/suites/fs/valgrind/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/valgrind/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/valgrind/begin b/qa/suites/fs/valgrind/begin
new file mode 120000
index 000000000..77af91f7d
--- /dev/null
+++ b/qa/suites/fs/valgrind/begin
@@ -0,0 +1 @@
+.qa/cephfs/begin/
+\ No newline at end of file
diff --git a/qa/suites/fs/valgrind/centos_latest.yaml b/qa/suites/fs/valgrind/centos_latest.yaml
new file mode 120000
index 000000000..bd9854e70
--- /dev/null
+++ b/qa/suites/fs/valgrind/centos_latest.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/centos_latest.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/valgrind/debug.yaml b/qa/suites/fs/valgrind/debug.yaml
new file mode 100644
index 000000000..5eed99cbb
--- /dev/null
+++ b/qa/suites/fs/valgrind/debug.yaml
@@ -0,0 +1,4 @@
+overrides:
+  install:
+    ceph:
+      debuginfo: true
diff --git a/qa/suites/fs/valgrind/mirror/% b/qa/suites/fs/valgrind/mirror/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/valgrind/mirror/%
diff --git a/qa/suites/fs/valgrind/mirror/.qa b/qa/suites/fs/valgrind/mirror/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/valgrind/mirror/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/valgrind/mirror/cephfs-mirror/.qa b/qa/suites/fs/valgrind/mirror/cephfs-mirror/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/valgrind/mirror/cephfs-mirror/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/valgrind/mirror/cephfs-mirror/one-per-cluster.yaml b/qa/suites/fs/valgrind/mirror/cephfs-mirror/one-per-cluster.yaml
new file mode 100644
index 000000000..4112a0af0
--- /dev/null
+++ b/qa/suites/fs/valgrind/mirror/cephfs-mirror/one-per-cluster.yaml
@@ -0,0 +1,7 @@
+meta:
+- desc: run one cephfs-mirror daemon on primary cluster
+
+tasks:
+- cephfs-mirror:
+    client: client.mirror
+    valgrind: [--tool=memcheck, --leak-check=full, --show-reachable=yes]
diff --git a/qa/suites/fs/valgrind/mirror/clients/.qa b/qa/suites/fs/valgrind/mirror/clients/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/valgrind/mirror/clients/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/valgrind/mirror/clients/mirror.yaml b/qa/suites/fs/valgrind/mirror/clients/mirror.yaml
new file mode 100644
index 000000000..1a68fea8b
--- /dev/null
+++ b/qa/suites/fs/valgrind/mirror/clients/mirror.yaml
@@ -0,0 +1,18 @@
+meta:
+- desc: configure the permissions for client.mirror
+overrides:
+  ceph:
+    conf:
+      client:
+        debug cephfs_mirror: 20
+        log to stderr: false
+      # make these predictable
+      client.mirror:
+        admin socket: /var/run/ceph/cephfs-mirror.asok
+        pid file: /var/run/ceph/cephfs-mirror.pid
+tasks:
+- exec:
+    client.mirror:
+      - "sudo ceph auth caps client.mirror mon 'profile cephfs-mirror' mds 'allow r' osd 'allow rw tag cephfs metadata=*, allow r tag cephfs data=*' mgr 'allow r'"
+    client.mirror_remote:
+      - "sudo ceph auth caps client.mirror_remote mon 'allow r' mds 'allow rwps' osd 'allow rw tag cephfs *=*' mgr 'allow r'"
diff --git a/qa/suites/fs/valgrind/mirror/cluster/.qa b/qa/suites/fs/valgrind/mirror/cluster/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/valgrind/mirror/cluster/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/valgrind/mirror/cluster/1-node.yaml b/qa/suites/fs/valgrind/mirror/cluster/1-node.yaml
new file mode 100644
index 000000000..cae4aca5e
--- /dev/null
+++ b/qa/suites/fs/valgrind/mirror/cluster/1-node.yaml
@@ -0,0 +1,17 @@
+meta:
+- desc: 1 ceph cluster with 1 mon, 1 mgr, 3 osds, 5 mdss
+roles:
+- - mon.a
+  - mgr.x
+  - mds.a
+  - mds.b
+  - mds.c
+  - mds.d
+  - mds.e
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+  - client.1
+  - client.mirror
+  - client.mirror_remote
diff --git a/qa/suites/fs/valgrind/mirror/mount/.qa b/qa/suites/fs/valgrind/mirror/mount/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/valgrind/mirror/mount/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/valgrind/mirror/mount/fuse.yaml b/qa/suites/fs/valgrind/mirror/mount/fuse.yaml
new file mode 100644
index 000000000..1fdf55ab4
--- /dev/null
+++ b/qa/suites/fs/valgrind/mirror/mount/fuse.yaml
@@ -0,0 +1,2 @@
+tasks:
+  - ceph-fuse: [client.0, client.1]
diff --git a/qa/suites/fs/valgrind/mirror/overrides/.qa b/qa/suites/fs/valgrind/mirror/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/valgrind/mirror/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/valgrind/mirror/overrides/whitelist_health.yaml b/qa/suites/fs/valgrind/mirror/overrides/whitelist_health.yaml
new file mode 100644
index 000000000..d40fa4cb8
--- /dev/null
+++ b/qa/suites/fs/valgrind/mirror/overrides/whitelist_health.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(FS_DEGRADED\)
+      - \(MDS_FAILED\)
+      - \(MDS_DEGRADED\)
+      - \(FS_WITH_FAILED_MDS\)
+      - \(MDS_DAMAGE\)
+      - \(MDS_ALL_DOWN\)
+      - \(MDS_UP_LESS_THAN_MAX\)
+      - \(FS_INLINE_DATA_DEPRECATED\)
+      - Reduced data availability
+      - Degraded data redundancy
diff --git a/qa/suites/fs/valgrind/mirror/tasks/.qa b/qa/suites/fs/valgrind/mirror/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/valgrind/mirror/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/valgrind/mirror/tasks/mirror.yaml b/qa/suites/fs/valgrind/mirror/tasks/mirror.yaml
new file mode 100644
index 000000000..07c1e24ef
--- /dev/null
+++ b/qa/suites/fs/valgrind/mirror/tasks/mirror.yaml
@@ -0,0 +1,10 @@
+overrides:
+  ceph:
+    conf:
+      mgr:
+        debug client: 10
+
+tasks:
+  - cephfs_test_runner:
+      modules:
+        - tasks.cephfs.test_mirroring.TestMirroring
diff --git a/qa/suites/fs/verify/% b/qa/suites/fs/verify/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/verify/%
diff --git a/qa/suites/fs/verify/.qa b/qa/suites/fs/verify/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/verify/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/verify/begin b/qa/suites/fs/verify/begin
new file mode 120000
index 000000000..77af91f7d
--- /dev/null
+++ b/qa/suites/fs/verify/begin
@@ -0,0 +1 @@
+.qa/cephfs/begin/
+\ No newline at end of file
diff --git a/qa/suites/fs/verify/clusters/.qa b/qa/suites/fs/verify/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/verify/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/verify/clusters/1a5s-mds-1c-client.yaml b/qa/suites/fs/verify/clusters/1a5s-mds-1c-client.yaml
new file mode 120000
index 000000000..2ab33af1c
--- /dev/null
+++ b/qa/suites/fs/verify/clusters/1a5s-mds-1c-client.yaml
@@ -0,0 +1 @@
+.qa/cephfs/clusters/1a5s-mds-1c-client.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/verify/conf b/qa/suites/fs/verify/conf
new file mode 120000
index 000000000..16e8cc44b
--- /dev/null
+++ b/qa/suites/fs/verify/conf
@@ -0,0 +1 @@
+.qa/cephfs/conf
+\ No newline at end of file
diff --git a/qa/suites/fs/verify/distro/$ b/qa/suites/fs/verify/distro/$
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/verify/distro/$
diff --git a/qa/suites/fs/verify/distro/.qa b/qa/suites/fs/verify/distro/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/verify/distro/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/verify/distro/centos_8.yaml b/qa/suites/fs/verify/distro/centos_8.yaml
new file mode 120000
index 000000000..380a1443b
--- /dev/null
+++ b/qa/suites/fs/verify/distro/centos_8.yaml
@@ -0,0 +1 @@
+.qa/distros/all/centos_8.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/verify/distro/rhel_8.yaml b/qa/suites/fs/verify/distro/rhel_8.yaml
new file mode 120000
index 000000000..133acf27b
--- /dev/null
+++ b/qa/suites/fs/verify/distro/rhel_8.yaml
@@ -0,0 +1 @@
+.qa/distros/all/rhel_8.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/verify/distro/ubuntu/+ b/qa/suites/fs/verify/distro/ubuntu/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/verify/distro/ubuntu/+
diff --git a/qa/suites/fs/verify/distro/ubuntu/.qa b/qa/suites/fs/verify/distro/ubuntu/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/verify/distro/ubuntu/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/verify/distro/ubuntu/latest.yaml b/qa/suites/fs/verify/distro/ubuntu/latest.yaml
new file mode 120000
index 000000000..162964882
--- /dev/null
+++ b/qa/suites/fs/verify/distro/ubuntu/latest.yaml
@@ -0,0 +1 @@
+.qa/distros/all/ubuntu_20.04.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/verify/distro/ubuntu/overrides.yaml b/qa/suites/fs/verify/distro/ubuntu/overrides.yaml
new file mode 100644
index 000000000..fdd7f5e5a
--- /dev/null
+++ b/qa/suites/fs/verify/distro/ubuntu/overrides.yaml
@@ -0,0 +1,4 @@
+overrides:
+  ceph:
+    valgrind:
+      exit_on_first_error: false
diff --git a/qa/suites/fs/verify/mount/.qa b/qa/suites/fs/verify/mount/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/verify/mount/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/verify/mount/fuse.yaml b/qa/suites/fs/verify/mount/fuse.yaml
new file mode 120000
index 000000000..0e55da9fb
--- /dev/null
+++ b/qa/suites/fs/verify/mount/fuse.yaml
@@ -0,0 +1 @@
+.qa/cephfs/mount/fuse.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/verify/mount/kclient/+ b/qa/suites/fs/verify/mount/kclient/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/verify/mount/kclient/+
diff --git a/qa/suites/fs/verify/mount/kclient/.qa b/qa/suites/fs/verify/mount/kclient/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/verify/mount/kclient/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/verify/mount/kclient/k-testing.yaml b/qa/suites/fs/verify/mount/kclient/k-testing.yaml
new file mode 120000
index 000000000..bec80be29
--- /dev/null
+++ b/qa/suites/fs/verify/mount/kclient/k-testing.yaml
@@ -0,0 +1 @@
+.qa/cephfs/mount/kclient/overrides/distro/testing/k-testing.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/verify/mount/kclient/mount.yaml b/qa/suites/fs/verify/mount/kclient/mount.yaml
new file mode 120000
index 000000000..9967f23e2
--- /dev/null
+++ b/qa/suites/fs/verify/mount/kclient/mount.yaml
@@ -0,0 +1 @@
+.qa/cephfs/mount/kclient/mount.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/verify/mount/kclient/ms-die-on-skipped.yaml b/qa/suites/fs/verify/mount/kclient/ms-die-on-skipped.yaml
new file mode 120000
index 000000000..1912a0c91
--- /dev/null
+++ b/qa/suites/fs/verify/mount/kclient/ms-die-on-skipped.yaml
@@ -0,0 +1 @@
+.qa/cephfs/mount/kclient/overrides/ms-die-on-skipped.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/verify/objectstore-ec b/qa/suites/fs/verify/objectstore-ec
new file mode 120000
index 000000000..affe29493
--- /dev/null
+++ b/qa/suites/fs/verify/objectstore-ec
@@ -0,0 +1 @@
+.qa/cephfs/objectstore-ec
+\ No newline at end of file
diff --git a/qa/suites/fs/verify/overrides/+ b/qa/suites/fs/verify/overrides/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/verify/overrides/+
diff --git a/qa/suites/fs/verify/overrides/.qa b/qa/suites/fs/verify/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/verify/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/verify/overrides/ignorelist_health.yaml b/qa/suites/fs/verify/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..5cb891a95
--- /dev/null
+++ b/qa/suites/fs/verify/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/verify/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/verify/overrides/ignorelist_wrongly_marked_down.yaml
new file mode 120000
index 000000000..f317cb714
--- /dev/null
+++ b/qa/suites/fs/verify/overrides/ignorelist_wrongly_marked_down.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/verify/overrides/mon-debug.yaml b/qa/suites/fs/verify/overrides/mon-debug.yaml
new file mode 100644
index 000000000..6ed3e6d52
--- /dev/null
+++ b/qa/suites/fs/verify/overrides/mon-debug.yaml
@@ -0,0 +1,6 @@
+overrides:
+  ceph:
+    conf:
+      mon:
+        debug ms: 1
+        debug mon: 20
diff --git a/qa/suites/fs/verify/overrides/session_timeout.yaml b/qa/suites/fs/verify/overrides/session_timeout.yaml
new file mode 120000
index 000000000..fce0318c5
--- /dev/null
+++ b/qa/suites/fs/verify/overrides/session_timeout.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/session_timeout.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/verify/ranks/.qa b/qa/suites/fs/verify/ranks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/verify/ranks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/verify/ranks/1.yaml b/qa/suites/fs/verify/ranks/1.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/verify/ranks/1.yaml
diff --git a/qa/suites/fs/verify/ranks/3.yaml b/qa/suites/fs/verify/ranks/3.yaml
new file mode 100644
index 000000000..9ed043c14
--- /dev/null
+++ b/qa/suites/fs/verify/ranks/3.yaml
@@ -0,0 +1,9 @@
+overrides:
+  ceph:
+    cephfs:
+      max_mds: 3
+  check-counter:
+    counters:
+      mds:
+        - mds.exported
+        - mds.imported
diff --git a/qa/suites/fs/verify/ranks/5.yaml b/qa/suites/fs/verify/ranks/5.yaml
new file mode 100644
index 000000000..ed89cef3a
--- /dev/null
+++ b/qa/suites/fs/verify/ranks/5.yaml
@@ -0,0 +1,9 @@
+overrides:
+  ceph:
+    cephfs:
+      max_mds: 5
+  check-counter:
+    counters:
+      mds:
+        - mds.exported
+        - mds.imported
diff --git a/qa/suites/fs/verify/tasks/.qa b/qa/suites/fs/verify/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/verify/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/verify/tasks/dbench.yaml b/qa/suites/fs/verify/tasks/dbench.yaml
new file mode 120000
index 000000000..b0f876c3c
--- /dev/null
+++ b/qa/suites/fs/verify/tasks/dbench.yaml
@@ -0,0 +1 @@
+.qa/cephfs/tasks/cfuse_workunit_suites_dbench.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/verify/tasks/fsstress.yaml b/qa/suites/fs/verify/tasks/fsstress.yaml
new file mode 120000
index 000000000..c2e859fff
--- /dev/null
+++ b/qa/suites/fs/verify/tasks/fsstress.yaml
@@ -0,0 +1 @@
+.qa/cephfs/tasks/cfuse_workunit_suites_fsstress.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/verify/validater/.qa b/qa/suites/fs/verify/validater/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/verify/validater/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/verify/validater/lockdep.yaml b/qa/suites/fs/verify/validater/lockdep.yaml
new file mode 100644
index 000000000..25f84355c
--- /dev/null
+++ b/qa/suites/fs/verify/validater/lockdep.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        lockdep: true
diff --git a/qa/suites/fs/verify/validater/valgrind.yaml b/qa/suites/fs/verify/validater/valgrind.yaml
new file mode 100644
index 000000000..930872fc7
--- /dev/null
+++ b/qa/suites/fs/verify/validater/valgrind.yaml
@@ -0,0 +1,29 @@
+overrides:
+  install:
+    ceph:
+      debuginfo: true
+  ceph:
+    # Valgrind makes everything slow, so ignore slow requests and extend heartbeat grace
+    log-ignorelist:
+      - slow request
+      - SLOW_OPS
+      - MON_DOWN
+    conf:
+      global:
+        osd heartbeat grace: 60
+        mds heartbeat grace: 60
+        mds beacon grace: 60
+      mds:
+        mds valgrind exit: true
+      mon:
+        mon osd crush smoke test: false
+      osd:
+        osd fast shutdown: false
+    valgrind:
+      mon: [--tool=memcheck, --leak-check=full, --show-reachable=yes]
+      mds: [--tool=memcheck]
+    watchdog:
+      daemon_restart: normal
+  ceph-fuse:
+    client.0:
+      valgrind: [--tool=memcheck, --leak-check=full, --show-reachable=yes]
diff --git a/qa/suites/fs/volumes/% b/qa/suites/fs/volumes/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/volumes/%
diff --git a/qa/suites/fs/volumes/.qa b/qa/suites/fs/volumes/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/volumes/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/volumes/begin b/qa/suites/fs/volumes/begin
new file mode 120000
index 000000000..77af91f7d
--- /dev/null
+++ b/qa/suites/fs/volumes/begin
@@ -0,0 +1 @@
+.qa/cephfs/begin/
+\ No newline at end of file
diff --git a/qa/suites/fs/volumes/clusters/.qa b/qa/suites/fs/volumes/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/volumes/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/volumes/clusters/1a3s-mds-4c-client.yaml b/qa/suites/fs/volumes/clusters/1a3s-mds-4c-client.yaml
new file mode 120000
index 000000000..5c722a30b
--- /dev/null
+++ b/qa/suites/fs/volumes/clusters/1a3s-mds-4c-client.yaml
@@ -0,0 +1 @@
+.qa/cephfs/clusters/1a3s-mds-4c-client.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/volumes/conf b/qa/suites/fs/volumes/conf
new file mode 120000
index 000000000..16e8cc44b
--- /dev/null
+++ b/qa/suites/fs/volumes/conf
@@ -0,0 +1 @@
+.qa/cephfs/conf
+\ No newline at end of file
diff --git a/qa/suites/fs/volumes/distro b/qa/suites/fs/volumes/distro
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/fs/volumes/distro
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/fs/volumes/mount b/qa/suites/fs/volumes/mount
new file mode 120000
index 000000000..e3600f453
--- /dev/null
+++ b/qa/suites/fs/volumes/mount
@@ -0,0 +1 @@
+.qa/cephfs/mount/
+\ No newline at end of file
diff --git a/qa/suites/fs/volumes/objectstore/.qa b/qa/suites/fs/volumes/objectstore/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/volumes/objectstore/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/volumes/objectstore/bluestore-bitmap.yaml b/qa/suites/fs/volumes/objectstore/bluestore-bitmap.yaml
new file mode 120000
index 000000000..a59cf5175
--- /dev/null
+++ b/qa/suites/fs/volumes/objectstore/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/volumes/overrides/+ b/qa/suites/fs/volumes/overrides/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/volumes/overrides/+
diff --git a/qa/suites/fs/volumes/overrides/.qa b/qa/suites/fs/volumes/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/volumes/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/volumes/overrides/ignorelist_health.yaml b/qa/suites/fs/volumes/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..5cb891a95
--- /dev/null
+++ b/qa/suites/fs/volumes/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/volumes/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/volumes/overrides/ignorelist_wrongly_marked_down.yaml
new file mode 120000
index 000000000..f317cb714
--- /dev/null
+++ b/qa/suites/fs/volumes/overrides/ignorelist_wrongly_marked_down.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/volumes/overrides/no_client_pidfile.yaml b/qa/suites/fs/volumes/overrides/no_client_pidfile.yaml
new file mode 120000
index 000000000..8888f3327
--- /dev/null
+++ b/qa/suites/fs/volumes/overrides/no_client_pidfile.yaml
@@ -0,0 +1 @@
+.qa/overrides/no_client_pidfile.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/volumes/tasks/.qa b/qa/suites/fs/volumes/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/volumes/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/volumes/tasks/volumes/% b/qa/suites/fs/volumes/tasks/volumes/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/volumes/tasks/volumes/%
diff --git a/qa/suites/fs/volumes/tasks/volumes/.qa b/qa/suites/fs/volumes/tasks/volumes/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/volumes/tasks/volumes/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/volumes/tasks/volumes/overrides.yaml b/qa/suites/fs/volumes/tasks/volumes/overrides.yaml
new file mode 100644
index 000000000..fb15e5079
--- /dev/null
+++ b/qa/suites/fs/volumes/tasks/volumes/overrides.yaml
@@ -0,0 +1,17 @@
+overrides:
+  ceph:
+    conf:
+      mgr:
+        debug client: 20
+        debug ms: 1
+        debug finisher: 20
+        debug mgr: 20
+    log-ignorelist:
+      - OSD full dropping all updates
+      - OSD near full
+      - pausewr flag
+      - failsafe engaged, dropping updates
+      - failsafe disengaged, no longer dropping
+      - is full \(reached quota
+      - POOL_FULL
+      - POOL_BACKFILLFULL
diff --git a/qa/suites/fs/volumes/tasks/volumes/test/.qa b/qa/suites/fs/volumes/tasks/volumes/test/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/volumes/tasks/volumes/test/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/volumes/tasks/volumes/test/basic.yaml b/qa/suites/fs/volumes/tasks/volumes/test/basic.yaml
new file mode 100644
index 000000000..b4c65cfc5
--- /dev/null
+++ b/qa/suites/fs/volumes/tasks/volumes/test/basic.yaml
@@ -0,0 +1,8 @@
+tasks:
+  - cephfs_test_runner:
+      fail_on_skip: false
+      modules:
+        - tasks.cephfs.test_volumes.TestVolumes
+        - tasks.cephfs.test_volumes.TestSubvolumeGroups
+        - tasks.cephfs.test_volumes.TestSubvolumes
+        - tasks.cephfs.test_subvolume.TestSubvolume
diff --git a/qa/suites/fs/volumes/tasks/volumes/test/clone.yaml b/qa/suites/fs/volumes/tasks/volumes/test/clone.yaml
new file mode 100644
index 000000000..e0c1f0150
--- /dev/null
+++ b/qa/suites/fs/volumes/tasks/volumes/test/clone.yaml
@@ -0,0 +1,5 @@
+tasks:
+  - cephfs_test_runner:
+      fail_on_skip: false
+      modules:
+        - tasks.cephfs.test_volumes.TestSubvolumeSnapshotClones
diff --git a/qa/suites/fs/volumes/tasks/volumes/test/finisher_per_module.yaml b/qa/suites/fs/volumes/tasks/volumes/test/finisher_per_module.yaml
new file mode 100644
index 000000000..ec8335fe0
--- /dev/null
+++ b/qa/suites/fs/volumes/tasks/volumes/test/finisher_per_module.yaml
@@ -0,0 +1,13 @@
+tasks:
+  - check-counter:
+      counters:
+        mgr:
+            - name: "finisher-volumes.complete_latency.avgcount"
+              min: 4
+            - name: "finisher-volumes.queue_len"
+              expected_val: 0
+
+  - cephfs_test_runner:
+      fail_on_skip: false
+      modules:
+        - tasks.cephfs.test_volumes.TestPerModuleFinsherThread
diff --git a/qa/suites/fs/volumes/tasks/volumes/test/misc.yaml b/qa/suites/fs/volumes/tasks/volumes/test/misc.yaml
new file mode 100644
index 000000000..1f6fd2b2d
--- /dev/null
+++ b/qa/suites/fs/volumes/tasks/volumes/test/misc.yaml
@@ -0,0 +1,5 @@
+tasks:
+  - cephfs_test_runner:
+      fail_on_skip: false
+      modules:
+        - tasks.cephfs.test_volumes.TestMisc
diff --git a/qa/suites/fs/volumes/tasks/volumes/test/snapshot.yaml b/qa/suites/fs/volumes/tasks/volumes/test/snapshot.yaml
new file mode 100644
index 000000000..d68201137
--- /dev/null
+++ b/qa/suites/fs/volumes/tasks/volumes/test/snapshot.yaml
@@ -0,0 +1,6 @@
+tasks:
+  - cephfs_test_runner:
+      fail_on_skip: false
+      modules:
+        - tasks.cephfs.test_volumes.TestSubvolumeGroupSnapshots
+        - tasks.cephfs.test_volumes.TestSubvolumeSnapshots
diff --git a/qa/suites/fs/workload/% b/qa/suites/fs/workload/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/workload/%
diff --git a/qa/suites/fs/workload/.qa b/qa/suites/fs/workload/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/workload/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/0-rhel_8.yaml b/qa/suites/fs/workload/0-rhel_8.yaml
new file mode 120000
index 000000000..c9abcd7b8
--- /dev/null
+++ b/qa/suites/fs/workload/0-rhel_8.yaml
@@ -0,0 +1 @@
+.qa/distros/podman/rhel_8.6_container_tools_rhel8.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/begin/+ b/qa/suites/fs/workload/begin/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/workload/begin/+
diff --git a/qa/suites/fs/workload/begin/.qa b/qa/suites/fs/workload/begin/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/workload/begin/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/begin/0-install.yaml b/qa/suites/fs/workload/begin/0-install.yaml
new file mode 120000
index 000000000..3b1852973
--- /dev/null
+++ b/qa/suites/fs/workload/begin/0-install.yaml
@@ -0,0 +1 @@
+.qa/cephfs/begin/0-install.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/begin/1-cephadm.yaml b/qa/suites/fs/workload/begin/1-cephadm.yaml
new file mode 100644
index 000000000..a58ea5725
--- /dev/null
+++ b/qa/suites/fs/workload/begin/1-cephadm.yaml
@@ -0,0 +1,21 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd shutdown pgref assert: true
+tasks:
+- cephadm:
+    roleless: false
+- cephadm.shell:
+    mon.a:
+      - ceph orch status
+      - ceph orch ps
+      - ceph orch ls
+      - ceph orch host ls
+      - ceph orch device ls
+- cephadm.shell:
+    mon.a:
+      - ceph fs dump
+      - ceph osd dump
+- fs.ready:
+    timeout: 300
diff --git a/qa/suites/fs/workload/begin/2-logrotate.yaml b/qa/suites/fs/workload/begin/2-logrotate.yaml
new file mode 120000
index 000000000..9d6e7ba83
--- /dev/null
+++ b/qa/suites/fs/workload/begin/2-logrotate.yaml
@@ -0,0 +1 @@
+.qa/cephfs/begin/2-logrotate.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/clusters/.qa b/qa/suites/fs/workload/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/workload/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/clusters/1a11s-mds-1c-client-3node.yaml b/qa/suites/fs/workload/clusters/1a11s-mds-1c-client-3node.yaml
new file mode 120000
index 000000000..884134573
--- /dev/null
+++ b/qa/suites/fs/workload/clusters/1a11s-mds-1c-client-3node.yaml
@@ -0,0 +1 @@
+.qa/cephfs/clusters/1a11s-mds-1c-client-3node.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/conf b/qa/suites/fs/workload/conf
new file mode 120000
index 000000000..16e8cc44b
--- /dev/null
+++ b/qa/suites/fs/workload/conf
@@ -0,0 +1 @@
+.qa/cephfs/conf
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/mount/.qa b/qa/suites/fs/workload/mount/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/workload/mount/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/mount/fuse.yaml b/qa/suites/fs/workload/mount/fuse.yaml
new file mode 120000
index 000000000..0e55da9fb
--- /dev/null
+++ b/qa/suites/fs/workload/mount/fuse.yaml
@@ -0,0 +1 @@
+.qa/cephfs/mount/fuse.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/mount/kclient/% b/qa/suites/fs/workload/mount/kclient/%
new file mode 100644
index 000000000..b8626c4cf
--- /dev/null
+++ b/qa/suites/fs/workload/mount/kclient/%
@@ -0,0 +1 @@
+4
diff --git a/qa/suites/fs/workload/mount/kclient/.qa b/qa/suites/fs/workload/mount/kclient/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/workload/mount/kclient/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/mount/kclient/base b/qa/suites/fs/workload/mount/kclient/base
new file mode 120000
index 000000000..22f94e150
--- /dev/null
+++ b/qa/suites/fs/workload/mount/kclient/base
@@ -0,0 +1 @@
+.qa/cephfs/mount/kclient/
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/mount/kclient/ms_mode/.qa b/qa/suites/fs/workload/mount/kclient/ms_mode/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/workload/mount/kclient/ms_mode/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/mount/kclient/ms_mode/crc.yaml b/qa/suites/fs/workload/mount/kclient/ms_mode/crc.yaml
new file mode 100644
index 000000000..7efada467
--- /dev/null
+++ b/qa/suites/fs/workload/mount/kclient/ms_mode/crc.yaml
@@ -0,0 +1,3 @@
+overrides:
+  kclient:
+    mntopts: ["ms_mode=crc"]
diff --git a/qa/suites/fs/workload/mount/kclient/ms_mode/legacy.yaml b/qa/suites/fs/workload/mount/kclient/ms_mode/legacy.yaml
new file mode 100644
index 000000000..8a68a7756
--- /dev/null
+++ b/qa/suites/fs/workload/mount/kclient/ms_mode/legacy.yaml
@@ -0,0 +1,3 @@
+overrides:
+  kclient:
+    mntopts: ["ms_mode=legacy"]
diff --git a/qa/suites/fs/workload/mount/kclient/ms_mode/secure.yaml b/qa/suites/fs/workload/mount/kclient/ms_mode/secure.yaml
new file mode 100644
index 000000000..b4a4221d5
--- /dev/null
+++ b/qa/suites/fs/workload/mount/kclient/ms_mode/secure.yaml
@@ -0,0 +1,3 @@
+overrides:
+  kclient:
+    mntopts: ["ms_mode=secure"]
diff --git a/qa/suites/fs/workload/mount/kclient/wsync/.qa b/qa/suites/fs/workload/mount/kclient/wsync/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/workload/mount/kclient/wsync/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/mount/kclient/wsync/no.yaml b/qa/suites/fs/workload/mount/kclient/wsync/no.yaml
new file mode 100644
index 000000000..1ed9e9953
--- /dev/null
+++ b/qa/suites/fs/workload/mount/kclient/wsync/no.yaml
@@ -0,0 +1,3 @@
+overrides:
+  kclient:
+    mntopts: ["nowsync"]
diff --git a/qa/suites/fs/workload/mount/kclient/wsync/yes.yaml b/qa/suites/fs/workload/mount/kclient/wsync/yes.yaml
new file mode 100644
index 000000000..2061bac11
--- /dev/null
+++ b/qa/suites/fs/workload/mount/kclient/wsync/yes.yaml
@@ -0,0 +1,3 @@
+overrides:
+  kclient:
+    mntopts: ["wsync"]
diff --git a/qa/suites/fs/workload/objectstore-ec b/qa/suites/fs/workload/objectstore-ec
new file mode 120000
index 000000000..affe29493
--- /dev/null
+++ b/qa/suites/fs/workload/objectstore-ec
@@ -0,0 +1 @@
+.qa/cephfs/objectstore-ec
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/omap_limit/.qa b/qa/suites/fs/workload/omap_limit/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/workload/omap_limit/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/omap_limit/10.yaml b/qa/suites/fs/workload/omap_limit/10.yaml
new file mode 100644
index 000000000..eec30f681
--- /dev/null
+++ b/qa/suites/fs/workload/omap_limit/10.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd_max_omap_entries_per_request: 10
diff --git a/qa/suites/fs/workload/omap_limit/10000.yaml b/qa/suites/fs/workload/omap_limit/10000.yaml
new file mode 100644
index 000000000..c4bea5538
--- /dev/null
+++ b/qa/suites/fs/workload/omap_limit/10000.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd_max_omap_entries_per_request: 10000
diff --git a/qa/suites/fs/workload/overrides/+ b/qa/suites/fs/workload/overrides/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/workload/overrides/+
diff --git a/qa/suites/fs/workload/overrides/.qa b/qa/suites/fs/workload/overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/workload/overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/overrides/cephsqlite-timeout.yaml b/qa/suites/fs/workload/overrides/cephsqlite-timeout.yaml
new file mode 100644
index 000000000..5bd92554f
--- /dev/null
+++ b/qa/suites/fs/workload/overrides/cephsqlite-timeout.yaml
@@ -0,0 +1,7 @@
+# increase lock renewal timeout: OSD stress from small clusters may cause
+# spurious timeouts
+overrides:
+  ceph:
+    conf:
+      mgr:
+        cephsqlite lock renewal timeout: 900000
diff --git a/qa/suites/fs/workload/overrides/frag.yaml b/qa/suites/fs/workload/overrides/frag.yaml
new file mode 120000
index 000000000..5e5cdaed8
--- /dev/null
+++ b/qa/suites/fs/workload/overrides/frag.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/frag.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/overrides/ignorelist_health.yaml b/qa/suites/fs/workload/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..5cb891a95
--- /dev/null
+++ b/qa/suites/fs/workload/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/workload/overrides/ignorelist_wrongly_marked_down.yaml
new file mode 120000
index 000000000..f317cb714
--- /dev/null
+++ b/qa/suites/fs/workload/overrides/ignorelist_wrongly_marked_down.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/overrides/osd-asserts.yaml b/qa/suites/fs/workload/overrides/osd-asserts.yaml
new file mode 120000
index 000000000..f290c749b
--- /dev/null
+++ b/qa/suites/fs/workload/overrides/osd-asserts.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/osd-asserts.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/overrides/session_timeout.yaml b/qa/suites/fs/workload/overrides/session_timeout.yaml
new file mode 120000
index 000000000..fce0318c5
--- /dev/null
+++ b/qa/suites/fs/workload/overrides/session_timeout.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/session_timeout.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/ranks/.qa b/qa/suites/fs/workload/ranks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/workload/ranks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/ranks/1.yaml b/qa/suites/fs/workload/ranks/1.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/workload/ranks/1.yaml
diff --git a/qa/suites/fs/workload/ranks/multi/% b/qa/suites/fs/workload/ranks/multi/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/workload/ranks/multi/%
diff --git a/qa/suites/fs/workload/ranks/multi/export-check.yaml b/qa/suites/fs/workload/ranks/multi/export-check.yaml
new file mode 100644
index 000000000..80f210861
--- /dev/null
+++ b/qa/suites/fs/workload/ranks/multi/export-check.yaml
@@ -0,0 +1,6 @@
+overrides:
+  check-counter:
+    counters:
+      mds:
+        - mds.exported
+        - mds.imported
diff --git a/qa/suites/fs/workload/ranks/multi/n/3.yaml b/qa/suites/fs/workload/ranks/multi/n/3.yaml
new file mode 100644
index 000000000..9e6bddce0
--- /dev/null
+++ b/qa/suites/fs/workload/ranks/multi/n/3.yaml
@@ -0,0 +1,4 @@
+overrides:
+  ceph:
+    cephfs:
+      max_mds: 3
diff --git a/qa/suites/fs/workload/ranks/multi/n/5.yaml b/qa/suites/fs/workload/ranks/multi/n/5.yaml
new file mode 100644
index 000000000..2265aa0f2
--- /dev/null
+++ b/qa/suites/fs/workload/ranks/multi/n/5.yaml
@@ -0,0 +1,4 @@
+overrides:
+  ceph:
+    cephfs:
+      max_mds: 5
diff --git a/qa/suites/fs/workload/ranks/multi/replication/always.yaml b/qa/suites/fs/workload/ranks/multi/replication/always.yaml
new file mode 100644
index 000000000..099ced663
--- /dev/null
+++ b/qa/suites/fs/workload/ranks/multi/replication/always.yaml
@@ -0,0 +1,18 @@
+# To exercise lock/witness code paths more regularly, try to get all
+# directories replicated.
+overrides:
+  ceph:
+    conf:
+      mds:
+        mds_bal_replicate_threshold: 1
+# Note: dir_update is only sent by an MDS trying to replicate a dir.
+# dir_update is always sent for root, so the count should be more than 2
+  check-counter:
+    counters:
+      mds:
+        -
+          name: mds_cache.dir_update
+          min: 3
+        -
+          name: mds_cache.dir_update_receipt
+          min: 3
diff --git a/qa/suites/fs/workload/ranks/multi/replication/default.yaml b/qa/suites/fs/workload/ranks/multi/replication/default.yaml
new file mode 100644
index 000000000..272977942
--- /dev/null
+++ b/qa/suites/fs/workload/ranks/multi/replication/default.yaml
@@ -0,0 +1 @@
+# Use default (8000)
diff --git a/qa/suites/fs/workload/standby-replay.yaml b/qa/suites/fs/workload/standby-replay.yaml
new file mode 100644
index 000000000..b47d312bf
--- /dev/null
+++ b/qa/suites/fs/workload/standby-replay.yaml
@@ -0,0 +1,4 @@
+overrides:
+  ceph:
+    cephfs:
+      standby_replay: true
diff --git a/qa/suites/fs/workload/tasks/% b/qa/suites/fs/workload/tasks/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/%
diff --git a/qa/suites/fs/workload/tasks/.qa b/qa/suites/fs/workload/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/tasks/0-subvolume/$ b/qa/suites/fs/workload/tasks/0-subvolume/$
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/0-subvolume/$
diff --git a/qa/suites/fs/workload/tasks/0-subvolume/.qa b/qa/suites/fs/workload/tasks/0-subvolume/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/0-subvolume/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/tasks/0-subvolume/no-subvolume.yaml b/qa/suites/fs/workload/tasks/0-subvolume/no-subvolume.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/0-subvolume/no-subvolume.yaml
diff --git a/qa/suites/fs/workload/tasks/0-subvolume/with-namespace-isolated-and-quota.yaml b/qa/suites/fs/workload/tasks/0-subvolume/with-namespace-isolated-and-quota.yaml
new file mode 100644
index 000000000..7129e54b5
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/0-subvolume/with-namespace-isolated-and-quota.yaml
@@ -0,0 +1,11 @@
+overrides:
+  ceph:
+    subvols:
+      create: 2
+      subvol_options: "--namespace-isolated --size 25000000000"
+  ceph-fuse:
+    client.0:
+      mount_subvol_num: 0
+  kclient:
+    client.0:
+      mount_subvol_num: 1
diff --git a/qa/suites/fs/workload/tasks/0-subvolume/with-namespace-isolated.yaml b/qa/suites/fs/workload/tasks/0-subvolume/with-namespace-isolated.yaml
new file mode 100644
index 000000000..2ac901fef
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/0-subvolume/with-namespace-isolated.yaml
@@ -0,0 +1,11 @@
+overrides:
+  ceph:
+    subvols:
+      create: 2
+      subvol_options: "--namespace-isolated"
+  ceph-fuse:
+    client.0:
+      mount_subvol_num: 0
+  kclient:
+    client.0:
+      mount_subvol_num: 1
diff --git a/qa/suites/fs/workload/tasks/0-subvolume/with-no-extra-options.yaml b/qa/suites/fs/workload/tasks/0-subvolume/with-no-extra-options.yaml
new file mode 100644
index 000000000..40f98c2bc
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/0-subvolume/with-no-extra-options.yaml
@@ -0,0 +1,10 @@
+overrides:
+  ceph:
+    subvols:
+      create: 2
+  ceph-fuse:
+    client.0:
+      mount_subvol_num: 0
+  kclient:
+    client.0:
+      mount_subvol_num: 1
diff --git a/qa/suites/fs/workload/tasks/0-subvolume/with-quota.yaml b/qa/suites/fs/workload/tasks/0-subvolume/with-quota.yaml
new file mode 100644
index 000000000..6cda00d4a
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/0-subvolume/with-quota.yaml
@@ -0,0 +1,11 @@
+overrides:
+  ceph:
+    subvols:
+      create: 2
+      subvol_options: "--size 25000000000"
+  ceph-fuse:
+    client.0:
+      mount_subvol_num: 0
+  kclient:
+    client.0:
+      mount_subvol_num: 1
diff --git a/qa/suites/fs/workload/tasks/1-check-counter.yaml b/qa/suites/fs/workload/tasks/1-check-counter.yaml
new file mode 100644
index 000000000..6339ddb7c
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/1-check-counter.yaml
@@ -0,0 +1,2 @@
+tasks:
+- check-counter: {}
diff --git a/qa/suites/fs/workload/tasks/2-scrub/.qa b/qa/suites/fs/workload/tasks/2-scrub/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/2-scrub/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/tasks/2-scrub/no.yaml b/qa/suites/fs/workload/tasks/2-scrub/no.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/2-scrub/no.yaml
diff --git a/qa/suites/fs/workload/tasks/2-scrub/yes.yaml b/qa/suites/fs/workload/tasks/2-scrub/yes.yaml
new file mode 100644
index 000000000..52978a2bb
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/2-scrub/yes.yaml
@@ -0,0 +1,10 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - slow metadata IO
+      - SLOW_OPS
+      - slow request
+tasks:
+- fwd_scrub:
+    scrub_timeout: 900
+    sleep_between_iterations: 1
diff --git a/qa/suites/fs/workload/tasks/3-snaps/.qa b/qa/suites/fs/workload/tasks/3-snaps/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/3-snaps/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/tasks/3-snaps/no.yaml b/qa/suites/fs/workload/tasks/3-snaps/no.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/3-snaps/no.yaml
diff --git a/qa/suites/fs/workload/tasks/3-snaps/yes.yaml b/qa/suites/fs/workload/tasks/3-snaps/yes.yaml
new file mode 100644
index 000000000..598f7e215
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/3-snaps/yes.yaml
@@ -0,0 +1,30 @@
+overrides:
+  ceph:
+    conf:
+      mgr:
+        debug cephsqlite: 20
+  check-counter:
+    counters:
+      mds:
+        - mds.root_rsnaps
+        - mds_server.req_mksnap_latency.avgcount
+        - mds_server.req_rmsnap_latency.avgcount
+tasks:
+- exec:
+    mon.a:
+      - ceph mgr module enable snap_schedule
+      - ceph config set mgr mgr/snap_schedule/allow_m_granularity true
+      - ceph config set mgr mgr/snap_schedule/dump_on_update true
+      - ceph fs snap-schedule add --fs=cephfs --path=/ --snap_schedule=1M
+      - ceph fs snap-schedule retention add --fs=cephfs --path=/ --retention-spec-or-period=6M3h
+      - ceph fs snap-schedule status --fs=cephfs --path=/
+      - ceph fs snap-schedule list --fs=cephfs --path=/ --recursive=true
+      - date +%s > START_TIME
+- full_sequential_finally:
+  - exec:
+      mon.a:
+        # Ensure that we have some snaps which get deleted (so check-counters does not fail)
+        - date +%s > END_TIME
+        - START_TIME=$(cat START_TIME); END_TIME=$(cat END_TIME); DIFF_TIME=$((600-(END_TIME-START_TIME))); if [ "$DIFF_TIME" -gt 0 ]; then sleep "$DIFF_TIME"; fi
+        - ceph fs snap-schedule status --fs=cephfs --path=/
+        - ceph fs snap-schedule list --fs=cephfs --path=/ --recursive=true
diff --git a/qa/suites/fs/workload/tasks/4-flush/.qa b/qa/suites/fs/workload/tasks/4-flush/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/4-flush/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/tasks/4-flush/no.yaml b/qa/suites/fs/workload/tasks/4-flush/no.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/4-flush/no.yaml
diff --git a/qa/suites/fs/workload/tasks/4-flush/yes.yaml b/qa/suites/fs/workload/tasks/4-flush/yes.yaml
new file mode 100644
index 000000000..4a3f7a11c
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/4-flush/yes.yaml
@@ -0,0 +1,4 @@
+tasks:
+- background_exec:
+    mon.a:
+      - while sleep 13; do ceph tell mds.cephfs:0 flush journal; done
diff --git a/qa/suites/fs/workload/tasks/5-workunit/.qa b/qa/suites/fs/workload/tasks/5-workunit/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/5-workunit/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/tasks/5-workunit/direct_io.yaml b/qa/suites/fs/workload/tasks/5-workunit/direct_io.yaml
new file mode 100644
index 000000000..6c6ea1422
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/5-workunit/direct_io.yaml
@@ -0,0 +1,9 @@
+overrides:
+  check-counter:
+    dry_run: true
+tasks:
+- workunit:
+    clients:
+      all:
+        - direct_io
+
diff --git a/qa/suites/fs/workload/tasks/5-workunit/fs/.qa b/qa/suites/fs/workload/tasks/5-workunit/fs/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/5-workunit/fs/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/tasks/5-workunit/fs/misc.yaml b/qa/suites/fs/workload/tasks/5-workunit/fs/misc.yaml
new file mode 100644
index 000000000..4a9d0b4c5
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/5-workunit/fs/misc.yaml
@@ -0,0 +1,10 @@
+overrides:
+  check-counter:
+    counters:
+      mds:
+        - "mds.dir_split"
+tasks:
+- workunit:
+    clients:
+      all:
+        - fs/misc
diff --git a/qa/suites/fs/workload/tasks/5-workunit/fs/norstats.yaml b/qa/suites/fs/workload/tasks/5-workunit/fs/norstats.yaml
new file mode 100644
index 000000000..d48df5471
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/5-workunit/fs/norstats.yaml
@@ -0,0 +1,17 @@
+overrides:
+  check-counter:
+    counters:
+      mds:
+        - "mds.dir_split"
+tasks:
+- workunit:
+    clients:
+      all:
+        - fs/norstats
+overrides:
+  kclient:
+    rbytes: false
+  ceph:
+    conf:
+      client:
+        client dirsize rbytes: false
diff --git a/qa/suites/fs/workload/tasks/5-workunit/fs/test_o_trunc.yaml b/qa/suites/fs/workload/tasks/5-workunit/fs/test_o_trunc.yaml
new file mode 100644
index 000000000..7b2b7c536
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/5-workunit/fs/test_o_trunc.yaml
@@ -0,0 +1,8 @@
+overrides:
+  check-counter:
+    dry_run: true
+tasks:
+- workunit:
+    clients:
+      all:
+        - fs/test_o_trunc.sh
diff --git a/qa/suites/fs/workload/tasks/5-workunit/kernel_untar_build.yaml b/qa/suites/fs/workload/tasks/5-workunit/kernel_untar_build.yaml
new file mode 100644
index 000000000..602d34162
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/5-workunit/kernel_untar_build.yaml
@@ -0,0 +1,10 @@
+overrides:
+  check-counter:
+    counters:
+      mds:
+        - "mds.dir_split"
+tasks:
+- workunit:
+    clients:
+      all:
+        - kernel_untar_build.sh
diff --git a/qa/suites/fs/workload/tasks/5-workunit/postgres.yaml b/qa/suites/fs/workload/tasks/5-workunit/postgres.yaml
new file mode 100644
index 000000000..7e71dbc88
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/5-workunit/postgres.yaml
@@ -0,0 +1,36 @@
+# I would expect setting the context on the postgresql database directories
+# would correctly trickle down to the files created by the postgresql daemon,
+# but this does not appear to work. I would still see denials like:
+
+#     type=AVC msg=audit(1655861665.521:21354): avc:  denied  { create } for  pid=131994 comm="postmaster" name="replorigin_checkpoint.tmp" scontext=system_u:system_r:postgresql_t:s0 tcontext=system_u:object_r:cephfs_t:s0 tclass=file permissive=1'
+
+# Instead, we'll just set the context for the mount and be done with it. I've
+# left in the context setting for the directories below.
+
+overrides:
+  ceph-fuse:
+    client.0:
+      mountpoint: /tmp/cephfs
+      mntopts: ["context=system_u:object_r:postgresql_db_t:s0"]
+  kclient:
+    client.0:
+      mountpoint: /tmp/cephfs
+      mntopts: ["context=system_u:object_r:postgresql_db_t:s0"]
+tasks:
+- exec:
+    client.0:
+      - sudo ls -l /tmp/cephfs/ && sudo df -h /tmp/cephfs/
+      - sudo mkdir -m 755 --context=system_u:system_r:postgresql_t:s0 /tmp/cephfs/postgres && sudo chown postgres:postgres /tmp/cephfs/postgres
+      - sudo -u postgres -- mkdir -m 700 --context=system_u:system_r:postgresql_t:s0 /tmp/cephfs/postgres/data
+      - sudo semanage fcontext -a -t postgresql_db_t "/tmp/cephfs/postgres(/.*)?"
+      - sudo grep -i postgresql /etc/selinux/targeted/contexts/files/file_contexts.local
+      - sudo restorecon -R -v /tmp/cephfs/postgres
+      - sudo ls -lZaR /tmp/cephfs/postgres/
+      - sudo mkdir -p /etc/systemd/system/postgresql.service.d/ && printf '[Service]\nEnvironment=PGDATA=/tmp/cephfs/postgres/data\nEnvironment=PGLOG=/tmp/cephfs/postgres/pgstartup.log\n' | sudo tee /etc/systemd/system/postgresql.service.d/env.conf
+      - sudo -u postgres -- postgresql-setup --initdb
+      - sudo ls -lZaR /tmp/cephfs/postgres/
+      - sudo systemctl start postgresql
+      - sudo -u postgres -- pgbench -s 32 -i
+      - sudo -u postgres -- pgbench -c 100 -j 4 --progress=5 --time=900
+      - sudo systemctl stop postgresql
+      - sudo ls -lZaR /tmp/cephfs/postgres/
diff --git a/qa/suites/fs/workload/tasks/5-workunit/suites/.qa b/qa/suites/fs/workload/tasks/5-workunit/suites/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/5-workunit/suites/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/tasks/5-workunit/suites/blogbench.yaml b/qa/suites/fs/workload/tasks/5-workunit/suites/blogbench.yaml
new file mode 100644
index 000000000..4c4bf2f9e
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/5-workunit/suites/blogbench.yaml
@@ -0,0 +1,10 @@
+overrides:
+  check-counter:
+    counters:
+      mds:
+        - "mds.dir_split"
+tasks:
+- workunit:
+    clients:
+      all:
+        - suites/blogbench.sh
diff --git a/qa/suites/fs/workload/tasks/5-workunit/suites/dbench.yaml b/qa/suites/fs/workload/tasks/5-workunit/suites/dbench.yaml
new file mode 100644
index 000000000..41b2bc8ed
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/5-workunit/suites/dbench.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      all:
+        - suites/dbench.sh
diff --git a/qa/suites/fs/workload/tasks/5-workunit/suites/ffsb.yaml b/qa/suites/fs/workload/tasks/5-workunit/suites/ffsb.yaml
new file mode 100644
index 000000000..9bc925ab0
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/5-workunit/suites/ffsb.yaml
@@ -0,0 +1,18 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - SLOW_OPS
+    - slow request
+    conf:
+      osd:
+        filestore flush min: 0
+        osd heartbeat grace: 60
+  check-counter:
+    counters:
+      mds:
+        - "mds.dir_split"
+tasks:
+- workunit:
+    clients:
+      all:
+        - suites/ffsb.sh
diff --git a/qa/suites/fs/workload/tasks/5-workunit/suites/fsstress.yaml b/qa/suites/fs/workload/tasks/5-workunit/suites/fsstress.yaml
new file mode 100644
index 000000000..bae220292
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/5-workunit/suites/fsstress.yaml
@@ -0,0 +1,6 @@
+tasks:
+- workunit:
+    timeout: 6h
+    clients:
+      all:
+        - suites/fsstress.sh
diff --git a/qa/suites/fs/workload/tasks/5-workunit/suites/fsx.yaml b/qa/suites/fs/workload/tasks/5-workunit/suites/fsx.yaml
new file mode 100644
index 000000000..12d456cf4
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/5-workunit/suites/fsx.yaml
@@ -0,0 +1,10 @@
+overrides:
+  check-counter:
+    counters:
+      mds:
+        - "mds.dir_split"
+tasks:
+- workunit:
+    clients:
+      all:
+        - suites/fsx.sh
diff --git a/qa/suites/fs/workload/tasks/5-workunit/suites/fsync-tester.yaml b/qa/suites/fs/workload/tasks/5-workunit/suites/fsync-tester.yaml
new file mode 100644
index 000000000..9aaf6d0c4
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/5-workunit/suites/fsync-tester.yaml
@@ -0,0 +1,8 @@
+overrides:
+  check-counter:
+    dry_run: true
+tasks:
+- workunit:
+    clients:
+      all:
+        - suites/fsync-tester.sh
diff --git a/qa/suites/fs/workload/tasks/5-workunit/suites/iogen.yaml b/qa/suites/fs/workload/tasks/5-workunit/suites/iogen.yaml
new file mode 100644
index 000000000..b8beb9e97
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/5-workunit/suites/iogen.yaml
@@ -0,0 +1,8 @@
+overrides:
+  check-counter:
+    dry_run: true
+tasks:
+- workunit:
+    clients:
+      all:
+        - suites/iogen.sh
diff --git a/qa/suites/fs/workload/tasks/5-workunit/suites/iozone.yaml b/qa/suites/fs/workload/tasks/5-workunit/suites/iozone.yaml
new file mode 100644
index 000000000..bf5fd25b3
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/5-workunit/suites/iozone.yaml
@@ -0,0 +1,8 @@
+overrides:
+  check-counter:
+    dry_run: true
+tasks:
+- workunit:
+    clients:
+      all:
+        - suites/iozone.sh
diff --git a/qa/suites/fs/workload/tasks/5-workunit/suites/pjd.yaml b/qa/suites/fs/workload/tasks/5-workunit/suites/pjd.yaml
new file mode 100644
index 000000000..37e315f7e
--- /dev/null
+++ b/qa/suites/fs/workload/tasks/5-workunit/suites/pjd.yaml
@@ -0,0 +1,12 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        fuse set user groups: true
+        fuse default permissions: false
+tasks:
+- workunit:
+    timeout: 6h
+    clients:
+      all:
+        - suites/pjd.sh
diff --git a/qa/suites/hadoop/.qa b/qa/suites/hadoop/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/hadoop/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/hadoop/basic/% b/qa/suites/hadoop/basic/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/hadoop/basic/%
diff --git a/qa/suites/hadoop/basic/.qa b/qa/suites/hadoop/basic/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/hadoop/basic/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/hadoop/basic/clusters/.qa b/qa/suites/hadoop/basic/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/hadoop/basic/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/hadoop/basic/clusters/fixed-3.yaml b/qa/suites/hadoop/basic/clusters/fixed-3.yaml
new file mode 100644
index 000000000..56b0be4cf
--- /dev/null
+++ b/qa/suites/hadoop/basic/clusters/fixed-3.yaml
@@ -0,0 +1,13 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        client permissions: false
+roles:
+- [mon.0, mds.a, osd.0, hadoop.master.0]
+- [mon.1, mgr.x, osd.1, hadoop.slave.0]
+- [mon.2, mgr.y, hadoop.slave.1, client.0]
+openstack:
+- volumes: # attached to each instance
+    count: 1
+    size: 10 # GB
diff --git a/qa/suites/hadoop/basic/distros/.qa b/qa/suites/hadoop/basic/distros/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/hadoop/basic/distros/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/hadoop/basic/distros/ubuntu_latest.yaml b/qa/suites/hadoop/basic/distros/ubuntu_latest.yaml
new file mode 120000
index 000000000..3a09f9abb
--- /dev/null
+++ b/qa/suites/hadoop/basic/distros/ubuntu_latest.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/ubuntu_latest.yaml
+\ No newline at end of file
diff --git a/qa/suites/hadoop/basic/tasks/.qa b/qa/suites/hadoop/basic/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/hadoop/basic/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/hadoop/basic/tasks/repl.yaml b/qa/suites/hadoop/basic/tasks/repl.yaml
new file mode 100644
index 000000000..60cdcca32
--- /dev/null
+++ b/qa/suites/hadoop/basic/tasks/repl.yaml
@@ -0,0 +1,8 @@
+tasks:
+- ssh_keys:
+- install:
+- ceph:
+- hadoop:
+- workunit:
+    clients:
+      client.0: [hadoop/repl.sh]
diff --git a/qa/suites/hadoop/basic/tasks/terasort.yaml b/qa/suites/hadoop/basic/tasks/terasort.yaml
new file mode 100644
index 000000000..4377894f5
--- /dev/null
+++ b/qa/suites/hadoop/basic/tasks/terasort.yaml
@@ -0,0 +1,10 @@
+tasks:
+- ssh_keys:
+- install:
+- ceph:
+- hadoop:
+- workunit: 
+    clients:
+      client.0: [hadoop/terasort.sh]
+    env:
+      NUM_RECORDS: "10000000"
diff --git a/qa/suites/hadoop/basic/tasks/wordcount.yaml b/qa/suites/hadoop/basic/tasks/wordcount.yaml
new file mode 100644
index 000000000..b84941b81
--- /dev/null
+++ b/qa/suites/hadoop/basic/tasks/wordcount.yaml
@@ -0,0 +1,8 @@
+tasks:
+- ssh_keys:
+- install:
+- ceph:
+- hadoop:
+- workunit: 
+    clients:
+      client.0: [hadoop/wordcount.sh]
diff --git a/qa/suites/krbd/.qa b/qa/suites/krbd/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/basic/% b/qa/suites/krbd/basic/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/krbd/basic/%
diff --git a/qa/suites/krbd/basic/.qa b/qa/suites/krbd/basic/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/basic/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/basic/bluestore-bitmap.yaml b/qa/suites/krbd/basic/bluestore-bitmap.yaml
new file mode 120000
index 000000000..a59cf5175
--- /dev/null
+++ b/qa/suites/krbd/basic/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/krbd/basic/ceph/.qa b/qa/suites/krbd/basic/ceph/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/basic/ceph/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/basic/ceph/ceph.yaml b/qa/suites/krbd/basic/ceph/ceph.yaml
new file mode 100644
index 000000000..2030acb90
--- /dev/null
+++ b/qa/suites/krbd/basic/ceph/ceph.yaml
@@ -0,0 +1,3 @@
+tasks:
+- install:
+- ceph:
diff --git a/qa/suites/krbd/basic/clusters/.qa b/qa/suites/krbd/basic/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/basic/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/basic/clusters/fixed-1.yaml b/qa/suites/krbd/basic/clusters/fixed-1.yaml
new file mode 120000
index 000000000..02df5dd0c
--- /dev/null
+++ b/qa/suites/krbd/basic/clusters/fixed-1.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-1.yaml
+\ No newline at end of file
diff --git a/qa/suites/krbd/basic/conf.yaml b/qa/suites/krbd/basic/conf.yaml
new file mode 100644
index 000000000..41292fa81
--- /dev/null
+++ b/qa/suites/krbd/basic/conf.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        mon warn on pool no app: false
+        ms die on skipped message: false
+      client:
+        rbd default features: 37
diff --git a/qa/suites/krbd/basic/ms_mode/.qa b/qa/suites/krbd/basic/ms_mode/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/basic/ms_mode/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/basic/ms_mode/crc$/.qa b/qa/suites/krbd/basic/ms_mode/crc$/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/basic/ms_mode/crc$/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/basic/ms_mode/crc$/crc-rxbounce.yaml b/qa/suites/krbd/basic/ms_mode/crc$/crc-rxbounce.yaml
new file mode 100644
index 000000000..4d27d0113
--- /dev/null
+++ b/qa/suites/krbd/basic/ms_mode/crc$/crc-rxbounce.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=crc,rxbounce
diff --git a/qa/suites/krbd/basic/ms_mode/crc$/crc.yaml b/qa/suites/krbd/basic/ms_mode/crc$/crc.yaml
new file mode 100644
index 000000000..3b072578f
--- /dev/null
+++ b/qa/suites/krbd/basic/ms_mode/crc$/crc.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=crc
diff --git a/qa/suites/krbd/basic/ms_mode/legacy$/.qa b/qa/suites/krbd/basic/ms_mode/legacy$/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/basic/ms_mode/legacy$/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/basic/ms_mode/legacy$/legacy-rxbounce.yaml b/qa/suites/krbd/basic/ms_mode/legacy$/legacy-rxbounce.yaml
new file mode 100644
index 000000000..244e45cbc
--- /dev/null
+++ b/qa/suites/krbd/basic/ms_mode/legacy$/legacy-rxbounce.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=legacy,rxbounce
diff --git a/qa/suites/krbd/basic/ms_mode/legacy$/legacy.yaml b/qa/suites/krbd/basic/ms_mode/legacy$/legacy.yaml
new file mode 100644
index 000000000..0048dcb0c
--- /dev/null
+++ b/qa/suites/krbd/basic/ms_mode/legacy$/legacy.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=legacy
diff --git a/qa/suites/krbd/basic/ms_mode/secure.yaml b/qa/suites/krbd/basic/ms_mode/secure.yaml
new file mode 100644
index 000000000..a735db18d
--- /dev/null
+++ b/qa/suites/krbd/basic/ms_mode/secure.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=secure
diff --git a/qa/suites/krbd/basic/tasks/.qa b/qa/suites/krbd/basic/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/basic/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/basic/tasks/krbd_deep_flatten.yaml b/qa/suites/krbd/basic/tasks/krbd_deep_flatten.yaml
new file mode 100644
index 000000000..a821b73a3
--- /dev/null
+++ b/qa/suites/krbd/basic/tasks/krbd_deep_flatten.yaml
@@ -0,0 +1,5 @@
+tasks:
+- cram:
+    clients:
+      client.0:
+      - qa/rbd/krbd_deep_flatten.t
diff --git a/qa/suites/krbd/basic/tasks/krbd_discard.yaml b/qa/suites/krbd/basic/tasks/krbd_discard.yaml
new file mode 100644
index 000000000..59ec5b943
--- /dev/null
+++ b/qa/suites/krbd/basic/tasks/krbd_discard.yaml
@@ -0,0 +1,9 @@
+tasks:
+- cram:
+    clients:
+      client.0:
+      - qa/rbd/krbd_discard.t
+      - qa/rbd/krbd_discard_512b.t
+      - qa/rbd/krbd_discard_4M.t
+      - qa/rbd/krbd_zeroout.t
+      - qa/rbd/krbd_discard_granularity.t
diff --git a/qa/suites/krbd/basic/tasks/krbd_huge_image.yaml b/qa/suites/krbd/basic/tasks/krbd_huge_image.yaml
new file mode 100644
index 000000000..15ff033c7
--- /dev/null
+++ b/qa/suites/krbd/basic/tasks/krbd_huge_image.yaml
@@ -0,0 +1,5 @@
+tasks:
+- cram:
+    clients:
+      client.0:
+      - qa/rbd/krbd_huge_image.t
diff --git a/qa/suites/krbd/basic/tasks/krbd_modprobe.yaml b/qa/suites/krbd/basic/tasks/krbd_modprobe.yaml
new file mode 100644
index 000000000..22f02cd40
--- /dev/null
+++ b/qa/suites/krbd/basic/tasks/krbd_modprobe.yaml
@@ -0,0 +1,5 @@
+tasks:
+- cram:
+    clients:
+      client.0:
+      - qa/rbd/krbd_modprobe.t
diff --git a/qa/suites/krbd/basic/tasks/krbd_msgr_segments.yaml b/qa/suites/krbd/basic/tasks/krbd_msgr_segments.yaml
new file mode 100644
index 000000000..cfa524e70
--- /dev/null
+++ b/qa/suites/krbd/basic/tasks/krbd_msgr_segments.yaml
@@ -0,0 +1,5 @@
+tasks:
+- cram:
+    clients:
+      client.0:
+      - qa/rbd/krbd_msgr_segments.t
diff --git a/qa/suites/krbd/basic/tasks/krbd_parent_overlap.yaml b/qa/suites/krbd/basic/tasks/krbd_parent_overlap.yaml
new file mode 100644
index 000000000..9bcf1fa35
--- /dev/null
+++ b/qa/suites/krbd/basic/tasks/krbd_parent_overlap.yaml
@@ -0,0 +1,5 @@
+tasks:
+- cram:
+    clients:
+      client.0:
+      - qa/rbd/krbd_parent_overlap.t
diff --git a/qa/suites/krbd/basic/tasks/krbd_read_only.yaml b/qa/suites/krbd/basic/tasks/krbd_read_only.yaml
new file mode 100644
index 000000000..8194b89ce
--- /dev/null
+++ b/qa/suites/krbd/basic/tasks/krbd_read_only.yaml
@@ -0,0 +1,6 @@
+tasks:
+- cram:
+    clients:
+      client.0:
+      - qa/rbd/krbd_blkroset.t
+      - qa/rbd/krbd_get_features.t
diff --git a/qa/suites/krbd/basic/tasks/krbd_whole_object_zeroout.yaml b/qa/suites/krbd/basic/tasks/krbd_whole_object_zeroout.yaml
new file mode 100644
index 000000000..3b0ff8d1e
--- /dev/null
+++ b/qa/suites/krbd/basic/tasks/krbd_whole_object_zeroout.yaml
@@ -0,0 +1,5 @@
+tasks:
+- cram:
+    clients:
+      client.0:
+      - qa/rbd/krbd_whole_object_zeroout.t
diff --git a/qa/suites/krbd/fsx/% b/qa/suites/krbd/fsx/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/krbd/fsx/%
diff --git a/qa/suites/krbd/fsx/.qa b/qa/suites/krbd/fsx/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/fsx/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/fsx/ceph/.qa b/qa/suites/krbd/fsx/ceph/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/fsx/ceph/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/fsx/ceph/ceph.yaml b/qa/suites/krbd/fsx/ceph/ceph.yaml
new file mode 100644
index 000000000..2030acb90
--- /dev/null
+++ b/qa/suites/krbd/fsx/ceph/ceph.yaml
@@ -0,0 +1,3 @@
+tasks:
+- install:
+- ceph:
diff --git a/qa/suites/krbd/fsx/clusters/.qa b/qa/suites/krbd/fsx/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/fsx/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/fsx/clusters/3-node.yaml b/qa/suites/krbd/fsx/clusters/3-node.yaml
new file mode 100644
index 000000000..0433ec9be
--- /dev/null
+++ b/qa/suites/krbd/fsx/clusters/3-node.yaml
@@ -0,0 +1,14 @@
+# fixed-3.yaml, but with two additional clients on the same target
+roles:
+- [mon.a, mon.c, mgr.x, osd.0, osd.1, osd.2, osd.3]
+- [mon.b, mgr.y, osd.4, osd.5, osd.6, osd.7]
+- [client.0, client.1, client.2]
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 10 # GB
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd shutdown pgref assert: true
diff --git a/qa/suites/krbd/fsx/conf.yaml b/qa/suites/krbd/fsx/conf.yaml
new file mode 100644
index 000000000..eb6d72a80
--- /dev/null
+++ b/qa/suites/krbd/fsx/conf.yaml
@@ -0,0 +1,6 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        mon warn on pool no app: false
+        ms die on skipped message: false
diff --git a/qa/suites/krbd/fsx/features/.qa b/qa/suites/krbd/fsx/features/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/fsx/features/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/fsx/features/no-object-map.yaml b/qa/suites/krbd/fsx/features/no-object-map.yaml
new file mode 100644
index 000000000..809c77093
--- /dev/null
+++ b/qa/suites/krbd/fsx/features/no-object-map.yaml
@@ -0,0 +1,6 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        # layering, deep-flatten
+        rbd default features: 33
diff --git a/qa/suites/krbd/fsx/features/object-map.yaml b/qa/suites/krbd/fsx/features/object-map.yaml
new file mode 100644
index 000000000..35e7e9d0b
--- /dev/null
+++ b/qa/suites/krbd/fsx/features/object-map.yaml
@@ -0,0 +1,6 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        # layering, exclusive-lock, object-map, fast-diff, deep-flatten
+        rbd default features: 61
diff --git a/qa/suites/krbd/fsx/ms_mode$/.qa b/qa/suites/krbd/fsx/ms_mode$/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/fsx/ms_mode$/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/fsx/ms_mode$/crc-rxbounce.yaml b/qa/suites/krbd/fsx/ms_mode$/crc-rxbounce.yaml
new file mode 100644
index 000000000..fb9c3dec2
--- /dev/null
+++ b/qa/suites/krbd/fsx/ms_mode$/crc-rxbounce.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=crc,rxbounce,read_from_replica=balance
diff --git a/qa/suites/krbd/fsx/ms_mode$/crc.yaml b/qa/suites/krbd/fsx/ms_mode$/crc.yaml
new file mode 100644
index 000000000..d11be3887
--- /dev/null
+++ b/qa/suites/krbd/fsx/ms_mode$/crc.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=crc,read_from_replica=balance
diff --git a/qa/suites/krbd/fsx/ms_mode$/legacy-rxbounce.yaml b/qa/suites/krbd/fsx/ms_mode$/legacy-rxbounce.yaml
new file mode 100644
index 000000000..3306c1e57
--- /dev/null
+++ b/qa/suites/krbd/fsx/ms_mode$/legacy-rxbounce.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=legacy,rxbounce,read_from_replica=balance
diff --git a/qa/suites/krbd/fsx/ms_mode$/legacy.yaml b/qa/suites/krbd/fsx/ms_mode$/legacy.yaml
new file mode 100644
index 000000000..2b7116c03
--- /dev/null
+++ b/qa/suites/krbd/fsx/ms_mode$/legacy.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=legacy,read_from_replica=balance
diff --git a/qa/suites/krbd/fsx/ms_mode$/secure.yaml b/qa/suites/krbd/fsx/ms_mode$/secure.yaml
new file mode 100644
index 000000000..671b73f9c
--- /dev/null
+++ b/qa/suites/krbd/fsx/ms_mode$/secure.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=secure,read_from_replica=balance
diff --git a/qa/suites/krbd/fsx/objectstore/.qa b/qa/suites/krbd/fsx/objectstore/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/fsx/objectstore/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/fsx/objectstore/bluestore-bitmap.yaml b/qa/suites/krbd/fsx/objectstore/bluestore-bitmap.yaml
new file mode 120000
index 000000000..a59cf5175
--- /dev/null
+++ b/qa/suites/krbd/fsx/objectstore/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/krbd/fsx/striping/.qa b/qa/suites/krbd/fsx/striping/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/fsx/striping/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/fsx/striping/default/% b/qa/suites/krbd/fsx/striping/default/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/krbd/fsx/striping/default/%
diff --git a/qa/suites/krbd/fsx/striping/default/.qa b/qa/suites/krbd/fsx/striping/default/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/fsx/striping/default/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/fsx/striping/default/msgr-failures/.qa b/qa/suites/krbd/fsx/striping/default/msgr-failures/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/fsx/striping/default/msgr-failures/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/fsx/striping/default/msgr-failures/few.yaml b/qa/suites/krbd/fsx/striping/default/msgr-failures/few.yaml
new file mode 100644
index 000000000..519288992
--- /dev/null
+++ b/qa/suites/krbd/fsx/striping/default/msgr-failures/few.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 5000
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/krbd/fsx/striping/default/msgr-failures/many.yaml b/qa/suites/krbd/fsx/striping/default/msgr-failures/many.yaml
new file mode 100644
index 000000000..e3855297d
--- /dev/null
+++ b/qa/suites/krbd/fsx/striping/default/msgr-failures/many.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 500
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/krbd/fsx/striping/default/randomized-striping-off.yaml b/qa/suites/krbd/fsx/striping/default/randomized-striping-off.yaml
new file mode 100644
index 000000000..0bf96a8d0
--- /dev/null
+++ b/qa/suites/krbd/fsx/striping/default/randomized-striping-off.yaml
@@ -0,0 +1,3 @@
+overrides:
+  rbd_fsx:
+    randomized_striping: false
diff --git a/qa/suites/krbd/fsx/striping/fancy/% b/qa/suites/krbd/fsx/striping/fancy/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/krbd/fsx/striping/fancy/%
diff --git a/qa/suites/krbd/fsx/striping/fancy/.qa b/qa/suites/krbd/fsx/striping/fancy/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/fsx/striping/fancy/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/fsx/striping/fancy/msgr-failures/.qa b/qa/suites/krbd/fsx/striping/fancy/msgr-failures/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/fsx/striping/fancy/msgr-failures/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/fsx/striping/fancy/msgr-failures/few.yaml b/qa/suites/krbd/fsx/striping/fancy/msgr-failures/few.yaml
new file mode 100644
index 000000000..519288992
--- /dev/null
+++ b/qa/suites/krbd/fsx/striping/fancy/msgr-failures/few.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 5000
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/krbd/fsx/striping/fancy/randomized-striping-on.yaml b/qa/suites/krbd/fsx/striping/fancy/randomized-striping-on.yaml
new file mode 100644
index 000000000..c2823e4ed
--- /dev/null
+++ b/qa/suites/krbd/fsx/striping/fancy/randomized-striping-on.yaml
@@ -0,0 +1,3 @@
+overrides:
+  rbd_fsx:
+    randomized_striping: true
diff --git a/qa/suites/krbd/fsx/tasks/.qa b/qa/suites/krbd/fsx/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/fsx/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/fsx/tasks/fsx-1-client.yaml b/qa/suites/krbd/fsx/tasks/fsx-1-client.yaml
new file mode 100644
index 000000000..b0af9829f
--- /dev/null
+++ b/qa/suites/krbd/fsx/tasks/fsx-1-client.yaml
@@ -0,0 +1,10 @@
+tasks:
+- rbd_fsx:
+    clients: [client.0]
+    ops: 20000
+    krbd: true
+    readbdy: 512
+    writebdy: 512
+    truncbdy: 512
+    holebdy: 512
+    punch_holes: true
diff --git a/qa/suites/krbd/fsx/tasks/fsx-3-client.yaml b/qa/suites/krbd/fsx/tasks/fsx-3-client.yaml
new file mode 100644
index 000000000..5b8e37012
--- /dev/null
+++ b/qa/suites/krbd/fsx/tasks/fsx-3-client.yaml
@@ -0,0 +1,10 @@
+tasks:
+- rbd_fsx:
+    clients: [client.0, client.1, client.2]
+    ops: 10000
+    krbd: true
+    readbdy: 512
+    writebdy: 512
+    truncbdy: 512
+    holebdy: 512
+    punch_holes: true
diff --git a/qa/suites/krbd/ms_modeless/% b/qa/suites/krbd/ms_modeless/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/krbd/ms_modeless/%
diff --git a/qa/suites/krbd/ms_modeless/.qa b/qa/suites/krbd/ms_modeless/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/ms_modeless/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/ms_modeless/bluestore-bitmap.yaml b/qa/suites/krbd/ms_modeless/bluestore-bitmap.yaml
new file mode 120000
index 000000000..a59cf5175
--- /dev/null
+++ b/qa/suites/krbd/ms_modeless/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/krbd/ms_modeless/ceph/.qa b/qa/suites/krbd/ms_modeless/ceph/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/ms_modeless/ceph/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/ms_modeless/ceph/ceph.yaml b/qa/suites/krbd/ms_modeless/ceph/ceph.yaml
new file mode 100644
index 000000000..2030acb90
--- /dev/null
+++ b/qa/suites/krbd/ms_modeless/ceph/ceph.yaml
@@ -0,0 +1,3 @@
+tasks:
+- install:
+- ceph:
diff --git a/qa/suites/krbd/ms_modeless/clusters/.qa b/qa/suites/krbd/ms_modeless/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/ms_modeless/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/ms_modeless/clusters/fixed-3.yaml b/qa/suites/krbd/ms_modeless/clusters/fixed-3.yaml
new file mode 120000
index 000000000..f75a848b8
--- /dev/null
+++ b/qa/suites/krbd/ms_modeless/clusters/fixed-3.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-3.yaml
+\ No newline at end of file
diff --git a/qa/suites/krbd/ms_modeless/conf.yaml b/qa/suites/krbd/ms_modeless/conf.yaml
new file mode 100644
index 000000000..eb6d72a80
--- /dev/null
+++ b/qa/suites/krbd/ms_modeless/conf.yaml
@@ -0,0 +1,6 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        mon warn on pool no app: false
+        ms die on skipped message: false
diff --git a/qa/suites/krbd/ms_modeless/tasks/.qa b/qa/suites/krbd/ms_modeless/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/ms_modeless/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/ms_modeless/tasks/krbd_default_map_options.yaml b/qa/suites/krbd/ms_modeless/tasks/krbd_default_map_options.yaml
new file mode 100644
index 000000000..c8c12f173
--- /dev/null
+++ b/qa/suites/krbd/ms_modeless/tasks/krbd_default_map_options.yaml
@@ -0,0 +1,5 @@
+tasks:
+- cram:
+    clients:
+      client.0:
+      - qa/rbd/krbd_default_map_options.t
diff --git a/qa/suites/krbd/ms_modeless/tasks/krbd_rxbounce.yaml b/qa/suites/krbd/ms_modeless/tasks/krbd_rxbounce.yaml
new file mode 100644
index 000000000..4ecd0e83e
--- /dev/null
+++ b/qa/suites/krbd/ms_modeless/tasks/krbd_rxbounce.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      all:
+        - rbd/krbd_rxbounce.sh
diff --git a/qa/suites/krbd/rbd-nomount/% b/qa/suites/krbd/rbd-nomount/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/%
diff --git a/qa/suites/krbd/rbd-nomount/.qa b/qa/suites/krbd/rbd-nomount/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/rbd-nomount/bluestore-bitmap.yaml b/qa/suites/krbd/rbd-nomount/bluestore-bitmap.yaml
new file mode 120000
index 000000000..a59cf5175
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/krbd/rbd-nomount/clusters/.qa b/qa/suites/krbd/rbd-nomount/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/rbd-nomount/clusters/fixed-3.yaml b/qa/suites/krbd/rbd-nomount/clusters/fixed-3.yaml
new file mode 120000
index 000000000..f75a848b8
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/clusters/fixed-3.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-3.yaml
+\ No newline at end of file
diff --git a/qa/suites/krbd/rbd-nomount/conf.yaml b/qa/suites/krbd/rbd-nomount/conf.yaml
new file mode 100644
index 000000000..41292fa81
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/conf.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        mon warn on pool no app: false
+        ms die on skipped message: false
+      client:
+        rbd default features: 37
diff --git a/qa/suites/krbd/rbd-nomount/install/.qa b/qa/suites/krbd/rbd-nomount/install/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/install/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/rbd-nomount/install/ceph.yaml b/qa/suites/krbd/rbd-nomount/install/ceph.yaml
new file mode 100644
index 000000000..2030acb90
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/install/ceph.yaml
@@ -0,0 +1,3 @@
+tasks:
+- install:
+- ceph:
diff --git a/qa/suites/krbd/rbd-nomount/ms_mode/.qa b/qa/suites/krbd/rbd-nomount/ms_mode/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/ms_mode/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/rbd-nomount/ms_mode/crc$/.qa b/qa/suites/krbd/rbd-nomount/ms_mode/crc$/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/ms_mode/crc$/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/rbd-nomount/ms_mode/crc$/crc-rxbounce.yaml b/qa/suites/krbd/rbd-nomount/ms_mode/crc$/crc-rxbounce.yaml
new file mode 100644
index 000000000..4d27d0113
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/ms_mode/crc$/crc-rxbounce.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=crc,rxbounce
diff --git a/qa/suites/krbd/rbd-nomount/ms_mode/crc$/crc.yaml b/qa/suites/krbd/rbd-nomount/ms_mode/crc$/crc.yaml
new file mode 100644
index 000000000..3b072578f
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/ms_mode/crc$/crc.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=crc
diff --git a/qa/suites/krbd/rbd-nomount/ms_mode/legacy$/.qa b/qa/suites/krbd/rbd-nomount/ms_mode/legacy$/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/ms_mode/legacy$/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/rbd-nomount/ms_mode/legacy$/legacy-rxbounce.yaml b/qa/suites/krbd/rbd-nomount/ms_mode/legacy$/legacy-rxbounce.yaml
new file mode 100644
index 000000000..244e45cbc
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/ms_mode/legacy$/legacy-rxbounce.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=legacy,rxbounce
diff --git a/qa/suites/krbd/rbd-nomount/ms_mode/legacy$/legacy.yaml b/qa/suites/krbd/rbd-nomount/ms_mode/legacy$/legacy.yaml
new file mode 100644
index 000000000..0048dcb0c
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/ms_mode/legacy$/legacy.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=legacy
diff --git a/qa/suites/krbd/rbd-nomount/ms_mode/secure.yaml b/qa/suites/krbd/rbd-nomount/ms_mode/secure.yaml
new file mode 100644
index 000000000..a735db18d
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/ms_mode/secure.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=secure
diff --git a/qa/suites/krbd/rbd-nomount/msgr-failures/.qa b/qa/suites/krbd/rbd-nomount/msgr-failures/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/msgr-failures/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/rbd-nomount/msgr-failures/few.yaml b/qa/suites/krbd/rbd-nomount/msgr-failures/few.yaml
new file mode 100644
index 000000000..519288992
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/msgr-failures/few.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 5000
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/krbd/rbd-nomount/msgr-failures/many.yaml b/qa/suites/krbd/rbd-nomount/msgr-failures/many.yaml
new file mode 100644
index 000000000..e3855297d
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/msgr-failures/many.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 500
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/krbd/rbd-nomount/tasks/.qa b/qa/suites/krbd/rbd-nomount/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/rbd-nomount/tasks/krbd_data_pool.yaml b/qa/suites/krbd/rbd-nomount/tasks/krbd_data_pool.yaml
new file mode 100644
index 000000000..35b9d67eb
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/tasks/krbd_data_pool.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      all:
+        - rbd/krbd_data_pool.sh
diff --git a/qa/suites/krbd/rbd-nomount/tasks/krbd_exclusive_option.yaml b/qa/suites/krbd/rbd-nomount/tasks/krbd_exclusive_option.yaml
new file mode 100644
index 000000000..567deebfd
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/tasks/krbd_exclusive_option.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      all:
+        - rbd/krbd_exclusive_option.sh
diff --git a/qa/suites/krbd/rbd-nomount/tasks/krbd_fallocate.yaml b/qa/suites/krbd/rbd-nomount/tasks/krbd_fallocate.yaml
new file mode 100644
index 000000000..a72869824
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/tasks/krbd_fallocate.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      all:
+        - rbd/krbd_fallocate.sh
diff --git a/qa/suites/krbd/rbd-nomount/tasks/krbd_huge_osdmap.yaml b/qa/suites/krbd/rbd-nomount/tasks/krbd_huge_osdmap.yaml
new file mode 100644
index 000000000..3148b32a1
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/tasks/krbd_huge_osdmap.yaml
@@ -0,0 +1,10 @@
+overrides:
+  ceph:
+    conf:
+      mon:
+        mon max osd: 60000
+tasks:
+- workunit:
+    clients:
+      all:
+        - rbd/krbd_huge_osdmap.sh
diff --git a/qa/suites/krbd/rbd-nomount/tasks/krbd_latest_osdmap_on_map.yaml b/qa/suites/krbd/rbd-nomount/tasks/krbd_latest_osdmap_on_map.yaml
new file mode 100644
index 000000000..522be6a42
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/tasks/krbd_latest_osdmap_on_map.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      all:
+        - rbd/krbd_latest_osdmap_on_map.sh
diff --git a/qa/suites/krbd/rbd-nomount/tasks/krbd_namespaces.yaml b/qa/suites/krbd/rbd-nomount/tasks/krbd_namespaces.yaml
new file mode 100644
index 000000000..4d6519a25
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/tasks/krbd_namespaces.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      all:
+        - rbd/krbd_namespaces.sh
diff --git a/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_enumerate.yaml b/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_enumerate.yaml
new file mode 100644
index 000000000..c326507ac
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_enumerate.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      all:
+        - rbd/krbd_udev_enumerate.sh
diff --git a/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_netlink_enobufs.yaml b/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_netlink_enobufs.yaml
new file mode 100644
index 000000000..ed1b2ae63
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_netlink_enobufs.yaml
@@ -0,0 +1,10 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - pauserd,pausewr flag\(s\) set
+
+tasks:
+- workunit:
+    clients:
+      all:
+        - rbd/krbd_udev_netlink_enobufs.sh
diff --git a/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_netns.yaml b/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_netns.yaml
new file mode 100644
index 000000000..21e06e388
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_netns.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      all:
+        - rbd/krbd_udev_netns.sh
diff --git a/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_symlinks.yaml b/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_symlinks.yaml
new file mode 100644
index 000000000..ee79932f5
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_symlinks.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      all:
+        - rbd/krbd_udev_symlinks.sh
diff --git a/qa/suites/krbd/rbd-nomount/tasks/rbd_concurrent.yaml b/qa/suites/krbd/rbd-nomount/tasks/rbd_concurrent.yaml
new file mode 100644
index 000000000..675b98e73
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/tasks/rbd_concurrent.yaml
@@ -0,0 +1,10 @@
+tasks:
+- workunit:
+    clients:
+      all:
+        - rbd/concurrent.sh
+# Options for rbd/concurrent.sh (default values shown)
+#    env:
+#        RBD_CONCURRENT_ITER: 100
+#        RBD_CONCURRENT_COUNT: 5
+#        RBD_CONCURRENT_DELAY: 5
diff --git a/qa/suites/krbd/rbd-nomount/tasks/rbd_huge_tickets.yaml b/qa/suites/krbd/rbd-nomount/tasks/rbd_huge_tickets.yaml
new file mode 100644
index 000000000..ea421eec1
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/tasks/rbd_huge_tickets.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      all:
+        - rbd/huge-tickets.sh
diff --git a/qa/suites/krbd/rbd-nomount/tasks/rbd_image_read.yaml b/qa/suites/krbd/rbd-nomount/tasks/rbd_image_read.yaml
new file mode 100644
index 000000000..e5017e118
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/tasks/rbd_image_read.yaml
@@ -0,0 +1,15 @@
+tasks:
+- workunit:
+    clients:
+      all:
+        - rbd/image_read.sh
+# Options for rbd/image_read.sh (default values shown)
+#    env:
+#        IMAGE_READ_LOCAL_FILES: 'false'
+#        IMAGE_READ_FORMAT: '2'
+#        IMAGE_READ_VERBOSE: 'true'
+#        IMAGE_READ_PAGE_SIZE: '4096'
+#        IMAGE_READ_OBJECT_ORDER: '22'
+#        IMAGE_READ_TEST_CLONES: 'true'
+#        IMAGE_READ_DOUBLE_ORDER: 'true'
+#        IMAGE_READ_HALF_ORDER: 'false'
diff --git a/qa/suites/krbd/rbd-nomount/tasks/rbd_kernel.yaml b/qa/suites/krbd/rbd-nomount/tasks/rbd_kernel.yaml
new file mode 100644
index 000000000..aa155827c
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/tasks/rbd_kernel.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      all:
+        - rbd/kernel.sh
diff --git a/qa/suites/krbd/rbd-nomount/tasks/rbd_map_snapshot_io.yaml b/qa/suites/krbd/rbd-nomount/tasks/rbd_map_snapshot_io.yaml
new file mode 100644
index 000000000..c1529398b
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/tasks/rbd_map_snapshot_io.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      all:
+        - rbd/map-snapshot-io.sh
diff --git a/qa/suites/krbd/rbd-nomount/tasks/rbd_map_unmap.yaml b/qa/suites/krbd/rbd-nomount/tasks/rbd_map_unmap.yaml
new file mode 100644
index 000000000..c2160997c
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/tasks/rbd_map_unmap.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      all:
+        - rbd/map-unmap.sh
diff --git a/qa/suites/krbd/rbd-nomount/tasks/rbd_simple_big.yaml b/qa/suites/krbd/rbd-nomount/tasks/rbd_simple_big.yaml
new file mode 100644
index 000000000..c493cfaf4
--- /dev/null
+++ b/qa/suites/krbd/rbd-nomount/tasks/rbd_simple_big.yaml
@@ -0,0 +1,6 @@
+tasks:
+- workunit:
+    clients:
+      all:
+        - rbd/simple_big.sh
+
diff --git a/qa/suites/krbd/rbd/% b/qa/suites/krbd/rbd/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/krbd/rbd/%
diff --git a/qa/suites/krbd/rbd/.qa b/qa/suites/krbd/rbd/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/rbd/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/rbd/bluestore-bitmap.yaml b/qa/suites/krbd/rbd/bluestore-bitmap.yaml
new file mode 120000
index 000000000..a59cf5175
--- /dev/null
+++ b/qa/suites/krbd/rbd/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/krbd/rbd/clusters/.qa b/qa/suites/krbd/rbd/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/rbd/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/rbd/clusters/fixed-3.yaml b/qa/suites/krbd/rbd/clusters/fixed-3.yaml
new file mode 120000
index 000000000..f75a848b8
--- /dev/null
+++ b/qa/suites/krbd/rbd/clusters/fixed-3.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-3.yaml
+\ No newline at end of file
diff --git a/qa/suites/krbd/rbd/conf.yaml b/qa/suites/krbd/rbd/conf.yaml
new file mode 100644
index 000000000..41292fa81
--- /dev/null
+++ b/qa/suites/krbd/rbd/conf.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        mon warn on pool no app: false
+        ms die on skipped message: false
+      client:
+        rbd default features: 37
diff --git a/qa/suites/krbd/rbd/ms_mode/.qa b/qa/suites/krbd/rbd/ms_mode/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/rbd/ms_mode/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/rbd/ms_mode/crc$/.qa b/qa/suites/krbd/rbd/ms_mode/crc$/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/rbd/ms_mode/crc$/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/rbd/ms_mode/crc$/crc-rxbounce.yaml b/qa/suites/krbd/rbd/ms_mode/crc$/crc-rxbounce.yaml
new file mode 100644
index 000000000..4d27d0113
--- /dev/null
+++ b/qa/suites/krbd/rbd/ms_mode/crc$/crc-rxbounce.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=crc,rxbounce
diff --git a/qa/suites/krbd/rbd/ms_mode/crc$/crc.yaml b/qa/suites/krbd/rbd/ms_mode/crc$/crc.yaml
new file mode 100644
index 000000000..3b072578f
--- /dev/null
+++ b/qa/suites/krbd/rbd/ms_mode/crc$/crc.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=crc
diff --git a/qa/suites/krbd/rbd/ms_mode/legacy$/.qa b/qa/suites/krbd/rbd/ms_mode/legacy$/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/rbd/ms_mode/legacy$/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/rbd/ms_mode/legacy$/legacy-rxbounce.yaml b/qa/suites/krbd/rbd/ms_mode/legacy$/legacy-rxbounce.yaml
new file mode 100644
index 000000000..244e45cbc
--- /dev/null
+++ b/qa/suites/krbd/rbd/ms_mode/legacy$/legacy-rxbounce.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=legacy,rxbounce
diff --git a/qa/suites/krbd/rbd/ms_mode/legacy$/legacy.yaml b/qa/suites/krbd/rbd/ms_mode/legacy$/legacy.yaml
new file mode 100644
index 000000000..0048dcb0c
--- /dev/null
+++ b/qa/suites/krbd/rbd/ms_mode/legacy$/legacy.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=legacy
diff --git a/qa/suites/krbd/rbd/ms_mode/secure.yaml b/qa/suites/krbd/rbd/ms_mode/secure.yaml
new file mode 100644
index 000000000..a735db18d
--- /dev/null
+++ b/qa/suites/krbd/rbd/ms_mode/secure.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=secure
diff --git a/qa/suites/krbd/rbd/msgr-failures/.qa b/qa/suites/krbd/rbd/msgr-failures/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/rbd/msgr-failures/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/rbd/msgr-failures/few.yaml b/qa/suites/krbd/rbd/msgr-failures/few.yaml
new file mode 100644
index 000000000..519288992
--- /dev/null
+++ b/qa/suites/krbd/rbd/msgr-failures/few.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 5000
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/krbd/rbd/msgr-failures/many.yaml b/qa/suites/krbd/rbd/msgr-failures/many.yaml
new file mode 100644
index 000000000..e3855297d
--- /dev/null
+++ b/qa/suites/krbd/rbd/msgr-failures/many.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 500
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/krbd/rbd/tasks/.qa b/qa/suites/krbd/rbd/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/rbd/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/rbd/tasks/rbd_fio.yaml b/qa/suites/krbd/rbd/tasks/rbd_fio.yaml
new file mode 100644
index 000000000..01088fa46
--- /dev/null
+++ b/qa/suites/krbd/rbd/tasks/rbd_fio.yaml
@@ -0,0 +1,11 @@
+tasks:
+- install:
+- ceph: null
+- rbd_fio:
+    client.0:
+       fio-io-size: 90%
+       formats: [2]
+       features: [[layering,exclusive-lock]]
+       io-engine: sync
+       rw: randrw
+       runtime: 900
diff --git a/qa/suites/krbd/rbd/tasks/rbd_workunit_kernel_untar_build.yaml b/qa/suites/krbd/rbd/tasks/rbd_workunit_kernel_untar_build.yaml
new file mode 100644
index 000000000..699cde82c
--- /dev/null
+++ b/qa/suites/krbd/rbd/tasks/rbd_workunit_kernel_untar_build.yaml
@@ -0,0 +1,12 @@
+tasks:
+- install:
+    extra_system_packages:
+      deb: ['bison', 'flex', 'libelf-dev', 'libssl-dev']
+      rpm: ['bison', 'flex', 'elfutils-libelf-devel', 'openssl-devel']
+- ceph:
+- rbd:
+    all:
+- workunit:
+    clients:
+      all:
+        - kernel_untar_build.sh
diff --git a/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_dbench.yaml b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_dbench.yaml
new file mode 100644
index 000000000..d779eea23
--- /dev/null
+++ b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_dbench.yaml
@@ -0,0 +1,9 @@
+tasks:
+- install:
+- ceph:
+- rbd:
+    all:
+- workunit:
+    clients:
+      all:
+        - suites/dbench.sh
diff --git a/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_ffsb.yaml b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_ffsb.yaml
new file mode 100644
index 000000000..5204bb87f
--- /dev/null
+++ b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_ffsb.yaml
@@ -0,0 +1,10 @@
+tasks:
+- install:
+- ceph:
+- rbd:
+    all:
+      image_size: 20480
+- workunit:
+    clients:
+      all:
+        - suites/ffsb.sh
diff --git a/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress.yaml b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress.yaml
new file mode 100644
index 000000000..f9d62fefc
--- /dev/null
+++ b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress.yaml
@@ -0,0 +1,9 @@
+tasks:
+- install:
+- ceph:
+- rbd:
+    all:
+- workunit:
+    clients:
+      all:
+        - suites/fsstress.sh
diff --git a/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress_ext4.yaml b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress_ext4.yaml
new file mode 100644
index 000000000..f765b74a6
--- /dev/null
+++ b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress_ext4.yaml
@@ -0,0 +1,10 @@
+tasks:
+- install:
+- ceph:
+- rbd:
+    all:
+      fs_type: ext4
+- workunit:
+    clients:
+      all:
+        - suites/fsstress.sh
diff --git a/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsx.yaml b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsx.yaml
new file mode 100644
index 000000000..39d4e04f2
--- /dev/null
+++ b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsx.yaml
@@ -0,0 +1,20 @@
+tasks:
+- install:
+    extra_system_packages:
+      deb:
+      - libaio-dev
+      - libtool-bin
+      - uuid-dev
+      - xfslibs-dev
+      rpm:
+      - libaio-devel
+      - libtool
+      - libuuid-devel
+      - xfsprogs-devel
+- ceph:
+- rbd:
+    all:
+- workunit:
+    clients:
+      all:
+        - suites/fsx.sh
diff --git a/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_iozone.yaml b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_iozone.yaml
new file mode 100644
index 000000000..eb8f18d60
--- /dev/null
+++ b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_iozone.yaml
@@ -0,0 +1,10 @@
+tasks:
+- install:
+- ceph:
+- rbd:
+    all:
+      image_size: 20480
+- workunit:
+    clients:
+      all:
+        - suites/iozone.sh
diff --git a/qa/suites/krbd/rbd/tasks/rbd_workunit_trivial_sync.yaml b/qa/suites/krbd/rbd/tasks/rbd_workunit_trivial_sync.yaml
new file mode 100644
index 000000000..7c2796b2a
--- /dev/null
+++ b/qa/suites/krbd/rbd/tasks/rbd_workunit_trivial_sync.yaml
@@ -0,0 +1,8 @@
+tasks:
+- install:
+- ceph:
+- rbd:
+    all:
+- workunit:
+    clients:
+      all: [fs/misc/trivial_sync.sh]
diff --git a/qa/suites/krbd/singleton-msgr-failures/% b/qa/suites/krbd/singleton-msgr-failures/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/krbd/singleton-msgr-failures/%
diff --git a/qa/suites/krbd/singleton-msgr-failures/.qa b/qa/suites/krbd/singleton-msgr-failures/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/singleton-msgr-failures/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/singleton-msgr-failures/bluestore-bitmap.yaml b/qa/suites/krbd/singleton-msgr-failures/bluestore-bitmap.yaml
new file mode 120000
index 000000000..a59cf5175
--- /dev/null
+++ b/qa/suites/krbd/singleton-msgr-failures/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/krbd/singleton-msgr-failures/conf.yaml b/qa/suites/krbd/singleton-msgr-failures/conf.yaml
new file mode 100644
index 000000000..5e7ed992e
--- /dev/null
+++ b/qa/suites/krbd/singleton-msgr-failures/conf.yaml
@@ -0,0 +1,7 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms die on skipped message: false
+      client:
+        rbd default features: 37
diff --git a/qa/suites/krbd/singleton-msgr-failures/ms_mode$/.qa b/qa/suites/krbd/singleton-msgr-failures/ms_mode$/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/singleton-msgr-failures/ms_mode$/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/singleton-msgr-failures/ms_mode$/crc-rxbounce.yaml b/qa/suites/krbd/singleton-msgr-failures/ms_mode$/crc-rxbounce.yaml
new file mode 100644
index 000000000..4d27d0113
--- /dev/null
+++ b/qa/suites/krbd/singleton-msgr-failures/ms_mode$/crc-rxbounce.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=crc,rxbounce
diff --git a/qa/suites/krbd/singleton-msgr-failures/ms_mode$/crc.yaml b/qa/suites/krbd/singleton-msgr-failures/ms_mode$/crc.yaml
new file mode 100644
index 000000000..3b072578f
--- /dev/null
+++ b/qa/suites/krbd/singleton-msgr-failures/ms_mode$/crc.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=crc
diff --git a/qa/suites/krbd/singleton-msgr-failures/ms_mode$/legacy-rxbounce.yaml b/qa/suites/krbd/singleton-msgr-failures/ms_mode$/legacy-rxbounce.yaml
new file mode 100644
index 000000000..244e45cbc
--- /dev/null
+++ b/qa/suites/krbd/singleton-msgr-failures/ms_mode$/legacy-rxbounce.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=legacy,rxbounce
diff --git a/qa/suites/krbd/singleton-msgr-failures/ms_mode$/legacy.yaml b/qa/suites/krbd/singleton-msgr-failures/ms_mode$/legacy.yaml
new file mode 100644
index 000000000..0048dcb0c
--- /dev/null
+++ b/qa/suites/krbd/singleton-msgr-failures/ms_mode$/legacy.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=legacy
diff --git a/qa/suites/krbd/singleton-msgr-failures/ms_mode$/secure.yaml b/qa/suites/krbd/singleton-msgr-failures/ms_mode$/secure.yaml
new file mode 100644
index 000000000..a735db18d
--- /dev/null
+++ b/qa/suites/krbd/singleton-msgr-failures/ms_mode$/secure.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=secure
diff --git a/qa/suites/krbd/singleton-msgr-failures/msgr-failures/.qa b/qa/suites/krbd/singleton-msgr-failures/msgr-failures/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/singleton-msgr-failures/msgr-failures/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/singleton-msgr-failures/msgr-failures/few.yaml b/qa/suites/krbd/singleton-msgr-failures/msgr-failures/few.yaml
new file mode 100644
index 000000000..519288992
--- /dev/null
+++ b/qa/suites/krbd/singleton-msgr-failures/msgr-failures/few.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 5000
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/krbd/singleton-msgr-failures/msgr-failures/many.yaml b/qa/suites/krbd/singleton-msgr-failures/msgr-failures/many.yaml
new file mode 100644
index 000000000..e3855297d
--- /dev/null
+++ b/qa/suites/krbd/singleton-msgr-failures/msgr-failures/many.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 500
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/krbd/singleton-msgr-failures/tasks/.qa b/qa/suites/krbd/singleton-msgr-failures/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/singleton-msgr-failures/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/singleton-msgr-failures/tasks/rbd_xfstests.yaml b/qa/suites/krbd/singleton-msgr-failures/tasks/rbd_xfstests.yaml
new file mode 100644
index 000000000..c94af0207
--- /dev/null
+++ b/qa/suites/krbd/singleton-msgr-failures/tasks/rbd_xfstests.yaml
@@ -0,0 +1,38 @@
+roles:
+- [mon.a, mon.c, osd.0, osd.1, osd.2]
+- [mon.b, mgr.x, mds.a, osd.3, osd.4, osd.5]
+- [client.0]
+- [client.1]
+openstack:
+- volumes: # attached to each instance
+    count: 3
+    size: 10 # GB
+tasks:
+- install:
+- ceph:
+- rbd.xfstests:
+    client.0: &ref
+        test_image: 'test_image-0'
+        test_size: 5120  # MB
+        scratch_image: 'scratch_image-0'
+        scratch_size: 15360  # MB
+        fs_type: ext4
+        tests: '-g auto -g blockdev -x clone'
+        exclude:
+        - generic/042
+        - generic/392
+        - generic/044
+        - generic/045
+        - generic/046
+        - generic/223
+        - ext4/002  # removed upstream
+        - ext4/304
+        - generic/388
+        - generic/405
+        - generic/422
+        - shared/298  # lockdep false positive
+        randomize: true
+    client.1:
+        <<: *ref
+        test_image: 'test_image-1'
+        scratch_image: 'scratch_image-1'
diff --git a/qa/suites/krbd/singleton/% b/qa/suites/krbd/singleton/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/krbd/singleton/%
diff --git a/qa/suites/krbd/singleton/.qa b/qa/suites/krbd/singleton/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/singleton/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/singleton/bluestore-bitmap.yaml b/qa/suites/krbd/singleton/bluestore-bitmap.yaml
new file mode 120000
index 000000000..a59cf5175
--- /dev/null
+++ b/qa/suites/krbd/singleton/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/krbd/singleton/conf.yaml b/qa/suites/krbd/singleton/conf.yaml
new file mode 100644
index 000000000..41292fa81
--- /dev/null
+++ b/qa/suites/krbd/singleton/conf.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        mon warn on pool no app: false
+        ms die on skipped message: false
+      client:
+        rbd default features: 37
diff --git a/qa/suites/krbd/singleton/ms_mode$/.qa b/qa/suites/krbd/singleton/ms_mode$/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/singleton/ms_mode$/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/singleton/ms_mode$/crc-rxbounce.yaml b/qa/suites/krbd/singleton/ms_mode$/crc-rxbounce.yaml
new file mode 100644
index 000000000..4d27d0113
--- /dev/null
+++ b/qa/suites/krbd/singleton/ms_mode$/crc-rxbounce.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=crc,rxbounce
diff --git a/qa/suites/krbd/singleton/ms_mode$/crc.yaml b/qa/suites/krbd/singleton/ms_mode$/crc.yaml
new file mode 100644
index 000000000..3b072578f
--- /dev/null
+++ b/qa/suites/krbd/singleton/ms_mode$/crc.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=crc
diff --git a/qa/suites/krbd/singleton/ms_mode$/legacy-rxbounce.yaml b/qa/suites/krbd/singleton/ms_mode$/legacy-rxbounce.yaml
new file mode 100644
index 000000000..244e45cbc
--- /dev/null
+++ b/qa/suites/krbd/singleton/ms_mode$/legacy-rxbounce.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=legacy,rxbounce
diff --git a/qa/suites/krbd/singleton/ms_mode$/legacy.yaml b/qa/suites/krbd/singleton/ms_mode$/legacy.yaml
new file mode 100644
index 000000000..0048dcb0c
--- /dev/null
+++ b/qa/suites/krbd/singleton/ms_mode$/legacy.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=legacy
diff --git a/qa/suites/krbd/singleton/ms_mode$/secure.yaml b/qa/suites/krbd/singleton/ms_mode$/secure.yaml
new file mode 100644
index 000000000..a735db18d
--- /dev/null
+++ b/qa/suites/krbd/singleton/ms_mode$/secure.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=secure
diff --git a/qa/suites/krbd/singleton/tasks/.qa b/qa/suites/krbd/singleton/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/singleton/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/singleton/tasks/krbd_watch_errors.yaml b/qa/suites/krbd/singleton/tasks/krbd_watch_errors.yaml
new file mode 100644
index 000000000..5e30ef2ba
--- /dev/null
+++ b/qa/suites/krbd/singleton/tasks/krbd_watch_errors.yaml
@@ -0,0 +1,19 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        osd pool default size: 1
+      osd:
+        osd shutdown pgref assert: true
+roles:
+- [mon.a, mgr.x, osd.0, client.0]
+
+tasks:
+- install:
+    extra_system_packages:
+      - fio
+- ceph:
+- workunit:
+    clients:
+      all:
+        - rbd/krbd_watch_errors.sh
diff --git a/qa/suites/krbd/thrash/% b/qa/suites/krbd/thrash/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/krbd/thrash/%
diff --git a/qa/suites/krbd/thrash/.qa b/qa/suites/krbd/thrash/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/thrash/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/thrash/bluestore-bitmap.yaml b/qa/suites/krbd/thrash/bluestore-bitmap.yaml
new file mode 120000
index 000000000..a59cf5175
--- /dev/null
+++ b/qa/suites/krbd/thrash/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/krbd/thrash/ceph/.qa b/qa/suites/krbd/thrash/ceph/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/thrash/ceph/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/thrash/ceph/ceph.yaml b/qa/suites/krbd/thrash/ceph/ceph.yaml
new file mode 100644
index 000000000..2030acb90
--- /dev/null
+++ b/qa/suites/krbd/thrash/ceph/ceph.yaml
@@ -0,0 +1,3 @@
+tasks:
+- install:
+- ceph:
diff --git a/qa/suites/krbd/thrash/clusters/.qa b/qa/suites/krbd/thrash/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/thrash/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/thrash/clusters/fixed-3.yaml b/qa/suites/krbd/thrash/clusters/fixed-3.yaml
new file mode 120000
index 000000000..f75a848b8
--- /dev/null
+++ b/qa/suites/krbd/thrash/clusters/fixed-3.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-3.yaml
+\ No newline at end of file
diff --git a/qa/suites/krbd/thrash/conf.yaml b/qa/suites/krbd/thrash/conf.yaml
new file mode 100644
index 000000000..41292fa81
--- /dev/null
+++ b/qa/suites/krbd/thrash/conf.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        mon warn on pool no app: false
+        ms die on skipped message: false
+      client:
+        rbd default features: 37
diff --git a/qa/suites/krbd/thrash/ms_mode$/.qa b/qa/suites/krbd/thrash/ms_mode$/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/thrash/ms_mode$/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/thrash/ms_mode$/crc-rxbounce.yaml b/qa/suites/krbd/thrash/ms_mode$/crc-rxbounce.yaml
new file mode 100644
index 000000000..4d27d0113
--- /dev/null
+++ b/qa/suites/krbd/thrash/ms_mode$/crc-rxbounce.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=crc,rxbounce
diff --git a/qa/suites/krbd/thrash/ms_mode$/crc.yaml b/qa/suites/krbd/thrash/ms_mode$/crc.yaml
new file mode 100644
index 000000000..3b072578f
--- /dev/null
+++ b/qa/suites/krbd/thrash/ms_mode$/crc.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=crc
diff --git a/qa/suites/krbd/thrash/ms_mode$/legacy-rxbounce.yaml b/qa/suites/krbd/thrash/ms_mode$/legacy-rxbounce.yaml
new file mode 100644
index 000000000..244e45cbc
--- /dev/null
+++ b/qa/suites/krbd/thrash/ms_mode$/legacy-rxbounce.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=legacy,rxbounce
diff --git a/qa/suites/krbd/thrash/ms_mode$/legacy.yaml b/qa/suites/krbd/thrash/ms_mode$/legacy.yaml
new file mode 100644
index 000000000..0048dcb0c
--- /dev/null
+++ b/qa/suites/krbd/thrash/ms_mode$/legacy.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=legacy
diff --git a/qa/suites/krbd/thrash/ms_mode$/secure.yaml b/qa/suites/krbd/thrash/ms_mode$/secure.yaml
new file mode 100644
index 000000000..a735db18d
--- /dev/null
+++ b/qa/suites/krbd/thrash/ms_mode$/secure.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=secure
diff --git a/qa/suites/krbd/thrash/thrashers/.qa b/qa/suites/krbd/thrash/thrashers/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/thrash/thrashers/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/thrash/thrashers/backoff.yaml b/qa/suites/krbd/thrash/thrashers/backoff.yaml
new file mode 100644
index 000000000..a98fec611
--- /dev/null
+++ b/qa/suites/krbd/thrash/thrashers/backoff.yaml
@@ -0,0 +1,15 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd backoff on peering: true
+        osd backoff on degraded: true
+    log-ignorelist:
+    - wrongly marked me down
+    - objects unfound and apparently lost
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 1
+    chance_pgnum_shrink: 1
+    chance_pgpnum_fix: 1
diff --git a/qa/suites/krbd/thrash/thrashers/mon-thrasher.yaml b/qa/suites/krbd/thrash/thrashers/mon-thrasher.yaml
new file mode 100644
index 000000000..4ef5fcaea
--- /dev/null
+++ b/qa/suites/krbd/thrash/thrashers/mon-thrasher.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - \(MON_DOWN\)
+tasks:
+- mon_thrash:
+    revive_delay: 20
+    thrash_delay: 1
diff --git a/qa/suites/krbd/thrash/thrashers/pggrow.yaml b/qa/suites/krbd/thrash/thrashers/pggrow.yaml
new file mode 100644
index 000000000..07a227325
--- /dev/null
+++ b/qa/suites/krbd/thrash/thrashers/pggrow.yaml
@@ -0,0 +1,10 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 2
+    chance_pgpnum_fix: 1
diff --git a/qa/suites/krbd/thrash/thrashers/upmap.yaml b/qa/suites/krbd/thrash/thrashers/upmap.yaml
new file mode 100644
index 000000000..f7d456627
--- /dev/null
+++ b/qa/suites/krbd/thrash/thrashers/upmap.yaml
@@ -0,0 +1,17 @@
+overrides:
+  ceph:
+    crush_tunables: optimal
+    conf:
+      mon:
+        mon osd initial require min compat client: luminous
+    log-ignorelist:
+    - wrongly marked me down
+    - objects unfound and apparently lost
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 1
+    chance_pgnum_shrink: 1
+    chance_pgpnum_fix: 1
+    chance_thrash_pg_upmap: 3
+    chance_thrash_pg_upmap_items: 3
diff --git a/qa/suites/krbd/thrash/thrashosds-health.yaml b/qa/suites/krbd/thrash/thrashosds-health.yaml
new file mode 120000
index 000000000..9124eb1aa
--- /dev/null
+++ b/qa/suites/krbd/thrash/thrashosds-health.yaml
@@ -0,0 +1 @@
+.qa/tasks/thrashosds-health.yaml
+\ No newline at end of file
diff --git a/qa/suites/krbd/thrash/workloads/.qa b/qa/suites/krbd/thrash/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/thrash/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/thrash/workloads/krbd_diff_continuous.yaml b/qa/suites/krbd/thrash/workloads/krbd_diff_continuous.yaml
new file mode 100644
index 000000000..5907718d5
--- /dev/null
+++ b/qa/suites/krbd/thrash/workloads/krbd_diff_continuous.yaml
@@ -0,0 +1,12 @@
+overrides:
+  install:
+    ceph:
+      extra_system_packages:
+        - pv
+tasks:
+- workunit:
+    clients:
+      all:
+        - rbd/diff_continuous.sh
+    env:
+      RBD_DEVICE_TYPE: "krbd"
diff --git a/qa/suites/krbd/thrash/workloads/rbd_fio.yaml b/qa/suites/krbd/thrash/workloads/rbd_fio.yaml
new file mode 100644
index 000000000..157210f53
--- /dev/null
+++ b/qa/suites/krbd/thrash/workloads/rbd_fio.yaml
@@ -0,0 +1,11 @@
+tasks:
+- rbd_fio:
+    client.0:
+       fio-io-size: 100%
+       formats: [2]
+       features: [[layering,exclusive-lock]]
+       io-engine: libaio
+       rw: randrw
+       bs: 1024
+       io-depth: 256
+       runtime: 1200
diff --git a/qa/suites/krbd/thrash/workloads/rbd_workunit_suites_ffsb.yaml b/qa/suites/krbd/thrash/workloads/rbd_workunit_suites_ffsb.yaml
new file mode 100644
index 000000000..4ae7d6909
--- /dev/null
+++ b/qa/suites/krbd/thrash/workloads/rbd_workunit_suites_ffsb.yaml
@@ -0,0 +1,8 @@
+tasks:
+- rbd:
+    all:
+      image_size: 20480
+- workunit:
+    clients:
+      all:
+        - suites/ffsb.sh
diff --git a/qa/suites/krbd/unmap/% b/qa/suites/krbd/unmap/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/krbd/unmap/%
diff --git a/qa/suites/krbd/unmap/.qa b/qa/suites/krbd/unmap/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/unmap/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/unmap/ceph/.qa b/qa/suites/krbd/unmap/ceph/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/unmap/ceph/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/unmap/ceph/ceph.yaml b/qa/suites/krbd/unmap/ceph/ceph.yaml
new file mode 100644
index 000000000..c33664028
--- /dev/null
+++ b/qa/suites/krbd/unmap/ceph/ceph.yaml
@@ -0,0 +1,16 @@
+overrides:
+  ceph:
+    crush_tunables: bobtail
+    mon_bind_addrvec: false
+    mon_bind_msgr2: false
+    conf:
+      global:
+        cephx require version: 1
+        cephx service require version: 1
+        ms bind msgr2: false
+tasks:
+- install:
+- ceph:
+- exec:
+    client.0:
+    - "ceph osd getcrushmap -o /dev/stdout | crushtool -d - | sed -e 's/alg straw2/alg straw/g' | crushtool -c /dev/stdin -o /dev/stdout | ceph osd setcrushmap -i /dev/stdin"
diff --git a/qa/suites/krbd/unmap/clusters/.qa b/qa/suites/krbd/unmap/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/unmap/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/unmap/clusters/separate-client.yaml b/qa/suites/krbd/unmap/clusters/separate-client.yaml
new file mode 100644
index 000000000..be1343189
--- /dev/null
+++ b/qa/suites/krbd/unmap/clusters/separate-client.yaml
@@ -0,0 +1,16 @@
+# fixed-1.yaml, but with client.0 on a separate target
+overrides:
+  ceph-deploy:
+    conf:
+      global:
+        osd pool default size: 2
+        osd crush chooseleaf type: 0
+        osd pool default pg num:  128
+        osd pool default pgp num:  128
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, osd.2]
+- [client.0]
+openstack:
+- volumes: # attached to each instance
+    count: 3
+    size: 10 # GB
diff --git a/qa/suites/krbd/unmap/conf.yaml b/qa/suites/krbd/unmap/conf.yaml
new file mode 100644
index 000000000..e52341f29
--- /dev/null
+++ b/qa/suites/krbd/unmap/conf.yaml
@@ -0,0 +1,7 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        mon warn on pool no app: false
+      client:
+        rbd default features: 1 # pre-single-major is v3.13, so layering only
diff --git a/qa/suites/krbd/unmap/kernels/.qa b/qa/suites/krbd/unmap/kernels/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/unmap/kernels/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/unmap/kernels/pre-single-major.yaml b/qa/suites/krbd/unmap/kernels/pre-single-major.yaml
new file mode 100644
index 000000000..a5636b45e
--- /dev/null
+++ b/qa/suites/krbd/unmap/kernels/pre-single-major.yaml
@@ -0,0 +1,10 @@
+overrides:
+  kernel:
+    client.0:
+      branch: nightly_pre-single-major # v3.12.z
+tasks:
+- exec:
+    client.0:
+    - "modprobe -r rbd"
+    - "modprobe --first-time rbd"
+    - "test ! -f /sys/module/rbd/parameters/single_major"
diff --git a/qa/suites/krbd/unmap/kernels/single-major-off.yaml b/qa/suites/krbd/unmap/kernels/single-major-off.yaml
new file mode 100644
index 000000000..9dc2488ef
--- /dev/null
+++ b/qa/suites/krbd/unmap/kernels/single-major-off.yaml
@@ -0,0 +1,6 @@
+tasks:
+- exec:
+    client.0:
+    - "modprobe -r rbd"
+    - "modprobe --first-time rbd single_major=0"
+    - "grep -q N /sys/module/rbd/parameters/single_major"
diff --git a/qa/suites/krbd/unmap/kernels/single-major-on.yaml b/qa/suites/krbd/unmap/kernels/single-major-on.yaml
new file mode 100644
index 000000000..c3889f34a
--- /dev/null
+++ b/qa/suites/krbd/unmap/kernels/single-major-on.yaml
@@ -0,0 +1,6 @@
+tasks:
+- exec:
+    client.0:
+    - "modprobe -r rbd"
+    - "modprobe --first-time rbd single_major=1"
+    - "grep -q Y /sys/module/rbd/parameters/single_major"
diff --git a/qa/suites/krbd/unmap/tasks/.qa b/qa/suites/krbd/unmap/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/unmap/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/unmap/tasks/unmap.yaml b/qa/suites/krbd/unmap/tasks/unmap.yaml
new file mode 100644
index 000000000..435061b45
--- /dev/null
+++ b/qa/suites/krbd/unmap/tasks/unmap.yaml
@@ -0,0 +1,5 @@
+tasks:
+- cram:
+    clients:
+      client.0:
+      - src/test/cli-integration/rbd/unmap.t
diff --git a/qa/suites/krbd/wac/.qa b/qa/suites/krbd/wac/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/wac/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/wac/sysfs/% b/qa/suites/krbd/wac/sysfs/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/krbd/wac/sysfs/%
diff --git a/qa/suites/krbd/wac/sysfs/.qa b/qa/suites/krbd/wac/sysfs/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/wac/sysfs/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/wac/sysfs/bluestore-bitmap.yaml b/qa/suites/krbd/wac/sysfs/bluestore-bitmap.yaml
new file mode 120000
index 000000000..a59cf5175
--- /dev/null
+++ b/qa/suites/krbd/wac/sysfs/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/krbd/wac/sysfs/ceph/.qa b/qa/suites/krbd/wac/sysfs/ceph/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/wac/sysfs/ceph/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/wac/sysfs/ceph/ceph.yaml b/qa/suites/krbd/wac/sysfs/ceph/ceph.yaml
new file mode 100644
index 000000000..2030acb90
--- /dev/null
+++ b/qa/suites/krbd/wac/sysfs/ceph/ceph.yaml
@@ -0,0 +1,3 @@
+tasks:
+- install:
+- ceph:
diff --git a/qa/suites/krbd/wac/sysfs/clusters/.qa b/qa/suites/krbd/wac/sysfs/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/wac/sysfs/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/wac/sysfs/clusters/fixed-1.yaml b/qa/suites/krbd/wac/sysfs/clusters/fixed-1.yaml
new file mode 120000
index 000000000..02df5dd0c
--- /dev/null
+++ b/qa/suites/krbd/wac/sysfs/clusters/fixed-1.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-1.yaml
+\ No newline at end of file
diff --git a/qa/suites/krbd/wac/sysfs/conf.yaml b/qa/suites/krbd/wac/sysfs/conf.yaml
new file mode 100644
index 000000000..41292fa81
--- /dev/null
+++ b/qa/suites/krbd/wac/sysfs/conf.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        mon warn on pool no app: false
+        ms die on skipped message: false
+      client:
+        rbd default features: 37
diff --git a/qa/suites/krbd/wac/sysfs/tasks/.qa b/qa/suites/krbd/wac/sysfs/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/wac/sysfs/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/wac/sysfs/tasks/stable_writes.yaml b/qa/suites/krbd/wac/sysfs/tasks/stable_writes.yaml
new file mode 100644
index 000000000..cd1ba930f
--- /dev/null
+++ b/qa/suites/krbd/wac/sysfs/tasks/stable_writes.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      all:
+        - rbd/krbd_stable_writes.sh
diff --git a/qa/suites/krbd/wac/wac/% b/qa/suites/krbd/wac/wac/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/krbd/wac/wac/%
diff --git a/qa/suites/krbd/wac/wac/.qa b/qa/suites/krbd/wac/wac/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/wac/wac/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/wac/wac/bluestore-bitmap.yaml b/qa/suites/krbd/wac/wac/bluestore-bitmap.yaml
new file mode 120000
index 000000000..a59cf5175
--- /dev/null
+++ b/qa/suites/krbd/wac/wac/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/krbd/wac/wac/ceph/.qa b/qa/suites/krbd/wac/wac/ceph/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/wac/wac/ceph/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/wac/wac/ceph/ceph.yaml b/qa/suites/krbd/wac/wac/ceph/ceph.yaml
new file mode 100644
index 000000000..2030acb90
--- /dev/null
+++ b/qa/suites/krbd/wac/wac/ceph/ceph.yaml
@@ -0,0 +1,3 @@
+tasks:
+- install:
+- ceph:
diff --git a/qa/suites/krbd/wac/wac/clusters/.qa b/qa/suites/krbd/wac/wac/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/wac/wac/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/wac/wac/clusters/fixed-3.yaml b/qa/suites/krbd/wac/wac/clusters/fixed-3.yaml
new file mode 120000
index 000000000..f75a848b8
--- /dev/null
+++ b/qa/suites/krbd/wac/wac/clusters/fixed-3.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-3.yaml
+\ No newline at end of file
diff --git a/qa/suites/krbd/wac/wac/conf.yaml b/qa/suites/krbd/wac/wac/conf.yaml
new file mode 100644
index 000000000..41292fa81
--- /dev/null
+++ b/qa/suites/krbd/wac/wac/conf.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        mon warn on pool no app: false
+        ms die on skipped message: false
+      client:
+        rbd default features: 37
diff --git a/qa/suites/krbd/wac/wac/tasks/.qa b/qa/suites/krbd/wac/wac/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/wac/wac/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/wac/wac/tasks/wac.yaml b/qa/suites/krbd/wac/wac/tasks/wac.yaml
new file mode 100644
index 000000000..524d29a43
--- /dev/null
+++ b/qa/suites/krbd/wac/wac/tasks/wac.yaml
@@ -0,0 +1,8 @@
+tasks:
+- exec:
+    client.0:
+    - "dmesg -C"
+- workunit:
+    clients:
+      all:
+        - rbd/krbd_wac.sh
diff --git a/qa/suites/krbd/wac/wac/verify/.qa b/qa/suites/krbd/wac/wac/verify/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/wac/wac/verify/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/wac/wac/verify/many-resets.yaml b/qa/suites/krbd/wac/wac/verify/many-resets.yaml
new file mode 100644
index 000000000..1f434fd28
--- /dev/null
+++ b/qa/suites/krbd/wac/wac/verify/many-resets.yaml
@@ -0,0 +1,13 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 500
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
+tasks:
+- exec:
+    client.0:
+    - "dmesg | grep -q 'libceph: osd.* socket closed'"
+    - "dmesg | grep -q 'libceph: osd.* socket error on write'"
diff --git a/qa/suites/krbd/wac/wac/verify/no-resets.yaml b/qa/suites/krbd/wac/wac/verify/no-resets.yaml
new file mode 100644
index 000000000..2728479da
--- /dev/null
+++ b/qa/suites/krbd/wac/wac/verify/no-resets.yaml
@@ -0,0 +1,5 @@
+tasks:
+- exec:
+    client.0:
+    - "! dmesg | grep -q 'libceph: osd.* socket closed'"
+    - "! dmesg | grep -q 'libceph: osd.* socket error on write'"
diff --git a/qa/suites/mixed-clients/.qa b/qa/suites/mixed-clients/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/mixed-clients/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/mixed-clients/basic/.qa b/qa/suites/mixed-clients/basic/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/mixed-clients/basic/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/mixed-clients/basic/clusters/.qa b/qa/suites/mixed-clients/basic/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/mixed-clients/basic/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/mixed-clients/basic/clusters/fixed-3.yaml b/qa/suites/mixed-clients/basic/clusters/fixed-3.yaml
new file mode 100644
index 000000000..134bca1b9
--- /dev/null
+++ b/qa/suites/mixed-clients/basic/clusters/fixed-3.yaml
@@ -0,0 +1,4 @@
+roles:
+- [mon.a, mgr.x, mds.a, osd.0, osd.1]
+- [mon.b, mon.c, osd.2, osd.3, client.0]
+- [client.1]
diff --git a/qa/suites/mixed-clients/basic/objectstore b/qa/suites/mixed-clients/basic/objectstore
new file mode 120000
index 000000000..c40bd3261
--- /dev/null
+++ b/qa/suites/mixed-clients/basic/objectstore
@@ -0,0 +1 @@
+.qa/objectstore
+\ No newline at end of file
diff --git a/qa/suites/mixed-clients/basic/tasks/.qa b/qa/suites/mixed-clients/basic/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/mixed-clients/basic/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/mixed-clients/basic/tasks/kernel_cfuse_workunits_dbench_iozone.yaml b/qa/suites/mixed-clients/basic/tasks/kernel_cfuse_workunits_dbench_iozone.yaml
new file mode 100644
index 000000000..bb347be7f
--- /dev/null
+++ b/qa/suites/mixed-clients/basic/tasks/kernel_cfuse_workunits_dbench_iozone.yaml
@@ -0,0 +1,26 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms die on skipped message: false
+tasks:
+- install:
+    branch: dumpling
+- ceph:
+- parallel:
+   - user-workload
+   - kclient-workload
+user-workload:
+  sequential:
+  - ceph-fuse: [client.0]
+  - workunit:
+      clients:
+         client.0:
+           - suites/iozone.sh
+kclient-workload:
+  sequential:
+  - kclient: [client.1]
+  - workunit:
+      clients:
+         client.1:
+           - suites/dbench.sh 
diff --git a/qa/suites/mixed-clients/basic/tasks/kernel_cfuse_workunits_untarbuild_blogbench.yaml b/qa/suites/mixed-clients/basic/tasks/kernel_cfuse_workunits_untarbuild_blogbench.yaml
new file mode 100644
index 000000000..2c32a61e8
--- /dev/null
+++ b/qa/suites/mixed-clients/basic/tasks/kernel_cfuse_workunits_untarbuild_blogbench.yaml
@@ -0,0 +1,26 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms die on skipped message: false
+tasks:
+- install:
+    branch: dumpling
+- ceph:
+- parallel:
+   - user-workload
+   - kclient-workload
+user-workload:
+  sequential:
+  - ceph-fuse: [client.0]
+  - workunit:
+      clients:
+         client.0:
+           - suites/blogbench.sh
+kclient-workload:
+  sequential:
+  - kclient: [client.1]
+  - workunit:
+      clients:
+         client.1:
+           - kernel_untar_build.sh
diff --git a/qa/suites/netsplit/% b/qa/suites/netsplit/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/netsplit/%
diff --git a/qa/suites/netsplit/.qa b/qa/suites/netsplit/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/netsplit/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/netsplit/ceph.yaml b/qa/suites/netsplit/ceph.yaml
new file mode 100644
index 000000000..ddf54b3a3
--- /dev/null
+++ b/qa/suites/netsplit/ceph.yaml
@@ -0,0 +1,23 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        mon election default strategy: 3
+      mon:
+        mon min osdmap epochs: 25
+        paxos service trim min: 5
+        # prune full osdmaps regularly
+        mon osdmap full prune min: 15
+        mon osdmap full prune interval: 2
+        mon osdmap full prune txsize: 2
+# thrashing monitors may make mgr have trouble w/ its keepalive
+    log-whitelist:
+      - overall HEALTH_
+      - \(MGR_DOWN\)
+      - \(MON_DOWN\)
+# slow mons -> slow peering -> PG_AVAILABILITY
+      - \(PG_AVAILABILITY\)
+      - \(SLOW_OPS\)
+tasks:
+- install:
+- ceph:
diff --git a/qa/suites/netsplit/cluster.yaml b/qa/suites/netsplit/cluster.yaml
new file mode 100644
index 000000000..0681feca2
--- /dev/null
+++ b/qa/suites/netsplit/cluster.yaml
@@ -0,0 +1,13 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, osd.2, osd.3]
+- [mon.b, mgr.y, osd.4, osd.5, osd.6, osd.7, client.0]
+- [mon.c]
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 10 # GB
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd shutdown pgref assert: true
diff --git a/qa/suites/netsplit/msgr.yaml b/qa/suites/netsplit/msgr.yaml
new file mode 120000
index 000000000..775a723fd
--- /dev/null
+++ b/qa/suites/netsplit/msgr.yaml
@@ -0,0 +1 @@
+../../msgr/async.yaml
+\ No newline at end of file
diff --git a/qa/suites/netsplit/rados.yaml b/qa/suites/netsplit/rados.yaml
new file mode 120000
index 000000000..d256979c0
--- /dev/null
+++ b/qa/suites/netsplit/rados.yaml
@@ -0,0 +1 @@
+.qa/config/rados.yaml
+\ No newline at end of file
diff --git a/qa/suites/netsplit/supported-random-distro$ b/qa/suites/netsplit/supported-random-distro$
new file mode 120000
index 000000000..8d1b6f6c8
--- /dev/null
+++ b/qa/suites/netsplit/supported-random-distro$
@@ -0,0 +1 @@
+../rados/basic/supported-random-distro$/
+\ No newline at end of file
diff --git a/qa/suites/netsplit/tests/.qa b/qa/suites/netsplit/tests/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/netsplit/tests/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/netsplit/tests/mon_pool_ops.yaml b/qa/suites/netsplit/tests/mon_pool_ops.yaml
new file mode 100644
index 000000000..5b41c05fa
--- /dev/null
+++ b/qa/suites/netsplit/tests/mon_pool_ops.yaml
@@ -0,0 +1,21 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        mon election default strategy: 3
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - mon/pool_ops.sh
+- netsplit.disconnect: [mon.a, mon.c]
+- workunit:
+    clients:
+      client.0:
+        - mon/pool_ops.sh
+- netsplit.reconnect: [mon.a, mon.c]
+- netsplit.disconnect: [mon.b, mon.c]
+- workunit:
+    clients:
+      client.0:
+        - mon/pool_ops.sh
+\ No newline at end of file
diff --git a/qa/suites/orch/.qa b/qa/suites/orch/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/.qa b/qa/suites/orch/cephadm/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/cephadm/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/mds_upgrade_sequence b/qa/suites/orch/cephadm/mds_upgrade_sequence
new file mode 120000
index 000000000..24aa41c10
--- /dev/null
+++ b/qa/suites/orch/cephadm/mds_upgrade_sequence
@@ -0,0 +1 @@
+.qa/suites/fs/upgrade/mds_upgrade_sequence/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/mgr-nfs-upgrade/% b/qa/suites/orch/cephadm/mgr-nfs-upgrade/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/orch/cephadm/mgr-nfs-upgrade/%
diff --git a/qa/suites/orch/cephadm/mgr-nfs-upgrade/.qa b/qa/suites/orch/cephadm/mgr-nfs-upgrade/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/cephadm/mgr-nfs-upgrade/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/mgr-nfs-upgrade/0-centos_8.stream_container_tools.yaml b/qa/suites/orch/cephadm/mgr-nfs-upgrade/0-centos_8.stream_container_tools.yaml
new file mode 120000
index 000000000..7a86f967f
--- /dev/null
+++ b/qa/suites/orch/cephadm/mgr-nfs-upgrade/0-centos_8.stream_container_tools.yaml
@@ -0,0 +1 @@
+.qa/distros/podman/centos_8.stream_container_tools.yaml
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-bootstrap/.qa b/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-bootstrap/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-bootstrap/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-bootstrap/16.2.0.yaml b/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-bootstrap/16.2.0.yaml
new file mode 100644
index 000000000..beba37428
--- /dev/null
+++ b/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-bootstrap/16.2.0.yaml
@@ -0,0 +1,8 @@
+tasks:
+- cephadm:
+    roleless: true
+    image: quay.io/ceph/ceph:v16.2.0
+    cephadm_branch: v16.2.0
+    cephadm_git_url: https://github.com/ceph/ceph
+    # needed for v16.2.0 due to --skip-admin-label
+    avoid_pacific_features: true
diff --git a/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-bootstrap/16.2.4.yaml b/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-bootstrap/16.2.4.yaml
new file mode 100644
index 000000000..1cbe5a134
--- /dev/null
+++ b/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-bootstrap/16.2.4.yaml
@@ -0,0 +1,8 @@
+tasks:
+- cephadm:
+    roleless: true
+    image: quay.io/ceph/ceph:v16.2.4
+    cephadm_branch: v16.2.4
+    cephadm_git_url: https://github.com/ceph/ceph
+    # needed for v16.2.4 due to --skip-admin-label
+    avoid_pacific_features: true
diff --git a/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-bootstrap/16.2.5.yaml b/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-bootstrap/16.2.5.yaml
new file mode 100644
index 000000000..381088d5b
--- /dev/null
+++ b/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-bootstrap/16.2.5.yaml
@@ -0,0 +1,6 @@
+tasks:
+- cephadm:
+    roleless: true
+    image: quay.io/ceph/ceph:v16.2.5
+    cephadm_branch: v16.2.5
+    cephadm_git_url: https://github.com/ceph/ceph
diff --git a/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-start.yaml b/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-start.yaml
new file mode 100644
index 000000000..2d9f09a4e
--- /dev/null
+++ b/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-start.yaml
@@ -0,0 +1,29 @@
+tasks:
+- cephadm.shell:
+    host.a:
+      - ceph orch status
+      - ceph orch ps
+      - ceph orch ls
+      - ceph orch host ls
+      - ceph orch device ls
+roles:
+- - host.a
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+  - client.0
+- - host.b
+  - osd.4
+  - osd.5
+  - osd.6
+  - osd.7
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 10 # GB
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd shutdown pgref assert: true
diff --git a/qa/suites/orch/cephadm/mgr-nfs-upgrade/2-nfs.yaml b/qa/suites/orch/cephadm/mgr-nfs-upgrade/2-nfs.yaml
new file mode 100644
index 000000000..34680fc8a
--- /dev/null
+++ b/qa/suites/orch/cephadm/mgr-nfs-upgrade/2-nfs.yaml
@@ -0,0 +1,29 @@
+tasks:
+
+# stop kernel nfs server, if running
+- vip.exec:
+    all-hosts:
+      - systemctl stop nfs-server
+
+- cephadm.shell:
+    host.a:
+      - ceph fs volume create foofs
+
+- cephadm.wait_for_service:
+    service: mds.foofs
+
+- cephadm.shell:
+    host.a:
+      - ceph nfs cluster create foo --placement=2 || ceph nfs cluster create cephfs foo --placement=2
+      - ceph nfs export create cephfs --fsname foofs --clusterid foo --binding /fake || ceph nfs export create cephfs --fsname foofs --cluster-id foo --pseudo-path /fake 
+
+      # we can't do wait_for_service here because with octopus it's nfs.ganesha-foo not nfs.foo
+      - while ! ceph orch ls | grep nfs | grep 2/2 ; do sleep 1 ; done
+
+- vip.exec:
+    host.a:
+      - mkdir /mnt/foo
+      - while ! mount -t nfs $(hostname):/fake /mnt/foo -o sync ; do sleep 5 ; done
+      - echo test > /mnt/foo/testfile
+      - sync
+
diff --git a/qa/suites/orch/cephadm/mgr-nfs-upgrade/3-upgrade-with-workload.yaml b/qa/suites/orch/cephadm/mgr-nfs-upgrade/3-upgrade-with-workload.yaml
new file mode 100644
index 000000000..ec901a92e
--- /dev/null
+++ b/qa/suites/orch/cephadm/mgr-nfs-upgrade/3-upgrade-with-workload.yaml
@@ -0,0 +1,43 @@
+tasks:
+- parallel:
+  - upgrade-tasks
+  - workload-tasks
+
+upgrade-tasks:
+  sequential:
+  - cephadm.shell:
+      env: [sha1]
+      host.a:
+        - ceph config set mon mon_warn_on_insecure_global_id_reclaim false --force
+        - ceph config set mon mon_warn_on_insecure_global_id_reclaim_allowed false --force
+        - ceph config set global log_to_journald false --force
+        - ceph mgr module enable nfs --force
+        - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1
+  - cephadm.shell:
+      env: [sha1]
+      host.a:
+        - while ceph orch upgrade status | jq '.in_progress' | grep true && ! ceph orch upgrade status | jq '.message' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; ceph health detail ; sleep 30 ; done
+        - ceph orch ps
+        - ceph versions
+        - echo "wait for servicemap items w/ changing names to refresh"
+        - sleep 60
+        - ceph orch ps
+        - ceph orch upgrade status
+        - ceph health detail
+        - ceph versions
+        - ceph versions | jq -e '.overall | length == 1'
+        - ceph versions | jq -e '.overall | keys' | grep $sha1
+
+  # this should be a no-op, but confirms nfs.ganesha-foo was remapped to nfs.foo
+  - cephadm.wait_for_service:
+      service: nfs.foo
+
+workload-tasks:
+  sequential:
+  - exec:
+      host.a:
+      - cd /mnt/foo && dbench 5 -t 600 || true   # might fail with ESTALE
+      # make sure mount works
+      - umount /mnt/foo
+      - while ! mount -t nfs $(hostname):/fake /mnt/foo ; do sleep 5 ; done
+      - cd /mnt/foo && dbench 5 -t 5
diff --git a/qa/suites/orch/cephadm/mgr-nfs-upgrade/4-final.yaml b/qa/suites/orch/cephadm/mgr-nfs-upgrade/4-final.yaml
new file mode 100644
index 000000000..3a9169659
--- /dev/null
+++ b/qa/suites/orch/cephadm/mgr-nfs-upgrade/4-final.yaml
@@ -0,0 +1,10 @@
+tasks:
+- vip.exec:
+    host.a:
+      - umount /mnt/foo
+- cephadm.shell:
+    host.a:
+      - ceph nfs cluster ls | grep foo
+      - ceph nfs export ls foo --detailed
+      - rados -p .nfs --all ls -
+      - ceph config get mgr mgr/cephadm/migration_current | grep 6
diff --git a/qa/suites/orch/cephadm/nfs b/qa/suites/orch/cephadm/nfs
new file mode 120000
index 000000000..628e2a2a2
--- /dev/null
+++ b/qa/suites/orch/cephadm/nfs
@@ -0,0 +1 @@
+.qa/suites/fs/nfs/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/orchestrator_cli/% b/qa/suites/orch/cephadm/orchestrator_cli/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/orch/cephadm/orchestrator_cli/%
diff --git a/qa/suites/orch/cephadm/orchestrator_cli/.qa b/qa/suites/orch/cephadm/orchestrator_cli/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/cephadm/orchestrator_cli/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/orchestrator_cli/0-random-distro$ b/qa/suites/orch/cephadm/orchestrator_cli/0-random-distro$
new file mode 120000
index 000000000..4b341719d
--- /dev/null
+++ b/qa/suites/orch/cephadm/orchestrator_cli/0-random-distro$
@@ -0,0 +1 @@
+.qa/distros/container-hosts
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/orchestrator_cli/2-node-mgr.yaml b/qa/suites/orch/cephadm/orchestrator_cli/2-node-mgr.yaml
new file mode 120000
index 000000000..8a0b9123b
--- /dev/null
+++ b/qa/suites/orch/cephadm/orchestrator_cli/2-node-mgr.yaml
@@ -0,0 +1 @@
+.qa/clusters/2-node-mgr.yaml
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/orchestrator_cli/agent b/qa/suites/orch/cephadm/orchestrator_cli/agent
new file mode 120000
index 000000000..154924209
--- /dev/null
+++ b/qa/suites/orch/cephadm/orchestrator_cli/agent
@@ -0,0 +1 @@
+../smoke/agent
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/orchestrator_cli/orchestrator_cli.yaml b/qa/suites/orch/cephadm/orchestrator_cli/orchestrator_cli.yaml
new file mode 100644
index 000000000..3e6e7f955
--- /dev/null
+++ b/qa/suites/orch/cephadm/orchestrator_cli/orchestrator_cli.yaml
@@ -0,0 +1,19 @@
+
+tasks:
+  - install:
+  - ceph:
+      # tests may leave mgrs broken, so don't try and call into them
+      # to invoke e.g. pg dump during teardown.
+      wait-for-scrub: false
+      log-ignorelist:
+        - overall HEALTH_
+        - \(MGR_DOWN\)
+        - \(DEVICE_IDENT_ON\)
+        - \(DEVICE_FAULT_ON\)
+        - \(PG_
+        - replacing it with standby
+        - No standby daemons available
+        - \(POOL_APP_NOT_ENABLED\)
+  - cephfs_test_runner:
+      modules:
+        - tasks.mgr.test_orchestrator_cli
diff --git a/qa/suites/orch/cephadm/osds/% b/qa/suites/orch/cephadm/osds/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/orch/cephadm/osds/%
diff --git a/qa/suites/orch/cephadm/osds/.qa b/qa/suites/orch/cephadm/osds/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/cephadm/osds/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/osds/0-distro b/qa/suites/orch/cephadm/osds/0-distro
new file mode 120000
index 000000000..4b341719d
--- /dev/null
+++ b/qa/suites/orch/cephadm/osds/0-distro
@@ -0,0 +1 @@
+.qa/distros/container-hosts
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/osds/0-nvme-loop.yaml b/qa/suites/orch/cephadm/osds/0-nvme-loop.yaml
new file mode 120000
index 000000000..5206b6edd
--- /dev/null
+++ b/qa/suites/orch/cephadm/osds/0-nvme-loop.yaml
@@ -0,0 +1 @@
+.qa/overrides/nvme_loop.yaml
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/osds/1-start.yaml b/qa/suites/orch/cephadm/osds/1-start.yaml
new file mode 100644
index 000000000..4331d7c66
--- /dev/null
+++ b/qa/suites/orch/cephadm/osds/1-start.yaml
@@ -0,0 +1,25 @@
+tasks:
+- cephadm:
+    roleless: true
+- cephadm.shell:
+    host.a:
+      - ceph orch status
+      - ceph orch ps
+      - ceph orch ls
+      - ceph orch host ls
+      - ceph orch device ls
+      - ceph orch ls | grep '^osd.all-available-devices '
+roles:
+- - host.a
+  - client.0
+- - host.b
+  - client.1
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 10 # GB
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd shutdown pgref assert: true
diff --git a/qa/suites/orch/cephadm/osds/2-ops/.qa b/qa/suites/orch/cephadm/osds/2-ops/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/cephadm/osds/2-ops/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/osds/2-ops/repave-all.yaml b/qa/suites/orch/cephadm/osds/2-ops/repave-all.yaml
new file mode 100644
index 000000000..16413aba8
--- /dev/null
+++ b/qa/suites/orch/cephadm/osds/2-ops/repave-all.yaml
@@ -0,0 +1,13 @@
+tasks:
+- cephadm.shell:
+    host.a:
+      - |
+        set -e
+        set -x
+        ceph orch ps
+        ceph orch device ls
+        ceph osd tree
+        for osd in `ceph osd ls` ; do
+            ceph orch osd rm $osd --force --zap --replace
+        done
+        while ceph orch osd rm ls | wc | grep ^1 ; do sleep 10 ; done
diff --git a/qa/suites/orch/cephadm/osds/2-ops/rm-zap-add.yaml b/qa/suites/orch/cephadm/osds/2-ops/rm-zap-add.yaml
new file mode 100644
index 000000000..09be72f11
--- /dev/null
+++ b/qa/suites/orch/cephadm/osds/2-ops/rm-zap-add.yaml
@@ -0,0 +1,17 @@
+tasks:
+- cephadm.shell:
+    host.a:
+      - |
+        set -e
+        set -x
+        ceph orch ps
+        ceph orch device ls
+        DEVID=$(ceph device ls | grep osd.1 | awk '{print $1}')
+        HOST=$(ceph orch device ls | grep $DEVID | awk '{print $1}')
+        DEV=$(ceph orch device ls | grep $DEVID | awk '{print $2}')
+        echo "host $HOST, dev $DEV, devid $DEVID"
+        ceph orch osd rm 1
+        while ceph orch osd rm status | grep ^1 ; do sleep 5 ; done
+        ceph orch device zap $HOST $DEV --force
+        ceph orch daemon add osd $HOST:$DEV
+        while ! ceph osd dump | grep osd.1 | grep up ; do sleep 5 ; done
diff --git a/qa/suites/orch/cephadm/osds/2-ops/rm-zap-flag.yaml b/qa/suites/orch/cephadm/osds/2-ops/rm-zap-flag.yaml
new file mode 100644
index 000000000..8f07f6d53
--- /dev/null
+++ b/qa/suites/orch/cephadm/osds/2-ops/rm-zap-flag.yaml
@@ -0,0 +1,15 @@
+tasks:
+- cephadm.shell:
+    host.a:
+      - |
+        set -e
+        set -x
+        ceph orch ps
+        ceph orch device ls
+        DEVID=$(ceph device ls | grep osd.1 | awk '{print $1}')
+        HOST=$(ceph orch device ls | grep "$DEVID" | awk '{print $1}')
+        DEV=$(ceph orch device ls | grep "$DEVID" | awk '{print $2}')
+        echo "host $HOST, dev $DEV, devid $DEVID"
+        ceph orch osd rm --zap --replace 1
+        while ceph orch osd rm status | grep ^1 ; do sleep 5 ; done
+        while ! ceph osd dump | grep osd.1 | grep "up\s*in" ; do sleep 5 ; done
diff --git a/qa/suites/orch/cephadm/osds/2-ops/rm-zap-wait.yaml b/qa/suites/orch/cephadm/osds/2-ops/rm-zap-wait.yaml
new file mode 100644
index 000000000..78161aa49
--- /dev/null
+++ b/qa/suites/orch/cephadm/osds/2-ops/rm-zap-wait.yaml
@@ -0,0 +1,16 @@
+tasks:
+- cephadm.shell:
+    host.a:
+      - |
+        set -e
+        set -x
+        ceph orch ps
+        ceph orch device ls
+        DEVID=$(ceph device ls | grep osd.1 | awk '{print $1}')
+        HOST=$(ceph orch device ls | grep $DEVID | awk '{print $1}')
+        DEV=$(ceph orch device ls | grep $DEVID | awk '{print $2}')
+        echo "host $HOST, dev $DEV, devid $DEVID"
+        ceph orch osd rm 1
+        while ceph orch osd rm status | grep ^1 ; do sleep 5 ; done
+        ceph orch device zap $HOST $DEV --force
+        while ! ceph osd dump | grep osd.1 | grep up ; do sleep 5 ; done
diff --git a/qa/suites/orch/cephadm/osds/2-ops/rmdir-reactivate.yaml b/qa/suites/orch/cephadm/osds/2-ops/rmdir-reactivate.yaml
new file mode 100644
index 000000000..a971a02e4
--- /dev/null
+++ b/qa/suites/orch/cephadm/osds/2-ops/rmdir-reactivate.yaml
@@ -0,0 +1,20 @@
+tasks:
+- cephadm.shell:
+    host.a:
+      - |
+        set -e
+        set -x
+        ceph orch ps
+        HOST=$(hostname -s)
+        OSD=$(ceph orch ps $HOST | grep osd | head -n 1 | awk '{print $1}')
+        echo "host $HOST, osd $OSD"
+        ceph orch daemon stop $OSD
+        while ceph orch ps | grep $OSD | grep running ; do sleep 5 ; done
+        ceph auth export $OSD > k
+        ceph orch daemon rm $OSD --force
+        ceph orch ps --refresh
+        while ceph orch ps | grep $OSD ; do sleep 5 ; done
+        ceph auth add $OSD -i k
+        ceph cephadm osd activate $HOST
+        while ! ceph orch ps | grep $OSD | grep running ; do sleep 5 ; done
+- cephadm.healthy:
diff --git a/qa/suites/orch/cephadm/rbd_iscsi b/qa/suites/orch/cephadm/rbd_iscsi
new file mode 120000
index 000000000..f0073a119
--- /dev/null
+++ b/qa/suites/orch/cephadm/rbd_iscsi
@@ -0,0 +1 @@
+.qa/suites/rbd/iscsi
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/smoke-roleless/% b/qa/suites/orch/cephadm/smoke-roleless/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-roleless/%
diff --git a/qa/suites/orch/cephadm/smoke-roleless/.qa b/qa/suites/orch/cephadm/smoke-roleless/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-roleless/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/smoke-roleless/0-distro b/qa/suites/orch/cephadm/smoke-roleless/0-distro
new file mode 120000
index 000000000..4b341719d
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-roleless/0-distro
@@ -0,0 +1 @@
+.qa/distros/container-hosts
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/smoke-roleless/0-nvme-loop.yaml b/qa/suites/orch/cephadm/smoke-roleless/0-nvme-loop.yaml
new file mode 120000
index 000000000..5206b6edd
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-roleless/0-nvme-loop.yaml
@@ -0,0 +1 @@
+.qa/overrides/nvme_loop.yaml
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/smoke-roleless/1-start.yaml b/qa/suites/orch/cephadm/smoke-roleless/1-start.yaml
new file mode 100644
index 000000000..018356f8f
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-roleless/1-start.yaml
@@ -0,0 +1,24 @@
+tasks:
+- cephadm:
+    roleless: true
+- cephadm.shell:
+    host.a:
+      - ceph orch status
+      - ceph orch ps
+      - ceph orch ls
+      - ceph orch host ls
+      - ceph orch device ls
+roles:
+- - host.a
+  - client.0
+- - host.b
+  - client.1
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 10 # GB
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd shutdown pgref assert: true
diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/.qa b/qa/suites/orch/cephadm/smoke-roleless/2-services/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/basic.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/basic.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/basic.yaml
diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/client-keyring.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/client-keyring.yaml
new file mode 100644
index 000000000..f00800471
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/client-keyring.yaml
@@ -0,0 +1,40 @@
+tasks:
+- cephadm.shell:
+    host.a:
+      - ceph orch host label add `hostname` foo
+      - ceph auth get-or-create client.foo mon 'allow r'
+      - ceph orch client-keyring set client.foo label:foo --mode 770 --owner 11111:22222
+- exec:
+    host.a:
+      - while ! test -e /etc/ceph/ceph.client.foo.keyring ; do sleep 1 ; done
+      - ls -al /etc/ceph/ceph.client.foo.keyring | grep rwxrwx---
+      - ls -al /etc/ceph/ceph.client.foo.keyring | grep 11111
+      - ls -al /etc/ceph/ceph.client.foo.keyring | grep 22222
+      - test -e /etc/ceph/ceph.conf
+- exec:
+    host.b:
+      - test ! -e /etc/ceph/ceph.client.foo.keyring
+- cephadm.shell:
+    host.b:
+      - ceph orch host label add `hostname` foo
+- exec:
+    host.b:
+      - while ! test -e /etc/ceph/ceph.client.foo.keyring ; do sleep 1 ; done
+      - ls -al /etc/ceph/ceph.client.foo.keyring | grep rwxrwx---
+      - ls -al /etc/ceph/ceph.client.foo.keyring | grep 11111
+      - ls -al /etc/ceph/ceph.client.foo.keyring | grep 22222
+- cephadm.shell:
+    host.b:
+      - ceph orch host label rm `hostname` foo
+- exec:
+    host.b:
+      - while test -e /etc/ceph/ceph.client.foo.keyring ; do sleep 1 ; done
+- exec:
+    host.a:
+      - test -e /etc/ceph/ceph.client.foo.keyring
+- cephadm.shell:
+    host.a:
+      - ceph orch client-keyring rm client.foo
+- exec:
+    host.a:
+      - while test -e /etc/ceph/ceph.client.foo.keyring ; do sleep 1 ; done
diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/iscsi.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/iscsi.yaml
new file mode 100644
index 000000000..7f57076db
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/iscsi.yaml
@@ -0,0 +1,8 @@
+tasks:
+- cephadm.shell:
+    host.a:
+      - ceph osd pool create foo
+      - rbd pool init foo
+      - ceph orch apply iscsi foo u p
+- cephadm.wait_for_service:
+    service: iscsi.foo
diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/jaeger.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/jaeger.yaml
new file mode 100644
index 000000000..ad102fedd
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/jaeger.yaml
@@ -0,0 +1,12 @@
+tasks:
+- cephadm.shell:
+    host.a:
+      - ceph orch apply jaeger
+- cephadm.wait_for_service:
+    service: elasticsearch
+- cephadm.wait_for_service:
+    service: jaeger-collector
+- cephadm.wait_for_service:
+    service: jaeger-query
+- cephadm.wait_for_service:
+    service: jaeger-agent
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/mirror.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/mirror.yaml
new file mode 100644
index 000000000..681e1e04a
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/mirror.yaml
@@ -0,0 +1,9 @@
+tasks:
+- cephadm.shell:
+    host.a:
+      - ceph orch apply rbd-mirror "--placement=*"
+      - ceph orch apply cephfs-mirror "--placement=*"
+- cephadm.wait_for_service:
+    service: rbd-mirror
+- cephadm.wait_for_service:
+    service: cephfs-mirror
diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-haproxy-proto.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-haproxy-proto.yaml
new file mode 100644
index 000000000..477e5c443
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-haproxy-proto.yaml
@@ -0,0 +1,35 @@
+tasks:
+- vip:
+
+# make sure cephadm notices the new IP
+- cephadm.shell:
+    host.a:
+      - ceph orch device ls --refresh
+
+# stop kernel nfs server, if running
+- vip.exec:
+    all-hosts:
+      - systemctl stop nfs-server
+
+# use nfs module to create cluster and export
+- cephadm.shell:
+    host.a:
+      - ceph fs volume create fs1
+      - ceph nfs cluster create happy --ingress --virtual-ip={{VIP0}} --ingress-mode=haproxy-protocol
+      - ceph nfs export create cephfs --fsname fs1 --cluster-id happy --pseudo-path /d1
+
+# wait for services to start
+- cephadm.wait_for_service:
+    service: nfs.happy
+- cephadm.wait_for_service:
+    service: ingress.nfs.happy
+
+# make sure mount can be reached over VIP, ensuring both that
+# keepalived is maintaining the VIP and that the nfs has bound to it
+- vip.exec:
+    host.a:
+      - mkdir /mnt/happy
+      - sleep 1
+      - mount -t nfs {{VIP0}}:/d1 /mnt/happy
+      - echo test > /mnt/happy/testfile
+      - sync
diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-ingress-rgw-bucket.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-ingress-rgw-bucket.yaml
new file mode 100644
index 000000000..3f4964978
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-ingress-rgw-bucket.yaml
@@ -0,0 +1,89 @@
+tasks:
+- vip:
+
+# make sure cephadm notices the new IP
+- cephadm.shell:
+    host.a:
+      - ceph orch device ls --refresh
+
+# stop kernel nfs server, if running
+- vip.exec:
+    all-hosts:
+      - systemctl stop nfs-server
+
+- cephadm.shell:
+    host.a:
+      - ceph orch apply rgw foorgw --port 8800
+      - ceph nfs cluster create foo --ingress --virtual-ip {{VIP0}}/{{VIPPREFIXLEN}}
+
+- vip.exec:
+    host.a:
+      - dnf install -y python3-boto3 || apt install -y python3-boto3
+      - /home/ubuntu/cephtest/cephadm shell radosgw-admin user create --uid foouser --display-name foo > /tmp/user.json
+
+- python:
+    host.a: |
+      import boto3
+      import json
+
+      with open('/tmp/user.json', 'rt') as f:
+          info = json.loads(f.read())
+      s3 = boto3.resource(
+          's3',
+          aws_access_key_id=info['keys'][0]['access_key'],
+          aws_secret_access_key=info['keys'][0]['secret_key'],
+          endpoint_url='http://localhost:8800',
+      )
+      bucket = s3.Bucket('foobucket')
+      bucket.create()
+      bucket.put_object(Key='myobject', Body='thebody')
+
+- cephadm.shell:
+    host.a:
+      - ceph nfs export create rgw --bucket foobucket --cluster-id foo --pseudo-path /foobucket 
+
+- cephadm.wait_for_service:
+    service: nfs.foo
+- cephadm.wait_for_service:
+    service: ingress.nfs.foo
+
+## export and mount
+
+- vip.exec:
+    host.a:
+      - mkdir /mnt/foo
+      - sleep 5
+      - mount -t nfs {{VIP0}}:/foobucket /mnt/foo
+      - find /mnt/foo -ls
+      - grep thebody /mnt/foo/myobject
+      - echo test > /mnt/foo/newobject
+      - sync
+
+- python:
+    host.a: |
+      import boto3
+      import json
+      from io import BytesIO
+
+      with open('/tmp/user.json', 'rt') as f:
+          info = json.loads(f.read())
+      s3 = boto3.resource(
+          's3',
+          aws_access_key_id=info['keys'][0]['access_key'],
+          aws_secret_access_key=info['keys'][0]['secret_key'],
+          endpoint_url='http://localhost:8800',
+      )
+      bucket = s3.Bucket('foobucket')
+      data = BytesIO()
+      bucket.download_fileobj(Fileobj=data, Key='newobject')
+      print(data.getvalue())
+      assert data.getvalue().decode() == 'test\n'
+
+- vip.exec:
+    host.a:
+      - umount /mnt/foo
+
+- cephadm.shell:
+    host.a:
+      - ceph nfs export rm foo /foobucket
+      - ceph nfs cluster rm foo
diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-ingress-rgw-user.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-ingress-rgw-user.yaml
new file mode 100644
index 000000000..721aecfc3
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-ingress-rgw-user.yaml
@@ -0,0 +1,90 @@
+tasks:
+- vip:
+
+# make sure cephadm notices the new IP
+- cephadm.shell:
+    host.a:
+      - ceph orch device ls --refresh
+
+# stop kernel nfs server, if running
+- vip.exec:
+    all-hosts:
+      - systemctl stop nfs-server
+
+- cephadm.shell:
+    host.a:
+      - ceph orch apply rgw foorgw --port 8800
+      - ceph nfs cluster create foo --ingress --virtual-ip {{VIP0}}/{{VIPPREFIXLEN}}
+
+- vip.exec:
+    host.a:
+      - dnf install -y python3-boto3 || apt install -y python3-boto3
+      - /home/ubuntu/cephtest/cephadm shell radosgw-admin user create --uid foouser --display-name foo > /tmp/user.json
+
+- python:
+    host.a: |
+      import boto3
+      import json
+
+      with open('/tmp/user.json', 'rt') as f:
+          info = json.loads(f.read())
+      s3 = boto3.resource(
+          's3',
+          aws_access_key_id=info['keys'][0]['access_key'],
+          aws_secret_access_key=info['keys'][0]['secret_key'],
+          endpoint_url='http://localhost:8800',
+      )
+      bucket = s3.Bucket('foobucket')
+      bucket.create()
+      bucket.put_object(Key='myobject', Body='thebody')
+
+- cephadm.shell:
+    host.a:
+      - ceph nfs export create rgw --cluster-id foo --pseudo-path /foouser --user-id foouser
+
+- cephadm.wait_for_service:
+    service: nfs.foo
+- cephadm.wait_for_service:
+    service: ingress.nfs.foo
+
+## export and mount
+
+- vip.exec:
+    host.a:
+      - mkdir /mnt/foo
+      - sleep 5
+      - mount -t nfs {{VIP0}}:/foouser /mnt/foo
+      - test -d /mnt/foo/foobucket
+      - find /mnt/foo -ls
+      - grep thebody /mnt/foo/foobucket/myobject
+      - echo test > /mnt/foo/foobucket/newobject
+      - sync
+
+- python:
+    host.a: |
+      import boto3
+      import json
+      from io import BytesIO
+
+      with open('/tmp/user.json', 'rt') as f:
+          info = json.loads(f.read())
+      s3 = boto3.resource(
+          's3',
+          aws_access_key_id=info['keys'][0]['access_key'],
+          aws_secret_access_key=info['keys'][0]['secret_key'],
+          endpoint_url='http://localhost:8800',
+      )
+      bucket = s3.Bucket('foobucket')
+      data = BytesIO()
+      bucket.download_fileobj(Fileobj=data, Key='newobject')
+      print(data.getvalue())
+      assert data.getvalue().decode() == 'test\n'
+
+- vip.exec:
+    host.a:
+      - umount /mnt/foo
+
+- cephadm.shell:
+    host.a:
+      - ceph nfs export rm foo /foouser
+      - ceph nfs cluster rm foo
diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-ingress.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-ingress.yaml
new file mode 100644
index 000000000..b4e843df2
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-ingress.yaml
@@ -0,0 +1,68 @@
+tasks:
+- vip:
+
+# make sure cephadm notices the new IP
+- cephadm.shell:
+    host.a:
+      - ceph orch device ls --refresh
+
+# stop kernel nfs server, if running
+- vip.exec:
+    all-hosts:
+      - systemctl stop nfs-server
+
+- cephadm.shell:
+    host.a:
+      - ceph fs volume create foofs
+
+# deploy nfs + ingress
+- cephadm.apply:
+    specs:
+      - service_type: nfs
+        service_id: foo
+        placement:
+          count: 2
+        spec:
+          port: 12049
+      - service_type: ingress
+        service_id: nfs.foo
+        spec:
+          backend_service: nfs.foo
+          frontend_port: 2049
+          monitor_port: 9002
+          virtual_ip: "{{VIP0}}/{{VIPPREFIXLEN}}"
+- cephadm.wait_for_service:
+    service: nfs.foo
+- cephadm.wait_for_service:
+    service: ingress.nfs.foo
+
+## export and mount
+
+- cephadm.shell:
+    host.a:
+      - ceph nfs export create cephfs --fsname foofs --cluster-id foo --pseudo-path /fake
+
+- vip.exec:
+    host.a:
+      - mkdir /mnt/foo
+      - sleep 5
+      - mount -t nfs {{VIP0}}:/fake /mnt/foo
+      - echo test > /mnt/foo/testfile
+      - sync
+
+# take each gateway down in turn and ensure things still work
+- cephadm.shell:
+    volumes:
+      - /mnt/foo:/mnt/foo
+    host.a:
+      - |
+        echo "Check with each haproxy down in turn..."
+        for haproxy in `ceph orch ps | grep ^haproxy.nfs.foo. | awk '{print $1}'`; do
+          ceph orch daemon stop $haproxy
+          while ! ceph orch ps | grep $haproxy | grep stopped; do sleep 1 ; done
+          cat /mnt/foo/testfile
+          echo $haproxy > /mnt/foo/testfile
+          sync
+          ceph orch daemon start $haproxy
+          while ! ceph orch ps | grep $haproxy | grep running; do sleep 1 ; done
+        done
diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-ingress2.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-ingress2.yaml
new file mode 100644
index 000000000..a47dd9d76
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-ingress2.yaml
@@ -0,0 +1,70 @@
+tasks:
+- vip:
+
+# make sure cephadm notices the new IP
+- cephadm.shell:
+    host.a:
+      - ceph orch device ls --refresh
+
+# stop kernel nfs server, if running
+- vip.exec:
+    all-hosts:
+      - systemctl stop nfs-server
+
+- cephadm.shell:
+    host.a:
+      - ceph fs volume create foofs
+      - ceph nfs cluster create foo --ingress --virtual-ip {{VIP0}}/{{VIPPREFIXLEN}} --port 2999
+      - ceph nfs export create cephfs --fsname foofs --cluster-id foo --pseudo-path /fake
+
+- cephadm.wait_for_service:
+    service: nfs.foo
+- cephadm.wait_for_service:
+    service: ingress.nfs.foo
+
+## export and mount
+
+- vip.exec:
+    host.a:
+      - mkdir /mnt/foo
+      - sleep 5
+      - mount -t nfs {{VIP0}}:/fake /mnt/foo -o port=2999
+      - echo test > /mnt/foo/testfile
+      - sync
+
+# take each gateway down in turn and ensure things still work
+- cephadm.shell:
+    volumes:
+      - /mnt/foo:/mnt/foo
+    host.a:
+      - |
+        echo "Check with each haproxy down in turn..."
+        for haproxy in `ceph orch ps | grep ^haproxy.nfs.foo. | awk '{print $1}'`; do
+          ceph orch daemon stop $haproxy
+          while ! ceph orch ps | grep $haproxy | grep stopped; do sleep 1 ; done
+          cat /mnt/foo/testfile
+          echo $haproxy > /mnt/foo/testfile
+          sync
+          ceph orch daemon start $haproxy
+          while ! ceph orch ps | grep $haproxy | grep running; do sleep 1 ; done
+        done
+
+# take each ganesha down in turn.
+# simulate "failure" by deleting the container
+- vip.exec:
+    all-hosts:
+      - |
+        echo "Check with $(hostname) ganesha(s) down..."
+        for c in `systemctl | grep ceph- | grep @nfs | awk '{print $1}'`; do
+            cid=`echo $c | sed 's/@/-/'`
+            id=`echo $c | cut -d @ -f 2 | sed 's/.service$//'`
+            fsid=`echo $c | cut -d @ -f 1 | cut -d - -f 2-`
+            echo "Removing daemon $id fsid $fsid..."
+            sudo $TESTDIR/cephadm rm-daemon --fsid $fsid --name $id
+
+            echo "Waking up cephadm..."
+            sudo $TESTDIR/cephadm shell -- ceph orch ps --refresh
+
+            while ! timeout 1 cat /mnt/foo/testfile ; do true ; done
+            echo "Mount is back!"
+        done
diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-keepalive-only.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-keepalive-only.yaml
new file mode 100644
index 000000000..ba5afed47
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-keepalive-only.yaml
@@ -0,0 +1,55 @@
+tasks:
+- vip:
+
+# make sure cephadm notices the new IP
+- cephadm.shell:
+    host.a:
+      - ceph orch device ls --refresh
+
+# stop kernel nfs server, if running
+- vip.exec:
+    all-hosts:
+      - systemctl stop nfs-server
+
+- cephadm.shell:
+    host.a:
+      - ceph fs volume create foofs
+
+# deploy nfs + keepalive-only ingress service
+- cephadm.apply:
+    specs:
+      - service_type: nfs
+        service_id: foo
+        placement:
+          count: 1
+        spec:
+          port: 2049
+          virtual_ip: "{{VIP0}}"
+      - service_type: ingress
+        service_id: nfs.foo
+        placement:
+          count: 1
+        spec:
+          backend_service: nfs.foo
+          monitor_port: 9002
+          virtual_ip: "{{VIP0}}/{{VIPPREFIXLEN}}"
+          keepalive_only: true
+- cephadm.wait_for_service:
+    service: nfs.foo
+- cephadm.wait_for_service:
+    service: ingress.nfs.foo
+
+# export and mount
+- cephadm.shell:
+    host.a:
+      - ceph nfs export create cephfs --fsname foofs --cluster-id foo --pseudo-path /fake
+
+# make sure mount can be reached over VIP, ensuring both that
+# keepalived is maintaining the VIP and that the nfs has bound to it
+- vip.exec:
+    host.a:
+      - mkdir /mnt/foo
+      - sleep 5
+      - mount -t nfs {{VIP0}}:/fake /mnt/foo
+      - echo test > /mnt/foo/testfile
+      - sync
diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs.yaml
new file mode 100644
index 000000000..194f4e9de
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs.yaml
@@ -0,0 +1,13 @@
+tasks:
+
+# stop kernel nfs server, if running
+- vip.exec:
+    all-hosts:
+      - systemctl stop nfs-server
+
+- cephadm.apply:
+    specs:
+      - service_type: nfs
+        service_id: foo
+- cephadm.wait_for_service:
+    service: nfs.foo
diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs2.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs2.yaml
new file mode 100644
index 000000000..959c5aa77
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs2.yaml
@@ -0,0 +1,12 @@
+tasks:
+
+# stop kernel nfs server, if running
+- vip.exec:
+    all-hosts:
+      - systemctl stop nfs-server
+
+- cephadm.shell:
+    host.a:
+      - ceph nfs cluster create foo
+- cephadm.wait_for_service:
+    service: nfs.foo
diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/nvmeof.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/nvmeof.yaml
new file mode 100644
index 000000000..4c5e26740
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/nvmeof.yaml
@@ -0,0 +1,8 @@
+tasks:
+- cephadm.shell:
+    host.a:
+      - ceph osd pool create foo
+      - rbd pool init foo
+      - ceph orch apply nvmeof foo
+- cephadm.wait_for_service:
+    service: nvmeof.foo
diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/rgw-ingress.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/rgw-ingress.yaml
new file mode 100644
index 000000000..710edab73
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/rgw-ingress.yaml
@@ -0,0 +1,60 @@
+tasks:
+- vip:
+
+# make sure cephadm notices the new IP
+- cephadm.shell:
+    host.a:
+      - ceph orch device ls --refresh
+
+# deploy rgw + ingress
+- cephadm.apply:
+    specs:
+      - service_type: rgw
+        service_id: foo
+        placement:
+          count: 4
+          host_pattern: "*"
+        spec:
+          rgw_frontend_port: 8000
+      - service_type: ingress
+        service_id: rgw.foo
+        placement:
+          count: 2
+        spec:
+          backend_service: rgw.foo
+          frontend_port: 9000
+          monitor_port: 9001
+          virtual_ip: "{{VIP0}}/{{VIPPREFIXLEN}}"
+- cephadm.wait_for_service:
+    service: rgw.foo
+- cephadm.wait_for_service:
+    service: ingress.rgw.foo
+
+# take each component down in turn and ensure things still work
+- cephadm.shell:
+    host.a:
+      - |
+        echo "Check while healthy..."
+        curl http://{{VIP0}}:9000/
+
+        # stop each rgw in turn
+        echo "Check with each rgw stopped in turn..."
+        for rgw in `ceph orch ps | grep ^rgw.foo. | awk '{print $1}'`; do
+          ceph orch daemon stop $rgw
+          while ! ceph orch ps | grep $rgw | grep stopped; do sleep 1 ; done
+          while ! curl http://{{VIP0}}:9000/ ; do sleep 1 ; done
+          ceph orch daemon start $rgw
+          while ! ceph orch ps | grep $rgw | grep running; do sleep 1 ; done
+        done
+
+        # stop each haproxy in turn
+        echo "Check with each haproxy down in turn..."
+        for haproxy in `ceph orch ps | grep ^haproxy.rgw.foo. | awk '{print $1}'`; do
+          ceph orch daemon stop $haproxy
+          while ! ceph orch ps | grep $haproxy | grep stopped; do sleep 1 ; done
+          while ! curl http://{{VIP0}}:9000/ ; do sleep 1 ; done
+          ceph orch daemon start $haproxy
+          while ! ceph orch ps | grep $haproxy | grep running; do sleep 1 ; done
+        done
+
+        while ! curl http://{{VIP0}}:9000/ ; do sleep 1 ; done
diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/rgw.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/rgw.yaml
new file mode 100644
index 000000000..cb2c6f4b6
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/rgw.yaml
@@ -0,0 +1,12 @@
+tasks:
+- cephadm.apply:
+    specs:
+      - service_type: rgw
+        service_id: foo
+        placement:
+          count_per_host: 4
+          host_pattern: "*"
+        spec:
+          rgw_frontend_port: 8000
+- cephadm.wait_for_service:
+    service: rgw.foo
diff --git a/qa/suites/orch/cephadm/smoke-roleless/3-final.yaml b/qa/suites/orch/cephadm/smoke-roleless/3-final.yaml
new file mode 100644
index 000000000..bb938848c
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-roleless/3-final.yaml
@@ -0,0 +1,10 @@
+tasks:
+- cephadm.shell:
+    host.a:
+      - stat -c '%u %g' /var/log/ceph | grep '167 167'
+      - ceph orch status
+      - ceph orch ps
+      - ceph orch ls
+      - ceph orch host ls
+      - ceph orch device ls
+      - ceph orch ls | grep '^osd.all-available-devices '
diff --git a/qa/suites/orch/cephadm/smoke-singlehost/% b/qa/suites/orch/cephadm/smoke-singlehost/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-singlehost/%
diff --git a/qa/suites/orch/cephadm/smoke-singlehost/.qa b/qa/suites/orch/cephadm/smoke-singlehost/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-singlehost/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/smoke-singlehost/0-random-distro$ b/qa/suites/orch/cephadm/smoke-singlehost/0-random-distro$
new file mode 120000
index 000000000..4b341719d
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-singlehost/0-random-distro$
@@ -0,0 +1 @@
+.qa/distros/container-hosts
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/smoke-singlehost/1-start.yaml b/qa/suites/orch/cephadm/smoke-singlehost/1-start.yaml
new file mode 100644
index 000000000..ca6019c66
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-singlehost/1-start.yaml
@@ -0,0 +1,27 @@
+tasks:
+- cephadm:
+    roleless: true
+    single_host_defaults: true
+- cephadm.shell:
+    host.a:
+      - ceph orch status
+      - ceph orch ps
+      - ceph orch ls
+      - ceph orch host ls
+      - ceph orch device ls
+roles:
+- - host.a
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+  - client.0
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 10 # GB
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd shutdown pgref assert: true
diff --git a/qa/suites/orch/cephadm/smoke-singlehost/2-services/.qa b/qa/suites/orch/cephadm/smoke-singlehost/2-services/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-singlehost/2-services/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/smoke-singlehost/2-services/basic.yaml b/qa/suites/orch/cephadm/smoke-singlehost/2-services/basic.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-singlehost/2-services/basic.yaml
diff --git a/qa/suites/orch/cephadm/smoke-singlehost/2-services/rgw.yaml b/qa/suites/orch/cephadm/smoke-singlehost/2-services/rgw.yaml
new file mode 100644
index 000000000..cb2c6f4b6
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-singlehost/2-services/rgw.yaml
@@ -0,0 +1,12 @@
+tasks:
+- cephadm.apply:
+    specs:
+      - service_type: rgw
+        service_id: foo
+        placement:
+          count_per_host: 4
+          host_pattern: "*"
+        spec:
+          rgw_frontend_port: 8000
+- cephadm.wait_for_service:
+    service: rgw.foo
diff --git a/qa/suites/orch/cephadm/smoke-singlehost/3-final.yaml b/qa/suites/orch/cephadm/smoke-singlehost/3-final.yaml
new file mode 100644
index 000000000..02f5b289c
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-singlehost/3-final.yaml
@@ -0,0 +1,8 @@
+tasks:
+- cephadm.shell:
+    host.a:
+      - ceph orch status
+      - ceph orch ps
+      - ceph orch ls
+      - ceph orch host ls
+      - ceph orch device ls
diff --git a/qa/suites/orch/cephadm/smoke-small/% b/qa/suites/orch/cephadm/smoke-small/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-small/%
diff --git a/qa/suites/orch/cephadm/smoke-small/.qa b/qa/suites/orch/cephadm/smoke-small/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-small/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/smoke-small/0-distro/centos_8.stream_container_tools_crun.yaml b/qa/suites/orch/cephadm/smoke-small/0-distro/centos_8.stream_container_tools_crun.yaml
new file mode 120000
index 000000000..83fe02026
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-small/0-distro/centos_8.stream_container_tools_crun.yaml
@@ -0,0 +1 @@
+../.qa/distros/container-hosts/centos_8.stream_container_tools_crun.yaml
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/smoke-small/0-nvme-loop.yaml b/qa/suites/orch/cephadm/smoke-small/0-nvme-loop.yaml
new file mode 120000
index 000000000..5206b6edd
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-small/0-nvme-loop.yaml
@@ -0,0 +1 @@
+.qa/overrides/nvme_loop.yaml
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/smoke-small/agent/.qa b/qa/suites/orch/cephadm/smoke-small/agent/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-small/agent/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/smoke-small/agent/off.yaml b/qa/suites/orch/cephadm/smoke-small/agent/off.yaml
new file mode 100644
index 000000000..f37c651bc
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-small/agent/off.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      mgr:
+        mgr/cephadm/use_agent: false
diff --git a/qa/suites/orch/cephadm/smoke-small/agent/on.yaml b/qa/suites/orch/cephadm/smoke-small/agent/on.yaml
new file mode 100644
index 000000000..90ac298b6
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-small/agent/on.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      mgr:
+        mgr/cephadm/use_agent: true
diff --git a/qa/suites/orch/cephadm/smoke-small/fixed-2.yaml b/qa/suites/orch/cephadm/smoke-small/fixed-2.yaml
new file mode 100644
index 000000000..61090a165
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-small/fixed-2.yaml
@@ -0,0 +1,29 @@
+roles:
+- - mon.a
+  - mgr.y
+  - osd.0
+  - client.0
+  - ceph.rgw.foo.a
+  - node-exporter.a
+  - alertmanager.a
+- - mon.b
+  - mgr.x
+  - osd.1
+  - client.1
+  - prometheus.a
+  - grafana.a
+  - node-exporter.b
+- - mon.c
+  - mgr.z
+  - osd.2
+  - client.2
+  - node-exporter.c
+openstack:
+- volumes: # attached to each instance
+    count: 1
+    size: 10 # GB
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd shutdown pgref assert: true
diff --git a/qa/suites/orch/cephadm/smoke-small/mon_election b/qa/suites/orch/cephadm/smoke-small/mon_election
new file mode 120000
index 000000000..3f331e621
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-small/mon_election
@@ -0,0 +1 @@
+.qa/mon_election
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/smoke-small/start.yaml b/qa/suites/orch/cephadm/smoke-small/start.yaml
new file mode 100644
index 000000000..77f493ca1
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke-small/start.yaml
@@ -0,0 +1,16 @@
+tasks:
+- cephadm:
+    conf:
+      mgr:
+        debug ms: 1
+        debug mgr: 20
+- cephadm.shell:
+    mon.a:
+      - stat -c '%u %g' /var/log/ceph | grep '167 167'
+      - ceph orch status
+      - ceph orch ps
+      - ceph orch ls
+      - ceph orch host ls
+      - ceph orch device ls
+      - ceph orch ls --format yaml
+      - ceph orch ls | grep '^osd '
diff --git a/qa/suites/orch/cephadm/smoke/% b/qa/suites/orch/cephadm/smoke/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke/%
diff --git a/qa/suites/orch/cephadm/smoke/.qa b/qa/suites/orch/cephadm/smoke/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/smoke/0-distro b/qa/suites/orch/cephadm/smoke/0-distro
new file mode 120000
index 000000000..4b341719d
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke/0-distro
@@ -0,0 +1 @@
+.qa/distros/container-hosts
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/smoke/0-nvme-loop.yaml b/qa/suites/orch/cephadm/smoke/0-nvme-loop.yaml
new file mode 120000
index 000000000..5206b6edd
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke/0-nvme-loop.yaml
@@ -0,0 +1 @@
+.qa/overrides/nvme_loop.yaml
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/smoke/agent/.qa b/qa/suites/orch/cephadm/smoke/agent/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke/agent/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/smoke/agent/off.yaml b/qa/suites/orch/cephadm/smoke/agent/off.yaml
new file mode 100644
index 000000000..f37c651bc
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke/agent/off.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      mgr:
+        mgr/cephadm/use_agent: false
diff --git a/qa/suites/orch/cephadm/smoke/agent/on.yaml b/qa/suites/orch/cephadm/smoke/agent/on.yaml
new file mode 100644
index 000000000..90ac298b6
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke/agent/on.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      mgr:
+        mgr/cephadm/use_agent: true
diff --git a/qa/suites/orch/cephadm/smoke/fixed-2.yaml b/qa/suites/orch/cephadm/smoke/fixed-2.yaml
new file mode 100644
index 000000000..e93564aa9
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke/fixed-2.yaml
@@ -0,0 +1,32 @@
+roles:
+- - mon.a
+  - mon.c
+  - mgr.y
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+  - client.0
+  - ceph.rgw.foo.a
+  - node-exporter.a
+  - alertmanager.a
+- - mon.b
+  - mgr.x
+  - osd.4
+  - osd.5
+  - osd.6
+  - osd.7
+  - client.1
+  - prometheus.a
+  - grafana.a
+  - node-exporter.b
+  - ceph.iscsi.iscsi.a
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 10 # GB
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd shutdown pgref assert: true
diff --git a/qa/suites/orch/cephadm/smoke/mon_election b/qa/suites/orch/cephadm/smoke/mon_election
new file mode 120000
index 000000000..3f331e621
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke/mon_election
@@ -0,0 +1 @@
+.qa/mon_election
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/smoke/start.yaml b/qa/suites/orch/cephadm/smoke/start.yaml
new file mode 100644
index 000000000..77f493ca1
--- /dev/null
+++ b/qa/suites/orch/cephadm/smoke/start.yaml
@@ -0,0 +1,16 @@
+tasks:
+- cephadm:
+    conf:
+      mgr:
+        debug ms: 1
+        debug mgr: 20
+- cephadm.shell:
+    mon.a:
+      - stat -c '%u %g' /var/log/ceph | grep '167 167'
+      - ceph orch status
+      - ceph orch ps
+      - ceph orch ls
+      - ceph orch host ls
+      - ceph orch device ls
+      - ceph orch ls --format yaml
+      - ceph orch ls | grep '^osd '
diff --git a/qa/suites/orch/cephadm/thrash/% b/qa/suites/orch/cephadm/thrash/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/orch/cephadm/thrash/%
diff --git a/qa/suites/orch/cephadm/thrash/.qa b/qa/suites/orch/cephadm/thrash/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/cephadm/thrash/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/thrash/0-distro b/qa/suites/orch/cephadm/thrash/0-distro
new file mode 120000
index 000000000..4b341719d
--- /dev/null
+++ b/qa/suites/orch/cephadm/thrash/0-distro
@@ -0,0 +1 @@
+.qa/distros/container-hosts
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/thrash/1-start.yaml b/qa/suites/orch/cephadm/thrash/1-start.yaml
new file mode 100644
index 000000000..a1b89e44d
--- /dev/null
+++ b/qa/suites/orch/cephadm/thrash/1-start.yaml
@@ -0,0 +1,7 @@
+tasks:
+- install:
+- cephadm:
+    conf:
+      mgr:
+        debug ms: 1
+        debug mgr: 20
diff --git a/qa/suites/orch/cephadm/thrash/2-thrash.yaml b/qa/suites/orch/cephadm/thrash/2-thrash.yaml
new file mode 100644
index 000000000..05e0f8e76
--- /dev/null
+++ b/qa/suites/orch/cephadm/thrash/2-thrash.yaml
@@ -0,0 +1,26 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    conf:
+      osd:
+        osd debug reject backfill probability: .3
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd max backfills: 3
+        osd snap trim sleep: 2
+        osd delete sleep: 1
+      mon:
+        mon min osdmap epochs: 50
+        paxos service trim min: 10
+        # prune full osdmaps regularly
+        mon osdmap full prune min: 15
+        mon osdmap full prune interval: 2
+        mon osdmap full prune txsize: 2
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 1
+    chance_pgnum_shrink: 1
+    chance_pgpnum_fix: 1
diff --git a/qa/suites/orch/cephadm/thrash/3-tasks/.qa b/qa/suites/orch/cephadm/thrash/3-tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/cephadm/thrash/3-tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/thrash/3-tasks/rados_api_tests.yaml b/qa/suites/orch/cephadm/thrash/3-tasks/rados_api_tests.yaml
new file mode 120000
index 000000000..34e657e04
--- /dev/null
+++ b/qa/suites/orch/cephadm/thrash/3-tasks/rados_api_tests.yaml
@@ -0,0 +1 @@
+.qa/suites/rados/thrash/workloads/rados_api_tests.yaml
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/thrash/3-tasks/radosbench.yaml b/qa/suites/orch/cephadm/thrash/3-tasks/radosbench.yaml
new file mode 120000
index 000000000..dad17e0de
--- /dev/null
+++ b/qa/suites/orch/cephadm/thrash/3-tasks/radosbench.yaml
@@ -0,0 +1 @@
+.qa/suites/rados/thrash/workloads/radosbench.yaml
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/thrash/3-tasks/small-objects.yaml b/qa/suites/orch/cephadm/thrash/3-tasks/small-objects.yaml
new file mode 120000
index 000000000..6aa66aa37
--- /dev/null
+++ b/qa/suites/orch/cephadm/thrash/3-tasks/small-objects.yaml
@@ -0,0 +1 @@
+.qa/suites/rados/thrash/workloads/small-objects.yaml
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/thrash/3-tasks/snaps-few-objects.yaml b/qa/suites/orch/cephadm/thrash/3-tasks/snaps-few-objects.yaml
new file mode 120000
index 000000000..c9cc4cd3e
--- /dev/null
+++ b/qa/suites/orch/cephadm/thrash/3-tasks/snaps-few-objects.yaml
@@ -0,0 +1 @@
+.qa/suites/rados/thrash/workloads/snaps-few-objects.yaml
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/thrash/fixed-2.yaml b/qa/suites/orch/cephadm/thrash/fixed-2.yaml
new file mode 120000
index 000000000..5c3e0593c
--- /dev/null
+++ b/qa/suites/orch/cephadm/thrash/fixed-2.yaml
@@ -0,0 +1 @@
+../smoke/fixed-2.yaml
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/thrash/msgr b/qa/suites/orch/cephadm/thrash/msgr
new file mode 120000
index 000000000..57bee80db
--- /dev/null
+++ b/qa/suites/orch/cephadm/thrash/msgr
@@ -0,0 +1 @@
+.qa/msgr
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/thrash/root.yaml b/qa/suites/orch/cephadm/thrash/root.yaml
new file mode 100644
index 000000000..bedb31d5d
--- /dev/null
+++ b/qa/suites/orch/cephadm/thrash/root.yaml
@@ -0,0 +1,3 @@
+overrides:
+  cephadm:
+    cephadm_mode: root
diff --git a/qa/suites/orch/cephadm/upgrade/% b/qa/suites/orch/cephadm/upgrade/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/orch/cephadm/upgrade/%
diff --git a/qa/suites/orch/cephadm/upgrade/.qa b/qa/suites/orch/cephadm/upgrade/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/cephadm/upgrade/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/upgrade/1-start-distro/.qa b/qa/suites/orch/cephadm/upgrade/1-start-distro/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/cephadm/upgrade/1-start-distro/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/upgrade/1-start-distro/1-start-centos_8.stream_container-tools.yaml b/qa/suites/orch/cephadm/upgrade/1-start-distro/1-start-centos_8.stream_container-tools.yaml
new file mode 100644
index 000000000..bb9a220e6
--- /dev/null
+++ b/qa/suites/orch/cephadm/upgrade/1-start-distro/1-start-centos_8.stream_container-tools.yaml
@@ -0,0 +1,39 @@
+os_type: centos
+os_version: "8.stream"
+
+tasks:
+- pexec:
+    all:
+    - sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup
+    - sudo dnf -y  module reset container-tools
+    - sudo dnf -y  module install container-tools
+    - sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf
+- cephadm:
+    image: quay.io/ceph/ceph:v16.2.0
+    cephadm_branch: v16.2.0
+    cephadm_git_url: https://github.com/ceph/ceph
+    # avoid --cap-add=PTRACE + --privileged for older cephadm versions
+    allow_ptrace: false
+    avoid_pacific_features: true
+
+roles:
+- - mon.a
+  - mon.c
+  - mgr.y
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+  - client.0
+  - node-exporter.a
+  - alertmanager.a
+- - mon.b
+  - mgr.x
+  - osd.4
+  - osd.5
+  - osd.6
+  - osd.7
+  - client.1
+  - prometheus.a
+  - grafana.a
+  - node-exporter.b
diff --git a/qa/suites/orch/cephadm/upgrade/1-start-distro/1-start-ubuntu_20.04.yaml b/qa/suites/orch/cephadm/upgrade/1-start-distro/1-start-ubuntu_20.04.yaml
new file mode 100644
index 000000000..d3d9de83e
--- /dev/null
+++ b/qa/suites/orch/cephadm/upgrade/1-start-distro/1-start-ubuntu_20.04.yaml
@@ -0,0 +1,33 @@
+os_type: ubuntu
+os_version: "20.04"
+
+tasks:
+- cephadm:
+    image: quay.io/ceph/ceph:v16.2.0
+    cephadm_branch: v16.2.0
+    cephadm_git_url: https://github.com/ceph/ceph
+    # avoid --cap-add=PTRACE + --privileged for older cephadm versions
+    allow_ptrace: false
+    avoid_pacific_features: true
+
+roles:
+- - mon.a
+  - mon.c
+  - mgr.y
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+  - client.0
+  - node-exporter.a
+  - alertmanager.a
+- - mon.b
+  - mgr.x
+  - osd.4
+  - osd.5
+  - osd.6
+  - osd.7
+  - client.1
+  - prometheus.a
+  - grafana.a
+  - node-exporter.b
diff --git a/qa/suites/orch/cephadm/upgrade/2-repo_digest/.qa b/qa/suites/orch/cephadm/upgrade/2-repo_digest/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/cephadm/upgrade/2-repo_digest/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/upgrade/2-repo_digest/defaut.yaml b/qa/suites/orch/cephadm/upgrade/2-repo_digest/defaut.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/orch/cephadm/upgrade/2-repo_digest/defaut.yaml
diff --git a/qa/suites/orch/cephadm/upgrade/2-repo_digest/repo_digest.yaml b/qa/suites/orch/cephadm/upgrade/2-repo_digest/repo_digest.yaml
new file mode 100644
index 000000000..2e6bbfd92
--- /dev/null
+++ b/qa/suites/orch/cephadm/upgrade/2-repo_digest/repo_digest.yaml
@@ -0,0 +1,4 @@
+tasks:
+- cephadm.shell:
+    mon.a:
+      - ceph config set mgr mgr/cephadm/use_repo_digest false --force
diff --git a/qa/suites/orch/cephadm/upgrade/3-upgrade/.qa b/qa/suites/orch/cephadm/upgrade/3-upgrade/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/cephadm/upgrade/3-upgrade/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/upgrade/3-upgrade/simple.yaml b/qa/suites/orch/cephadm/upgrade/3-upgrade/simple.yaml
new file mode 100644
index 000000000..f10a49bea
--- /dev/null
+++ b/qa/suites/orch/cephadm/upgrade/3-upgrade/simple.yaml
@@ -0,0 +1,21 @@
+tasks:
+- cephadm.shell:
+    env: [sha1]
+    mon.a:
+      # setup rgw
+      - radosgw-admin realm create --rgw-realm=r --default
+      - radosgw-admin zonegroup create --rgw-zonegroup=default --master --default
+      - radosgw-admin zone create --rgw-zonegroup=default --rgw-zone=z --master --default
+      - radosgw-admin period update --rgw-realm=r --commit
+      - ceph orch apply rgw foo --realm r --zone z --placement=2 --port=8000
+      # simple rgw spec (will have no "spec" field) to make sure that works with rgw spec migration
+      - ceph orch apply rgw smpl
+      # setup iscsi
+      - ceph osd pool create foo
+      - rbd pool init foo
+      - ceph orch apply iscsi foo u p
+      - sleep 120
+      - ceph config set mon mon_warn_on_insecure_global_id_reclaim false --force
+      - ceph config set mon mon_warn_on_insecure_global_id_reclaim_allowed false --force
+      - ceph config set global log_to_journald false --force
+      - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1
diff --git a/qa/suites/orch/cephadm/upgrade/3-upgrade/staggered.yaml b/qa/suites/orch/cephadm/upgrade/3-upgrade/staggered.yaml
new file mode 100644
index 000000000..280714e4e
--- /dev/null
+++ b/qa/suites/orch/cephadm/upgrade/3-upgrade/staggered.yaml
@@ -0,0 +1,132 @@
+tasks:
+- cephadm.shell:
+    env: [sha1]
+    mon.a:
+      # setup rgw
+      - radosgw-admin realm create --rgw-realm=r --default
+      - radosgw-admin zonegroup create --rgw-zonegroup=default --master --default
+      - radosgw-admin zone create --rgw-zonegroup=default --rgw-zone=z --master --default
+      - radosgw-admin period update --rgw-realm=r --commit
+      - ceph orch apply rgw foo --realm r --zone z --placement=2 --port=8000
+      # setup iscsi
+      - ceph osd pool create foo
+      - rbd pool init foo
+      - ceph orch apply iscsi foo u p
+      - sleep 180
+      - ceph config set mon mon_warn_on_insecure_global_id_reclaim false --force
+      - ceph config set mon mon_warn_on_insecure_global_id_reclaim_allowed false --force
+      - ceph config set global log_to_journald false --force
+      # get some good info on the state of things pre-upgrade. Useful for debugging
+      - ceph orch ps
+      - ceph versions
+      - ceph -s
+      - ceph orch ls
+      # doing staggered upgrade requires mgr daemons being on a version that contains the staggered upgrade code
+      # until there is a stable version that contains it, we can test by manually upgrading a mgr daemon
+      - ceph orch daemon redeploy "mgr.$(ceph mgr dump -f json | jq .standbys | jq .[] | jq -r .name)" --image quay.ceph.io/ceph-ci/ceph:$sha1
+      - ceph orch ps --refresh
+      - sleep 180
+      # gather more possible debugging info
+      - ceph orch ps
+      - ceph versions
+      - ceph -s
+      - ceph health detail
+      # check that there are two different versions found for mgr daemon (which implies we upgraded one)
+      - ceph versions | jq -e '.mgr | length == 2'
+      - ceph mgr fail
+      - sleep 180
+      # now try upgrading the other mgr
+      - ceph orch daemon redeploy "mgr.$(ceph mgr dump -f json | jq .standbys | jq .[] | jq -r .name)" --image quay.ceph.io/ceph-ci/ceph:$sha1
+      - ceph orch ps --refresh
+      - sleep 180
+      # gather more possible debugging info
+      - ceph orch ps
+      - ceph versions
+      - ceph health detail
+      - ceph -s
+      - ceph mgr fail
+      - sleep 180
+      # gather more debugging info
+      - ceph orch ps
+      - ceph versions
+      - ceph -s
+      - ceph health detail
+      # now that both mgrs should have been redeployed with the new version, we should be back on only 1 version for the mgrs
+      - ceph versions | jq -e '.mgr | length == 1'
+      - ceph mgr fail
+      - sleep 180
+      # debugging info
+      - ceph orch ps
+      - ceph orch ls
+      - ceph versions
+      # to make sure mgr daemons upgrade is fully completed, including being deployed by a mgr on a new version
+      # also serves as an early failure if manually upgrading the mgrs failed as --daemon-types won't be recognized
+      - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 --daemon-types mgr
+      - while ceph orch upgrade status | jq '.in_progress' | grep true && ! ceph orch upgrade status | jq '.message' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; sleep 30 ; done
+      # verify only one version found for mgrs and that their version hash matches what we are upgrading to
+      - ceph versions | jq -e '.mgr | length == 1'
+      - ceph versions | jq -e '.mgr | keys' | grep $sha1
+      # verify overall we still see two versions, basically to make sure --daemon-types wasn't ignored and all daemons upgraded
+      - ceph versions | jq -e '.overall | length == 2'
+      # check that exactly two daemons have been upgraded to the new image (our 2 mgr daemons)
+      - ceph orch upgrade check quay.ceph.io/ceph-ci/ceph:$sha1 | jq -e '.up_to_date | length == 2'
+      - ceph orch upgrade status
+      - ceph health detail
+      # upgrade only the mons on one of the two hosts
+      - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 --daemon-types mon --hosts $(ceph orch ps | grep mgr.x | awk '{print $2}')
+      - while ceph orch upgrade status | jq '.in_progress' | grep true && ! ceph orch upgrade status | jq '.message' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; sleep 30 ; done
+      - ceph orch ps
+      # verify two different version seen for mons
+      - ceph versions | jq -e '.mon | length == 2'
+      - ceph orch upgrade status
+      - ceph health detail
+      # upgrade mons on the other hosts
+      - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 --daemon-types mon --hosts $(ceph orch ps | grep mgr.y | awk '{print $2}')
+      - while ceph orch upgrade status | jq '.in_progress' | grep true && ! ceph orch upgrade status | jq '.message' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; sleep 30 ; done
+      - ceph orch ps
+      # verify all mons now on same version and version hash matches what we are upgrading to
+      - ceph versions | jq -e '.mon | length == 1'
+      - ceph versions | jq -e '.mon | keys' | grep $sha1
+      # verify exactly 5 daemons are now upgraded (2 mgrs, 3 mons)
+      - ceph orch upgrade check quay.ceph.io/ceph-ci/ceph:$sha1 | jq -e '.up_to_date | length == 5'
+      - ceph orch upgrade status
+      - ceph health detail
+      # upgrade exactly 2 osd daemons
+      - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 --daemon-types osd --limit 2
+      - while ceph orch upgrade status | jq '.in_progress' | grep true && ! ceph orch upgrade status | jq '.message' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; sleep 30 ; done
+      - ceph orch ps
+      # verify two different versions now seen for osds
+      - ceph versions | jq -e '.osd | length == 2'
+      # verify exactly 7 daemons have been upgraded (2 mgrs, 3 mons, 2 osds)
+      - ceph orch upgrade check quay.ceph.io/ceph-ci/ceph:$sha1 | jq -e '.up_to_date | length == 7'
+      - ceph orch upgrade status
+      - ceph health detail
+      # upgrade one more osd
+      - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 --daemon-types crash,osd --limit 1
+      - while ceph orch upgrade status | jq '.in_progress' | grep true && ! ceph orch upgrade status | jq '.message' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; sleep 30 ; done
+      - ceph orch ps
+      - ceph versions | jq -e '.osd | length == 2'
+      # verify now 8 daemons have been upgraded
+      - ceph orch upgrade check quay.ceph.io/ceph-ci/ceph:$sha1 | jq -e '.up_to_date | length == 8'
+      # upgrade the rest of the osds
+      - ceph orch upgrade status
+      - ceph health detail
+      - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 --daemon-types crash,osd
+      - while ceph orch upgrade status | jq '.in_progress' | grep true && ! ceph orch upgrade status | jq '.message' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; sleep 30 ; done
+      - ceph orch ps
+      # verify all osds are now on same version and version hash matches what we are upgrading to
+      - ceph versions | jq -e '.osd | length == 1'
+      - ceph versions | jq -e '.osd | keys' | grep $sha1
+      - ceph orch upgrade status
+      - ceph health detail
+      # upgrade the rgw daemons using --services
+      - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 --services rgw.foo
+      - while ceph orch upgrade status | jq '.in_progress' | grep true && ! ceph orch upgrade status | jq '.message' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; sleep 30 ; done
+      - ceph orch ps
+      # verify all rgw daemons on same version and version hash matches what we are upgrading to
+      - ceph versions | jq -e '.rgw | length == 1'
+      - ceph versions | jq -e '.rgw | keys' | grep $sha1
+      - ceph orch upgrade status
+      - ceph health detail
+      # run upgrade one more time with no filter parameters to make sure anything left gets upgraded
+      - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1
diff --git a/qa/suites/orch/cephadm/upgrade/4-wait.yaml b/qa/suites/orch/cephadm/upgrade/4-wait.yaml
new file mode 100644
index 000000000..4010c58ed
--- /dev/null
+++ b/qa/suites/orch/cephadm/upgrade/4-wait.yaml
@@ -0,0 +1,16 @@
+tasks:
+- cephadm.shell:
+    env: [sha1]
+    mon.a:
+      - while ceph orch upgrade status | jq '.in_progress' | grep true && ! ceph orch upgrade status | jq '.message' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; ceph health detail ; sleep 30 ; done
+      - ceph orch ps
+      - ceph versions
+      - echo "wait for servicemap items w/ changing names to refresh"
+      - sleep 60
+      - ceph orch ps
+      - ceph versions
+      - ceph orch upgrade status
+      - ceph health detail
+      - ceph versions | jq -e '.overall | length == 1'
+      - ceph versions | jq -e '.overall | keys' | grep $sha1
+      - ceph orch ls | grep '^osd '
diff --git a/qa/suites/orch/cephadm/upgrade/5-upgrade-ls.yaml b/qa/suites/orch/cephadm/upgrade/5-upgrade-ls.yaml
new file mode 100644
index 000000000..799458bc5
--- /dev/null
+++ b/qa/suites/orch/cephadm/upgrade/5-upgrade-ls.yaml
@@ -0,0 +1,6 @@
+tasks:
+- cephadm.shell:
+    mon.a:
+      - ceph orch upgrade ls
+      - ceph orch upgrade ls --image quay.io/ceph/ceph --show-all-versions | grep 16.2.0
+      - ceph orch upgrade ls --image quay.io/ceph/ceph --tags | grep v16.2.2
diff --git a/qa/suites/orch/cephadm/upgrade/agent b/qa/suites/orch/cephadm/upgrade/agent
new file mode 120000
index 000000000..154924209
--- /dev/null
+++ b/qa/suites/orch/cephadm/upgrade/agent
@@ -0,0 +1 @@
+../smoke/agent
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/upgrade/mon_election b/qa/suites/orch/cephadm/upgrade/mon_election
new file mode 120000
index 000000000..3f331e621
--- /dev/null
+++ b/qa/suites/orch/cephadm/upgrade/mon_election
@@ -0,0 +1 @@
+.qa/mon_election
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/with-work/% b/qa/suites/orch/cephadm/with-work/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/orch/cephadm/with-work/%
diff --git a/qa/suites/orch/cephadm/with-work/.qa b/qa/suites/orch/cephadm/with-work/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/cephadm/with-work/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/with-work/0-distro b/qa/suites/orch/cephadm/with-work/0-distro
new file mode 120000
index 000000000..4b341719d
--- /dev/null
+++ b/qa/suites/orch/cephadm/with-work/0-distro
@@ -0,0 +1 @@
+.qa/distros/container-hosts
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/with-work/fixed-2.yaml b/qa/suites/orch/cephadm/with-work/fixed-2.yaml
new file mode 120000
index 000000000..5c3e0593c
--- /dev/null
+++ b/qa/suites/orch/cephadm/with-work/fixed-2.yaml
@@ -0,0 +1 @@
+../smoke/fixed-2.yaml
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/with-work/mode/.qa b/qa/suites/orch/cephadm/with-work/mode/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/cephadm/with-work/mode/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/with-work/mode/packaged.yaml b/qa/suites/orch/cephadm/with-work/mode/packaged.yaml
new file mode 100644
index 000000000..ba8d43218
--- /dev/null
+++ b/qa/suites/orch/cephadm/with-work/mode/packaged.yaml
@@ -0,0 +1,5 @@
+overrides:
+  cephadm:
+    cephadm_mode: cephadm-package
+  install:
+    extra_packages: [cephadm]
diff --git a/qa/suites/orch/cephadm/with-work/mode/root.yaml b/qa/suites/orch/cephadm/with-work/mode/root.yaml
new file mode 100644
index 000000000..bedb31d5d
--- /dev/null
+++ b/qa/suites/orch/cephadm/with-work/mode/root.yaml
@@ -0,0 +1,3 @@
+overrides:
+  cephadm:
+    cephadm_mode: root
diff --git a/qa/suites/orch/cephadm/with-work/mon_election b/qa/suites/orch/cephadm/with-work/mon_election
new file mode 120000
index 000000000..3f331e621
--- /dev/null
+++ b/qa/suites/orch/cephadm/with-work/mon_election
@@ -0,0 +1 @@
+.qa/mon_election
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/with-work/msgr b/qa/suites/orch/cephadm/with-work/msgr
new file mode 120000
index 000000000..57bee80db
--- /dev/null
+++ b/qa/suites/orch/cephadm/with-work/msgr
@@ -0,0 +1 @@
+.qa/msgr
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/with-work/start.yaml b/qa/suites/orch/cephadm/with-work/start.yaml
new file mode 100644
index 000000000..a1b89e44d
--- /dev/null
+++ b/qa/suites/orch/cephadm/with-work/start.yaml
@@ -0,0 +1,7 @@
+tasks:
+- install:
+- cephadm:
+    conf:
+      mgr:
+        debug ms: 1
+        debug mgr: 20
diff --git a/qa/suites/orch/cephadm/with-work/tasks/.qa b/qa/suites/orch/cephadm/with-work/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/cephadm/with-work/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/with-work/tasks/rados_api_tests.yaml b/qa/suites/orch/cephadm/with-work/tasks/rados_api_tests.yaml
new file mode 120000
index 000000000..2ce80f969
--- /dev/null
+++ b/qa/suites/orch/cephadm/with-work/tasks/rados_api_tests.yaml
@@ -0,0 +1 @@
+.qa/suites/rados/basic/tasks/rados_api_tests.yaml
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/with-work/tasks/rados_python.yaml b/qa/suites/orch/cephadm/with-work/tasks/rados_python.yaml
new file mode 120000
index 000000000..210ad8f18
--- /dev/null
+++ b/qa/suites/orch/cephadm/with-work/tasks/rados_python.yaml
@@ -0,0 +1 @@
+.qa/suites/rados/basic/tasks/rados_python.yaml
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/with-work/tasks/rotate-keys.yaml b/qa/suites/orch/cephadm/with-work/tasks/rotate-keys.yaml
new file mode 100644
index 000000000..5b91c6ed3
--- /dev/null
+++ b/qa/suites/orch/cephadm/with-work/tasks/rotate-keys.yaml
@@ -0,0 +1,16 @@
+tasks:
+- cephadm.shell:
+    mon.a:
+    - |
+      set -ex
+      for f in osd.0 osd.1 osd.2 osd.3 osd.4 osd.5 osd.6 osd.7 mgr.y mgr.x
+      do
+          echo "rotating key for $f"
+          K=$(ceph auth get-key $f)
+          NK="$K"
+          ceph orch daemon rotate-key $f
+          while [ "$K" == "$NK" ]; do
+              sleep 5
+              NK=$(ceph auth get-key $f)
+          done
+      done
diff --git a/qa/suites/orch/cephadm/workunits/% b/qa/suites/orch/cephadm/workunits/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/orch/cephadm/workunits/%
diff --git a/qa/suites/orch/cephadm/workunits/.qa b/qa/suites/orch/cephadm/workunits/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/cephadm/workunits/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/workunits/0-distro b/qa/suites/orch/cephadm/workunits/0-distro
new file mode 120000
index 000000000..4b341719d
--- /dev/null
+++ b/qa/suites/orch/cephadm/workunits/0-distro
@@ -0,0 +1 @@
+.qa/distros/container-hosts
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/workunits/agent b/qa/suites/orch/cephadm/workunits/agent
new file mode 120000
index 000000000..154924209
--- /dev/null
+++ b/qa/suites/orch/cephadm/workunits/agent
@@ -0,0 +1 @@
+../smoke/agent
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/workunits/mon_election b/qa/suites/orch/cephadm/workunits/mon_election
new file mode 120000
index 000000000..3f331e621
--- /dev/null
+++ b/qa/suites/orch/cephadm/workunits/mon_election
@@ -0,0 +1 @@
+.qa/mon_election
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/workunits/task/.qa b/qa/suites/orch/cephadm/workunits/task/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/cephadm/workunits/task/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/workunits/task/test_adoption.yaml b/qa/suites/orch/cephadm/workunits/task/test_adoption.yaml
new file mode 100644
index 000000000..e04fc1eea
--- /dev/null
+++ b/qa/suites/orch/cephadm/workunits/task/test_adoption.yaml
@@ -0,0 +1,11 @@
+roles:
+- [mon.a, mgr.x, osd.0, client.0]
+tasks:
+- install:
+- exec:
+    mon.a:
+      - yum install -y python3 || apt install -y python3
+- workunit:
+    clients:
+      client.0:
+        - cephadm/test_adoption.sh
diff --git a/qa/suites/orch/cephadm/workunits/task/test_ca_signed_key.yaml b/qa/suites/orch/cephadm/workunits/task/test_ca_signed_key.yaml
new file mode 100644
index 000000000..7bf51f719
--- /dev/null
+++ b/qa/suites/orch/cephadm/workunits/task/test_ca_signed_key.yaml
@@ -0,0 +1,31 @@
+roles:
+- - host.a
+  - mon.a
+  - mgr.a
+  - osd.0
+  - client.0
+- - host.b
+  - mon.b
+  - mgr.b
+  - osd.1
+  - client.1
+overrides:
+  cephadm:
+    use-ca-signed-key: True
+tasks:
+- install:
+- cephadm:
+- cephadm.shell:
+    host.a:
+      - |
+        set -ex
+        HOSTNAMES=$(ceph orch host ls --format json | jq -r '.[] | .hostname')
+        for host in $HOSTNAMES; do
+          # do a check-host on each host to make sure it's reachable
+          ceph cephadm check-host ${host} 2> ${host}-ok.txt
+          HOST_OK=$(cat ${host}-ok.txt)
+          if ! grep -q "Host looks OK" <<< "$HOST_OK"; then
+            printf "Failed host check:\n\n$HOST_OK"
+            exit 1
+          fi
+        done
diff --git a/qa/suites/orch/cephadm/workunits/task/test_cephadm.yaml b/qa/suites/orch/cephadm/workunits/task/test_cephadm.yaml
new file mode 100644
index 000000000..4d253517c
--- /dev/null
+++ b/qa/suites/orch/cephadm/workunits/task/test_cephadm.yaml
@@ -0,0 +1,11 @@
+roles:
+- [mon.a, mgr.x, osd.0, client.0]
+tasks:
+- install:
+- exec:
+    mon.a:
+      - yum install -y python3 || apt install -y python3
+- workunit:
+    clients:
+      client.0:
+        - cephadm/test_cephadm.sh
diff --git a/qa/suites/orch/cephadm/workunits/task/test_cephadm_repos.yaml b/qa/suites/orch/cephadm/workunits/task/test_cephadm_repos.yaml
new file mode 100644
index 000000000..4a1ac88de
--- /dev/null
+++ b/qa/suites/orch/cephadm/workunits/task/test_cephadm_repos.yaml
@@ -0,0 +1,8 @@
+roles:
+- [mon.a, mgr.x, osd.0, client.0]
+tasks:
+- workunit:
+    no_coverage_and_limits: true
+    clients:
+      client.0:
+        - cephadm/test_repos.sh
diff --git a/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/+ b/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/+
diff --git a/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/.qa b/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/centos_8.stream_container_tools.yaml b/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/centos_8.stream_container_tools.yaml
new file mode 120000
index 000000000..7a86f967f
--- /dev/null
+++ b/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/centos_8.stream_container_tools.yaml
@@ -0,0 +1 @@
+.qa/distros/podman/centos_8.stream_container_tools.yaml
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/test_iscsi_container.yaml b/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/test_iscsi_container.yaml
new file mode 100644
index 000000000..19d302c87
--- /dev/null
+++ b/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/test_iscsi_container.yaml
@@ -0,0 +1,21 @@
+roles:
+- - host.a
+  - osd.0
+  - osd.1
+  - osd.2
+  - mon.a
+  - mgr.a
+  - client.0
+tasks:
+- install:
+- cephadm:
+- cephadm.shell:
+    host.a:
+      - ceph osd pool create foo
+      - rbd pool init foo
+      - ceph orch apply iscsi foo u p
+- workunit:
+    clients:
+      client.0:
+        - cephadm/test_iscsi_pids_limit.sh
+        - cephadm/test_iscsi_etc_hosts.sh
diff --git a/qa/suites/orch/cephadm/workunits/task/test_orch_cli.yaml b/qa/suites/orch/cephadm/workunits/task/test_orch_cli.yaml
new file mode 100644
index 000000000..ec65fb116
--- /dev/null
+++ b/qa/suites/orch/cephadm/workunits/task/test_orch_cli.yaml
@@ -0,0 +1,17 @@
+roles:
+- - host.a
+  - osd.0
+  - osd.1
+  - osd.2
+  - mon.a
+  - mgr.a
+  - client.0
+tasks:
+- install:
+- cephadm:
+- cephadm.shell:
+    host.a:
+      - ceph orch apply mds a
+- cephfs_test_runner:
+    modules:
+      - tasks.cephadm_cases.test_cli
diff --git a/qa/suites/orch/cephadm/workunits/task/test_orch_cli_mon.yaml b/qa/suites/orch/cephadm/workunits/task/test_orch_cli_mon.yaml
new file mode 100644
index 000000000..2a33dc839
--- /dev/null
+++ b/qa/suites/orch/cephadm/workunits/task/test_orch_cli_mon.yaml
@@ -0,0 +1,45 @@
+roles:
+- - host.a
+  - osd.0
+  - osd.1
+  - osd.2
+  - mon.a
+  - mgr.a
+  - client.0
+- - host.b
+  - osd.3
+  - osd.4
+  - osd.5
+  - mon.b
+  - mgr.b
+  - client.1
+- - host.c
+  - osd.6
+  - osd.7
+  - osd.8
+  - mon.c
+  - mgr.c
+  - client.2
+- - host.d
+  - osd.9
+  - osd.10
+  - osd.11
+  - mon.d
+  - mgr.d
+  - client.3
+- - host.e
+  - osd.12
+  - osd.13
+  - osd.14
+  - mon.e
+  - mgr.e
+  - client.4
+tasks:
+- install:
+- cephadm:
+- cephadm.shell:
+    host.a:
+      - ceph orch apply mds a
+- cephfs_test_runner:
+    modules:
+      - tasks.cephadm_cases.test_cli_mon
diff --git a/qa/suites/orch/cephadm/workunits/task/test_rgw_multisite.yaml b/qa/suites/orch/cephadm/workunits/task/test_rgw_multisite.yaml
new file mode 100644
index 000000000..976e3730c
--- /dev/null
+++ b/qa/suites/orch/cephadm/workunits/task/test_rgw_multisite.yaml
@@ -0,0 +1,40 @@
+roles:
+- - host.a
+  - mon.a
+  - mgr.a
+  - osd.0
+- - host.b
+  - mon.b
+  - mgr.b
+  - osd.1
+- - host.c
+  - mon.c
+  - osd.2
+tasks:
+- install:
+- cephadm:
+- cephadm.shell:
+    host.a:
+      - ceph mgr module enable rgw
+- rgw_module.apply:
+    specs:
+      - rgw_realm: myrealm1
+        rgw_zonegroup: myzonegroup1
+        rgw_zone: myzone1
+        spec:
+          rgw_frontend_port: 5500
+- cephadm.shell:
+    host.a:
+      - |
+        set -e
+        set -x
+        while true; do TOKEN=$(ceph rgw realm tokens | jq -r '.[0].token'); echo $TOKEN; if [ "$TOKEN" != "master zone has no endpoint" ]; then break; fi; sleep 5; done
+        TOKENS=$(ceph rgw realm tokens)
+        echo $TOKENS | jq --exit-status '.[0].realm == "myrealm1"'
+        echo $TOKENS | jq --exit-status '.[0].token'
+        TOKEN_JSON=$(ceph rgw realm tokens | jq -r '.[0].token' | base64 --decode)
+        echo $TOKEN_JSON | jq --exit-status '.realm_name == "myrealm1"'
+        echo $TOKEN_JSON | jq --exit-status '.endpoint | test("http://.+:\\d+")'
+        echo $TOKEN_JSON | jq --exit-status '.realm_id | test("^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$")'
+        echo $TOKEN_JSON | jq --exit-status '.access_key'
+        echo $TOKEN_JSON | jq --exit-status '.secret'
diff --git a/qa/suites/orch/cephadm/workunits/task/test_set_mon_crush_locations.yaml b/qa/suites/orch/cephadm/workunits/task/test_set_mon_crush_locations.yaml
new file mode 100644
index 000000000..6d9bd1525
--- /dev/null
+++ b/qa/suites/orch/cephadm/workunits/task/test_set_mon_crush_locations.yaml
@@ -0,0 +1,62 @@
+roles:
+- - host.a
+  - osd.0
+  - mon.a
+  - mgr.a
+- - host.b
+  - osd.1
+  - mon.b
+  - mgr.b
+- - host.c
+  - osd.2
+  - mon.c
+tasks:
+- install:
+- cephadm:
+- cephadm.apply:
+    specs:
+      - service_type: mon
+        service_id: foo
+        placement:
+          count: 3
+        spec:
+          crush_locations:
+            host.a:
+              - datacenter=a
+            host.b:
+              - datacenter=b
+              - rack=2
+            host.c:
+              - datacenter=a
+              - rack=3
+- cephadm.shell:
+    host.a:
+      - |
+        set -ex
+        # since we don't know the real hostnames before the test, the next
+        # bit is in order to replace the fake hostnames "host.a/b/c" with
+        # the actual names cephadm knows the host by within the mon spec
+        ceph orch host ls --format json | jq -r '.[] | .hostname' > realnames
+        echo $'host.a\nhost.b\nhost.c' > fakenames
+        echo $'a\nb\nc' > mon_ids
+        echo $'{datacenter=a}\n{datacenter=b,rack=2}\n{datacenter=a,rack=3}' > crush_locs
+        ceph orch ls --service-name mon --export > mon.yaml
+        MONSPEC=`cat mon.yaml`
+        echo "$MONSPEC"
+        while read realname <&3 && read fakename <&4; do
+          MONSPEC="${MONSPEC//$fakename/$realname}"
+        done 3<realnames 4<fakenames
+        echo "$MONSPEC" > mon.yaml
+        cat mon.yaml
+        # now the spec should have the real hostnames, so let's re-apply
+        ceph orch apply -i mon.yaml
+        sleep 90
+        ceph orch ps --refresh
+        ceph orch ls --service-name mon --export > mon.yaml; ceph orch apply -i mon.yaml
+        sleep 90
+        ceph mon dump
+        ceph mon dump --format json
+        # verify all the crush locations got set from "ceph mon dump" output
+        while read monid <&3 && read crushloc <&4; do
+          ceph mon dump --format json | jq --arg monid "$monid" --arg crushloc "$crushloc" -e '.mons | .[] | select(.name == $monid) | .crush_location == $crushloc'
+        done 3<mon_ids 4<crush_locs
diff --git a/qa/suites/orch/rook/.qa b/qa/suites/orch/rook/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/rook/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/rook/smoke/% b/qa/suites/orch/rook/smoke/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/orch/rook/smoke/%
diff --git a/qa/suites/orch/rook/smoke/.qa b/qa/suites/orch/rook/smoke/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/rook/smoke/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/rook/smoke/0-distro/.qa b/qa/suites/orch/rook/smoke/0-distro/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/rook/smoke/0-distro/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/rook/smoke/0-distro/ubuntu_20.04.yaml b/qa/suites/orch/rook/smoke/0-distro/ubuntu_20.04.yaml
new file mode 120000
index 000000000..f62164f91
--- /dev/null
+++ b/qa/suites/orch/rook/smoke/0-distro/ubuntu_20.04.yaml
@@ -0,0 +1 @@
+.qa/distros/container-hosts/ubuntu_20.04.yaml
+\ No newline at end of file
diff --git a/qa/suites/orch/rook/smoke/0-kubeadm.yaml b/qa/suites/orch/rook/smoke/0-kubeadm.yaml
new file mode 100644
index 000000000..33915f571
--- /dev/null
+++ b/qa/suites/orch/rook/smoke/0-kubeadm.yaml
@@ -0,0 +1,2 @@
+tasks:
+- kubeadm:
diff --git a/qa/suites/orch/rook/smoke/0-nvme-loop.yaml b/qa/suites/orch/rook/smoke/0-nvme-loop.yaml
new file mode 120000
index 000000000..5206b6edd
--- /dev/null
+++ b/qa/suites/orch/rook/smoke/0-nvme-loop.yaml
@@ -0,0 +1 @@
+.qa/overrides/nvme_loop.yaml
+\ No newline at end of file
diff --git a/qa/suites/orch/rook/smoke/1-rook.yaml b/qa/suites/orch/rook/smoke/1-rook.yaml
new file mode 100644
index 000000000..8182845e9
--- /dev/null
+++ b/qa/suites/orch/rook/smoke/1-rook.yaml
@@ -0,0 +1,2 @@
+tasks:
+- rook:
diff --git a/qa/suites/orch/rook/smoke/2-workload/.qa b/qa/suites/orch/rook/smoke/2-workload/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/rook/smoke/2-workload/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/rook/smoke/2-workload/none.yaml b/qa/suites/orch/rook/smoke/2-workload/none.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/orch/rook/smoke/2-workload/none.yaml
diff --git a/qa/suites/orch/rook/smoke/2-workload/radosbench.yaml b/qa/suites/orch/rook/smoke/2-workload/radosbench.yaml
new file mode 100644
index 000000000..fd71605c8
--- /dev/null
+++ b/qa/suites/orch/rook/smoke/2-workload/radosbench.yaml
@@ -0,0 +1,5 @@
+tasks:
+- install:
+    host.a:
+- radosbench:
+    clients: [client.a]
diff --git a/qa/suites/orch/rook/smoke/cluster/.qa b/qa/suites/orch/rook/smoke/cluster/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/rook/smoke/cluster/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/rook/smoke/cluster/1-node.yaml b/qa/suites/orch/rook/smoke/cluster/1-node.yaml
new file mode 100644
index 000000000..d18510f6e
--- /dev/null
+++ b/qa/suites/orch/rook/smoke/cluster/1-node.yaml
@@ -0,0 +1,9 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        osd crush chooseleaf type: 0
+
+roles:
+- - host.a
+  - client.a
diff --git a/qa/suites/orch/rook/smoke/cluster/3-node.yaml b/qa/suites/orch/rook/smoke/cluster/3-node.yaml
new file mode 100644
index 000000000..d79a9f786
--- /dev/null
+++ b/qa/suites/orch/rook/smoke/cluster/3-node.yaml
@@ -0,0 +1,7 @@
+roles:
+- - host.a
+  - client.a
+- - host.b
+  - client.b
+- - host.c
+  - client.c
diff --git a/qa/suites/orch/rook/smoke/k8s/.qa b/qa/suites/orch/rook/smoke/k8s/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/rook/smoke/k8s/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/rook/smoke/k8s/1.21.yaml b/qa/suites/orch/rook/smoke/k8s/1.21.yaml
new file mode 100644
index 000000000..9e57a477f
--- /dev/null
+++ b/qa/suites/orch/rook/smoke/k8s/1.21.yaml
@@ -0,0 +1,3 @@
+overrides:
+  kubeadm:
+    version: "1.21"
diff --git a/qa/suites/orch/rook/smoke/net/.qa b/qa/suites/orch/rook/smoke/net/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/rook/smoke/net/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/rook/smoke/net/calico.yaml b/qa/suites/orch/rook/smoke/net/calico.yaml
new file mode 100644
index 000000000..7e838c6c8
--- /dev/null
+++ b/qa/suites/orch/rook/smoke/net/calico.yaml
@@ -0,0 +1,3 @@
+overrides:
+  kubeadm:
+    pod_network: calico
diff --git a/qa/suites/orch/rook/smoke/net/flannel.yaml b/qa/suites/orch/rook/smoke/net/flannel.yaml
new file mode 100644
index 000000000..8a1a20691
--- /dev/null
+++ b/qa/suites/orch/rook/smoke/net/flannel.yaml
@@ -0,0 +1,3 @@
+overrides:
+  kubeadm:
+    pod_network: flannel
diff --git a/qa/suites/orch/rook/smoke/net/host.yaml b/qa/suites/orch/rook/smoke/net/host.yaml
new file mode 100644
index 000000000..d25725afd
--- /dev/null
+++ b/qa/suites/orch/rook/smoke/net/host.yaml
@@ -0,0 +1,7 @@
+overrides:
+  ceph:
+    spec:
+      mon:
+        allowMultiplePerNode: false
+      network:
+        provider: host
diff --git a/qa/suites/orch/rook/smoke/rook/.qa b/qa/suites/orch/rook/smoke/rook/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/rook/smoke/rook/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/rook/smoke/rook/1.7.2.yaml b/qa/suites/orch/rook/smoke/rook/1.7.2.yaml
new file mode 100644
index 000000000..de96c5815
--- /dev/null
+++ b/qa/suites/orch/rook/smoke/rook/1.7.2.yaml
@@ -0,0 +1,4 @@
+overrides:
+  rook:
+    rook_image: rook/ceph:v1.7.2
+    rook_branch: v1.7.2
diff --git a/qa/suites/orch/rook/smoke/rook/master.yaml b/qa/suites/orch/rook/smoke/rook/master.yaml
new file mode 100644
index 000000000..72b1cec72
--- /dev/null
+++ b/qa/suites/orch/rook/smoke/rook/master.yaml
@@ -0,0 +1,3 @@
+overrides:
+  rook:
+    rook_image: rook/ceph:master
diff --git a/qa/suites/perf-basic/% b/qa/suites/perf-basic/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/perf-basic/%
diff --git a/qa/suites/perf-basic/.qa b/qa/suites/perf-basic/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/perf-basic/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/perf-basic/ceph.yaml b/qa/suites/perf-basic/ceph.yaml
new file mode 100644
index 000000000..72f66cf5f
--- /dev/null
+++ b/qa/suites/perf-basic/ceph.yaml
@@ -0,0 +1,23 @@
+meta:
+- desc: |
+   perf-basic is a basic performance suite.
+   Must be run on bare-metal machines.
+   On VMs performance results will be inconsistent
+   and can't be compared across runs.
+   Run ceph on a single node.
+   Use xfs beneath the osds.
+   Setup rgw on client.0
+
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0]
+tasks:
+- install:
+- ceph:
+    fs: xfs
+    wait-for-scrub: false
+    log-ignorelist:
+      - \(PG_
+      - \(OSD_
+      - \(OBJECT_
+      - overall HEALTH
+- ssh_keys:
diff --git a/qa/suites/perf-basic/objectstore/.qa b/qa/suites/perf-basic/objectstore/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/perf-basic/objectstore/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/perf-basic/objectstore/bluestore.yaml b/qa/suites/perf-basic/objectstore/bluestore.yaml
new file mode 100644
index 000000000..699db42d7
--- /dev/null
+++ b/qa/suites/perf-basic/objectstore/bluestore.yaml
@@ -0,0 +1,17 @@
+overrides:
+  ceph:
+    fs: xfs
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+  ceph-deploy:
+    fs: xfs
+    bluestore: yes
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+
diff --git a/qa/suites/perf-basic/settings/.qa b/qa/suites/perf-basic/settings/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/perf-basic/settings/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/perf-basic/settings/optimized.yaml b/qa/suites/perf-basic/settings/optimized.yaml
new file mode 100644
index 000000000..8b53498db
--- /dev/null
+++ b/qa/suites/perf-basic/settings/optimized.yaml
@@ -0,0 +1,78 @@
+meta:
+- desc: |
+   Use debug level 0/0 for performance tests.
+
+overrides:
+  ceph:
+    conf:
+      mon:
+        debug mon: "0/0"
+        debug ms: "0/0"
+        debug paxos: "0/0"
+      osd:
+        debug filestore: "0/0"
+        debug journal: "0/0"
+        debug ms: "0/0"
+        debug osd: "0/0"
+      global:
+        auth client required: none
+        auth cluster required: none
+        auth service required: none
+        auth supported: none
+
+        debug lockdep: "0/0"
+        debug context: "0/0"
+        debug crush: "0/0"
+        debug mds: "0/0"
+        debug mds balancer: "0/0"
+        debug mds locker: "0/0"
+        debug mds log: "0/0"
+        debug mds log expire: "0/0"
+        debug mds migrator: "0/0"
+        debug buffer: "0/0"
+        debug timer: "0/0"
+        debug filer: "0/0"
+        debug striper: "0/0"
+        debug objecter: "0/0"
+        debug rados: "0/0"
+        debug rbd: "0/0"
+        debug rbd mirror: "0/0"
+        debug rbd replay: "0/0"
+        debug journaler: "0/0"
+        debug objectcacher: "0/0"
+        debug client: "0/0"
+        debug osd: "0/0"
+        debug optracker: "0/0"
+        debug objclass: "0/0"
+        debug filestore: "0/0"
+        debug journal: "0/0"
+        debug ms: "0/0"
+        debug mon: "0/0"
+        debug monc: "0/0"
+        debug paxos: "0/0"
+        debug tp: "0/0"
+        debug auth: "0/0"
+        debug crypto: "0/0"
+        debug finisher: "0/0"
+        debug heartbeatmap: "0/0"
+        debug perfcounter: "0/0"
+        debug rgw: "0/0"
+        debug rgw sync: "0/0"
+        debug civetweb: "0/0"
+        debug javaclient: "0/0"
+        debug asok: "0/0"
+        debug throttle: "0/0"
+        debug refs: "0/0"
+        debug compressor: "0/0"
+        debug bluestore: "0/0"
+        debug bluefs: "0/0"
+        debug bdev: "0/0"
+        debug kstore: "0/0"
+        debug rocksdb: "0/0"
+        debug leveldb: "0/0"
+        debug memdb: "0/0"
+        debug fuse: "0/0"
+        debug mgr: "0/0"
+        debug mgrc: "0/0"
+        debug dpdk: "0/0"
+        debug eventtrace: "0/0"
diff --git a/qa/suites/perf-basic/ubuntu_latest.yaml b/qa/suites/perf-basic/ubuntu_latest.yaml
new file mode 120000
index 000000000..3a09f9abb
--- /dev/null
+++ b/qa/suites/perf-basic/ubuntu_latest.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/ubuntu_latest.yaml
+\ No newline at end of file
diff --git a/qa/suites/perf-basic/workloads/.qa b/qa/suites/perf-basic/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/perf-basic/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/perf-basic/workloads/client_endpoint_rbd_4K_rand_write.yaml b/qa/suites/perf-basic/workloads/client_endpoint_rbd_4K_rand_write.yaml
new file mode 100644
index 000000000..1e9832b06
--- /dev/null
+++ b/qa/suites/perf-basic/workloads/client_endpoint_rbd_4K_rand_write.yaml
@@ -0,0 +1,32 @@
+meta:
+- desc: |
+   Run librbdfio benchmark using cbt client endpoint for rbd.
+   4K randwrite workload.
+
+tasks:
+- cbt:
+    benchmarks:
+      fio:
+        client_endpoints: 'fiotest'
+        op_size: [4096]
+        time: 300
+        mode: ['randwrite']
+        norandommap: True
+        size: 4096
+        iodepth: [32]
+        osd_ra: [4096]
+        pool_profile: 'rbd'
+        log_avg_msec: 100
+    cluster:
+      user: 'ubuntu'
+      osds_per_node: 3
+      iterations: 1
+      pool_profiles:
+        rbd:
+          pg_size: 256
+          pgp_size: 256
+          replication: 3
+
+    client_endpoints:
+      fiotest:
+        driver: 'librbd'
diff --git a/qa/suites/perf-basic/workloads/fio_4K_rand_write.yaml b/qa/suites/perf-basic/workloads/fio_4K_rand_write.yaml
new file mode 100644
index 000000000..0b1c492b8
--- /dev/null
+++ b/qa/suites/perf-basic/workloads/fio_4K_rand_write.yaml
@@ -0,0 +1,29 @@
+meta:
+- desc: |
+   Run librbdfio benchmark using cbt.
+   4K randwrite workload.
+
+tasks:
+- cbt:
+    benchmarks:
+      librbdfio:
+        op_size: [4096]
+        time: 300
+        mode: ['randwrite']
+        norandommap: True
+        vol_size: 4096
+        procs_per_volume: [1]
+        volumes_per_client: [2]
+        iodepth: [32]
+        osd_ra: [4096]
+        pool_profile: 'rbd'
+        log_avg_msec: 100
+    cluster:
+      user: 'ubuntu'
+      osds_per_node: 3
+      iterations: 1
+      pool_profiles:
+        rbd:
+          pg_size: 256
+          pgp_size: 256
+          replication: 3
diff --git a/qa/suites/perf-basic/workloads/radosbench_4K_write.yaml b/qa/suites/perf-basic/workloads/radosbench_4K_write.yaml
new file mode 100644
index 000000000..d0a825bf5
--- /dev/null
+++ b/qa/suites/perf-basic/workloads/radosbench_4K_write.yaml
@@ -0,0 +1,28 @@
+meta:
+- desc: |
+   Run radosbench benchmark using cbt.
+   4K write workload.
+
+tasks:
+- cbt:
+    benchmarks:
+      radosbench:
+        concurrent_ops: 4
+        concurrent_procs: 2
+        op_size: [4096]
+        pool_monitoring_list:
+        - collectl
+        pool_profile: 'replicated'
+        run_monitoring_list:
+        - collectl
+        time: 300
+        write_only: true
+    cluster:
+      user: 'ubuntu'
+      osds_per_node: 3
+      iterations: 1
+      pool_profiles:
+        replicated:
+          pg_size: 256
+          pgp_size: 256
+          replication: 'replicated'
diff --git a/qa/suites/powercycle/.qa b/qa/suites/powercycle/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/powercycle/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/powercycle/osd/% b/qa/suites/powercycle/osd/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/powercycle/osd/%
diff --git a/qa/suites/powercycle/osd/.qa b/qa/suites/powercycle/osd/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/powercycle/osd/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/powercycle/osd/clusters/.qa b/qa/suites/powercycle/osd/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/powercycle/osd/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/powercycle/osd/clusters/3osd-1per-target.yaml b/qa/suites/powercycle/osd/clusters/3osd-1per-target.yaml
new file mode 100644
index 000000000..2fbcd0180
--- /dev/null
+++ b/qa/suites/powercycle/osd/clusters/3osd-1per-target.yaml
@@ -0,0 +1,5 @@
+roles:
+- [mon.a, mon.b, mon.c, mgr.x, mgr.y, mds.a, client.0]
+- [osd.0]
+- [osd.1]
+- [osd.2]
diff --git a/qa/suites/powercycle/osd/ignorelist_health.yaml b/qa/suites/powercycle/osd/ignorelist_health.yaml
new file mode 100644
index 000000000..bce5e9588
--- /dev/null
+++ b/qa/suites/powercycle/osd/ignorelist_health.yaml
@@ -0,0 +1,7 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - \(MDS_TRIM\)
+      - \(MDS_SLOW_REQUEST\)
+      - MDS_SLOW_METADATA_IO
+      - Behind on trimming
diff --git a/qa/suites/powercycle/osd/objectstore b/qa/suites/powercycle/osd/objectstore
new file mode 120000
index 000000000..c40bd3261
--- /dev/null
+++ b/qa/suites/powercycle/osd/objectstore
@@ -0,0 +1 @@
+.qa/objectstore
+\ No newline at end of file
diff --git a/qa/suites/powercycle/osd/powercycle/.qa b/qa/suites/powercycle/osd/powercycle/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/powercycle/osd/powercycle/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/powercycle/osd/powercycle/default.yaml b/qa/suites/powercycle/osd/powercycle/default.yaml
new file mode 100644
index 000000000..9e0ed4769
--- /dev/null
+++ b/qa/suites/powercycle/osd/powercycle/default.yaml
@@ -0,0 +1,26 @@
+tasks:
+- install:
+    extra_system_packages:
+      deb:
+      - bison
+      - flex
+      - libelf-dev
+      - libssl-dev
+      - libaio-dev
+      - libtool-bin
+      - uuid-dev
+      - xfslibs-dev
+      rpm:
+      - bison
+      - flex
+      - elfutils-libelf-devel
+      - openssl-devel
+      - libaio-devel
+      - libtool
+      - libuuid-devel
+      - xfsprogs-devel
+- ceph:
+- thrashosds:
+    chance_down: 1.0
+    powercycle: true
+    timeout: 600
diff --git a/qa/suites/powercycle/osd/supported-all-distro b/qa/suites/powercycle/osd/supported-all-distro
new file mode 120000
index 000000000..ca82dde58
--- /dev/null
+++ b/qa/suites/powercycle/osd/supported-all-distro
@@ -0,0 +1 @@
+.qa/distros/supported-all-distro
+\ No newline at end of file
diff --git a/qa/suites/powercycle/osd/tasks/.qa b/qa/suites/powercycle/osd/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/powercycle/osd/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/powercycle/osd/tasks/admin_socket_objecter_requests.yaml b/qa/suites/powercycle/osd/tasks/admin_socket_objecter_requests.yaml
new file mode 100644
index 000000000..3b1a8920b
--- /dev/null
+++ b/qa/suites/powercycle/osd/tasks/admin_socket_objecter_requests.yaml
@@ -0,0 +1,13 @@
+overrides:
+  ceph:
+    conf:
+      client.0:
+        admin socket: /var/run/ceph/ceph-$name.asok
+tasks:
+- radosbench:
+    clients: [client.0]
+    time: 60
+- admin_socket:
+    client.0:
+      objecter_requests:
+        test: "http://git.ceph.com/?p={repo};a=blob_plain;f=src/test/admin_socket/objecter_requests;hb={branch}"
diff --git a/qa/suites/powercycle/osd/tasks/cfuse_workunit_kernel_untar_build.yaml b/qa/suites/powercycle/osd/tasks/cfuse_workunit_kernel_untar_build.yaml
new file mode 100644
index 000000000..87f8f57cc
--- /dev/null
+++ b/qa/suites/powercycle/osd/tasks/cfuse_workunit_kernel_untar_build.yaml
@@ -0,0 +1,12 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        fuse_default_permissions: 0
+tasks:
+- ceph-fuse:
+- workunit:
+    timeout: 6h
+    clients:
+      all:
+        - kernel_untar_build.sh
diff --git a/qa/suites/powercycle/osd/tasks/cfuse_workunit_misc.yaml b/qa/suites/powercycle/osd/tasks/cfuse_workunit_misc.yaml
new file mode 100644
index 000000000..683d3f592
--- /dev/null
+++ b/qa/suites/powercycle/osd/tasks/cfuse_workunit_misc.yaml
@@ -0,0 +1,7 @@
+tasks:
+- ceph-fuse:
+- workunit:
+    timeout: 6h
+    clients:
+      all:
+        - fs/misc
diff --git a/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_ffsb.yaml b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_ffsb.yaml
new file mode 100644
index 000000000..9f3fa7b18
--- /dev/null
+++ b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_ffsb.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        filestore flush min: 0
+      mds:
+        debug ms: 1
+        debug mds: 20
+tasks:
+- ceph-fuse:
+- workunit:
+    clients:
+      all:
+        - suites/ffsb.sh
diff --git a/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsstress.yaml b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsstress.yaml
new file mode 100644
index 000000000..5908d951b
--- /dev/null
+++ b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsstress.yaml
@@ -0,0 +1,6 @@
+tasks:
+- ceph-fuse:
+- workunit:
+    clients:
+      all:
+        - suites/fsstress.sh
diff --git a/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsx.yaml b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsx.yaml
new file mode 100644
index 000000000..94031518e
--- /dev/null
+++ b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsx.yaml
@@ -0,0 +1,7 @@
+tasks:
+- ceph-fuse:
+- workunit:
+    timeout: 6h
+    clients:
+      all:
+        - suites/fsx.sh
diff --git a/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsync.yaml b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsync.yaml
new file mode 100644
index 000000000..2cbb03c77
--- /dev/null
+++ b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsync.yaml
@@ -0,0 +1,12 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        osd_pg_log_dups_tracked: 10000
+
+tasks:
+- ceph-fuse:
+- workunit:
+    clients:
+      all:
+        - suites/fsync-tester.sh
diff --git a/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_pjd.yaml b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_pjd.yaml
new file mode 100644
index 000000000..cb9e2e019
--- /dev/null
+++ b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_pjd.yaml
@@ -0,0 +1,11 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        fuse set user groups: true
+tasks:
+- ceph-fuse:
+- workunit:
+    clients:
+      all:
+        - suites/pjd.sh
diff --git a/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_truncate_delay.yaml b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_truncate_delay.yaml
new file mode 100644
index 000000000..f3efafa2e
--- /dev/null
+++ b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_truncate_delay.yaml
@@ -0,0 +1,15 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        ms_inject_delay_probability: 1
+        ms_inject_delay_type: osd
+        ms_inject_delay_max: 5
+        client_oc_max_dirty_age: 1
+tasks:
+- ceph-fuse:
+- exec:
+    client.0:
+      - dd if=/dev/zero of=./foo count=100
+      - sleep 2
+      - truncate --size 0 ./foo
diff --git a/qa/suites/powercycle/osd/tasks/rados_api_tests.yaml b/qa/suites/powercycle/osd/tasks/rados_api_tests.yaml
new file mode 100644
index 000000000..d2970908c
--- /dev/null
+++ b/qa/suites/powercycle/osd/tasks/rados_api_tests.yaml
@@ -0,0 +1,18 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - reached quota
+      - \(POOL_APP_NOT_ENABLED\)
+      - \(PG_AVAILABILITY\)
+    conf:
+      mon:
+        mon warn on pool no app: false
+      osd:
+        osd class load list: "*"
+        osd class default list: "*"
+tasks:
+- ceph-fuse:
+- workunit:
+    clients:
+      client.0:
+        - rados/test.sh
diff --git a/qa/suites/powercycle/osd/tasks/radosbench.yaml b/qa/suites/powercycle/osd/tasks/radosbench.yaml
new file mode 100644
index 000000000..91573f907
--- /dev/null
+++ b/qa/suites/powercycle/osd/tasks/radosbench.yaml
@@ -0,0 +1,38 @@
+tasks:
+- full_sequential:
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
diff --git a/qa/suites/powercycle/osd/tasks/readwrite.yaml b/qa/suites/powercycle/osd/tasks/readwrite.yaml
new file mode 100644
index 000000000..c53e52b08
--- /dev/null
+++ b/qa/suites/powercycle/osd/tasks/readwrite.yaml
@@ -0,0 +1,9 @@
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 500
+    op_weights:
+      read: 45
+      write: 45
+      delete: 10
diff --git a/qa/suites/powercycle/osd/tasks/snaps-few-objects.yaml b/qa/suites/powercycle/osd/tasks/snaps-few-objects.yaml
new file mode 100644
index 000000000..aa82d973a
--- /dev/null
+++ b/qa/suites/powercycle/osd/tasks/snaps-few-objects.yaml
@@ -0,0 +1,13 @@
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 50
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
diff --git a/qa/suites/powercycle/osd/tasks/snaps-many-objects.yaml b/qa/suites/powercycle/osd/tasks/snaps-many-objects.yaml
new file mode 100644
index 000000000..1ffe4e148
--- /dev/null
+++ b/qa/suites/powercycle/osd/tasks/snaps-many-objects.yaml
@@ -0,0 +1,13 @@
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 500
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
diff --git a/qa/suites/powercycle/osd/thrashosds-health.yaml b/qa/suites/powercycle/osd/thrashosds-health.yaml
new file mode 120000
index 000000000..9124eb1aa
--- /dev/null
+++ b/qa/suites/powercycle/osd/thrashosds-health.yaml
@@ -0,0 +1 @@
+.qa/tasks/thrashosds-health.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/.qa b/qa/suites/rados/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/basic/% b/qa/suites/rados/basic/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/basic/%
diff --git a/qa/suites/rados/basic/.qa b/qa/suites/rados/basic/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/basic/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/basic/ceph.yaml b/qa/suites/rados/basic/ceph.yaml
new file mode 100644
index 000000000..c12a671f0
--- /dev/null
+++ b/qa/suites/rados/basic/ceph.yaml
@@ -0,0 +1,18 @@
+overrides:
+  ceph:
+    conf:
+      mon:
+        mon min osdmap epochs: 50
+        paxos service trim min: 10
+        # prune full osdmaps regularly
+        mon osdmap full prune min: 15
+        mon osdmap full prune interval: 2
+        mon osdmap full prune txsize: 2
+tasks:
+- install:
+    extra_system_packages:
+      rpm:
+      - sqlite-devel
+      deb:
+      - sqlite3
+- ceph:
diff --git a/qa/suites/rados/basic/clusters/+ b/qa/suites/rados/basic/clusters/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/basic/clusters/+
diff --git a/qa/suites/rados/basic/clusters/.qa b/qa/suites/rados/basic/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/basic/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/basic/clusters/fixed-2.yaml b/qa/suites/rados/basic/clusters/fixed-2.yaml
new file mode 120000
index 000000000..230ff0fda
--- /dev/null
+++ b/qa/suites/rados/basic/clusters/fixed-2.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-2.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/basic/clusters/openstack.yaml b/qa/suites/rados/basic/clusters/openstack.yaml
new file mode 100644
index 000000000..e559d9126
--- /dev/null
+++ b/qa/suites/rados/basic/clusters/openstack.yaml
@@ -0,0 +1,4 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 4
+      size: 10 # GB
diff --git a/qa/suites/rados/basic/mon_election b/qa/suites/rados/basic/mon_election
new file mode 120000
index 000000000..3f331e621
--- /dev/null
+++ b/qa/suites/rados/basic/mon_election
@@ -0,0 +1 @@
+.qa/mon_election
+\ No newline at end of file
diff --git a/qa/suites/rados/basic/msgr b/qa/suites/rados/basic/msgr
new file mode 120000
index 000000000..57bee80db
--- /dev/null
+++ b/qa/suites/rados/basic/msgr
@@ -0,0 +1 @@
+.qa/msgr
+\ No newline at end of file
diff --git a/qa/suites/rados/basic/msgr-failures/.qa b/qa/suites/rados/basic/msgr-failures/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/basic/msgr-failures/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/basic/msgr-failures/few.yaml b/qa/suites/rados/basic/msgr-failures/few.yaml
new file mode 100644
index 000000000..519288992
--- /dev/null
+++ b/qa/suites/rados/basic/msgr-failures/few.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 5000
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/rados/basic/msgr-failures/many.yaml b/qa/suites/rados/basic/msgr-failures/many.yaml
new file mode 100644
index 000000000..075d959a7
--- /dev/null
+++ b/qa/suites/rados/basic/msgr-failures/many.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 1500
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/rados/basic/objectstore b/qa/suites/rados/basic/objectstore
new file mode 120000
index 000000000..848c65f9e
--- /dev/null
+++ b/qa/suites/rados/basic/objectstore
@@ -0,0 +1 @@
+.qa/objectstore_debug
+\ No newline at end of file
diff --git a/qa/suites/rados/basic/rados.yaml b/qa/suites/rados/basic/rados.yaml
new file mode 120000
index 000000000..d256979c0
--- /dev/null
+++ b/qa/suites/rados/basic/rados.yaml
@@ -0,0 +1 @@
+.qa/config/rados.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/basic/supported-random-distro$ b/qa/suites/rados/basic/supported-random-distro$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/rados/basic/supported-random-distro$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rados/basic/tasks/.qa b/qa/suites/rados/basic/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/basic/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/basic/tasks/libcephsqlite.yaml b/qa/suites/rados/basic/tasks/libcephsqlite.yaml
new file mode 100644
index 000000000..12498fb15
--- /dev/null
+++ b/qa/suites/rados/basic/tasks/libcephsqlite.yaml
@@ -0,0 +1,24 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        debug ms: 1
+        debug client: 20
+        debug cephsqlite: 20
+    log-ignorelist:
+    - POOL_APP_NOT_ENABLED
+    - do not have an application enabled
+tasks:
+- exec:
+   client.0:
+   - ceph osd pool create cephsqlite
+   - ceph auth get-or-create client.libcephsqlite mon 'profile simple-rados-client-with-blocklist' osd 'allow rwx pool=cephsqlite' >> /etc/ceph/ceph.keyring
+- exec:
+   client.0:
+   - ceph_test_libcephsqlite --id libcephsqlite --no-log-to-stderr
+- workunit:
+   clients:
+     client.0:
+     - rados/test_libcephsqlite.sh cephsqlite
+   env:
+     CEPH_ARGS: --id libcephsqlite --no-log-to-stderr
diff --git a/qa/suites/rados/basic/tasks/rados_api_tests.yaml b/qa/suites/rados/basic/tasks/rados_api_tests.yaml
new file mode 100644
index 000000000..f765663a3
--- /dev/null
+++ b/qa/suites/rados/basic/tasks/rados_api_tests.yaml
@@ -0,0 +1,28 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - reached quota
+    - but it is still running
+    - overall HEALTH_
+    - \(POOL_FULL\)
+    - \(SMALLER_PGP_NUM\)
+    - \(CACHE_POOL_NO_HIT_SET\)
+    - \(CACHE_POOL_NEAR_FULL\)
+    - \(POOL_APP_NOT_ENABLED\)
+    - \(PG_AVAILABILITY\)
+    - \(PG_DEGRADED\)
+    conf:
+      client:
+        debug ms: 1
+      mon:
+        mon warn on pool no app: false
+      osd:
+        osd class load list: "*"
+        osd class default list: "*"
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rados/test.sh
+        - rados/test_pool_quota.sh
+
diff --git a/qa/suites/rados/basic/tasks/rados_cls_all.yaml b/qa/suites/rados/basic/tasks/rados_cls_all.yaml
new file mode 100644
index 000000000..8896ccb44
--- /dev/null
+++ b/qa/suites/rados/basic/tasks/rados_cls_all.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - \(PG_AVAILABILITY\)
+    - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        osd_class_load_list: "*"
+        osd_class_default_list: "*"
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - cls
diff --git a/qa/suites/rados/basic/tasks/rados_python.yaml b/qa/suites/rados/basic/tasks/rados_python.yaml
new file mode 100644
index 000000000..18e150c6c
--- /dev/null
+++ b/qa/suites/rados/basic/tasks/rados_python.yaml
@@ -0,0 +1,23 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - overall HEALTH_
+    - \(OSDMAP_FLAGS\)
+    - \(PG_
+    - \(OSD_
+    - \(OBJECT_
+    - \(POOL_APP_NOT_ENABLED\)
+  install:
+    ceph:
+      extra_system_packages:
+        rpm:
+        - python3-pytest
+        deb:
+        - python3-pytest
+tasks:
+- workunit:
+    timeout: 1h
+    clients:
+      client.0:
+        - rados/test_python.sh
diff --git a/qa/suites/rados/basic/tasks/rados_stress_watch.yaml b/qa/suites/rados/basic/tasks/rados_stress_watch.yaml
new file mode 100644
index 000000000..9ff0a4150
--- /dev/null
+++ b/qa/suites/rados/basic/tasks/rados_stress_watch.yaml
@@ -0,0 +1,12 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - \(TOO_FEW_PGS\)
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rados/stress_watch.sh
diff --git a/qa/suites/rados/basic/tasks/rados_striper.yaml b/qa/suites/rados/basic/tasks/rados_striper.yaml
new file mode 100644
index 000000000..cafd824b0
--- /dev/null
+++ b/qa/suites/rados/basic/tasks/rados_striper.yaml
@@ -0,0 +1,11 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- exec:
+   client.0:
+   - ceph_test_rados_striper_api_io
+   - ceph_test_rados_striper_api_aio
+   - ceph_test_rados_striper_api_striping
+
diff --git a/qa/suites/rados/basic/tasks/rados_workunit_loadgen_big.yaml b/qa/suites/rados/basic/tasks/rados_workunit_loadgen_big.yaml
new file mode 100644
index 000000000..53effb42d
--- /dev/null
+++ b/qa/suites/rados/basic/tasks/rados_workunit_loadgen_big.yaml
@@ -0,0 +1,11 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - overall HEALTH_
+    - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- workunit:
+    clients:
+      all:
+        - rados/load-gen-big.sh
diff --git a/qa/suites/rados/basic/tasks/rados_workunit_loadgen_mix.yaml b/qa/suites/rados/basic/tasks/rados_workunit_loadgen_mix.yaml
new file mode 100644
index 000000000..847aedb21
--- /dev/null
+++ b/qa/suites/rados/basic/tasks/rados_workunit_loadgen_mix.yaml
@@ -0,0 +1,11 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - overall HEALTH_
+    - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- workunit:
+    clients:
+      all:
+        - rados/load-gen-mix.sh
diff --git a/qa/suites/rados/basic/tasks/rados_workunit_loadgen_mostlyread.yaml b/qa/suites/rados/basic/tasks/rados_workunit_loadgen_mostlyread.yaml
new file mode 100644
index 000000000..b25392ffa
--- /dev/null
+++ b/qa/suites/rados/basic/tasks/rados_workunit_loadgen_mostlyread.yaml
@@ -0,0 +1,11 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - overall HEALTH_
+    - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- workunit:
+    clients:
+      all:
+        - rados/load-gen-mostlyread.sh
diff --git a/qa/suites/rados/basic/tasks/readwrite.yaml b/qa/suites/rados/basic/tasks/readwrite.yaml
new file mode 100644
index 000000000..fc13e67f0
--- /dev/null
+++ b/qa/suites/rados/basic/tasks/readwrite.yaml
@@ -0,0 +1,19 @@
+overrides:
+  ceph:
+    crush_tunables: optimal
+    conf:
+      mon:
+        mon osd initial require min compat client: luminous
+      osd:
+        osd_discard_disconnected_ops: false
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 500
+    op_weights:
+      read: 45
+      write: 45
+      delete: 10
diff --git a/qa/suites/rados/basic/tasks/repair_test.yaml b/qa/suites/rados/basic/tasks/repair_test.yaml
new file mode 100644
index 000000000..383acc956
--- /dev/null
+++ b/qa/suites/rados/basic/tasks/repair_test.yaml
@@ -0,0 +1,32 @@
+overrides:
+  ceph:
+    wait-for-scrub: false
+    log-ignorelist:
+      - candidate had a stat error
+      - candidate had a read error
+      - deep-scrub 0 missing, 1 inconsistent objects
+      - deep-scrub 0 missing, 4 inconsistent objects
+      - deep-scrub [0-9]+ errors
+      - '!= omap_digest'
+      - '!= data_digest'
+      - repair 0 missing, 1 inconsistent objects
+      - repair 0 missing, 4 inconsistent objects
+      - repair [0-9]+ errors, [0-9]+ fixed
+      - scrub 0 missing, 1 inconsistent objects
+      - scrub [0-9]+ errors
+      - 'size 1 != size'
+      - attr name mismatch
+      - Regular scrub request, deep-scrub details will be lost
+      - candidate size [0-9]+ info size [0-9]+ mismatch
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        filestore debug inject read err: true
+        bluestore debug inject read err: true
+tasks:
+- repair_test:
+
diff --git a/qa/suites/rados/basic/tasks/scrub_test.yaml b/qa/suites/rados/basic/tasks/scrub_test.yaml
new file mode 100644
index 000000000..424657343
--- /dev/null
+++ b/qa/suites/rados/basic/tasks/scrub_test.yaml
@@ -0,0 +1,31 @@
+overrides:
+  ceph:
+    wait-for-scrub: false
+    log-ignorelist:
+    - '!= data_digest'
+    - '!= omap_digest'
+    - '!= size'
+    - 'deep-scrub 0 missing, 1 inconsistent objects'
+    - 'deep-scrub [0-9]+ errors'
+    - 'repair 0 missing, 1 inconsistent objects'
+    - 'repair [0-9]+ errors, [0-9]+ fixed'
+    - 'shard [0-9]+ .* : missing'
+    - 'deep-scrub 1 missing, 1 inconsistent objects'
+    - 'does not match object info size'
+    - 'attr name mistmatch'
+    - 'deep-scrub 1 missing, 0 inconsistent objects'
+    - 'failed to pick suitable auth object'
+    - 'candidate size [0-9]+ info size [0-9]+ mismatch'
+    - overall HEALTH_
+    - \(OSDMAP_FLAGS\)
+    - \(OSD_
+    - \(PG_
+    - \(OSD_SCRUB_ERRORS\)
+    - \(TOO_FEW_PGS\)
+    - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        osd deep scrub update digest min age: 0
+        osd skip data digest: false
+tasks:
+- scrub_test:
diff --git a/qa/suites/rados/cephadm/.qa b/qa/suites/rados/cephadm/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/cephadm/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/cephadm/osds b/qa/suites/rados/cephadm/osds
new file mode 120000
index 000000000..ace6d7c06
--- /dev/null
+++ b/qa/suites/rados/cephadm/osds
@@ -0,0 +1 @@
+.qa/suites/orch/cephadm/osds/
+\ No newline at end of file
diff --git a/qa/suites/rados/cephadm/smoke b/qa/suites/rados/cephadm/smoke
new file mode 120000
index 000000000..bb2347907
--- /dev/null
+++ b/qa/suites/rados/cephadm/smoke
@@ -0,0 +1 @@
+.qa/suites/orch/cephadm/smoke
+\ No newline at end of file
diff --git a/qa/suites/rados/cephadm/smoke-singlehost b/qa/suites/rados/cephadm/smoke-singlehost
new file mode 120000
index 000000000..458d624e2
--- /dev/null
+++ b/qa/suites/rados/cephadm/smoke-singlehost
@@ -0,0 +1 @@
+.qa/suites/orch/cephadm/smoke-singlehost/
+\ No newline at end of file
diff --git a/qa/suites/rados/cephadm/workunits b/qa/suites/rados/cephadm/workunits
new file mode 120000
index 000000000..d2ed9d78a
--- /dev/null
+++ b/qa/suites/rados/cephadm/workunits
@@ -0,0 +1 @@
+.qa/suites/orch/cephadm/workunits/
+\ No newline at end of file
diff --git a/qa/suites/rados/dashboard/% b/qa/suites/rados/dashboard/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/dashboard/%
diff --git a/qa/suites/rados/dashboard/.qa b/qa/suites/rados/dashboard/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/dashboard/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/dashboard/0-single-container-host.yaml b/qa/suites/rados/dashboard/0-single-container-host.yaml
new file mode 120000
index 000000000..7406e749c
--- /dev/null
+++ b/qa/suites/rados/dashboard/0-single-container-host.yaml
@@ -0,0 +1 @@
+.qa/distros/single-container-host.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/dashboard/debug/.qa b/qa/suites/rados/dashboard/debug/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/dashboard/debug/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/dashboard/debug/mgr.yaml b/qa/suites/rados/dashboard/debug/mgr.yaml
new file mode 120000
index 000000000..651e5f8a8
--- /dev/null
+++ b/qa/suites/rados/dashboard/debug/mgr.yaml
@@ -0,0 +1 @@
+.qa/debug/mgr.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/dashboard/mon_election b/qa/suites/rados/dashboard/mon_election
new file mode 120000
index 000000000..3f331e621
--- /dev/null
+++ b/qa/suites/rados/dashboard/mon_election
@@ -0,0 +1 @@
+.qa/mon_election
+\ No newline at end of file
diff --git a/qa/suites/rados/dashboard/random-objectstore$ b/qa/suites/rados/dashboard/random-objectstore$
new file mode 120000
index 000000000..c40bd3261
--- /dev/null
+++ b/qa/suites/rados/dashboard/random-objectstore$
@@ -0,0 +1 @@
+.qa/objectstore
+\ No newline at end of file
diff --git a/qa/suites/rados/dashboard/tasks/.qa b/qa/suites/rados/dashboard/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/dashboard/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/dashboard/tasks/dashboard.yaml b/qa/suites/rados/dashboard/tasks/dashboard.yaml
new file mode 100644
index 000000000..e7622f8c3
--- /dev/null
+++ b/qa/suites/rados/dashboard/tasks/dashboard.yaml
@@ -0,0 +1,72 @@
+roles:
+- [mgr.x, mon.a, mon.c, mds.a, mds.c, osd.0, client.0]
+- [mgr.y, mgr.z, mon.b, mds.b, osd.1, osd.2, osd.3, client.1]
+
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd mclock override recovery settings: true
+      mgr:
+        mon warn on pool no app: false
+
+tasks:
+  - install:
+  - ceph:
+      # tests may leave mgrs broken, so don't try and call into them
+      # to invoke e.g. pg dump during teardown.
+      wait-for-scrub: false
+      log-ignorelist:
+        - overall HEALTH_
+        - \(MGR_DOWN\)
+        - \(PG_
+        - replacing it with standby
+        - No standby daemons available
+        - \(FS_DEGRADED\)
+        - \(MDS_FAILED\)
+        - \(MDS_DEGRADED\)
+        - \(FS_WITH_FAILED_MDS\)
+        - \(MDS_DAMAGE\)
+        - \(MDS_ALL_DOWN\)
+        - \(MDS_UP_LESS_THAN_MAX\)
+        - \(OSD_DOWN\)
+        - \(OSD_HOST_DOWN\)
+        - \(POOL_APP_NOT_ENABLED\)
+        - \(OSDMAP_FLAGS\)
+        - \(OSD_FLAGS\)
+        - \(TELEMETRY_CHANGED\)
+        - pauserd,pausewr flag\(s\) set
+        - Monitor daemon marked osd\.[[:digit:]]+ down, but it is still running
+        - evicting unresponsive client .+
+        - MON_DOWN
+  - rgw: [client.0]
+  - cephfs_test_runner:
+      fail_on_skip: false
+      modules:
+        - tasks.mgr.test_dashboard
+        - tasks.mgr.dashboard.test_api
+        - tasks.mgr.dashboard.test_auth
+        - tasks.mgr.dashboard.test_cephfs
+        - tasks.mgr.dashboard.test_cluster
+        - tasks.mgr.dashboard.test_cluster_configuration
+        - tasks.mgr.dashboard.test_crush_rule
+        - tasks.mgr.dashboard.test_erasure_code_profile
+        - tasks.mgr.dashboard.test_health
+        - tasks.mgr.dashboard.test_host
+        - tasks.mgr.dashboard.test_logs
+        - tasks.mgr.dashboard.test_mgr_module
+        - tasks.mgr.dashboard.test_monitor
+        - tasks.mgr.dashboard.test_motd
+        - tasks.mgr.dashboard.test_orchestrator
+        - tasks.mgr.dashboard.test_osd
+        - tasks.mgr.dashboard.test_perf_counters
+        - tasks.mgr.dashboard.test_pool
+        - tasks.mgr.dashboard.test_rbd
+        - tasks.mgr.dashboard.test_rbd_mirroring
+        - tasks.mgr.dashboard.test_requests
+        - tasks.mgr.dashboard.test_rgw
+        - tasks.mgr.dashboard.test_role
+        - tasks.mgr.dashboard.test_settings
+        - tasks.mgr.dashboard.test_summary
+        - tasks.mgr.dashboard.test_telemetry
+        - tasks.mgr.dashboard.test_user
diff --git a/qa/suites/rados/dashboard/tasks/e2e.yaml b/qa/suites/rados/dashboard/tasks/e2e.yaml
new file mode 100644
index 000000000..cb6ffb22f
--- /dev/null
+++ b/qa/suites/rados/dashboard/tasks/e2e.yaml
@@ -0,0 +1,23 @@
+roles:
+# 3 osd roles on host.a is required for cephadm task. It checks if the cluster is healthy.
+# More daemons will be deployed on both hosts in e2e tests.
+- - host.a
+  - osd.0
+  - osd.1
+  - osd.2
+  - mon.a
+  - mgr.a
+  - client.0
+- - host.b
+  - client.1
+tasks:
+- install:
+- cephadm:
+- workunit:
+    clients:
+      client.1:
+        - cephadm/create_iscsi_disks.sh
+- workunit:
+    clients:
+      client.0:
+        - cephadm/test_dashboard_e2e.sh
diff --git a/qa/suites/rados/mgr/% b/qa/suites/rados/mgr/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/mgr/%
diff --git a/qa/suites/rados/mgr/.qa b/qa/suites/rados/mgr/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/mgr/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/mgr/clusters/+ b/qa/suites/rados/mgr/clusters/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/mgr/clusters/+
diff --git a/qa/suites/rados/mgr/clusters/.qa b/qa/suites/rados/mgr/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/mgr/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/mgr/clusters/2-node-mgr.yaml b/qa/suites/rados/mgr/clusters/2-node-mgr.yaml
new file mode 120000
index 000000000..8a0b9123b
--- /dev/null
+++ b/qa/suites/rados/mgr/clusters/2-node-mgr.yaml
@@ -0,0 +1 @@
+.qa/clusters/2-node-mgr.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/mgr/debug/.qa b/qa/suites/rados/mgr/debug/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/mgr/debug/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/mgr/debug/mgr.yaml b/qa/suites/rados/mgr/debug/mgr.yaml
new file mode 120000
index 000000000..651e5f8a8
--- /dev/null
+++ b/qa/suites/rados/mgr/debug/mgr.yaml
@@ -0,0 +1 @@
+.qa/debug/mgr.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/mgr/mgr_ttl_cache/.qa b/qa/suites/rados/mgr/mgr_ttl_cache/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/mgr/mgr_ttl_cache/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/mgr/mgr_ttl_cache/disable.yaml b/qa/suites/rados/mgr/mgr_ttl_cache/disable.yaml
new file mode 120000
index 000000000..d7db486dd
--- /dev/null
+++ b/qa/suites/rados/mgr/mgr_ttl_cache/disable.yaml
@@ -0,0 +1 @@
+.qa/mgr_ttl_cache/disable.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/mgr/mgr_ttl_cache/enable.yaml b/qa/suites/rados/mgr/mgr_ttl_cache/enable.yaml
new file mode 120000
index 000000000..18286a656
--- /dev/null
+++ b/qa/suites/rados/mgr/mgr_ttl_cache/enable.yaml
@@ -0,0 +1 @@
+.qa/mgr_ttl_cache/enable.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/mgr/mon_election b/qa/suites/rados/mgr/mon_election
new file mode 120000
index 000000000..3f331e621
--- /dev/null
+++ b/qa/suites/rados/mgr/mon_election
@@ -0,0 +1 @@
+.qa/mon_election
+\ No newline at end of file
diff --git a/qa/suites/rados/mgr/random-objectstore$ b/qa/suites/rados/mgr/random-objectstore$
new file mode 120000
index 000000000..848c65f9e
--- /dev/null
+++ b/qa/suites/rados/mgr/random-objectstore$
@@ -0,0 +1 @@
+.qa/objectstore_debug
+\ No newline at end of file
diff --git a/qa/suites/rados/mgr/supported-random-distro$ b/qa/suites/rados/mgr/supported-random-distro$
new file mode 120000
index 000000000..7cef21eef
--- /dev/null
+++ b/qa/suites/rados/mgr/supported-random-distro$
@@ -0,0 +1 @@
+../basic/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rados/mgr/tasks/.qa b/qa/suites/rados/mgr/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/mgr/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/mgr/tasks/crash.yaml b/qa/suites/rados/mgr/tasks/crash.yaml
new file mode 100644
index 000000000..9d2ba535e
--- /dev/null
+++ b/qa/suites/rados/mgr/tasks/crash.yaml
@@ -0,0 +1,18 @@
+
+tasks:
+  - install:
+  - ceph:
+      # tests may leave mgrs broken, so don't try and call into them
+      # to invoke e.g. pg dump during teardown.
+      wait-for-scrub: false
+      log-ignorelist:
+        - overall HEALTH_
+        - \(MGR_DOWN\)
+        - \(PG_
+        - \(RECENT_CRASH\)
+        - replacing it with standby
+        - No standby daemons available
+        - \(POOL_APP_NOT_ENABLED\)
+  - cephfs_test_runner:
+      modules:
+        - tasks.mgr.test_crash
diff --git a/qa/suites/rados/mgr/tasks/failover.yaml b/qa/suites/rados/mgr/tasks/failover.yaml
new file mode 100644
index 000000000..6d1e0d557
--- /dev/null
+++ b/qa/suites/rados/mgr/tasks/failover.yaml
@@ -0,0 +1,17 @@
+
+tasks:
+  - install:
+  - ceph:
+      # tests may leave mgrs broken, so don't try and call into them
+      # to invoke e.g. pg dump during teardown.
+      wait-for-scrub: false
+      log-ignorelist:
+        - overall HEALTH_
+        - \(MGR_DOWN\)
+        - \(PG_
+        - replacing it with standby
+        - No standby daemons available
+        - \(POOL_APP_NOT_ENABLED\)
+  - cephfs_test_runner:
+      modules:
+        - tasks.mgr.test_failover
diff --git a/qa/suites/rados/mgr/tasks/insights.yaml b/qa/suites/rados/mgr/tasks/insights.yaml
new file mode 100644
index 000000000..f7c82cf7f
--- /dev/null
+++ b/qa/suites/rados/mgr/tasks/insights.yaml
@@ -0,0 +1,20 @@
+
+tasks:
+  - install:
+  - ceph:
+      # tests may leave mgrs broken, so don't try and call into them
+      # to invoke e.g. pg dump during teardown.
+      wait-for-scrub: false
+      log-ignorelist:
+        - overall HEALTH_
+        - \(MGR_DOWN\)
+        - \(MGR_INSIGHTS_WARNING\)
+        - \(insights_health_check
+        - \(PG_
+        - \(RECENT_CRASH\)
+        - replacing it with standby
+        - No standby daemons available
+        - \(POOL_APP_NOT_ENABLED\)
+  - cephfs_test_runner:
+      modules:
+        - tasks.mgr.test_insights
diff --git a/qa/suites/rados/mgr/tasks/module_selftest.yaml b/qa/suites/rados/mgr/tasks/module_selftest.yaml
new file mode 100644
index 000000000..4403d9fff
--- /dev/null
+++ b/qa/suites/rados/mgr/tasks/module_selftest.yaml
@@ -0,0 +1,28 @@
+
+tasks:
+  - install:
+  - ceph:
+      # tests may leave mgrs broken, so don't try and call into them
+      # to invoke e.g. pg dump during teardown.
+      wait-for-scrub: false
+      log-ignorelist:
+        - overall HEALTH_
+        - \(MGR_DOWN\)
+        - \(PG_
+        - replacing it with standby
+        - No standby daemons available
+        - Reduced data availability
+        - Degraded data redundancy
+        - objects misplaced
+        - Synthetic exception in serve
+        - influxdb python module not found
+        - \(MGR_ZABBIX_
+        - foo bar
+        - Failed to open Telegraf
+        - evicting unresponsive client
+        - 1 mgr modules have recently crashed \(RECENT_MGR_MODULE_CRASH\)
+        - \(POOL_APP_NOT_ENABLED\)
+  - cephfs_test_runner:
+      modules:
+        - tasks.mgr.test_module_selftest
+      fail_on_skip: false
diff --git a/qa/suites/rados/mgr/tasks/per_module_finisher_stats.yaml b/qa/suites/rados/mgr/tasks/per_module_finisher_stats.yaml
new file mode 100644
index 000000000..de1d592df
--- /dev/null
+++ b/qa/suites/rados/mgr/tasks/per_module_finisher_stats.yaml
@@ -0,0 +1,45 @@
+tasks:
+  - install:
+  - ceph:
+      wait-for-scrub: false
+      log-ignorelist:
+        - \(POOL_APP_NOT_ENABLED\)
+  - check-counter:
+      counters:
+        mgr:
+            - name: "finisher-balancer.complete_latency.avgcount"
+              min: 1
+            - name: "finisher-balancer.queue_len"
+              expected_val: 0
+            - name: "finisher-crash.complete_latency.avgcount"
+              min: 2
+            - name: "finisher-crash.queue_len"
+              expected_val: 0
+            - name: "finisher-devicehealth.complete_latency.avgcount"
+              min: 1
+            - name: "finisher-devicehealth.queue_len"
+              expected_val: 0
+            - name: "finisher-iostat.complete_latency.avgcount"
+              min: 1
+            - name: "finisher-iostat.queue_len"
+              expected_val: 0
+            - name: "finisher-pg_autoscaler.complete_latency.avgcount"
+              min: 1
+            - name: "finisher-pg_autoscaler.queue_len"
+              expected_val: 0
+            - name: "finisher-progress.complete_latency.avgcount"
+              min: 2
+            - name: "finisher-progress.queue_len"
+              expected_val: 0
+            - name: "finisher-status.complete_latency.avgcount"
+              min: 2
+            - name: "finisher-status.queue_len"
+              expected_val: 0
+            - name: "finisher-telemetry.complete_latency.avgcount"
+              min: 2
+            - name: "finisher-telemetry.queue_len"
+              expected_val: 0
+  - workunit:
+      clients:
+        client.0:
+          - mgr/test_per_module_finisher.sh
diff --git a/qa/suites/rados/mgr/tasks/progress.yaml b/qa/suites/rados/mgr/tasks/progress.yaml
new file mode 100644
index 000000000..183a9a29a
--- /dev/null
+++ b/qa/suites/rados/mgr/tasks/progress.yaml
@@ -0,0 +1,30 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd mclock profile: high_recovery_ops
+tasks:
+  - install:
+  - ceph:
+      config:
+        global:
+          osd pool default size : 3
+          osd pool default min size : 2
+      # tests may leave mgrs broken, so don't try and call into them
+      # to invoke e.g. pg dump during teardown.
+      wait-for-scrub: false
+      log-ignorelist:
+        - overall HEALTH_
+        - \(MGR_DOWN\)
+        - \(MDS_ALL_DOWN\)
+        - \(MDS_UP_LESS_THAN_MAX\)
+        - \(FS_WITH_FAILED_MDS\)
+        - \(FS_DEGRADED\)
+        - \(PG_
+        - \(OSDMAP_FLAGS\)
+        - replacing it with standby
+        - No standby daemons available
+        - \(POOL_APP_NOT_ENABLED\)
+  - cephfs_test_runner:
+      modules:
+        - tasks.mgr.test_progress
diff --git a/qa/suites/rados/mgr/tasks/prometheus.yaml b/qa/suites/rados/mgr/tasks/prometheus.yaml
new file mode 100644
index 000000000..fd0e23a35
--- /dev/null
+++ b/qa/suites/rados/mgr/tasks/prometheus.yaml
@@ -0,0 +1,17 @@
+
+tasks:
+  - install:
+  - ceph:
+      # tests may leave mgrs broken, so don't try and call into them
+      # to invoke e.g. pg dump during teardown.
+      wait-for-scrub: false
+      log-ignorelist:
+        - overall HEALTH_
+        - \(MGR_DOWN\)
+        - \(PG_
+        - replacing it with standby
+        - No standby daemons available
+        - \(POOL_APP_NOT_ENABLED\)
+  - cephfs_test_runner:
+      modules:
+        - tasks.mgr.test_prometheus
diff --git a/qa/suites/rados/mgr/tasks/workunits.yaml b/qa/suites/rados/mgr/tasks/workunits.yaml
new file mode 100644
index 000000000..a48274033
--- /dev/null
+++ b/qa/suites/rados/mgr/tasks/workunits.yaml
@@ -0,0 +1,17 @@
+tasks:
+  - install:
+  - ceph:
+      # tests may leave mgrs broken, so don't try and call into them
+      # to invoke e.g. pg dump during teardown.
+      wait-for-scrub: false
+      log-ignorelist:
+        - overall HEALTH_
+        - \(MGR_DOWN\)
+        - \(PG_
+        - replacing it with standby
+        - No standby daemons available
+        - \(POOL_APP_NOT_ENABLED\)
+  - workunit:
+      clients:
+        client.0:
+          - mgr/test_localpool.sh
diff --git a/qa/suites/rados/monthrash/% b/qa/suites/rados/monthrash/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/monthrash/%
diff --git a/qa/suites/rados/monthrash/.qa b/qa/suites/rados/monthrash/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/monthrash/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/monthrash/ceph.yaml b/qa/suites/rados/monthrash/ceph.yaml
new file mode 100644
index 000000000..8055fe372
--- /dev/null
+++ b/qa/suites/rados/monthrash/ceph.yaml
@@ -0,0 +1,28 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        debug monc: 20
+        debug ms: 1
+      mon:
+        mon min osdmap epochs: 25
+        paxos service trim min: 5
+        # prune full osdmaps regularly
+        mon osdmap full prune min: 15
+        mon osdmap full prune interval: 2
+        mon osdmap full prune txsize: 2
+        mon scrub inject crc mismatch: 0.01
+        mon scrub inject missing keys: 0.05
+# thrashing monitors may make mgr have trouble w/ its keepalive
+    log-ignorelist:
+      - ScrubResult
+      - scrub mismatch
+      - overall HEALTH_
+      - \(MGR_DOWN\)
+# slow mons -> slow peering -> PG_AVAILABILITY
+      - \(PG_AVAILABILITY\)
+      - \(SLOW_OPS\)
+      - slow request
+tasks:
+- install:
+- ceph:
diff --git a/qa/suites/rados/monthrash/clusters/.qa b/qa/suites/rados/monthrash/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/monthrash/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/monthrash/clusters/3-mons.yaml b/qa/suites/rados/monthrash/clusters/3-mons.yaml
new file mode 100644
index 000000000..4b721ef80
--- /dev/null
+++ b/qa/suites/rados/monthrash/clusters/3-mons.yaml
@@ -0,0 +1,7 @@
+roles:
+- [mon.a, mon.c, osd.0, osd.1, osd.2]
+- [mon.b, mgr.x, osd.3, osd.4, osd.5, client.0]
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
diff --git a/qa/suites/rados/monthrash/clusters/9-mons.yaml b/qa/suites/rados/monthrash/clusters/9-mons.yaml
new file mode 100644
index 000000000..a2874c1d0
--- /dev/null
+++ b/qa/suites/rados/monthrash/clusters/9-mons.yaml
@@ -0,0 +1,7 @@
+roles:
+- [mon.a, mon.b, mon.c, mon.d, mon.e, osd.0, osd.1, osd.2]
+- [mon.f, mon.g, mon.h, mon.i, mgr.x, osd.3, osd.4, osd.5, client.0]
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
diff --git a/qa/suites/rados/monthrash/mon_election b/qa/suites/rados/monthrash/mon_election
new file mode 120000
index 000000000..3f331e621
--- /dev/null
+++ b/qa/suites/rados/monthrash/mon_election
@@ -0,0 +1 @@
+.qa/mon_election
+\ No newline at end of file
diff --git a/qa/suites/rados/monthrash/msgr b/qa/suites/rados/monthrash/msgr
new file mode 120000
index 000000000..57bee80db
--- /dev/null
+++ b/qa/suites/rados/monthrash/msgr
@@ -0,0 +1 @@
+.qa/msgr
+\ No newline at end of file
diff --git a/qa/suites/rados/monthrash/msgr-failures/.qa b/qa/suites/rados/monthrash/msgr-failures/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/monthrash/msgr-failures/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/monthrash/msgr-failures/few.yaml b/qa/suites/rados/monthrash/msgr-failures/few.yaml
new file mode 100644
index 000000000..519288992
--- /dev/null
+++ b/qa/suites/rados/monthrash/msgr-failures/few.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 5000
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/rados/monthrash/msgr-failures/mon-delay.yaml b/qa/suites/rados/monthrash/msgr-failures/mon-delay.yaml
new file mode 100644
index 000000000..83b136518
--- /dev/null
+++ b/qa/suites/rados/monthrash/msgr-failures/mon-delay.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 2500
+        ms inject delay type: mon
+        ms inject delay probability: .005
+        ms inject delay max: 1
+        ms inject internal delays: .002
+        mon client directed command retry: 5
+      mgr:
+        debug monc: 10
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/rados/monthrash/objectstore b/qa/suites/rados/monthrash/objectstore
new file mode 120000
index 000000000..848c65f9e
--- /dev/null
+++ b/qa/suites/rados/monthrash/objectstore
@@ -0,0 +1 @@
+.qa/objectstore_debug
+\ No newline at end of file
diff --git a/qa/suites/rados/monthrash/rados.yaml b/qa/suites/rados/monthrash/rados.yaml
new file mode 120000
index 000000000..d256979c0
--- /dev/null
+++ b/qa/suites/rados/monthrash/rados.yaml
@@ -0,0 +1 @@
+.qa/config/rados.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/monthrash/supported-random-distro$ b/qa/suites/rados/monthrash/supported-random-distro$
new file mode 120000
index 000000000..7cef21eef
--- /dev/null
+++ b/qa/suites/rados/monthrash/supported-random-distro$
@@ -0,0 +1 @@
+../basic/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rados/monthrash/thrashers/.qa b/qa/suites/rados/monthrash/thrashers/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/monthrash/thrashers/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/monthrash/thrashers/force-sync-many.yaml b/qa/suites/rados/monthrash/thrashers/force-sync-many.yaml
new file mode 100644
index 000000000..f4c98ae27
--- /dev/null
+++ b/qa/suites/rados/monthrash/thrashers/force-sync-many.yaml
@@ -0,0 +1,13 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(MON_DOWN\)
+      - \(TOO_FEW_PGS\)
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- mon_thrash:
+    revive_delay: 90
+    thrash_delay: 1
+    store_thrash: true
+    thrash_many: true
diff --git a/qa/suites/rados/monthrash/thrashers/many.yaml b/qa/suites/rados/monthrash/thrashers/many.yaml
new file mode 100644
index 000000000..2f5de97e3
--- /dev/null
+++ b/qa/suites/rados/monthrash/thrashers/many.yaml
@@ -0,0 +1,17 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(MON_DOWN\)
+      - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        mon client ping interval: 4
+        mon client ping timeout: 12
+tasks:
+- mon_thrash:
+    revive_delay: 20
+    thrash_delay: 1
+    thrash_many: true
+    freeze_mon_duration: 20
+    freeze_mon_probability: 10
diff --git a/qa/suites/rados/monthrash/thrashers/one.yaml b/qa/suites/rados/monthrash/thrashers/one.yaml
new file mode 100644
index 000000000..3a71edaf1
--- /dev/null
+++ b/qa/suites/rados/monthrash/thrashers/one.yaml
@@ -0,0 +1,10 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(MON_DOWN\)
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- mon_thrash:
+    revive_delay: 20
+    thrash_delay: 1
diff --git a/qa/suites/rados/monthrash/thrashers/sync-many.yaml b/qa/suites/rados/monthrash/thrashers/sync-many.yaml
new file mode 100644
index 000000000..6bb25b7eb
--- /dev/null
+++ b/qa/suites/rados/monthrash/thrashers/sync-many.yaml
@@ -0,0 +1,15 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(MON_DOWN\)
+      - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      mon:
+        paxos min: 10
+        paxos trim min: 10
+tasks:
+- mon_thrash:
+    revive_delay: 90
+    thrash_delay: 1
+    thrash_many: true
diff --git a/qa/suites/rados/monthrash/thrashers/sync.yaml b/qa/suites/rados/monthrash/thrashers/sync.yaml
new file mode 100644
index 000000000..30f133055
--- /dev/null
+++ b/qa/suites/rados/monthrash/thrashers/sync.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(MON_DOWN\)
+      - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      mon:
+        paxos min: 10
+        paxos trim min: 10
+tasks:
+- mon_thrash:
+    revive_delay: 90
+    thrash_delay: 1
diff --git a/qa/suites/rados/monthrash/workloads/.qa b/qa/suites/rados/monthrash/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/monthrash/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/monthrash/workloads/pool-create-delete.yaml b/qa/suites/rados/monthrash/workloads/pool-create-delete.yaml
new file mode 100644
index 000000000..6e8dadaf0
--- /dev/null
+++ b/qa/suites/rados/monthrash/workloads/pool-create-delete.yaml
@@ -0,0 +1,57 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - slow request
+      - overall HEALTH_
+tasks:
+- exec:
+    client.0:
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
+      - ceph_test_rados_delete_pools_parallel
diff --git a/qa/suites/rados/monthrash/workloads/rados_5925.yaml b/qa/suites/rados/monthrash/workloads/rados_5925.yaml
new file mode 100644
index 000000000..bf7eee534
--- /dev/null
+++ b/qa/suites/rados/monthrash/workloads/rados_5925.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+     log-ignorelist:
+       - overall HEALTH_
+tasks:
+- exec:
+    client.0:
+      - ceph_test_rados_delete_pools_parallel --debug_objecter 20 --debug_ms 1 --debug_rados 20 --debug_monc 20
diff --git a/qa/suites/rados/monthrash/workloads/rados_api_tests.yaml b/qa/suites/rados/monthrash/workloads/rados_api_tests.yaml
new file mode 100644
index 000000000..f14c2c2a6
--- /dev/null
+++ b/qa/suites/rados/monthrash/workloads/rados_api_tests.yaml
@@ -0,0 +1,28 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - reached quota
+      - overall HEALTH_
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - \(CACHE_POOL_NEAR_FULL\)
+      - \(POOL_FULL\)
+      - \(SLOW_OPS\)
+      - \(MON_DOWN\)
+      - \(PG_
+      - \(SMALLER_PGP_NUM\)
+      - slow request
+    conf:
+      global:
+        debug objecter: 20
+        debug rados: 20
+        debug ms: 1
+      mon:
+        mon warn on pool no app: false
+      osd:
+        osd class load list: "*"
+        osd class default list: "*"
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rados/test.sh
diff --git a/qa/suites/rados/monthrash/workloads/rados_mon_osdmap_prune.yaml b/qa/suites/rados/monthrash/workloads/rados_mon_osdmap_prune.yaml
new file mode 100644
index 000000000..372bf2561
--- /dev/null
+++ b/qa/suites/rados/monthrash/workloads/rados_mon_osdmap_prune.yaml
@@ -0,0 +1,22 @@
+overrides:
+  ceph:
+    conf:
+      mon:
+        mon debug extra checks: true
+        mon min osdmap epochs: 100
+        mon osdmap full prune enabled: true
+        mon osdmap full prune min: 200
+        mon osdmap full prune interval: 10
+        mon osdmap full prune txsize: 100
+      osd:
+        osd beacon report interval: 10
+    log-ignorelist:
+      # setting/unsetting noup will trigger health warns,
+      # causing tests to fail due to health warns, even if
+      # the tests themselves are successful.
+      - \(OSDMAP_FLAGS\)
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - mon/test_mon_osdmap_prune.sh
diff --git a/qa/suites/rados/monthrash/workloads/rados_mon_workunits.yaml b/qa/suites/rados/monthrash/workloads/rados_mon_workunits.yaml
new file mode 100644
index 000000000..2f9729c92
--- /dev/null
+++ b/qa/suites/rados/monthrash/workloads/rados_mon_workunits.yaml
@@ -0,0 +1,17 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - overall HEALTH_
+    - \(PG_
+    - \(MON_DOWN\)
+    - \(AUTH_BAD_CAPS\)
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - mon/pool_ops.sh
+        - mon/crush_ops.sh
+        - mon/osd.sh
+        - mon/caps.sh
+
diff --git a/qa/suites/rados/monthrash/workloads/snaps-few-objects.yaml b/qa/suites/rados/monthrash/workloads/snaps-few-objects.yaml
new file mode 100644
index 000000000..aa82d973a
--- /dev/null
+++ b/qa/suites/rados/monthrash/workloads/snaps-few-objects.yaml
@@ -0,0 +1,13 @@
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 50
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
diff --git a/qa/suites/rados/multimon/% b/qa/suites/rados/multimon/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/multimon/%
diff --git a/qa/suites/rados/multimon/.qa b/qa/suites/rados/multimon/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/multimon/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/multimon/clusters/.qa b/qa/suites/rados/multimon/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/multimon/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/multimon/clusters/21.yaml b/qa/suites/rados/multimon/clusters/21.yaml
new file mode 100644
index 000000000..aae968665
--- /dev/null
+++ b/qa/suites/rados/multimon/clusters/21.yaml
@@ -0,0 +1,8 @@
+roles:
+- [mon.a, mon.d, mon.g, mon.j, mon.m, mon.p, mon.s]
+- [mon.b, mon.e, mon.h, mon.k, mon.n, mon.q, mon.t, mgr.x]
+- [mon.c, mon.f, mon.i, mon.l, mon.o, mon.r, mon.u]
+openstack:
+- volumes: # attached to each instance
+    count: 1
+    size: 10 # GB
diff --git a/qa/suites/rados/multimon/clusters/3.yaml b/qa/suites/rados/multimon/clusters/3.yaml
new file mode 100644
index 000000000..11adef16b
--- /dev/null
+++ b/qa/suites/rados/multimon/clusters/3.yaml
@@ -0,0 +1,7 @@
+roles:
+- [mon.a, mon.c]
+- [mon.b, mgr.x]
+openstack:
+- volumes: # attached to each instance
+    count: 2
+    size: 10 # GB
diff --git a/qa/suites/rados/multimon/clusters/6.yaml b/qa/suites/rados/multimon/clusters/6.yaml
new file mode 100644
index 000000000..29c74dc7f
--- /dev/null
+++ b/qa/suites/rados/multimon/clusters/6.yaml
@@ -0,0 +1,7 @@
+roles:
+- [mon.a, mon.c, mon.e, mgr.x]
+- [mon.b, mon.d, mon.f, mgr.y]
+openstack:
+- volumes: # attached to each instance
+    count: 1
+    size: 10 # GB
diff --git a/qa/suites/rados/multimon/clusters/9.yaml b/qa/suites/rados/multimon/clusters/9.yaml
new file mode 100644
index 000000000..d51168556
--- /dev/null
+++ b/qa/suites/rados/multimon/clusters/9.yaml
@@ -0,0 +1,8 @@
+roles:
+- [mon.a, mon.d, mon.g]
+- [mon.b, mon.e, mon.h, mgr.x]
+- [mon.c, mon.f, mon.i]
+openstack:
+- volumes: # attached to each instance
+    count: 1
+    size: 10 # GB
diff --git a/qa/suites/rados/multimon/mon_election b/qa/suites/rados/multimon/mon_election
new file mode 120000
index 000000000..3f331e621
--- /dev/null
+++ b/qa/suites/rados/multimon/mon_election
@@ -0,0 +1 @@
+.qa/mon_election
+\ No newline at end of file
diff --git a/qa/suites/rados/multimon/msgr b/qa/suites/rados/multimon/msgr
new file mode 120000
index 000000000..57bee80db
--- /dev/null
+++ b/qa/suites/rados/multimon/msgr
@@ -0,0 +1 @@
+.qa/msgr
+\ No newline at end of file
diff --git a/qa/suites/rados/multimon/msgr-failures/.qa b/qa/suites/rados/multimon/msgr-failures/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/multimon/msgr-failures/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/multimon/msgr-failures/few.yaml b/qa/suites/rados/multimon/msgr-failures/few.yaml
new file mode 100644
index 000000000..519288992
--- /dev/null
+++ b/qa/suites/rados/multimon/msgr-failures/few.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 5000
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/rados/multimon/msgr-failures/many.yaml b/qa/suites/rados/multimon/msgr-failures/many.yaml
new file mode 100644
index 000000000..d47b466b9
--- /dev/null
+++ b/qa/suites/rados/multimon/msgr-failures/many.yaml
@@ -0,0 +1,9 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 1000
+        mon client directed command retry: 5
+        mon mgr beacon grace: 90
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/rados/multimon/no_pools.yaml b/qa/suites/rados/multimon/no_pools.yaml
new file mode 100644
index 000000000..32ef2439f
--- /dev/null
+++ b/qa/suites/rados/multimon/no_pools.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    create_rbd_pool: false
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
diff --git a/qa/suites/rados/multimon/objectstore b/qa/suites/rados/multimon/objectstore
new file mode 120000
index 000000000..848c65f9e
--- /dev/null
+++ b/qa/suites/rados/multimon/objectstore
@@ -0,0 +1 @@
+.qa/objectstore_debug
+\ No newline at end of file
diff --git a/qa/suites/rados/multimon/rados.yaml b/qa/suites/rados/multimon/rados.yaml
new file mode 120000
index 000000000..d256979c0
--- /dev/null
+++ b/qa/suites/rados/multimon/rados.yaml
@@ -0,0 +1 @@
+.qa/config/rados.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/multimon/supported-random-distro$ b/qa/suites/rados/multimon/supported-random-distro$
new file mode 120000
index 000000000..7cef21eef
--- /dev/null
+++ b/qa/suites/rados/multimon/supported-random-distro$
@@ -0,0 +1 @@
+../basic/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rados/multimon/tasks/.qa b/qa/suites/rados/multimon/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/multimon/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/multimon/tasks/mon_clock_no_skews.yaml b/qa/suites/rados/multimon/tasks/mon_clock_no_skews.yaml
new file mode 100644
index 000000000..691bd1efc
--- /dev/null
+++ b/qa/suites/rados/multimon/tasks/mon_clock_no_skews.yaml
@@ -0,0 +1,12 @@
+tasks:
+- install:
+- ceph:
+    log-ignorelist:
+    - slow request
+    - .*clock.*skew.*
+    - clocks not synchronized
+    - overall HEALTH_
+    - \(MON_CLOCK_SKEW\)
+    - \(POOL_APP_NOT_ENABLED\)
+- mon_clock_skew_check:
+    expect-skew: false
diff --git a/qa/suites/rados/multimon/tasks/mon_clock_with_skews.yaml b/qa/suites/rados/multimon/tasks/mon_clock_with_skews.yaml
new file mode 100644
index 000000000..079e1555b
--- /dev/null
+++ b/qa/suites/rados/multimon/tasks/mon_clock_with_skews.yaml
@@ -0,0 +1,25 @@
+tasks:
+- install:
+- exec:
+    mon.b:
+    - sudo systemctl stop chronyd.service || true
+    - sudo systemctl stop systemd-timesync.service || true
+    - sudo systemctl stop ntpd.service || true
+    - sudo systemctl stop ntp.service || true
+    - date -u -s @$(expr $(date -u +%s) + 2)
+- ceph:
+    wait-for-healthy: false
+    log-ignorelist:
+    - .*clock.*skew.*
+    - clocks not synchronized
+    - overall HEALTH_
+    - \(MON_CLOCK_SKEW\)
+    - \(MGR_DOWN\)
+    - \(MON_DOWN\)
+    - \(PG_
+    - \(SLOW_OPS\)
+    - No standby daemons available
+    - slow request
+    - \(POOL_APP_NOT_ENABLED\)
+- mon_clock_skew_check:
+    expect-skew: true
diff --git a/qa/suites/rados/multimon/tasks/mon_recovery.yaml b/qa/suites/rados/multimon/tasks/mon_recovery.yaml
new file mode 100644
index 000000000..18e53e092
--- /dev/null
+++ b/qa/suites/rados/multimon/tasks/mon_recovery.yaml
@@ -0,0 +1,11 @@
+tasks:
+- install:
+- ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(MON_DOWN\)
+      - \(PG_AVAILABILITY\)
+      - \(SLOW_OPS\)
+      - slow request
+      - \(POOL_APP_NOT_ENABLED\)
+- mon_recovery:
diff --git a/qa/suites/rados/objectstore/% b/qa/suites/rados/objectstore/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/objectstore/%
diff --git a/qa/suites/rados/objectstore/.qa b/qa/suites/rados/objectstore/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/objectstore/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/objectstore/backends/.qa b/qa/suites/rados/objectstore/backends/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/objectstore/backends/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/objectstore/backends/ceph_objectstore_tool.yaml b/qa/suites/rados/objectstore/backends/ceph_objectstore_tool.yaml
new file mode 100644
index 000000000..1ae569b7e
--- /dev/null
+++ b/qa/suites/rados/objectstore/backends/ceph_objectstore_tool.yaml
@@ -0,0 +1,26 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, osd.2, osd.3, osd.4, osd.5, client.0]
+openstack:
+- volumes: # attached to each instance
+    count: 6
+    size: 10 # GB
+tasks:
+- install:
+- ceph:
+    fs: xfs
+    conf:
+      global:
+        osd max object name len: 460
+        osd max object namespace len: 64
+      osd:
+        osd objectstore: bluestore
+        osd op queue: wpq
+    log-ignorelist:
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(TOO_FEW_PGS\)
+      - \(POOL_APP_NOT_ENABLED\)
+- ceph_objectstore_tool:
+    objects: 20
diff --git a/qa/suites/rados/objectstore/backends/fusestore.yaml b/qa/suites/rados/objectstore/backends/fusestore.yaml
new file mode 100644
index 000000000..1c34fcaea
--- /dev/null
+++ b/qa/suites/rados/objectstore/backends/fusestore.yaml
@@ -0,0 +1,9 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+tasks:
+- install:
+- workunit:
+    clients:
+      all:
+        - objectstore/test_fuse.sh
+
diff --git a/qa/suites/rados/objectstore/backends/keyvaluedb.yaml b/qa/suites/rados/objectstore/backends/keyvaluedb.yaml
new file mode 100644
index 000000000..efff8d379
--- /dev/null
+++ b/qa/suites/rados/objectstore/backends/keyvaluedb.yaml
@@ -0,0 +1,8 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+tasks:
+- install:
+- exec:
+    client.0:
+      - mkdir $TESTDIR/kvtest && cd $TESTDIR/kvtest && ceph_test_keyvaluedb
+      - rm -rf $TESTDIR/kvtest
diff --git a/qa/suites/rados/objectstore/backends/objectcacher-stress.yaml b/qa/suites/rados/objectstore/backends/objectcacher-stress.yaml
new file mode 100644
index 000000000..ae0f8f381
--- /dev/null
+++ b/qa/suites/rados/objectstore/backends/objectcacher-stress.yaml
@@ -0,0 +1,16 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+openstack:
+- volumes: # attached to each instance
+    count: 2
+    size: 10 # GB
+tasks:
+- install:
+- ceph:
+    fs: xfs
+    log-ignorelist:
+    - \(POOL_APP_NOT_ENABLED\)
+- workunit:
+    clients:
+      all:
+        - osdc/stress_objectcacher.sh
diff --git a/qa/suites/rados/objectstore/backends/objectstore-bluestore-a.yaml b/qa/suites/rados/objectstore/backends/objectstore-bluestore-a.yaml
new file mode 100644
index 000000000..b3c615bd6
--- /dev/null
+++ b/qa/suites/rados/objectstore/backends/objectstore-bluestore-a.yaml
@@ -0,0 +1,12 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+openstack:
+- volumes: # attached to each instance
+    count: 2
+    size: 10 # GB
+tasks:
+- install:
+- exec:
+    client.0:
+      - mkdir $TESTDIR/archive/ostest && cd $TESTDIR/archive/ostest && ulimit -Sn 16384 && CEPH_ARGS="--no-log-to-stderr --log-file $TESTDIR/archive/ceph_test_objectstore.log --debug-bluestore 20" ceph_test_objectstore --gtest_filter=*/1:-*SyntheticMatrixC* --gtest_catch_exceptions=0
+      - rm -rf $TESTDIR/archive/ostest
diff --git a/qa/suites/rados/objectstore/backends/objectstore-bluestore-b.yaml b/qa/suites/rados/objectstore/backends/objectstore-bluestore-b.yaml
new file mode 100644
index 000000000..eacb5ab44
--- /dev/null
+++ b/qa/suites/rados/objectstore/backends/objectstore-bluestore-b.yaml
@@ -0,0 +1,12 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+openstack:
+- volumes: # attached to each instance
+    count: 2
+    size: 10 # GB
+tasks:
+- install:
+- exec:
+    client.0:
+      - mkdir $TESTDIR/archive/ostest && cd $TESTDIR/archive/ostest && ulimit -Sn 16384 && CEPH_ARGS="--no-log-to-stderr --log-file $TESTDIR/archive/ceph_test_objectstore.log --debug-bluestore 20" ceph_test_objectstore --gtest_filter=*SyntheticMatrixC*/2 --gtest_catch_exceptions=0
+      - rm -rf $TESTDIR/archive/ostest
diff --git a/qa/suites/rados/objectstore/backends/objectstore-memstore.yaml b/qa/suites/rados/objectstore/backends/objectstore-memstore.yaml
new file mode 100644
index 000000000..f60b6d59b
--- /dev/null
+++ b/qa/suites/rados/objectstore/backends/objectstore-memstore.yaml
@@ -0,0 +1,12 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+openstack:
+- volumes: # attached to each instance
+    count: 2
+    size: 10 # GB
+tasks:
+- install:
+- exec:
+    client.0:
+      - mkdir $TESTDIR/archive/ostest && cd $TESTDIR/archive/ostest && ulimit -Sn 16384 && CEPH_ARGS="--no-log-to-stderr --log-file $TESTDIR/archive/ceph_test_objectstore.log --debug-bluestore 20" ceph_test_objectstore --gtest_filter=*/0 --gtest_catch_exceptions=0
+      - rm -rf $TESTDIR/archive/ostest
diff --git a/qa/suites/rados/objectstore/supported-random-distro$ b/qa/suites/rados/objectstore/supported-random-distro$
new file mode 120000
index 000000000..7cef21eef
--- /dev/null
+++ b/qa/suites/rados/objectstore/supported-random-distro$
@@ -0,0 +1 @@
+../basic/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rados/perf/% b/qa/suites/rados/perf/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/perf/%
diff --git a/qa/suites/rados/perf/.qa b/qa/suites/rados/perf/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/perf/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/perf/ceph.yaml b/qa/suites/rados/perf/ceph.yaml
new file mode 100644
index 000000000..ca229dd46
--- /dev/null
+++ b/qa/suites/rados/perf/ceph.yaml
@@ -0,0 +1,19 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        osd client message cap: 5000
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0]
+tasks:
+- install:
+- ceph:
+    fs: xfs
+    wait-for-scrub: false
+    log-ignorelist:
+      - \(PG_
+      - \(OSD_
+      - \(OBJECT_
+      - overall HEALTH
+      - \(POOL_APP_NOT_ENABLED\)
+- ssh_keys:
diff --git a/qa/suites/rados/perf/mon_election b/qa/suites/rados/perf/mon_election
new file mode 120000
index 000000000..3f331e621
--- /dev/null
+++ b/qa/suites/rados/perf/mon_election
@@ -0,0 +1 @@
+.qa/mon_election
+\ No newline at end of file
diff --git a/qa/suites/rados/perf/objectstore/.qa b/qa/suites/rados/perf/objectstore/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/perf/objectstore/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/perf/objectstore/bluestore-basic-min-osd-mem-target.yaml b/qa/suites/rados/perf/objectstore/bluestore-basic-min-osd-mem-target.yaml
new file mode 100644
index 000000000..32f596da1
--- /dev/null
+++ b/qa/suites/rados/perf/objectstore/bluestore-basic-min-osd-mem-target.yaml
@@ -0,0 +1,25 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: bluestore
+        osd memory target: 2147483648 # min recommended is 2_G
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore fsck on mount: true
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
+        bdev enable discard: true
+        bdev async discard: true
+
diff --git a/qa/suites/rados/perf/objectstore/bluestore-bitmap.yaml b/qa/suites/rados/perf/objectstore/bluestore-bitmap.yaml
new file mode 100644
index 000000000..b18e04bee
--- /dev/null
+++ b/qa/suites/rados/perf/objectstore/bluestore-bitmap.yaml
@@ -0,0 +1,43 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore fsck on mount: true
+        bluestore allocator: bitmap
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
+        bdev enable discard: true
+        bdev async discard: true
+  ceph-deploy:
+    fs: xfs
+    bluestore: yes
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore fsck on mount: true
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+        bdev enable discard: true
+        bdev async discard: true
+
diff --git a/qa/suites/rados/perf/objectstore/bluestore-comp.yaml b/qa/suites/rados/perf/objectstore/bluestore-comp.yaml
new file mode 100644
index 000000000..b408032fd
--- /dev/null
+++ b/qa/suites/rados/perf/objectstore/bluestore-comp.yaml
@@ -0,0 +1,23 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore compression mode: aggressive
+        bluestore fsck on mount: true
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
diff --git a/qa/suites/rados/perf/objectstore/bluestore-low-osd-mem-target.yaml b/qa/suites/rados/perf/objectstore/bluestore-low-osd-mem-target.yaml
new file mode 100644
index 000000000..b2a49790b
--- /dev/null
+++ b/qa/suites/rados/perf/objectstore/bluestore-low-osd-mem-target.yaml
@@ -0,0 +1,25 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: bluestore
+        osd memory target: 1610612736 # reduced to 1.5_G
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore fsck on mount: true
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
+        bdev enable discard: true
+        bdev async discard: true
+
diff --git a/qa/suites/rados/perf/objectstore/bluestore-stupid.yaml b/qa/suites/rados/perf/objectstore/bluestore-stupid.yaml
new file mode 100644
index 000000000..ca811f131
--- /dev/null
+++ b/qa/suites/rados/perf/objectstore/bluestore-stupid.yaml
@@ -0,0 +1,43 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore fsck on mount: true
+        bluestore allocator: stupid
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
+        bdev enable discard: true
+        bdev async discard: true
+  ceph-deploy:
+    fs: xfs
+    bluestore: yes
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore fsck on mount: true
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+        bdev enable discard: true
+        bdev async discard: true
+
diff --git a/qa/suites/rados/perf/openstack.yaml b/qa/suites/rados/perf/openstack.yaml
new file mode 100644
index 000000000..f4d1349b4
--- /dev/null
+++ b/qa/suites/rados/perf/openstack.yaml
@@ -0,0 +1,4 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 30 # GB
diff --git a/qa/suites/rados/perf/scheduler/.qa b/qa/suites/rados/perf/scheduler/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/perf/scheduler/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/perf/scheduler/dmclock_1Shard_16Threads.yaml b/qa/suites/rados/perf/scheduler/dmclock_1Shard_16Threads.yaml
new file mode 100644
index 000000000..10388ad72
--- /dev/null
+++ b/qa/suites/rados/perf/scheduler/dmclock_1Shard_16Threads.yaml
@@ -0,0 +1,7 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd op num shards: 1
+        osd op num threads per shard: 16
+        osd op queue: mclock_scheduler
diff --git a/qa/suites/rados/perf/scheduler/dmclock_default_shards.yaml b/qa/suites/rados/perf/scheduler/dmclock_default_shards.yaml
new file mode 100644
index 000000000..57a0ed912
--- /dev/null
+++ b/qa/suites/rados/perf/scheduler/dmclock_default_shards.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd op queue: mclock_scheduler
diff --git a/qa/suites/rados/perf/scheduler/wpq_default_shards.yaml b/qa/suites/rados/perf/scheduler/wpq_default_shards.yaml
new file mode 100644
index 000000000..25d358f27
--- /dev/null
+++ b/qa/suites/rados/perf/scheduler/wpq_default_shards.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd op queue: wpq
diff --git a/qa/suites/rados/perf/settings/.qa b/qa/suites/rados/perf/settings/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/perf/settings/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/perf/settings/optimized.yaml b/qa/suites/rados/perf/settings/optimized.yaml
new file mode 100644
index 000000000..dc4dcbb96
--- /dev/null
+++ b/qa/suites/rados/perf/settings/optimized.yaml
@@ -0,0 +1,74 @@
+overrides:
+  ceph:
+    conf:
+      mon:
+        debug mon: "0/0"
+        debug ms: "0/0"
+        debug paxos: "0/0"
+      osd:
+        debug filestore: "0/0"
+        debug journal: "0/0"
+        debug ms: "0/0"
+        debug osd: "0/0"
+      global:
+        auth client required: none
+        auth cluster required: none
+        auth service required: none
+        auth supported: none
+
+        debug lockdep: "0/0"
+        debug context: "0/0"
+        debug crush: "0/0"
+        debug mds: "0/0"
+        debug mds balancer: "0/0"
+        debug mds locker: "0/0"
+        debug mds log: "0/0"
+        debug mds log expire: "0/0"
+        debug mds migrator: "0/0"
+        debug buffer: "0/0"
+        debug timer: "0/0"
+        debug filer: "0/0"
+        debug striper: "0/0"
+        debug objecter: "0/0"
+        debug rados: "0/0"
+        debug rbd: "0/0"
+        debug rbd mirror: "0/0"
+        debug rbd replay: "0/0"
+        debug journaler: "0/0"
+        debug objectcacher: "0/0"
+        debug client: "0/0"
+        debug osd: "0/0"
+        debug optracker: "0/0"
+        debug objclass: "0/0"
+        debug filestore: "0/0"
+        debug journal: "0/0"
+        debug ms: "0/0"
+        debug mon: "0/0"
+        debug monc: "0/0"
+        debug paxos: "0/0"
+        debug tp: "0/0"
+        debug auth: "0/0"
+        debug crypto: "0/0"
+        debug finisher: "0/0"
+        debug heartbeatmap: "0/0"
+        debug perfcounter: "0/0"
+        debug rgw: "0/0"
+        debug rgw sync: "0/0"
+        debug civetweb: "0/0"
+        debug javaclient: "0/0"
+        debug asok: "0/0"
+        debug throttle: "0/0"
+        debug refs: "0/0"
+        debug compressor: "0/0"
+        debug bluestore: "0/0"
+        debug bluefs: "0/0"
+        debug bdev: "0/0"
+        debug kstore: "0/0"
+        debug rocksdb: "0/0"
+        debug leveldb: "0/0"
+        debug memdb: "0/0"
+        debug fuse: "0/0"
+        debug mgr: "0/0"
+        debug mgrc: "0/0"
+        debug dpdk: "0/0"
+        debug eventtrace: "0/0"
diff --git a/qa/suites/rados/perf/ubuntu_latest.yaml b/qa/suites/rados/perf/ubuntu_latest.yaml
new file mode 120000
index 000000000..3a09f9abb
--- /dev/null
+++ b/qa/suites/rados/perf/ubuntu_latest.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/ubuntu_latest.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/perf/workloads/.qa b/qa/suites/rados/perf/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/perf/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/perf/workloads/fio_4K_rand_read.yaml b/qa/suites/rados/perf/workloads/fio_4K_rand_read.yaml
new file mode 100644
index 000000000..d5ef33ab3
--- /dev/null
+++ b/qa/suites/rados/perf/workloads/fio_4K_rand_read.yaml
@@ -0,0 +1,24 @@
+tasks:
+- cbt:
+    benchmarks:
+      librbdfio:
+        op_size: [4096]
+        time: 60
+        mode: ['randread']
+        norandommap: True
+        vol_size: 4096
+        procs_per_volume: [1]
+        volumes_per_client: [2]
+        iodepth: [32]
+        osd_ra: [4096]
+        pool_profile: 'rbd'
+        log_avg_msec: 100
+    cluster:
+      user: 'ubuntu'
+      osds_per_node: 3
+      iterations: 1
+      pool_profiles:
+        rbd:
+          pg_size: 128
+          pgp_size: 128
+          replication: 3
diff --git a/qa/suites/rados/perf/workloads/fio_4K_rand_rw.yaml b/qa/suites/rados/perf/workloads/fio_4K_rand_rw.yaml
new file mode 100644
index 000000000..14d43f143
--- /dev/null
+++ b/qa/suites/rados/perf/workloads/fio_4K_rand_rw.yaml
@@ -0,0 +1,24 @@
+tasks:
+- cbt:
+    benchmarks:
+      librbdfio:
+        op_size: [4096]
+        time: 60
+        mode: ['randrw']
+        norandommap: True
+        vol_size: 4096
+        procs_per_volume: [1]
+        volumes_per_client: [2]
+        iodepth: [32]
+        osd_ra: [4096]
+        pool_profile: 'rbd'
+        log_avg_msec: 100
+    cluster:
+      user: 'ubuntu'
+      osds_per_node: 3
+      iterations: 1
+      pool_profiles:
+        rbd:
+          pg_size: 128
+          pgp_size: 128
+          replication: 3
diff --git a/qa/suites/rados/perf/workloads/fio_4M_rand_read.yaml b/qa/suites/rados/perf/workloads/fio_4M_rand_read.yaml
new file mode 100644
index 000000000..b07432243
--- /dev/null
+++ b/qa/suites/rados/perf/workloads/fio_4M_rand_read.yaml
@@ -0,0 +1,24 @@
+tasks:
+- cbt:
+    benchmarks:
+      librbdfio:
+        op_size: [4194304]
+        time: 60
+        mode: ['randread']
+        norandommap: True
+        vol_size: 4096
+        procs_per_volume: [1]
+        volumes_per_client: [2]
+        iodepth: [32]
+        osd_ra: [4096]
+        pool_profile: 'rbd'
+        log_avg_msec: 100
+    cluster:
+      user: 'ubuntu'
+      osds_per_node: 3
+      iterations: 1
+      pool_profiles:
+        rbd:
+          pg_size: 128
+          pgp_size: 128
+          replication: 3
diff --git a/qa/suites/rados/perf/workloads/fio_4M_rand_rw.yaml b/qa/suites/rados/perf/workloads/fio_4M_rand_rw.yaml
new file mode 100644
index 000000000..5fd6e2877
--- /dev/null
+++ b/qa/suites/rados/perf/workloads/fio_4M_rand_rw.yaml
@@ -0,0 +1,24 @@
+tasks:
+- cbt:
+    benchmarks:
+      librbdfio:
+        op_size: [4194304]
+        time: 60
+        mode: ['randrw']
+        norandommap: True
+        vol_size: 4096
+        procs_per_volume: [1]
+        volumes_per_client: [2]
+        iodepth: [32]
+        osd_ra: [4096]
+        pool_profile: 'rbd'
+        log_avg_msec: 100
+    cluster:
+      user: 'ubuntu'
+      osds_per_node: 3
+      iterations: 1
+      pool_profiles:
+        rbd:
+          pg_size: 128
+          pgp_size: 128
+          replication: 3
diff --git a/qa/suites/rados/perf/workloads/fio_4M_rand_write.yaml b/qa/suites/rados/perf/workloads/fio_4M_rand_write.yaml
new file mode 100644
index 000000000..2d9d83611
--- /dev/null
+++ b/qa/suites/rados/perf/workloads/fio_4M_rand_write.yaml
@@ -0,0 +1,24 @@
+tasks:
+- cbt:
+    benchmarks:
+      librbdfio:
+        op_size: [4194304]
+        time: 60
+        mode: ['randwrite']
+        norandommap: True
+        vol_size: 4096
+        procs_per_volume: [1]
+        volumes_per_client: [2]
+        iodepth: [32]
+        osd_ra: [4096]
+        pool_profile: 'rbd'
+        log_avg_msec: 100
+    cluster:
+      user: 'ubuntu'
+      osds_per_node: 3
+      iterations: 1
+      pool_profiles:
+        rbd:
+          pg_size: 128
+          pgp_size: 128
+          replication: 3
diff --git a/qa/suites/rados/perf/workloads/radosbench_4K_rand_read.yaml b/qa/suites/rados/perf/workloads/radosbench_4K_rand_read.yaml
new file mode 100644
index 000000000..f1de9b41b
--- /dev/null
+++ b/qa/suites/rados/perf/workloads/radosbench_4K_rand_read.yaml
@@ -0,0 +1,24 @@
+tasks:
+- cbt:
+    benchmarks:
+      radosbench:
+        concurrent_ops: 4
+        concurrent_procs: 2
+        op_size: [4096]
+        pool_monitoring_list:
+        - collectl
+        pool_profile: 'replicated'
+        run_monitoring_list:
+        - collectl
+        time: 60
+        write_only: false
+        readmode: 'rand'
+    cluster:
+      user: 'ubuntu'
+      osds_per_node: 3
+      iterations: 1
+      pool_profiles:
+        replicated:
+          pg_size: 256
+          pgp_size: 256
+          replication: 'replicated'
diff --git a/qa/suites/rados/perf/workloads/radosbench_4K_seq_read.yaml b/qa/suites/rados/perf/workloads/radosbench_4K_seq_read.yaml
new file mode 100644
index 000000000..8fb204a2f
--- /dev/null
+++ b/qa/suites/rados/perf/workloads/radosbench_4K_seq_read.yaml
@@ -0,0 +1,23 @@
+tasks:
+- cbt:
+    benchmarks:
+      radosbench:
+        concurrent_ops: 4
+        concurrent_procs: 2
+        op_size: [4096]
+        pool_monitoring_list:
+        - collectl
+        pool_profile: 'replicated'
+        run_monitoring_list:
+        - collectl
+        time: 60
+        write_only: false
+    cluster:
+      user: 'ubuntu'
+      osds_per_node: 3
+      iterations: 1
+      pool_profiles:
+        replicated:
+          pg_size: 256
+          pgp_size: 256
+          replication: 'replicated'
diff --git a/qa/suites/rados/perf/workloads/radosbench_4M_rand_read.yaml b/qa/suites/rados/perf/workloads/radosbench_4M_rand_read.yaml
new file mode 100644
index 000000000..cc1c74489
--- /dev/null
+++ b/qa/suites/rados/perf/workloads/radosbench_4M_rand_read.yaml
@@ -0,0 +1,24 @@
+tasks:
+- cbt:
+    benchmarks:
+      radosbench:
+        concurrent_ops: 4
+        concurrent_procs: 2
+        op_size: [4194304]
+        pool_monitoring_list:
+        - collectl
+        pool_profile: 'replicated'
+        run_monitoring_list:
+        - collectl
+        time: 60
+        write_only: false
+        readmode: 'rand'
+    cluster:
+      user: 'ubuntu'
+      osds_per_node: 3
+      iterations: 1
+      pool_profiles:
+        replicated:
+          pg_size: 256
+          pgp_size: 256
+          replication: 'replicated'
diff --git a/qa/suites/rados/perf/workloads/radosbench_4M_seq_read.yaml b/qa/suites/rados/perf/workloads/radosbench_4M_seq_read.yaml
new file mode 100644
index 000000000..3ab55cf51
--- /dev/null
+++ b/qa/suites/rados/perf/workloads/radosbench_4M_seq_read.yaml
@@ -0,0 +1,23 @@
+tasks:
+- cbt:
+    benchmarks:
+      radosbench:
+        concurrent_ops: 4
+        concurrent_procs: 2
+        op_size: [4194304]
+        pool_monitoring_list:
+        - collectl
+        pool_profile: 'replicated'
+        run_monitoring_list:
+        - collectl
+        time: 60
+        write_only: false
+    cluster:
+      user: 'ubuntu'
+      osds_per_node: 3
+      iterations: 1
+      pool_profiles:
+        replicated:
+          pg_size: 256
+          pgp_size: 256
+          replication: 'replicated'
diff --git a/qa/suites/rados/perf/workloads/radosbench_4M_write.yaml b/qa/suites/rados/perf/workloads/radosbench_4M_write.yaml
new file mode 100644
index 000000000..f6a5d715c
--- /dev/null
+++ b/qa/suites/rados/perf/workloads/radosbench_4M_write.yaml
@@ -0,0 +1,23 @@
+tasks:
+- cbt:
+    benchmarks:
+      radosbench:
+        concurrent_ops: 4
+        concurrent_procs: 2
+        op_size: [4194304]
+        pool_monitoring_list:
+        - collectl
+        pool_profile: 'replicated'
+        run_monitoring_list:
+        - collectl
+        time: 60
+        write_only: true
+    cluster:
+      user: 'ubuntu'
+      osds_per_node: 3
+      iterations: 1
+      pool_profiles:
+        replicated:
+          pg_size: 256
+          pgp_size: 256
+          replication: 'replicated'
diff --git a/qa/suites/rados/perf/workloads/radosbench_omap_write.yaml b/qa/suites/rados/perf/workloads/radosbench_omap_write.yaml
new file mode 100644
index 000000000..5df4674d9
--- /dev/null
+++ b/qa/suites/rados/perf/workloads/radosbench_omap_write.yaml
@@ -0,0 +1,7 @@
+tasks:
+- radosbench:
+    clients: [client.0]
+    write-omap: True
+    objectsize: 4096
+    size: 4096
+    time: 300
diff --git a/qa/suites/rados/perf/workloads/sample_fio.yaml b/qa/suites/rados/perf/workloads/sample_fio.yaml
new file mode 100644
index 000000000..98411392d
--- /dev/null
+++ b/qa/suites/rados/perf/workloads/sample_fio.yaml
@@ -0,0 +1,24 @@
+tasks:
+- cbt:
+    benchmarks:
+      librbdfio:
+        op_size: [4096]
+        time: 60
+        mode: ['randwrite']
+        norandommap: True
+        vol_size: 4096
+        procs_per_volume: [1]
+        volumes_per_client: [2]
+        iodepth: [32]
+        osd_ra: [4096]
+        pool_profile: 'rbd'
+        log_avg_msec: 100
+    cluster:
+      user: 'ubuntu'
+      osds_per_node: 3
+      iterations: 1
+      pool_profiles:
+        rbd:
+          pg_size: 128
+          pgp_size: 128
+          replication: 3
diff --git a/qa/suites/rados/perf/workloads/sample_radosbench.yaml b/qa/suites/rados/perf/workloads/sample_radosbench.yaml
new file mode 100644
index 000000000..e3dc47ae6
--- /dev/null
+++ b/qa/suites/rados/perf/workloads/sample_radosbench.yaml
@@ -0,0 +1,23 @@
+tasks:
+- cbt:
+    benchmarks:
+      radosbench:
+        concurrent_ops: 4
+        concurrent_procs: 2
+        op_size: [4096]
+        pool_monitoring_list:
+        - collectl
+        pool_profile: 'replicated'
+        run_monitoring_list:
+        - collectl
+        time: 60
+        write_only: true
+    cluster:
+      user: 'ubuntu'
+      osds_per_node: 3
+      iterations: 1
+      pool_profiles:
+        replicated:
+          pg_size: 256
+          pgp_size: 256
+          replication: 'replicated'
diff --git a/qa/suites/rados/rest/% b/qa/suites/rados/rest/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/rest/%
diff --git a/qa/suites/rados/rest/.qa b/qa/suites/rados/rest/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/rest/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/rest/mgr-restful.yaml b/qa/suites/rados/rest/mgr-restful.yaml
new file mode 100644
index 000000000..4901f401d
--- /dev/null
+++ b/qa/suites/rados/rest/mgr-restful.yaml
@@ -0,0 +1,31 @@
+openstack:
+- volumes: # attached to each instance
+    count: 3
+    size: 10 # GB
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, osd.2, mds.a, client.a]
+tasks:
+- install:
+- ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(MGR_DOWN\)
+      - \(PG_
+      - \(OSD_
+      - \(OBJECT_
+      - \(OSDMAP_FLAGS\)
+      - \(POOL_APP_NOT_ENABLED\)
+- exec:
+    mon.a:
+      - ceph restful create-key admin
+      - ceph restful create-self-signed-cert
+      - ceph restful restart
+- workunit:
+    clients:
+      client.a:
+        - rest/test-restful.sh
+- exec:
+    mon.a:
+      - ceph restful delete-key admin
+      - ceph restful list-keys | jq ".admin" | grep null
+
diff --git a/qa/suites/rados/rest/supported-random-distro$ b/qa/suites/rados/rest/supported-random-distro$
new file mode 120000
index 000000000..7cef21eef
--- /dev/null
+++ b/qa/suites/rados/rest/supported-random-distro$
@@ -0,0 +1 @@
+../basic/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rados/singleton-bluestore/% b/qa/suites/rados/singleton-bluestore/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/singleton-bluestore/%
diff --git a/qa/suites/rados/singleton-bluestore/.qa b/qa/suites/rados/singleton-bluestore/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/singleton-bluestore/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/singleton-bluestore/all/.qa b/qa/suites/rados/singleton-bluestore/all/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/singleton-bluestore/all/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/singleton-bluestore/all/cephtool.yaml b/qa/suites/rados/singleton-bluestore/all/cephtool.yaml
new file mode 100644
index 000000000..f86be3459
--- /dev/null
+++ b/qa/suites/rados/singleton-bluestore/all/cephtool.yaml
@@ -0,0 +1,48 @@
+roles:
+- - mon.a
+  - mon.b
+  - mon.c
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+tasks:
+- install:
+- ceph:
+    log-ignorelist:
+    - but it is still running
+    - had wrong client addr
+    - had wrong cluster addr
+    - must scrub before tier agent can activate
+    - failsafe engaged, dropping updates
+    - failsafe disengaged, no longer dropping updates
+    - overall HEALTH_
+    - \(OSDMAP_FLAGS\)
+    - \(OSD_
+    - \(PG_
+    - \(SMALLER_PG_NUM\)
+    - \(SMALLER_PGP_NUM\)
+    - \(CACHE_POOL_NO_HIT_SET\)
+    - \(CACHE_POOL_NEAR_FULL\)
+    - \(FS_WITH_FAILED_MDS\)
+    - \(FS_DEGRADED\)
+    - \(POOL_BACKFILLFULL\)
+    - \(POOL_FULL\)
+    - \(SMALLER_PGP_NUM\)
+    - \(POOL_NEARFULL\)
+    - \(POOL_APP_NOT_ENABLED\)
+    - \(AUTH_BAD_CAPS\)
+    - \(FS_INLINE_DATA_DEPRECATED\)
+    - \(MON_DOWN\)
+    - \(SLOW_OPS\)
+    - slow request
+- workunit:
+    clients:
+      all:
+        - cephtool
+        - mon/pool_ops.sh
diff --git a/qa/suites/rados/singleton-bluestore/mon_election b/qa/suites/rados/singleton-bluestore/mon_election
new file mode 120000
index 000000000..3f331e621
--- /dev/null
+++ b/qa/suites/rados/singleton-bluestore/mon_election
@@ -0,0 +1 @@
+.qa/mon_election
+\ No newline at end of file
diff --git a/qa/suites/rados/singleton-bluestore/msgr b/qa/suites/rados/singleton-bluestore/msgr
new file mode 120000
index 000000000..57bee80db
--- /dev/null
+++ b/qa/suites/rados/singleton-bluestore/msgr
@@ -0,0 +1 @@
+.qa/msgr
+\ No newline at end of file
diff --git a/qa/suites/rados/singleton-bluestore/msgr-failures b/qa/suites/rados/singleton-bluestore/msgr-failures
new file mode 120000
index 000000000..3ded97b94
--- /dev/null
+++ b/qa/suites/rados/singleton-bluestore/msgr-failures
@@ -0,0 +1 @@
+../singleton/msgr-failures
+\ No newline at end of file
diff --git a/qa/suites/rados/singleton-bluestore/objectstore/.qa b/qa/suites/rados/singleton-bluestore/objectstore/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/singleton-bluestore/objectstore/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/singleton-bluestore/objectstore/bluestore-bitmap.yaml b/qa/suites/rados/singleton-bluestore/objectstore/bluestore-bitmap.yaml
new file mode 120000
index 000000000..66cf2bc75
--- /dev/null
+++ b/qa/suites/rados/singleton-bluestore/objectstore/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/objectstore_debug/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/singleton-bluestore/objectstore/bluestore-comp-lz4.yaml b/qa/suites/rados/singleton-bluestore/objectstore/bluestore-comp-lz4.yaml
new file mode 120000
index 000000000..da2e2598c
--- /dev/null
+++ b/qa/suites/rados/singleton-bluestore/objectstore/bluestore-comp-lz4.yaml
@@ -0,0 +1 @@
+.qa/objectstore_debug/bluestore-comp-lz4.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/singleton-bluestore/objectstore/bluestore-comp-snappy.yaml b/qa/suites/rados/singleton-bluestore/objectstore/bluestore-comp-snappy.yaml
new file mode 120000
index 000000000..f75b0e1b4
--- /dev/null
+++ b/qa/suites/rados/singleton-bluestore/objectstore/bluestore-comp-snappy.yaml
@@ -0,0 +1 @@
+.qa/objectstore_debug/bluestore-comp-snappy.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/singleton-bluestore/rados.yaml b/qa/suites/rados/singleton-bluestore/rados.yaml
new file mode 120000
index 000000000..d256979c0
--- /dev/null
+++ b/qa/suites/rados/singleton-bluestore/rados.yaml
@@ -0,0 +1 @@
+.qa/config/rados.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/singleton-bluestore/supported-random-distro$ b/qa/suites/rados/singleton-bluestore/supported-random-distro$
new file mode 120000
index 000000000..7cef21eef
--- /dev/null
+++ b/qa/suites/rados/singleton-bluestore/supported-random-distro$
@@ -0,0 +1 @@
+../basic/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rados/singleton-nomsgr/% b/qa/suites/rados/singleton-nomsgr/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/singleton-nomsgr/%
diff --git a/qa/suites/rados/singleton-nomsgr/.qa b/qa/suites/rados/singleton-nomsgr/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/singleton-nomsgr/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/singleton-nomsgr/all/.qa b/qa/suites/rados/singleton-nomsgr/all/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/singleton-nomsgr/all/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/singleton-nomsgr/all/admin_socket_output.yaml b/qa/suites/rados/singleton-nomsgr/all/admin_socket_output.yaml
new file mode 100644
index 000000000..341a559f3
--- /dev/null
+++ b/qa/suites/rados/singleton-nomsgr/all/admin_socket_output.yaml
@@ -0,0 +1,28 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 2
+      size: 10 # GB
+roles:
+- [mon.a, mds.a, mgr.x, osd.0, osd.1, client.0]
+overrides:
+  ceph:
+    log-ignorelist:
+      - MDS in read-only mode
+      - force file system read-only
+      - overall HEALTH_
+      - \(FS_DEGRADED\)
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_FULL\)
+      - \(MDS_READ_ONLY\)
+      - \(POOL_FULL\)
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+- rgw:
+  - client.0
+- exec:
+    client.0:
+      - ceph_test_admin_socket_output --all
diff --git a/qa/suites/rados/singleton-nomsgr/all/balancer.yaml b/qa/suites/rados/singleton-nomsgr/all/balancer.yaml
new file mode 100644
index 000000000..c42c5539d
--- /dev/null
+++ b/qa/suites/rados/singleton-nomsgr/all/balancer.yaml
@@ -0,0 +1,15 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0]
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    fs: xfs
+    log-ignorelist:
+      - \(PG_AVAILABILITY\)
+      - \(POOL_APP_NOT_ENABLED\)
+- cram:
+    clients:
+      client.0:
+      - src/test/cli-integration/balancer/misplaced.t
diff --git a/qa/suites/rados/singleton-nomsgr/all/cache-fs-trunc.yaml b/qa/suites/rados/singleton-nomsgr/all/cache-fs-trunc.yaml
new file mode 100644
index 000000000..fddbd0723
--- /dev/null
+++ b/qa/suites/rados/singleton-nomsgr/all/cache-fs-trunc.yaml
@@ -0,0 +1,55 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+roles:
+- [mon.a, mgr.x, mds.a, osd.0, osd.1, osd.2, client.0, client.1]
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+      - overall HEALTH_
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      global:
+        osd max object name len: 460
+        osd max object namespace len: 64
+        debug client: 20
+        debug mds: 20
+        debug ms: 1
+- exec:
+    client.0:
+    - ceph osd pool create data_cache 4
+    - ceph osd tier add cephfs_data data_cache
+    - ceph osd tier cache-mode data_cache writeback
+    - ceph osd tier set-overlay cephfs_data data_cache
+    - ceph osd pool set data_cache hit_set_type bloom
+    - ceph osd pool set data_cache hit_set_count 8
+    - ceph osd pool set data_cache hit_set_period 3600
+    - ceph osd pool set data_cache min_read_recency_for_promote 0
+- ceph-fuse:
+- exec:
+    client.0:
+      - sudo chmod 777 $TESTDIR/mnt.0/
+      - dd if=/dev/urandom of=$TESTDIR/mnt.0/foo bs=1M count=5
+      - ls -al $TESTDIR/mnt.0/foo
+      - truncate --size 0 $TESTDIR/mnt.0/foo
+      - ls -al $TESTDIR/mnt.0/foo
+      - dd if=/dev/urandom of=$TESTDIR/mnt.0/foo bs=1M count=5
+      - ls -al $TESTDIR/mnt.0/foo
+      - cp $TESTDIR/mnt.0/foo /tmp/foo
+      - sync
+      - rados -p data_cache ls -
+      - sleep 10
+      - rados -p data_cache ls -
+      - rados -p data_cache cache-flush-evict-all
+      - rados -p data_cache ls -
+      - sleep 1
+- exec:
+    client.1:
+      - hexdump -C /tmp/foo | head
+      - hexdump -C $TESTDIR/mnt.1/foo | head
+      - cmp $TESTDIR/mnt.1/foo /tmp/foo
diff --git a/qa/suites/rados/singleton-nomsgr/all/ceph-kvstore-tool.yaml b/qa/suites/rados/singleton-nomsgr/all/ceph-kvstore-tool.yaml
new file mode 100644
index 000000000..6a8faa4a8
--- /dev/null
+++ b/qa/suites/rados/singleton-nomsgr/all/ceph-kvstore-tool.yaml
@@ -0,0 +1,25 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0]
+
+overrides:
+  ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+    - but it is still running
+    - overall HEALTH_
+    - \(POOL_APP_NOT_ENABLED\)
+
+tasks:
+- install:
+- ceph:
+    log-ignorelist:
+    - \(POOL_APP_NOT_ENABLED\)
+- workunit:
+    clients:
+      all:
+        - cephtool/test_kvstore_tool.sh
diff --git a/qa/suites/rados/singleton-nomsgr/all/ceph-post-file.yaml b/qa/suites/rados/singleton-nomsgr/all/ceph-post-file.yaml
new file mode 100644
index 000000000..530dc42a7
--- /dev/null
+++ b/qa/suites/rados/singleton-nomsgr/all/ceph-post-file.yaml
@@ -0,0 +1,12 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0]
+tasks:
+- install:
+- workunit:
+    clients:
+      all:
+        - post-file.sh
diff --git a/qa/suites/rados/singleton-nomsgr/all/crushdiff.yaml b/qa/suites/rados/singleton-nomsgr/all/crushdiff.yaml
new file mode 100644
index 000000000..1639f0ed5
--- /dev/null
+++ b/qa/suites/rados/singleton-nomsgr/all/crushdiff.yaml
@@ -0,0 +1,24 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 4
+      size: 10 # GB
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, osd.2, osd.3, client.0]
+
+overrides:
+  ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+    - but it is still running
+    - overall HEALTH_
+    - \(POOL_APP_NOT_ENABLED\)
+    - \(PG_DEGRADED\)
+
+tasks:
+- install:
+- ceph:
+- workunit:
+    clients:
+      all:
+        - rados/test_crushdiff.sh
diff --git a/qa/suites/rados/singleton-nomsgr/all/export-after-evict.yaml b/qa/suites/rados/singleton-nomsgr/all/export-after-evict.yaml
new file mode 100644
index 000000000..b4ce5468a
--- /dev/null
+++ b/qa/suites/rados/singleton-nomsgr/all/export-after-evict.yaml
@@ -0,0 +1,41 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+      - overall HEALTH_
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      global:
+        osd max object name len: 460
+        osd max object namespace len: 64
+- exec:
+    client.0:
+    - ceph osd pool create base-pool 4
+    - ceph osd pool application enable base-pool rados
+    - ceph osd pool create cache-pool 4
+    - ceph osd tier add base-pool cache-pool
+    - ceph osd tier cache-mode cache-pool writeback
+    - ceph osd tier set-overlay base-pool cache-pool
+    - dd if=/dev/urandom of=$TESTDIR/foo bs=1M count=1
+    - rbd import --image-format 2 $TESTDIR/foo base-pool/bar
+    - rbd snap create base-pool/bar@snap
+    - rados -p base-pool cache-flush-evict-all
+    - rbd export base-pool/bar $TESTDIR/bar
+    - rbd export base-pool/bar@snap $TESTDIR/snap
+    - cmp $TESTDIR/foo $TESTDIR/bar
+    - cmp $TESTDIR/foo $TESTDIR/snap
+    - rm $TESTDIR/foo $TESTDIR/bar $TESTDIR/snap
diff --git a/qa/suites/rados/singleton-nomsgr/all/full-tiering.yaml b/qa/suites/rados/singleton-nomsgr/all/full-tiering.yaml
new file mode 100644
index 000000000..a06221449
--- /dev/null
+++ b/qa/suites/rados/singleton-nomsgr/all/full-tiering.yaml
@@ -0,0 +1,41 @@
+# verify #13098 fix
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0]
+overrides:
+  ceph:
+    log-ignorelist:
+      - is full
+      - overall HEALTH_
+      - \(POOL_FULL\)
+      - \(POOL_NEAR_FULL\)
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - \(CACHE_POOL_NEAR_FULL\)
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    conf:
+      global:
+        osd max object name len: 460
+        osd max object namespace len: 64
+- exec:
+    client.0:
+      - ceph osd pool create ec-ca 1 1
+      - ceph osd pool create ec 1 1 erasure default
+      - ceph osd pool application enable ec rados
+      - ceph osd tier add ec ec-ca
+      - ceph osd tier cache-mode ec-ca readproxy
+      - ceph osd tier set-overlay ec ec-ca
+      - ceph osd pool set ec-ca hit_set_type bloom
+      - ceph osd pool set-quota ec-ca max_bytes 20480000
+      - ceph osd pool set-quota ec max_bytes 20480000
+      - ceph osd pool set ec-ca target_max_bytes 20480000
+      - timeout 30 rados -p ec-ca bench 30 write || true
+      - ceph osd pool set-quota ec-ca max_bytes 0
+      - ceph osd pool set-quota ec max_bytes 0
diff --git a/qa/suites/rados/singleton-nomsgr/all/health-warnings.yaml b/qa/suites/rados/singleton-nomsgr/all/health-warnings.yaml
new file mode 100644
index 000000000..5ed655324
--- /dev/null
+++ b/qa/suites/rados/singleton-nomsgr/all/health-warnings.yaml
@@ -0,0 +1,23 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, osd.2, osd.3, osd.4, osd.5, osd.6, osd.7, osd.8, osd.9, client.0]
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    conf:
+      osd:
+# we may land on ext4
+        osd max object name len: 400
+        osd max object namespace len: 64
+    log-ignorelist:
+      - but it is still running
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(POOL_APP_NOT_ENABLED\)
+- workunit:
+    clients:
+      all:
+        - rados/test_health_warnings.sh
diff --git a/qa/suites/rados/singleton-nomsgr/all/large-omap-object-warnings.yaml b/qa/suites/rados/singleton-nomsgr/all/large-omap-object-warnings.yaml
new file mode 100644
index 000000000..e1e9d34ef
--- /dev/null
+++ b/qa/suites/rados/singleton-nomsgr/all/large-omap-object-warnings.yaml
@@ -0,0 +1,30 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 2
+      size: 10 # GB
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+overrides:
+  ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_FULL\)
+      - \(MDS_READ_ONLY\)
+      - \(POOL_APP_NOT_ENABLED\)
+      - large omap objects
+      - Large omap object found
+      - application not enabled
+    conf:
+      osd:
+        osd scrub backoff ratio: 0
+        osd deep scrub large omap object value sum threshold: 8800000
+        osd deep scrub large omap object key threshold: 20000
+tasks:
+- install:
+- ceph:
+- workunit:
+    clients:
+      all:
+        - rados/test_large_omap_detection.py
diff --git a/qa/suites/rados/singleton-nomsgr/all/lazy_omap_stats_output.yaml b/qa/suites/rados/singleton-nomsgr/all/lazy_omap_stats_output.yaml
new file mode 100644
index 000000000..61c2fa663
--- /dev/null
+++ b/qa/suites/rados/singleton-nomsgr/all/lazy_omap_stats_output.yaml
@@ -0,0 +1,18 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 2
+      size: 10 # GB
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0]
+overrides:
+  ceph:
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+- exec:
+    client.0:
+      - ceph_test_lazy_omap_stats
diff --git a/qa/suites/rados/singleton-nomsgr/all/librados_hello_world.yaml b/qa/suites/rados/singleton-nomsgr/all/librados_hello_world.yaml
new file mode 100644
index 000000000..0c0a071e9
--- /dev/null
+++ b/qa/suites/rados/singleton-nomsgr/all/librados_hello_world.yaml
@@ -0,0 +1,24 @@
+roles:
+- [mon.a, mds.a, mgr.x, osd.0, osd.1, client.0]
+overrides:
+  ceph:
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- install:
+    extra_packages:
+      deb:
+        - libradosstriper-dev
+        - librados-dev
+        - libradospp-dev
+      rpm:
+        - libradosstriper-devel
+        - librados-devel
+        - libradospp-devel
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+- workunit:
+    clients:
+      all:
+        - rados/test_librados_build.sh
diff --git a/qa/suites/rados/singleton-nomsgr/all/msgr.yaml b/qa/suites/rados/singleton-nomsgr/all/msgr.yaml
new file mode 100644
index 000000000..4eb376fcf
--- /dev/null
+++ b/qa/suites/rados/singleton-nomsgr/all/msgr.yaml
@@ -0,0 +1,23 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+tasks:
+- install:
+- exec:
+    client.0:
+          - ceph_test_async_driver
+          - ceph_test_msgr
+openstack:
+  - machine:
+      disk: 40 # GB
+      ram: 15000 # MB
+      cpus: 1
+    volumes: # attached to each instance
+      count: 0
+      size: 1 # GB
+overrides:
+  ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    conf:
+      client:
+        debug ms: 20
diff --git a/qa/suites/rados/singleton-nomsgr/all/multi-backfill-reject.yaml b/qa/suites/rados/singleton-nomsgr/all/multi-backfill-reject.yaml
new file mode 100644
index 000000000..15952b989
--- /dev/null
+++ b/qa/suites/rados/singleton-nomsgr/all/multi-backfill-reject.yaml
@@ -0,0 +1,51 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+- - osd.3
+  - osd.4
+  - osd.5
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+      - overall HEALTH_
+      - \(PG_
+      - \(OSD_
+      - \(OBJECT_
+      - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        osd debug reject backfill probability: .3
+        osd min pg log entries: 25
+        osd max pg log entries: 100
+        osd max object name len: 460
+        osd max object namespace len: 64
+- exec:
+    client.0:
+      - sudo ceph osd pool create foo 64
+      - sudo ceph osd pool application enable foo rados
+      - rados -p foo bench 60 write -b 1024 --no-cleanup
+      - sudo ceph osd pool set foo size 3
+      - sudo ceph osd out 0 1
+- sleep:
+    duration: 60
+- exec:
+    client.0:
+      - sudo ceph osd in 0 1
+- sleep:
+    duration: 60
+- exec:
+    client.0:
+      - sudo ceph osd pool set foo size 2
+- sleep:
+    duration: 300
diff --git a/qa/suites/rados/singleton-nomsgr/all/osd_stale_reads.yaml b/qa/suites/rados/singleton-nomsgr/all/osd_stale_reads.yaml
new file mode 100644
index 000000000..5beb2015f
--- /dev/null
+++ b/qa/suites/rados/singleton-nomsgr/all/osd_stale_reads.yaml
@@ -0,0 +1,29 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0]
+overrides:
+  ceph:
+    log-ignorelist:
+      - \(OSD_DOWN\)
+      - \(POOL_APP_NOT_ENABLED\)
+      - \(SLOW_OPS\)
+      - \(PG_AVAILABILITY\)
+      - \(PG_DEGRADED\)
+      - application not enabled
+      - slow request
+    conf:
+      osd:
+        osd scrub backoff ratio: 0
+        osd deep scrub large omap object value sum threshold: 8800000
+        osd deep scrub large omap object key threshold: 20000
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+- exec:
+    client.0:
+      - ceph_test_osd_stale_read
diff --git a/qa/suites/rados/singleton-nomsgr/all/pool-access.yaml b/qa/suites/rados/singleton-nomsgr/all/pool-access.yaml
new file mode 100644
index 000000000..26d548430
--- /dev/null
+++ b/qa/suites/rados/singleton-nomsgr/all/pool-access.yaml
@@ -0,0 +1,17 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 2
+      size: 10 # GB
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+    - \(POOL_APP_NOT_ENABLED\)
+- workunit:
+    clients:
+      all:
+        - rados/test_pool_access.sh
diff --git a/qa/suites/rados/singleton-nomsgr/all/recovery-unfound-found.yaml b/qa/suites/rados/singleton-nomsgr/all/recovery-unfound-found.yaml
new file mode 100644
index 000000000..39788ddd2
--- /dev/null
+++ b/qa/suites/rados/singleton-nomsgr/all/recovery-unfound-found.yaml
@@ -0,0 +1,60 @@
+roles:
+- - mon.a
+  - mon.b
+  - mon.c
+  - mgr.x
+  - osd.0
+  - osd.1
+openstack:
+  - volumes: # attached to each instance
+      count: 2
+      size: 20 # GB
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    fs: xfs
+    conf:
+      osd:
+        osd recovery sleep: .1
+        osd objectstore: bluestore
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(OBJECT_
+      - \(PG_
+      - overall HEALTH
+- exec:
+    osd.0:
+      - ceph osd pool create foo 32
+      - ceph osd pool application enable foo foo
+      - rados -p foo bench 30 write -b 4096 --no-cleanup
+      - ceph osd set noup
+- ceph.restart:
+    daemons: [osd.0]
+    wait-for-up: false
+    wait-for-healthy: false
+- exec:
+    osd.0:
+      - sleep 5
+      - rados -p foo bench 3 write -b 4096 --no-cleanup
+      - ceph osd unset noup
+      - sleep 10
+      - ceph osd set noup
+- ceph.restart:
+    daemons: [osd.1]
+    wait-for-up: false
+    wait-for-healthy: false
+- exec:
+    osd.0:
+      - ceph osd out 0
+      - sleep 10
+      - ceph osd unset noup
+- ceph.healthy:
+      wait-for-healthy: false # only wait for osds up and pgs clean, ignore misplaced
+- exec:
+    osd.0:
+      - ceph osd in 0
+- ceph.healthy:
diff --git a/qa/suites/rados/singleton-nomsgr/all/version-number-sanity.yaml b/qa/suites/rados/singleton-nomsgr/all/version-number-sanity.yaml
new file mode 100644
index 000000000..daeeeef4e
--- /dev/null
+++ b/qa/suites/rados/singleton-nomsgr/all/version-number-sanity.yaml
@@ -0,0 +1,15 @@
+roles:
+- [mon.a, mds.a, mgr.x, osd.0, osd.1, client.0]
+overrides:
+  ceph:
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+- workunit:
+    clients:
+      all:
+        - rados/version_number_sanity.sh
diff --git a/qa/suites/rados/singleton-nomsgr/mon_election b/qa/suites/rados/singleton-nomsgr/mon_election
new file mode 120000
index 000000000..3f331e621
--- /dev/null
+++ b/qa/suites/rados/singleton-nomsgr/mon_election
@@ -0,0 +1 @@
+.qa/mon_election
+\ No newline at end of file
diff --git a/qa/suites/rados/singleton-nomsgr/rados.yaml b/qa/suites/rados/singleton-nomsgr/rados.yaml
new file mode 120000
index 000000000..d256979c0
--- /dev/null
+++ b/qa/suites/rados/singleton-nomsgr/rados.yaml
@@ -0,0 +1 @@
+.qa/config/rados.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/singleton-nomsgr/supported-random-distro$ b/qa/suites/rados/singleton-nomsgr/supported-random-distro$
new file mode 120000
index 000000000..7cef21eef
--- /dev/null
+++ b/qa/suites/rados/singleton-nomsgr/supported-random-distro$
@@ -0,0 +1 @@
+../basic/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rados/singleton/% b/qa/suites/rados/singleton/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/singleton/%
diff --git a/qa/suites/rados/singleton/.qa b/qa/suites/rados/singleton/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/singleton/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/singleton/all/.qa b/qa/suites/rados/singleton/all/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/singleton/all/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/singleton/all/admin-socket.yaml b/qa/suites/rados/singleton/all/admin-socket.yaml
new file mode 100644
index 000000000..0d88e6f2a
--- /dev/null
+++ b/qa/suites/rados/singleton/all/admin-socket.yaml
@@ -0,0 +1,28 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - client.a
+openstack:
+  - volumes: # attached to each instance
+      count: 2
+      size: 10 # GB
+tasks:
+- install:
+- ceph:
+    log-ignorelist:
+    - \(POOL_APP_NOT_ENABLED\)
+- admin_socket:
+    osd.0:
+      version:
+      git_version:
+      help:
+      config show:
+      config help:
+      config set bluestore_csum_type xxhash64:
+      perf dump:
+      perf schema:
+      get_heap_property tcmalloc.max_total_thread_cache_byte || dump_metrics memory:
+      set_heap_property tcmalloc.max_total_thread_cache_bytes 67108864 || dump_metrics memory:
+      set_heap_property tcmalloc.max_total_thread_cache_bytes 33554432 || dump_metrics memory:
diff --git a/qa/suites/rados/singleton/all/backfill-toofull.yaml b/qa/suites/rados/singleton/all/backfill-toofull.yaml
new file mode 100644
index 000000000..d53e9a3b2
--- /dev/null
+++ b/qa/suites/rados/singleton/all/backfill-toofull.yaml
@@ -0,0 +1,38 @@
+roles:
+- - mon.a
+  - mon.b
+  - mon.c
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+openstack:
+  - volumes: # attached to each instance
+      count: 4
+      size: 10 # GB
+tasks:
+- install:
+- ceph:
+    create_rbd_pool: false
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr/devicehealth/enable_monitoring false --force
+    log-ignorelist:
+      - Error
+      - overall HEALTH_
+      - \(OBJECT_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(POOL_BACKFILLFULL\)
+      - \(POOL_NEARFULL\)
+      - \(SLOW_OPS\)
+      - \(TOO_FEW_PGS\)
+      - Monitor daemon marked osd\.[[:digit:]]+ down, but it is still running
+      - slow request
+      - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        osd min pg log entries: 5
+        osd max pg log entries: 5
+- backfill_toofull:
diff --git a/qa/suites/rados/singleton/all/deduptool.yaml b/qa/suites/rados/singleton/all/deduptool.yaml
new file mode 100644
index 000000000..3a34cb309
--- /dev/null
+++ b/qa/suites/rados/singleton/all/deduptool.yaml
@@ -0,0 +1,28 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 2
+      size: 10 # GB
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+    - but it is still running
+    - had wrong client addr
+    - had wrong cluster addr
+    - reached quota
+    - overall HEALTH_
+    - \(POOL_FULL\)
+    - \(POOL_APP_NOT_ENABLED\)
+- workunit:
+    clients:
+      all:
+        - rados/test_dedup_tool.sh
diff --git a/qa/suites/rados/singleton/all/divergent_priors.yaml b/qa/suites/rados/singleton/all/divergent_priors.yaml
new file mode 100644
index 000000000..81d68654a
--- /dev/null
+++ b/qa/suites/rados/singleton/all/divergent_priors.yaml
@@ -0,0 +1,28 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(OBJECT_
+      - \(POOL_APP_NOT_ENABLED\)
+
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+- divergent_priors:
diff --git a/qa/suites/rados/singleton/all/divergent_priors2.yaml b/qa/suites/rados/singleton/all/divergent_priors2.yaml
new file mode 100644
index 000000000..baac3110c
--- /dev/null
+++ b/qa/suites/rados/singleton/all/divergent_priors2.yaml
@@ -0,0 +1,28 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(OBJECT_
+      - \(POOL_APP_NOT_ENABLED\)
+
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+- divergent_priors2:
diff --git a/qa/suites/rados/singleton/all/dump-stuck.yaml b/qa/suites/rados/singleton/all/dump-stuck.yaml
new file mode 100644
index 000000000..f561795bd
--- /dev/null
+++ b/qa/suites/rados/singleton/all/dump-stuck.yaml
@@ -0,0 +1,22 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+openstack:
+  - volumes: # attached to each instance
+      count: 2
+      size: 10 # GB
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+      - but it is still running
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(POOL_APP_NOT_ENABLED\)
+- dump_stuck:
diff --git a/qa/suites/rados/singleton/all/ec-inconsistent-hinfo.yaml b/qa/suites/rados/singleton/all/ec-inconsistent-hinfo.yaml
new file mode 100644
index 000000000..31724f9e8
--- /dev/null
+++ b/qa/suites/rados/singleton/all/ec-inconsistent-hinfo.yaml
@@ -0,0 +1,37 @@
+roles:
+- - mon.a
+  - mon.b
+  - mon.c
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+openstack:
+  - volumes: # attached to each instance
+      count: 4
+      size: 10 # GB
+tasks:
+- install:
+- ceph:
+    create_rbd_pool: false
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr/devicehealth/enable_monitoring false --force
+    log-ignorelist:
+      - \(OBJECT_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(SLOW_OPS\)
+      - deep-scrub
+      - missing
+      - overall HEALTH_
+      - repair
+      - slow request
+      - unfound
+      - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        osd min pg log entries: 5
+        osd max pg log entries: 5
+- ec_inconsistent_hinfo:
diff --git a/qa/suites/rados/singleton/all/ec-lost-unfound.yaml b/qa/suites/rados/singleton/all/ec-lost-unfound.yaml
new file mode 100644
index 000000000..d397b005c
--- /dev/null
+++ b/qa/suites/rados/singleton/all/ec-lost-unfound.yaml
@@ -0,0 +1,30 @@
+roles:
+- - mon.a
+  - mon.b
+  - mon.c
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+openstack:
+  - volumes: # attached to each instance
+      count: 4
+      size: 10 # GB
+tasks:
+- install:
+- ceph:
+    create_rbd_pool: false
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+      - objects unfound and apparently lost
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(OBJECT_
+      - \(SLOW_OPS\)
+      - slow request
+      - \(POOL_APP_NOT_ENABLED\)
+- ec_lost_unfound:
diff --git a/qa/suites/rados/singleton/all/erasure-code-nonregression.yaml b/qa/suites/rados/singleton/all/erasure-code-nonregression.yaml
new file mode 100644
index 000000000..e8201ee0b
--- /dev/null
+++ b/qa/suites/rados/singleton/all/erasure-code-nonregression.yaml
@@ -0,0 +1,17 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+tasks:
+- install:
+- workunit:
+    clients:
+      all:
+        - erasure-code/encode-decode-non-regression.sh
diff --git a/qa/suites/rados/singleton/all/lost-unfound-delete.yaml b/qa/suites/rados/singleton/all/lost-unfound-delete.yaml
new file mode 100644
index 000000000..a6b68cd50
--- /dev/null
+++ b/qa/suites/rados/singleton/all/lost-unfound-delete.yaml
@@ -0,0 +1,28 @@
+roles:
+- - mon.a
+  - mon.b
+  - mon.c
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+      - objects unfound and apparently lost
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(OBJECT_
+      - \(SLOW_OPS\)
+      - slow request
+      - \(POOL_APP_NOT_ENABLED\)
+- rep_lost_unfound_delete:
diff --git a/qa/suites/rados/singleton/all/lost-unfound.yaml b/qa/suites/rados/singleton/all/lost-unfound.yaml
new file mode 100644
index 000000000..4010a5208
--- /dev/null
+++ b/qa/suites/rados/singleton/all/lost-unfound.yaml
@@ -0,0 +1,28 @@
+roles:
+- - mon.a
+  - mon.b
+  - mon.c
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+      - objects unfound and apparently lost
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(OBJECT_
+      - \(SLOW_OPS\)
+      - slow request
+      - \(POOL_APP_NOT_ENABLED\)
+- lost_unfound:
diff --git a/qa/suites/rados/singleton/all/max-pg-per-osd.from-mon.yaml b/qa/suites/rados/singleton/all/max-pg-per-osd.from-mon.yaml
new file mode 100644
index 000000000..e7eded31e
--- /dev/null
+++ b/qa/suites/rados/singleton/all/max-pg-per-osd.from-mon.yaml
@@ -0,0 +1,30 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+openstack:
+  - volumes: # attached to each instance
+      count: 2
+      size: 10 # GB
+overrides:
+  ceph:
+    create_rbd_pool: False
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    conf:
+      mon:
+        osd pool default size: 2
+      osd:
+        mon max pg per osd : 2
+        osd max pg per osd hard ratio : 1
+    log-ignorelist:
+      - \(TOO_FEW_PGS\)
+      - \(PENDING_CREATING_PGS\)
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- install:
+- ceph:
+- osd_max_pg_per_osd:
+    test_create_from_mon: True
+    pg_num: 2
diff --git a/qa/suites/rados/singleton/all/max-pg-per-osd.from-primary.yaml b/qa/suites/rados/singleton/all/max-pg-per-osd.from-primary.yaml
new file mode 100644
index 000000000..dc7c3f0f5
--- /dev/null
+++ b/qa/suites/rados/singleton/all/max-pg-per-osd.from-primary.yaml
@@ -0,0 +1,35 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+openstack:
+  - volumes: # attached to each instance
+      count: 4
+      size: 10 # GB
+overrides:
+  ceph:
+    create_rbd_pool: False
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    conf:
+      mon:
+        osd pool default size: 2
+      osd:
+        mon max pg per osd : 1
+        osd max pg per osd hard ratio : 1
+    log-ignorelist:
+      - \(TOO_FEW_PGS\)
+      - \(PG_
+      - \(PENDING_CREATING_PGS\)
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- install:
+- ceph:
+- osd_max_pg_per_osd:
+    test_create_from_mon: False
+    pg_num: 1
+    pool_size: 2
+    from_primary: True
diff --git a/qa/suites/rados/singleton/all/max-pg-per-osd.from-replica.yaml b/qa/suites/rados/singleton/all/max-pg-per-osd.from-replica.yaml
new file mode 100644
index 000000000..ee0dae1d4
--- /dev/null
+++ b/qa/suites/rados/singleton/all/max-pg-per-osd.from-replica.yaml
@@ -0,0 +1,35 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+openstack:
+  - volumes: # attached to each instance
+      count: 4
+      size: 10 # GB
+overrides:
+  ceph:
+    create_rbd_pool: False
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    conf:
+      mon:
+        osd pool default size: 2
+      osd:
+        mon max pg per osd : 1
+        osd max pg per osd hard ratio : 1
+    log-ignorelist:
+      - \(TOO_FEW_PGS\)
+      - \(PG_
+      - \(PENDING_CREATING_PGS\)
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- install:
+- ceph:
+- osd_max_pg_per_osd:
+    test_create_from_mon: False
+    pg_num: 1
+    pool_size: 2
+    from_primary: False
diff --git a/qa/suites/rados/singleton/all/mon-auth-caps.yaml b/qa/suites/rados/singleton/all/mon-auth-caps.yaml
new file mode 100644
index 000000000..264dc535a
--- /dev/null
+++ b/qa/suites/rados/singleton/all/mon-auth-caps.yaml
@@ -0,0 +1,21 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+    - overall HEALTH_
+    - \(AUTH_BAD_CAPS\)
+    - \(POOL_APP_NOT_ENABLED\)
+- workunit:
+    clients:
+      all:
+        - mon/auth_caps.sh
+        - mon/auth_key_rotation.sh
diff --git a/qa/suites/rados/singleton/all/mon-config-key-caps.yaml b/qa/suites/rados/singleton/all/mon-config-key-caps.yaml
new file mode 100644
index 000000000..c475a2080
--- /dev/null
+++ b/qa/suites/rados/singleton/all/mon-config-key-caps.yaml
@@ -0,0 +1,20 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+    - overall HEALTH_
+    - \(AUTH_BAD_CAPS\)
+    - \(POOL_APP_NOT_ENABLED\)
+- workunit:
+    clients:
+      all:
+        - mon/test_config_key_caps.sh
diff --git a/qa/suites/rados/singleton/all/mon-config-keys.yaml b/qa/suites/rados/singleton/all/mon-config-keys.yaml
new file mode 100644
index 000000000..117b6d055
--- /dev/null
+++ b/qa/suites/rados/singleton/all/mon-config-keys.yaml
@@ -0,0 +1,22 @@
+roles:
+- - mon.a
+  - mon.b
+  - mon.c
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+- workunit:
+    clients:
+      all:
+        - mon/test_mon_config_key.py
diff --git a/qa/suites/rados/singleton/all/mon-config.yaml b/qa/suites/rados/singleton/all/mon-config.yaml
new file mode 100644
index 000000000..ab1eb81b0
--- /dev/null
+++ b/qa/suites/rados/singleton/all/mon-config.yaml
@@ -0,0 +1,24 @@
+roles:
+- - mon.a
+  - mon.b
+  - mon.c
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+- workunit:
+    clients:
+      all:
+        - mon/config.sh
diff --git a/qa/suites/rados/singleton/all/mon-memory-target-compliance.yaml.disabled b/qa/suites/rados/singleton/all/mon-memory-target-compliance.yaml.disabled
new file mode 100644
index 000000000..e1f79c168
--- /dev/null
+++ b/qa/suites/rados/singleton/all/mon-memory-target-compliance.yaml.disabled
@@ -0,0 +1,154 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+  - osd.4
+  - osd.5
+  - osd.6
+  - osd.7
+  - osd.8
+  - osd.9
+  - osd.10
+  - osd.11
+  - osd.12
+  - osd.13
+  - osd.14
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 4
+      size: 1 # GB
+overrides:
+  ceph:
+    conf:
+      mon:
+        mon memory target: 134217728 # reduced to 128_M
+        rocksdb cache size: 67108864 # reduced to 64_M
+        mon osd cache size: 100000
+        mon osd cache size min: 134217728
+      osd:
+        osd memory target: 1610612736 # reduced to 1.5_G
+        osd objectstore: bluestore
+        debug bluestore: 20
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd max backfills: 9
+
+tasks:
+- install:
+    branch: wip-sseshasa2-testing-2019-07-30-1825 # change as appropriate
+- ceph:
+    create_rbd_pool: false
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(POOL_
+      - \(CACHE_POOL_
+      - \(OBJECT_
+      - \(SLOW_OPS\)
+      - \(REQUEST_SLOW\)
+      - \(TOO_FEW_PGS\)
+      - slow request
+- interactive:
+- parallel:
+    - log-mon-rss
+    - stress-tasks
+    - benchload
+- exec:
+    client.0:
+      - "ceph_test_mon_memory_target 134217728" # mon memory target
+      - "ceph_test_mon_rss_usage 134217728"
+log-mon-rss:
+- background_exec:
+    client.0:
+      - while true
+      - do /usr/bin/ceph_test_log_rss_usage ceph-mon >> /var/log/ceph/ceph-mon-rss-usage.log
+      - sleep 300 # log rss usage every 5 mins. May be modified accordingly
+      - done
+- exec:
+    client.0:
+      - sleep 37860 # sum total of the radosbench test times below plus 60 secs
+benchload: # The total radosbench test below translates to 10.5 hrs
+- full_sequential:
+  - radosbench:
+      clients: [client.0]
+      time: 1800
+  - radosbench:
+      clients: [client.0]
+      time: 1800
+  - radosbench:
+      clients: [client.0]
+      time: 1800
+  - radosbench:
+      clients: [client.0]
+      time: 1800
+  - radosbench:
+      clients: [client.0]
+      time: 1800
+  - radosbench:
+      clients: [client.0]
+      time: 1800
+  - radosbench:
+      clients: [client.0]
+      time: 1800
+  - radosbench:
+      clients: [client.0]
+      time: 1800
+  - radosbench:
+      clients: [client.0]
+      time: 1800
+  - radosbench:
+      clients: [client.0]
+      time: 1800
+  - radosbench:
+      clients: [client.0]
+      time: 1800
+  - radosbench:
+      clients: [client.0]
+      time: 1800
+  - radosbench:
+      clients: [client.0]
+      time: 1800
+  - radosbench:
+      clients: [client.0]
+      time: 1800
+  - radosbench:
+      clients: [client.0]
+      time: 1800
+  - radosbench:
+      clients: [client.0]
+      time: 1800
+  - radosbench:
+      clients: [client.0]
+      time: 1800
+  - radosbench:
+      clients: [client.0]
+      time: 1800
+  - radosbench:
+      clients: [client.0]
+      time: 1800
+  - radosbench:
+      clients: [client.0]
+      time: 1800
+  - radosbench:
+      clients: [client.0]
+      time: 1800
+stress-tasks:
+- thrashosds:
+    op_delay: 1
+    bdev_inject_crash: 1
+    bdev_inject_crash_probability: .8
+    chance_down: 80
+    chance_pgnum_grow: 3
+    chance_pgpnum_fix: 1
+    chance_thrash_cluster_full: 0
+    chance_thrash_pg_upmap: 3
+    chance_thrash_pg_upmap_items: 3
+    min_in: 2
diff --git a/qa/suites/rados/singleton/all/osd-backfill.yaml b/qa/suites/rados/singleton/all/osd-backfill.yaml
new file mode 100644
index 000000000..92f5959b5
--- /dev/null
+++ b/qa/suites/rados/singleton/all/osd-backfill.yaml
@@ -0,0 +1,29 @@
+roles:
+- - mon.a
+  - mon.b
+  - mon.c
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+      - but it is still running
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(OBJECT_
+      - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        osd min pg log entries: 5
+- osd_backfill:
diff --git a/qa/suites/rados/singleton/all/osd-recovery-incomplete.yaml b/qa/suites/rados/singleton/all/osd-recovery-incomplete.yaml
new file mode 100644
index 000000000..6d0955c73
--- /dev/null
+++ b/qa/suites/rados/singleton/all/osd-recovery-incomplete.yaml
@@ -0,0 +1,31 @@
+roles:
+- - mon.a
+  - mon.b
+  - mon.c
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+openstack:
+  - volumes: # attached to each instance
+      count: 4
+      size: 10 # GB
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+      - but it is still running
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(OBJECT_
+      - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        osd min pg log entries: 5
+        osd_fast_fail_on_connection_refused: false
+- osd_recovery.test_incomplete_pgs:
diff --git a/qa/suites/rados/singleton/all/osd-recovery.yaml b/qa/suites/rados/singleton/all/osd-recovery.yaml
new file mode 100644
index 000000000..9e33b3c39
--- /dev/null
+++ b/qa/suites/rados/singleton/all/osd-recovery.yaml
@@ -0,0 +1,33 @@
+roles:
+- - mon.a
+  - mon.b
+  - mon.c
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+      - but it is still running
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(OBJECT_DEGRADED\)
+      - \(SLOW_OPS\)
+      - slow request
+      - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        osd min pg log entries: 5
+        osd pg log trim min: 0
+        osd_fast_fail_on_connection_refused: false
+- osd_recovery:
diff --git a/qa/suites/rados/singleton/all/peer.yaml b/qa/suites/rados/singleton/all/peer.yaml
new file mode 100644
index 000000000..f01473b0f
--- /dev/null
+++ b/qa/suites/rados/singleton/all/peer.yaml
@@ -0,0 +1,28 @@
+roles:
+- - mon.a
+  - mon.b
+  - mon.c
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    config:
+      global:
+        osd pool default min size : 1
+    log-ignorelist:
+      - objects unfound and apparently lost
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(POOL_APP_NOT_ENABLED\)
+- peer:
diff --git a/qa/suites/rados/singleton/all/pg-autoscaler-progress-off.yaml b/qa/suites/rados/singleton/all/pg-autoscaler-progress-off.yaml
new file mode 100644
index 000000000..e4b48189f
--- /dev/null
+++ b/qa/suites/rados/singleton/all/pg-autoscaler-progress-off.yaml
@@ -0,0 +1,45 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+  - client.0
+- - mon.b
+  - mon.c
+  - osd.4
+  - osd.5
+  - osd.6
+  - osd.7
+openstack:
+  - volumes: # attached to each instance
+      count: 4
+      size: 10 # GB
+tasks:
+- install:
+- ceph:
+    create_rbd_pool: false
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(POOL_
+      - \(CACHE_POOL_
+      - \(OBJECT_
+      - \(SLOW_OPS\)
+      - \(REQUEST_SLOW\)
+      - \(TOO_FEW_PGS\)
+      - slow request
+      - \(POOL_APP_NOT_ENABLED\)
+- exec:
+    client.0:
+    - ceph progress off
+
+- workunit:
+    clients:
+      all:
+        - mon/pg_autoscaler.sh
diff --git a/qa/suites/rados/singleton/all/pg-autoscaler.yaml b/qa/suites/rados/singleton/all/pg-autoscaler.yaml
new file mode 100644
index 000000000..a03c2d521
--- /dev/null
+++ b/qa/suites/rados/singleton/all/pg-autoscaler.yaml
@@ -0,0 +1,37 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+  - osd.4
+  - osd.5
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 4
+      size: 10 # GB
+tasks:
+- install:
+- ceph:
+    create_rbd_pool: false
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(POOL_
+      - \(CACHE_POOL_
+      - \(OBJECT_
+      - \(SLOW_OPS\)
+      - \(REQUEST_SLOW\)
+      - \(TOO_FEW_PGS\)
+      - slow request
+      - \(POOL_APP_NOT_ENABLED\)
+- workunit:
+    clients:
+      all:
+        - mon/pg_autoscaler.sh
diff --git a/qa/suites/rados/singleton/all/pg-removal-interruption.yaml b/qa/suites/rados/singleton/all/pg-removal-interruption.yaml
new file mode 100644
index 000000000..0dd0fb38d
--- /dev/null
+++ b/qa/suites/rados/singleton/all/pg-removal-interruption.yaml
@@ -0,0 +1,37 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+      - but it is still running
+      - slow request
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(POOL_APP_NOT_ENABLED\)
+- exec:
+    client.0:
+      - sudo ceph osd pool create foo 128 128
+      - sudo ceph osd pool application enable foo rados
+      - sleep 5
+      - sudo ceph tell osd.0 injectargs -- --osd-inject-failure-on-pg-removal
+      - sudo ceph osd pool delete foo foo --yes-i-really-really-mean-it
+- ceph.wait_for_failure: [osd.0]
+- exec:
+    client.0:
+      - sudo ceph osd down 0
+- ceph.restart: [osd.0]
+- ceph.healthy:
diff --git a/qa/suites/rados/singleton/all/radostool.yaml b/qa/suites/rados/singleton/all/radostool.yaml
new file mode 100644
index 000000000..6a3998ed2
--- /dev/null
+++ b/qa/suites/rados/singleton/all/radostool.yaml
@@ -0,0 +1,28 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 2
+      size: 10 # GB
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+    - but it is still running
+    - had wrong client addr
+    - had wrong cluster addr
+    - reached quota
+    - overall HEALTH_
+    - \(POOL_FULL\)
+    - \(POOL_APP_NOT_ENABLED\)
+- workunit:
+    clients:
+      all:
+        - rados/test_rados_tool.sh
diff --git a/qa/suites/rados/singleton/all/random-eio.yaml b/qa/suites/rados/singleton/all/random-eio.yaml
new file mode 100644
index 000000000..258ae90ed
--- /dev/null
+++ b/qa/suites/rados/singleton/all/random-eio.yaml
@@ -0,0 +1,46 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+- - osd.3
+  - osd.4
+  - osd.5
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+    - missing primary copy of
+    - objects unfound and apparently lost
+    - had a read error
+    - overall HEALTH_
+    - \(POOL_APP_NOT_ENABLED\)
+    - \(PG_DEGRADED\)
+    - \(OSD_TOO_MANY_REPAIRS\)
+- full_sequential:
+  - exec:
+      client.0:
+        - sudo ceph tell osd.1 injectargs -- --filestore_debug_random_read_err=0.33
+        - sudo ceph tell osd.1 injectargs -- --bluestore_debug_random_read_err=0.33
+        - sudo ceph osd pool create test 16 16
+        - sudo ceph osd pool set test size 3
+        - sudo ceph pg  dump pgs --format=json-pretty
+  - radosbench:
+      clients: [client.0]
+      time: 360
+      type: rand
+      objectsize: 1048576
+      pool: test
+      create_pool: false
+  - exec:
+      client.0:
+        - sudo ceph tell osd.1 injectargs -- --filestore_debug_random_read_err=0.0
+        - sudo ceph tell osd.1 injectargs -- --bluestore_debug_random_read_err=0.0
diff --git a/qa/suites/rados/singleton/all/rebuild-mondb.yaml b/qa/suites/rados/singleton/all/rebuild-mondb.yaml
new file mode 100644
index 000000000..0c7de00c8
--- /dev/null
+++ b/qa/suites/rados/singleton/all/rebuild-mondb.yaml
@@ -0,0 +1,38 @@
+roles:
+- - mon.a
+  - mon.b
+  - mon.c
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+      - no reply from
+      - overall HEALTH_
+      - \(MON_DOWN\)
+      - \(MGR_DOWN\)
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      mon:
+        debug auth: 30
+- full_sequential:
+  - radosbench:
+      clients: [client.0]
+      time: 30
+  - rebuild_mondb:
+  - radosbench:
+      clients: [client.0]
+      time: 30
diff --git a/qa/suites/rados/singleton/all/recovery-preemption.yaml b/qa/suites/rados/singleton/all/recovery-preemption.yaml
new file mode 100644
index 000000000..ce51688e5
--- /dev/null
+++ b/qa/suites/rados/singleton/all/recovery-preemption.yaml
@@ -0,0 +1,60 @@
+roles:
+- - mon.a
+  - mon.b
+  - mon.c
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 20 # GB
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    conf:
+      osd:
+        osd recovery sleep: .1
+        osd min pg log entries: 10
+        osd max pg log entries: 1000
+        osd_target_pg_log_entries_per_osd: 0
+        osd pg log trim min: 10
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(OBJECT_
+      - \(PG_
+      - \(SLOW_OPS\)
+      - overall HEALTH
+      - slow request
+- exec:
+    osd.0:
+      - ceph osd pool create foo 128
+      - ceph osd pool application enable foo foo
+      - sleep 5
+- ceph.healthy:
+- exec:
+    osd.0:
+      - rados -p foo bench 30 write -b 4096 --no-cleanup
+      - ceph osd out 0
+      - sleep 5
+      - ceph osd set noup
+- ceph.restart:
+    daemons: [osd.1]
+    wait-for-up: false
+    wait-for-healthy: false
+- exec:
+    osd.0:
+      - rados -p foo bench 3 write -b 4096 --no-cleanup
+      - ceph osd unset noup
+      - sleep 10
+      - for f in 0 1 2 3 ; do sudo ceph daemon osd.$f config set osd_recovery_sleep 0 ; sudo ceph daemon osd.$f config set osd_recovery_max_active 20 ; done
+- ceph.healthy:
+- exec:
+    osd.0:
+      - egrep '(defer backfill|defer recovery)' /var/log/ceph/ceph-osd.*.log
diff --git a/qa/suites/rados/singleton/all/resolve_stuck_peering.yaml b/qa/suites/rados/singleton/all/resolve_stuck_peering.yaml
new file mode 100644
index 000000000..41a011bd4
--- /dev/null
+++ b/qa/suites/rados/singleton/all/resolve_stuck_peering.yaml
@@ -0,0 +1,19 @@
+roles:
+- [mon.a, mgr.x]
+- [osd.0, osd.1, osd.2, client.0]
+
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    fs: xfs
+    log-ignorelist:
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(OBJECT_DEGRADED\)
+      - \(POOL_APP_NOT_ENABLED\)
+- resolve_stuck_peering:
+
diff --git a/qa/suites/rados/singleton/all/test-crash.yaml b/qa/suites/rados/singleton/all/test-crash.yaml
new file mode 100644
index 000000000..ec227cec7
--- /dev/null
+++ b/qa/suites/rados/singleton/all/test-crash.yaml
@@ -0,0 +1,21 @@
+roles:
+  - [client.0, mon.a, mgr.x, osd.0, osd.1, osd.2]
+
+tasks:
+  - install:
+  - ceph:
+      pre-mgr-commands:
+        - sudo ceph config set mgr mgr_pool false --force
+      log-ignorelist:
+        - Reduced data availability
+        - OSD_.*DOWN
+        - \(RECENT_CRASH\)
+        - \(POOL_APP_NOT_ENABLED\)
+  - workunit:
+      clients:
+         client.0:
+           - rados/test_crash.sh
+  - ceph.restart: [osd.*]
+  - exec:
+      mon.a:
+        - find $TESTDIR/archive/coredump -type f -exec rm -f {} \;
diff --git a/qa/suites/rados/singleton/all/test-noautoscale-flag.yaml b/qa/suites/rados/singleton/all/test-noautoscale-flag.yaml
new file mode 100644
index 000000000..039300f7f
--- /dev/null
+++ b/qa/suites/rados/singleton/all/test-noautoscale-flag.yaml
@@ -0,0 +1,40 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 4
+      size: 10 # GB
+overrides:
+  ceph:
+    create_rbd_pool: false
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    conf:
+      mon:
+        osd pool default pg autoscale mode: on
+    log-ignorelist:
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(POOL_
+      - \(CACHE_POOL_
+      - \(OBJECT_
+      - \(SLOW_OPS\)
+      - \(REQUEST_SLOW\)
+      - \(TOO_FEW_PGS\)
+      - slow request
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- install:
+- ceph:
+- workunit:
+    clients:
+      all:
+        - mon/test_noautoscale_flag.sh
diff --git a/qa/suites/rados/singleton/all/thrash-backfill-full.yaml b/qa/suites/rados/singleton/all/thrash-backfill-full.yaml
new file mode 100644
index 000000000..a9049560d
--- /dev/null
+++ b/qa/suites/rados/singleton/all/thrash-backfill-full.yaml
@@ -0,0 +1,53 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+- - osd.3
+  - osd.4
+  - osd.5
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+override:
+  ceph:
+    conf:
+      mon:
+        osd pool default size: 3
+        osd min pg log entries: 5
+        osd max pg log entries: 10
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+    - but it is still running
+    - missing primary copy of
+    - objects unfound and apparently lost
+    - overall HEALTH_
+    - \(OSDMAP_FLAGS\)
+    - \(SLOW_OPS\)
+    - \(PG_
+    - \(OBJECT_MISPLACED\)
+    - \(OSD_
+    - \(OBJECT_
+    - \(TOO_FEW_PGS\)
+    - \(POOL_BACKFILLFULL\)
+    - slow request
+    - \(POOL_APP_NOT_ENABLED\)
+- thrashosds:
+    op_delay: 30
+    clean_interval: 120
+    chance_down: .75
+    min_live: 5
+    min_in: 5
+    chance_test_backfill_full: .5
+- radosbench:
+    clients: [client.0]
+    time: 1800
+    type: rand
+    objectsize: 1048576
diff --git a/qa/suites/rados/singleton/all/thrash-eio.yaml b/qa/suites/rados/singleton/all/thrash-eio.yaml
new file mode 100644
index 000000000..52e0cc51e
--- /dev/null
+++ b/qa/suites/rados/singleton/all/thrash-eio.yaml
@@ -0,0 +1,50 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+- - osd.3
+  - osd.4
+  - osd.5
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+overrides:
+  ceph:
+    conf:
+      mon:
+        osd pool default size: 3
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+    - but it is still running
+    - missing primary copy of
+    - objects unfound and apparently lost
+    - overall HEALTH_
+    - \(OSDMAP_FLAGS\)
+    - \(SLOW_OPS\)
+    - \(PG_
+    - \(OBJECT_MISPLACED\)
+    - \(OSD_
+    - \(OBJECT_
+    - \(TOO_FEW_PGS\)
+    - slow request
+    - \(POOL_APP_NOT_ENABLED\)
+- thrashosds:
+    op_delay: 30
+    clean_interval: 120
+    chance_down: .5
+    random_eio: .33
+    min_live: 5
+    min_in: 5
+- radosbench:
+    clients: [client.0]
+    time: 720
+    type: rand
+    objectsize: 1048576
diff --git a/qa/suites/rados/singleton/all/thrash-rados/+ b/qa/suites/rados/singleton/all/thrash-rados/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/singleton/all/thrash-rados/+
diff --git a/qa/suites/rados/singleton/all/thrash-rados/.qa b/qa/suites/rados/singleton/all/thrash-rados/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/singleton/all/thrash-rados/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/singleton/all/thrash-rados/thrash-rados.yaml b/qa/suites/rados/singleton/all/thrash-rados/thrash-rados.yaml
new file mode 100644
index 000000000..b3b54e173
--- /dev/null
+++ b/qa/suites/rados/singleton/all/thrash-rados/thrash-rados.yaml
@@ -0,0 +1,28 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+- - osd.3
+  - osd.4
+  - osd.5
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+tasks:
+- install:
+- ceph:
+    log-ignorelist:
+      - but it is still running
+      - \(POOL_APP_NOT_ENABLED\)
+- thrashosds:
+    op_delay: 30
+    clean_interval: 120
+    chance_down: .5
+- workunit:
+    clients:
+      all:
+      - rados/load-gen-mix-small.sh
diff --git a/qa/suites/rados/singleton/all/thrash-rados/thrashosds-health.yaml b/qa/suites/rados/singleton/all/thrash-rados/thrashosds-health.yaml
new file mode 120000
index 000000000..9124eb1aa
--- /dev/null
+++ b/qa/suites/rados/singleton/all/thrash-rados/thrashosds-health.yaml
@@ -0,0 +1 @@
+.qa/tasks/thrashosds-health.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/singleton/all/thrash_cache_writeback_proxy_none.yaml b/qa/suites/rados/singleton/all/thrash_cache_writeback_proxy_none.yaml
new file mode 100644
index 000000000..e58fb4ef4
--- /dev/null
+++ b/qa/suites/rados/singleton/all/thrash_cache_writeback_proxy_none.yaml
@@ -0,0 +1,71 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+- - osd.3
+  - osd.4
+  - osd.5
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 30 # GB
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+      - but it is still running
+      - slow request
+      - overall HEALTH_
+      - \(CACHE_POOL_
+      - \(POOL_APP_NOT_ENABLED\)
+- exec:
+    client.0:
+      - sudo ceph osd pool create base 4
+      - sudo ceph osd pool application enable base rados
+      - sudo ceph osd pool create cache 4
+      - sudo ceph osd tier add base cache
+      - sudo ceph osd tier cache-mode cache writeback
+      - sudo ceph osd tier set-overlay base cache
+      - sudo ceph osd pool set cache hit_set_type bloom
+      - sudo ceph osd pool set cache hit_set_count 8
+      - sudo ceph osd pool set cache hit_set_period 60
+      - sudo ceph osd pool set cache target_max_objects 500
+- background_exec:
+    mon.a:
+      - while true
+      - do sleep 30
+      - sudo ceph osd pool set cache cache_target_full_ratio .001
+      - echo cache-try-flush-evict-all
+      - rados -p cache cache-try-flush-evict-all
+      - sleep 5
+      - echo cache-flush-evict-all
+      - rados -p cache cache-flush-evict-all
+      - sleep 5
+      - echo remove overlay
+      - sudo ceph osd tier remove-overlay base
+      - sleep 20
+      # Disabled due to https://tracker.ceph.com/issues/46323
+      #- echo add writeback overlay
+      #- sudo ceph osd tier cache-mode cache writeback
+      #- sudo ceph osd pool set cache cache_target_full_ratio .8
+      #- sudo ceph osd tier set-overlay base cache
+      #- sleep 30
+      #- sudo ceph osd tier cache-mode cache readproxy
+      - done
+- rados:
+    clients: [client.0]
+    pools: [base]
+    max_seconds: 600
+    ops: 400000
+    objects: 10000
+    size: 1024
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      copy_from: 50
diff --git a/qa/suites/rados/singleton/all/watch-notify-same-primary.yaml b/qa/suites/rados/singleton/all/watch-notify-same-primary.yaml
new file mode 100644
index 000000000..04d3969b2
--- /dev/null
+++ b/qa/suites/rados/singleton/all/watch-notify-same-primary.yaml
@@ -0,0 +1,35 @@
+roles:
+- - mon.a
+  - mon.b
+  - mon.c
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    config:
+      global:
+        osd pool default min size : 1
+      client:
+        debug ms: 1
+        debug objecter: 20
+        debug rados: 20
+    log-ignorelist:
+      - objects unfound and apparently lost
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(OBJECT_DEGRADED\)
+      - \(POOL_APP_NOT_ENABLED\)
+- watch_notify_same_primary:
+    clients: [client.0]
diff --git a/qa/suites/rados/singleton/mon_election b/qa/suites/rados/singleton/mon_election
new file mode 120000
index 000000000..3f331e621
--- /dev/null
+++ b/qa/suites/rados/singleton/mon_election
@@ -0,0 +1 @@
+.qa/mon_election
+\ No newline at end of file
diff --git a/qa/suites/rados/singleton/msgr b/qa/suites/rados/singleton/msgr
new file mode 120000
index 000000000..57bee80db
--- /dev/null
+++ b/qa/suites/rados/singleton/msgr
@@ -0,0 +1 @@
+.qa/msgr
+\ No newline at end of file
diff --git a/qa/suites/rados/singleton/msgr-failures/.qa b/qa/suites/rados/singleton/msgr-failures/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/singleton/msgr-failures/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/singleton/msgr-failures/few.yaml b/qa/suites/rados/singleton/msgr-failures/few.yaml
new file mode 100644
index 000000000..8fd638744
--- /dev/null
+++ b/qa/suites/rados/singleton/msgr-failures/few.yaml
@@ -0,0 +1,9 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 5000
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
+      - \(MON_DOWN\)
diff --git a/qa/suites/rados/singleton/msgr-failures/many.yaml b/qa/suites/rados/singleton/msgr-failures/many.yaml
new file mode 100644
index 000000000..206da3ec1
--- /dev/null
+++ b/qa/suites/rados/singleton/msgr-failures/many.yaml
@@ -0,0 +1,13 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 1000
+        mon mgr beacon grace: 90
+        mon client hunt interval max multiple: 2
+        mon client directed command retry: 5
+      mgr:
+        debug monc: 10
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
+      - \(MON_DOWN\)
diff --git a/qa/suites/rados/singleton/msgr-failures/none.yaml b/qa/suites/rados/singleton/msgr-failures/none.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/singleton/msgr-failures/none.yaml
diff --git a/qa/suites/rados/singleton/objectstore b/qa/suites/rados/singleton/objectstore
new file mode 120000
index 000000000..848c65f9e
--- /dev/null
+++ b/qa/suites/rados/singleton/objectstore
@@ -0,0 +1 @@
+.qa/objectstore_debug
+\ No newline at end of file
diff --git a/qa/suites/rados/singleton/rados.yaml b/qa/suites/rados/singleton/rados.yaml
new file mode 120000
index 000000000..d256979c0
--- /dev/null
+++ b/qa/suites/rados/singleton/rados.yaml
@@ -0,0 +1 @@
+.qa/config/rados.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/singleton/supported-random-distro$ b/qa/suites/rados/singleton/supported-random-distro$
new file mode 120000
index 000000000..7cef21eef
--- /dev/null
+++ b/qa/suites/rados/singleton/supported-random-distro$
@@ -0,0 +1 @@
+../basic/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rados/standalone/% b/qa/suites/rados/standalone/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/standalone/%
diff --git a/qa/suites/rados/standalone/.qa b/qa/suites/rados/standalone/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/standalone/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/standalone/supported-random-distro$ b/qa/suites/rados/standalone/supported-random-distro$
new file mode 120000
index 000000000..7cef21eef
--- /dev/null
+++ b/qa/suites/rados/standalone/supported-random-distro$
@@ -0,0 +1 @@
+../basic/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rados/standalone/workloads/.qa b/qa/suites/rados/standalone/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/standalone/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/standalone/workloads/c2c.yaml b/qa/suites/rados/standalone/workloads/c2c.yaml
new file mode 100644
index 000000000..9a0dfce94
--- /dev/null
+++ b/qa/suites/rados/standalone/workloads/c2c.yaml
@@ -0,0 +1,18 @@
+arch: x86_64
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - client.0
+tasks:
+- install:
+    extra_system_packages:
+      rpm:
+        - perf
+      deb:
+        - linux-tools-generic
+- workunit:
+    basedir: qa/standalone
+    clients:
+      all:
+        - c2c
diff --git a/qa/suites/rados/standalone/workloads/crush.yaml b/qa/suites/rados/standalone/workloads/crush.yaml
new file mode 100644
index 000000000..a62a0dd81
--- /dev/null
+++ b/qa/suites/rados/standalone/workloads/crush.yaml
@@ -0,0 +1,18 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+tasks:
+- install:
+- workunit:
+    basedir: qa/standalone
+    clients:
+      all:
+        - crush
diff --git a/qa/suites/rados/standalone/workloads/erasure-code.yaml b/qa/suites/rados/standalone/workloads/erasure-code.yaml
new file mode 100644
index 000000000..7d79753ce
--- /dev/null
+++ b/qa/suites/rados/standalone/workloads/erasure-code.yaml
@@ -0,0 +1,18 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+tasks:
+- install:
+- workunit:
+    basedir: qa/standalone
+    clients:
+      all:
+        - erasure-code
diff --git a/qa/suites/rados/standalone/workloads/mgr.yaml b/qa/suites/rados/standalone/workloads/mgr.yaml
new file mode 100644
index 000000000..997fae865
--- /dev/null
+++ b/qa/suites/rados/standalone/workloads/mgr.yaml
@@ -0,0 +1,18 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+tasks:
+- install:
+- workunit:
+    basedir: qa/standalone
+    clients:
+      all:
+        - mgr
diff --git a/qa/suites/rados/standalone/workloads/misc.yaml b/qa/suites/rados/standalone/workloads/misc.yaml
new file mode 100644
index 000000000..4aa9ee27e
--- /dev/null
+++ b/qa/suites/rados/standalone/workloads/misc.yaml
@@ -0,0 +1,18 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+tasks:
+- install:
+- workunit:
+    basedir: qa/standalone
+    clients:
+      all:
+        - misc
diff --git a/qa/suites/rados/standalone/workloads/mon-stretch.yaml b/qa/suites/rados/standalone/workloads/mon-stretch.yaml
new file mode 100644
index 000000000..d039126c5
--- /dev/null
+++ b/qa/suites/rados/standalone/workloads/mon-stretch.yaml
@@ -0,0 +1,18 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+tasks:
+- install:
+- workunit:
+    basedir: qa/standalone
+    clients:
+      all:
+        - mon-stretch
+\ No newline at end of file
diff --git a/qa/suites/rados/standalone/workloads/mon.yaml b/qa/suites/rados/standalone/workloads/mon.yaml
new file mode 100644
index 000000000..c19606f42
--- /dev/null
+++ b/qa/suites/rados/standalone/workloads/mon.yaml
@@ -0,0 +1,18 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+tasks:
+- install:
+- workunit:
+    basedir: qa/standalone
+    clients:
+      all:
+        - mon
diff --git a/qa/suites/rados/standalone/workloads/osd-backfill.yaml b/qa/suites/rados/standalone/workloads/osd-backfill.yaml
new file mode 100644
index 000000000..b61e27289
--- /dev/null
+++ b/qa/suites/rados/standalone/workloads/osd-backfill.yaml
@@ -0,0 +1,18 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+tasks:
+- install:
+- workunit:
+    basedir: qa/standalone
+    clients:
+      all:
+        - osd-backfill
diff --git a/qa/suites/rados/standalone/workloads/osd.yaml b/qa/suites/rados/standalone/workloads/osd.yaml
new file mode 100644
index 000000000..e28b52210
--- /dev/null
+++ b/qa/suites/rados/standalone/workloads/osd.yaml
@@ -0,0 +1,18 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+tasks:
+- install:
+- workunit:
+    basedir: qa/standalone
+    clients:
+      all:
+        - osd
diff --git a/qa/suites/rados/standalone/workloads/scrub.yaml b/qa/suites/rados/standalone/workloads/scrub.yaml
new file mode 100644
index 000000000..7f6fad406
--- /dev/null
+++ b/qa/suites/rados/standalone/workloads/scrub.yaml
@@ -0,0 +1,18 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+tasks:
+- install:
+- workunit:
+    basedir: qa/standalone
+    clients:
+      all:
+        - scrub
diff --git a/qa/suites/rados/thrash-erasure-code-big/% b/qa/suites/rados/thrash-erasure-code-big/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-big/%
diff --git a/qa/suites/rados/thrash-erasure-code-big/.qa b/qa/suites/rados/thrash-erasure-code-big/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-big/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-big/ceph.yaml b/qa/suites/rados/thrash-erasure-code-big/ceph.yaml
new file mode 120000
index 000000000..a2fd139cb
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-big/ceph.yaml
@@ -0,0 +1 @@
+../thrash/ceph.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-big/cluster/+ b/qa/suites/rados/thrash-erasure-code-big/cluster/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-big/cluster/+
diff --git a/qa/suites/rados/thrash-erasure-code-big/cluster/.qa b/qa/suites/rados/thrash-erasure-code-big/cluster/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-big/cluster/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-big/cluster/12-osds.yaml b/qa/suites/rados/thrash-erasure-code-big/cluster/12-osds.yaml
new file mode 100644
index 000000000..1c45ee352
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-big/cluster/12-osds.yaml
@@ -0,0 +1,4 @@
+roles:
+- [osd.0, osd.1, osd.2, osd.3, client.0, mon.a]
+- [osd.4, osd.5, osd.6, osd.7, mon.b, mgr.x]
+- [osd.8, osd.9, osd.10, osd.11, mon.c]
diff --git a/qa/suites/rados/thrash-erasure-code-big/cluster/openstack.yaml b/qa/suites/rados/thrash-erasure-code-big/cluster/openstack.yaml
new file mode 100644
index 000000000..e559d9126
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-big/cluster/openstack.yaml
@@ -0,0 +1,4 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 4
+      size: 10 # GB
diff --git a/qa/suites/rados/thrash-erasure-code-big/mon_election b/qa/suites/rados/thrash-erasure-code-big/mon_election
new file mode 120000
index 000000000..3f331e621
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-big/mon_election
@@ -0,0 +1 @@
+.qa/mon_election
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-big/msgr-failures b/qa/suites/rados/thrash-erasure-code-big/msgr-failures
new file mode 120000
index 000000000..03689aa44
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-big/msgr-failures
@@ -0,0 +1 @@
+../thrash/msgr-failures
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-big/objectstore b/qa/suites/rados/thrash-erasure-code-big/objectstore
new file mode 120000
index 000000000..848c65f9e
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-big/objectstore
@@ -0,0 +1 @@
+.qa/objectstore_debug
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-big/rados.yaml b/qa/suites/rados/thrash-erasure-code-big/rados.yaml
new file mode 120000
index 000000000..d256979c0
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-big/rados.yaml
@@ -0,0 +1 @@
+.qa/config/rados.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-big/recovery-overrides b/qa/suites/rados/thrash-erasure-code-big/recovery-overrides
new file mode 120000
index 000000000..1957f2c42
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-big/recovery-overrides
@@ -0,0 +1 @@
+../thrash/2-recovery-overrides
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-big/supported-random-distro$ b/qa/suites/rados/thrash-erasure-code-big/supported-random-distro$
new file mode 120000
index 000000000..7cef21eef
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-big/supported-random-distro$
@@ -0,0 +1 @@
+../basic/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-big/thrashers/.qa b/qa/suites/rados/thrash-erasure-code-big/thrashers/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-big/thrashers/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-big/thrashers/careful.yaml b/qa/suites/rados/thrash-erasure-code-big/thrashers/careful.yaml
new file mode 100644
index 000000000..df0a14500
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-big/thrashers/careful.yaml
@@ -0,0 +1,21 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    - slow request
+    - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        osd debug reject backfill probability: .3
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd max backfills: 6
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 1
+    chance_pgnum_shrink: 1
+    chance_pgpnum_fix: 1
+    min_in: 8
+    aggressive_pg_num_changes: false
diff --git a/qa/suites/rados/thrash-erasure-code-big/thrashers/default.yaml b/qa/suites/rados/thrash-erasure-code-big/thrashers/default.yaml
new file mode 100644
index 000000000..09b6c1782
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-big/thrashers/default.yaml
@@ -0,0 +1,22 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    - slow request
+    - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        osd debug reject backfill probability: .1
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd max backfills: 6
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 1
+    chance_pgnum_shrink: 1
+    chance_pgpnum_fix: 1
+    min_in: 8
+    chance_bluestore_reshard: 1
+    bluestore_new_sharding: random
diff --git a/qa/suites/rados/thrash-erasure-code-big/thrashers/fastread.yaml b/qa/suites/rados/thrash-erasure-code-big/thrashers/fastread.yaml
new file mode 100644
index 000000000..a36155609
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-big/thrashers/fastread.yaml
@@ -0,0 +1,21 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      mon:
+        osd pool default ec fast read: true
+      osd:
+        osd debug reject backfill probability: .1
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd max backfills: 2
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 1
+    chance_pgnum_shrink: 1
+    chance_pgpnum_fix: 1
+    min_in: 4
diff --git a/qa/suites/rados/thrash-erasure-code-big/thrashers/mapgap.yaml b/qa/suites/rados/thrash-erasure-code-big/thrashers/mapgap.yaml
new file mode 100644
index 000000000..6cf4dc930
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-big/thrashers/mapgap.yaml
@@ -0,0 +1,23 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    - osd_map_cache_size
+    - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      mon:
+        mon min osdmap epochs: 2
+      osd:
+        osd map cache size: 1
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd max backfills: 6
+tasks:
+- thrashosds:
+    timeout: 1800
+    chance_pgnum_grow: 1
+    chance_pgnum_shrink: 1
+    chance_pgpnum_fix: 1
+    chance_test_map_discontinuity: 0.5
+    min_in: 8
diff --git a/qa/suites/rados/thrash-erasure-code-big/thrashers/morepggrow.yaml b/qa/suites/rados/thrash-erasure-code-big/thrashers/morepggrow.yaml
new file mode 100644
index 000000000..794e994f2
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-big/thrashers/morepggrow.yaml
@@ -0,0 +1,17 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd max backfills: 9
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 3
+    chance_pgpnum_fix: 1
+    min_in: 8
diff --git a/qa/suites/rados/thrash-erasure-code-big/thrashers/pggrow.yaml b/qa/suites/rados/thrash-erasure-code-big/thrashers/pggrow.yaml
new file mode 100644
index 000000000..15be6b43b
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-big/thrashers/pggrow.yaml
@@ -0,0 +1,17 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd max backfills: 6
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 2
+    chance_pgpnum_fix: 1
+    min_in: 8
diff --git a/qa/suites/rados/thrash-erasure-code-big/thrashosds-health.yaml b/qa/suites/rados/thrash-erasure-code-big/thrashosds-health.yaml
new file mode 120000
index 000000000..9124eb1aa
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-big/thrashosds-health.yaml
@@ -0,0 +1 @@
+.qa/tasks/thrashosds-health.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-big/workloads/.qa b/qa/suites/rados/thrash-erasure-code-big/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-big/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-big/workloads/ec-rados-plugin=jerasure-k=4-m=2.yaml b/qa/suites/rados/thrash-erasure-code-big/workloads/ec-rados-plugin=jerasure-k=4-m=2.yaml
new file mode 120000
index 000000000..c18bec161
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-big/workloads/ec-rados-plugin=jerasure-k=4-m=2.yaml
@@ -0,0 +1 @@
+.qa/erasure-code/ec-rados-plugin=jerasure-k=4-m=2.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-big/workloads/ec-rados-plugin=lrc-k=4-m=2-l=3.yaml b/qa/suites/rados/thrash-erasure-code-big/workloads/ec-rados-plugin=lrc-k=4-m=2-l=3.yaml
new file mode 120000
index 000000000..d66fd7960
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-big/workloads/ec-rados-plugin=lrc-k=4-m=2-l=3.yaml
@@ -0,0 +1 @@
+.qa/erasure-code/ec-rados-plugin=lrc-k=4-m=2-l=3.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-isa/% b/qa/suites/rados/thrash-erasure-code-isa/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-isa/%
diff --git a/qa/suites/rados/thrash-erasure-code-isa/.qa b/qa/suites/rados/thrash-erasure-code-isa/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-isa/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-isa/arch/.qa b/qa/suites/rados/thrash-erasure-code-isa/arch/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-isa/arch/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-isa/arch/x86_64.yaml b/qa/suites/rados/thrash-erasure-code-isa/arch/x86_64.yaml
new file mode 100644
index 000000000..c2409f5d0
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-isa/arch/x86_64.yaml
@@ -0,0 +1 @@
+arch: x86_64
diff --git a/qa/suites/rados/thrash-erasure-code-isa/ceph.yaml b/qa/suites/rados/thrash-erasure-code-isa/ceph.yaml
new file mode 120000
index 000000000..a2fd139cb
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-isa/ceph.yaml
@@ -0,0 +1 @@
+../thrash/ceph.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-isa/clusters b/qa/suites/rados/thrash-erasure-code-isa/clusters
new file mode 120000
index 000000000..7aac47be3
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-isa/clusters
@@ -0,0 +1 @@
+../thrash/clusters
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-isa/mon_election b/qa/suites/rados/thrash-erasure-code-isa/mon_election
new file mode 120000
index 000000000..3f331e621
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-isa/mon_election
@@ -0,0 +1 @@
+.qa/mon_election
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-isa/msgr-failures b/qa/suites/rados/thrash-erasure-code-isa/msgr-failures
new file mode 120000
index 000000000..03689aa44
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-isa/msgr-failures
@@ -0,0 +1 @@
+../thrash/msgr-failures
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-isa/objectstore b/qa/suites/rados/thrash-erasure-code-isa/objectstore
new file mode 120000
index 000000000..848c65f9e
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-isa/objectstore
@@ -0,0 +1 @@
+.qa/objectstore_debug
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-isa/rados.yaml b/qa/suites/rados/thrash-erasure-code-isa/rados.yaml
new file mode 120000
index 000000000..d256979c0
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-isa/rados.yaml
@@ -0,0 +1 @@
+.qa/config/rados.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-isa/recovery-overrides b/qa/suites/rados/thrash-erasure-code-isa/recovery-overrides
new file mode 120000
index 000000000..1957f2c42
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-isa/recovery-overrides
@@ -0,0 +1 @@
+../thrash/2-recovery-overrides
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-isa/supported-random-distro$ b/qa/suites/rados/thrash-erasure-code-isa/supported-random-distro$
new file mode 120000
index 000000000..7cef21eef
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-isa/supported-random-distro$
@@ -0,0 +1 @@
+../basic/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-isa/thrashers b/qa/suites/rados/thrash-erasure-code-isa/thrashers
new file mode 120000
index 000000000..f461dadc3
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-isa/thrashers
@@ -0,0 +1 @@
+../thrash/thrashers
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-isa/thrashosds-health.yaml b/qa/suites/rados/thrash-erasure-code-isa/thrashosds-health.yaml
new file mode 120000
index 000000000..9124eb1aa
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-isa/thrashosds-health.yaml
@@ -0,0 +1 @@
+.qa/tasks/thrashosds-health.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-isa/workloads/.qa b/qa/suites/rados/thrash-erasure-code-isa/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-isa/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-isa/workloads/ec-rados-plugin=isa-k=2-m=1.yaml b/qa/suites/rados/thrash-erasure-code-isa/workloads/ec-rados-plugin=isa-k=2-m=1.yaml
new file mode 120000
index 000000000..19342b9d8
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-isa/workloads/ec-rados-plugin=isa-k=2-m=1.yaml
@@ -0,0 +1 @@
+.qa/erasure-code/ec-rados-plugin=isa-k=2-m=1.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/% b/qa/suites/rados/thrash-erasure-code-overwrites/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-overwrites/%
diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/.qa b/qa/suites/rados/thrash-erasure-code-overwrites/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-overwrites/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/bluestore-bitmap.yaml b/qa/suites/rados/thrash-erasure-code-overwrites/bluestore-bitmap.yaml
new file mode 120000
index 000000000..635085f7f
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-overwrites/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+../thrash-erasure-code/objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/ceph.yaml b/qa/suites/rados/thrash-erasure-code-overwrites/ceph.yaml
new file mode 120000
index 000000000..a2fd139cb
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-overwrites/ceph.yaml
@@ -0,0 +1 @@
+../thrash/ceph.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/clusters b/qa/suites/rados/thrash-erasure-code-overwrites/clusters
new file mode 120000
index 000000000..646ea04cd
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-overwrites/clusters
@@ -0,0 +1 @@
+../thrash-erasure-code/clusters
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/fast b/qa/suites/rados/thrash-erasure-code-overwrites/fast
new file mode 120000
index 000000000..6170b30e0
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-overwrites/fast
@@ -0,0 +1 @@
+../thrash-erasure-code/fast
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/mon_election b/qa/suites/rados/thrash-erasure-code-overwrites/mon_election
new file mode 120000
index 000000000..3f331e621
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-overwrites/mon_election
@@ -0,0 +1 @@
+.qa/mon_election
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/msgr-failures b/qa/suites/rados/thrash-erasure-code-overwrites/msgr-failures
new file mode 120000
index 000000000..70c9ca130
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-overwrites/msgr-failures
@@ -0,0 +1 @@
+../thrash-erasure-code/msgr-failures
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/rados.yaml b/qa/suites/rados/thrash-erasure-code-overwrites/rados.yaml
new file mode 120000
index 000000000..017df6f60
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-overwrites/rados.yaml
@@ -0,0 +1 @@
+../thrash-erasure-code/rados.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/recovery-overrides b/qa/suites/rados/thrash-erasure-code-overwrites/recovery-overrides
new file mode 120000
index 000000000..1957f2c42
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-overwrites/recovery-overrides
@@ -0,0 +1 @@
+../thrash/2-recovery-overrides
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/supported-random-distro$ b/qa/suites/rados/thrash-erasure-code-overwrites/supported-random-distro$
new file mode 120000
index 000000000..7cef21eef
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-overwrites/supported-random-distro$
@@ -0,0 +1 @@
+../basic/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/thrashers b/qa/suites/rados/thrash-erasure-code-overwrites/thrashers
new file mode 120000
index 000000000..40ff82cf7
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-overwrites/thrashers
@@ -0,0 +1 @@
+../thrash-erasure-code/thrashers
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/thrashosds-health.yaml b/qa/suites/rados/thrash-erasure-code-overwrites/thrashosds-health.yaml
new file mode 120000
index 000000000..9124eb1aa
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-overwrites/thrashosds-health.yaml
@@ -0,0 +1 @@
+.qa/tasks/thrashosds-health.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/workloads/.qa b/qa/suites/rados/thrash-erasure-code-overwrites/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-overwrites/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/workloads/ec-pool-snaps-few-objects-overwrites.yaml b/qa/suites/rados/thrash-erasure-code-overwrites/workloads/ec-pool-snaps-few-objects-overwrites.yaml
new file mode 100644
index 000000000..d2ad70a57
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-overwrites/workloads/ec-pool-snaps-few-objects-overwrites.yaml
@@ -0,0 +1,23 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        enable experimental unrecoverable data corrupting features: '*'
+  thrashosds:
+    disable_objectstore_tool_tests: true
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 50
+    pool_snaps: true
+    ec_pool: true
+    erasure_code_use_overwrites: true
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/workloads/ec-small-objects-fast-read-overwrites.yaml b/qa/suites/rados/thrash-erasure-code-overwrites/workloads/ec-small-objects-fast-read-overwrites.yaml
new file mode 100644
index 000000000..b3f831b77
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-overwrites/workloads/ec-small-objects-fast-read-overwrites.yaml
@@ -0,0 +1,29 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        enable experimental unrecoverable data corrupting features: '*'
+  thrashosds:
+    disable_objectstore_tool_tests: true
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 400000
+    max_seconds: 600
+    max_in_flight: 64
+    objects: 1024
+    size: 16384
+    ec_pool: true
+    erasure_code_use_overwrites: true
+    fast_read: true
+    op_weights:
+      read: 100
+      write: 100
+      append: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
+      setattr: 25
+      rmattr: 25
diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/workloads/ec-small-objects-overwrites.yaml b/qa/suites/rados/thrash-erasure-code-overwrites/workloads/ec-small-objects-overwrites.yaml
new file mode 100644
index 000000000..9baacef48
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-overwrites/workloads/ec-small-objects-overwrites.yaml
@@ -0,0 +1,28 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        enable experimental unrecoverable data corrupting features: '*'
+  thrashosds:
+    disable_objectstore_tool_tests: true
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 400000
+    max_seconds: 600
+    max_in_flight: 64
+    objects: 1024
+    size: 16384
+    ec_pool: true
+    erasure_code_use_overwrites: true
+    op_weights:
+      read: 100
+      write: 100
+      append: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
+      setattr: 25
+      rmattr: 25
diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/workloads/ec-snaps-few-objects-overwrites.yaml b/qa/suites/rados/thrash-erasure-code-overwrites/workloads/ec-snaps-few-objects-overwrites.yaml
new file mode 100644
index 000000000..b7c538199
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-overwrites/workloads/ec-snaps-few-objects-overwrites.yaml
@@ -0,0 +1,22 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        enable experimental unrecoverable data corrupting features: '*'
+  thrashosds:
+    disable_objectstore_tool_tests: true
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 50
+    ec_pool: true
+    erasure_code_use_overwrites: true
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
diff --git a/qa/suites/rados/thrash-erasure-code-shec/% b/qa/suites/rados/thrash-erasure-code-shec/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-shec/%
diff --git a/qa/suites/rados/thrash-erasure-code-shec/.qa b/qa/suites/rados/thrash-erasure-code-shec/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-shec/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-shec/ceph.yaml b/qa/suites/rados/thrash-erasure-code-shec/ceph.yaml
new file mode 120000
index 000000000..a2fd139cb
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-shec/ceph.yaml
@@ -0,0 +1 @@
+../thrash/ceph.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-shec/clusters/+ b/qa/suites/rados/thrash-erasure-code-shec/clusters/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-shec/clusters/+
diff --git a/qa/suites/rados/thrash-erasure-code-shec/clusters/.qa b/qa/suites/rados/thrash-erasure-code-shec/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-shec/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-shec/clusters/fixed-4.yaml b/qa/suites/rados/thrash-erasure-code-shec/clusters/fixed-4.yaml
new file mode 120000
index 000000000..aa8830071
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-shec/clusters/fixed-4.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-4.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-shec/clusters/openstack.yaml b/qa/suites/rados/thrash-erasure-code-shec/clusters/openstack.yaml
new file mode 100644
index 000000000..e559d9126
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-shec/clusters/openstack.yaml
@@ -0,0 +1,4 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 4
+      size: 10 # GB
diff --git a/qa/suites/rados/thrash-erasure-code-shec/mon_election b/qa/suites/rados/thrash-erasure-code-shec/mon_election
new file mode 120000
index 000000000..3f331e621
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-shec/mon_election
@@ -0,0 +1 @@
+.qa/mon_election
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-shec/msgr-failures b/qa/suites/rados/thrash-erasure-code-shec/msgr-failures
new file mode 120000
index 000000000..03689aa44
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-shec/msgr-failures
@@ -0,0 +1 @@
+../thrash/msgr-failures
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-shec/objectstore b/qa/suites/rados/thrash-erasure-code-shec/objectstore
new file mode 120000
index 000000000..848c65f9e
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-shec/objectstore
@@ -0,0 +1 @@
+.qa/objectstore_debug
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-shec/rados.yaml b/qa/suites/rados/thrash-erasure-code-shec/rados.yaml
new file mode 120000
index 000000000..d256979c0
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-shec/rados.yaml
@@ -0,0 +1 @@
+.qa/config/rados.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-shec/recovery-overrides b/qa/suites/rados/thrash-erasure-code-shec/recovery-overrides
new file mode 120000
index 000000000..1957f2c42
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-shec/recovery-overrides
@@ -0,0 +1 @@
+../thrash/2-recovery-overrides
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-shec/supported-random-distro$ b/qa/suites/rados/thrash-erasure-code-shec/supported-random-distro$
new file mode 120000
index 000000000..7cef21eef
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-shec/supported-random-distro$
@@ -0,0 +1 @@
+../basic/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-shec/thrashers/.qa b/qa/suites/rados/thrash-erasure-code-shec/thrashers/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-shec/thrashers/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-shec/thrashers/careful.yaml b/qa/suites/rados/thrash-erasure-code-shec/thrashers/careful.yaml
new file mode 100644
index 000000000..e18379b5f
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-shec/thrashers/careful.yaml
@@ -0,0 +1,21 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    - slow request
+    - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        osd debug reject backfill probability: .3
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd max backfills: 3
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 1
+    chance_pgnum_shrink: 1
+    chance_pgpnum_fix: 1
+    min_in: 8
+    aggressive_pg_num_changes: false
diff --git a/qa/suites/rados/thrash-erasure-code-shec/thrashers/default.yaml b/qa/suites/rados/thrash-erasure-code-shec/thrashers/default.yaml
new file mode 100644
index 000000000..00c8689d4
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-shec/thrashers/default.yaml
@@ -0,0 +1,22 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    - slow request
+    - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        osd debug reject backfill probability: .1
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd max backfills: 3
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 1
+    chance_pgnum_shrink: 1
+    chance_pgpnum_fix: 1
+    min_in: 8
+    chance_bluestore_reshard: 1
+    bluestore_new_sharding: random
diff --git a/qa/suites/rados/thrash-erasure-code-shec/thrashosds-health.yaml b/qa/suites/rados/thrash-erasure-code-shec/thrashosds-health.yaml
new file mode 120000
index 000000000..9124eb1aa
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-shec/thrashosds-health.yaml
@@ -0,0 +1 @@
+.qa/tasks/thrashosds-health.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-shec/workloads/.qa b/qa/suites/rados/thrash-erasure-code-shec/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-shec/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code-shec/workloads/ec-rados-plugin=shec-k=4-m=3-c=2.yaml b/qa/suites/rados/thrash-erasure-code-shec/workloads/ec-rados-plugin=shec-k=4-m=3-c=2.yaml
new file mode 120000
index 000000000..8f318cc33
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code-shec/workloads/ec-rados-plugin=shec-k=4-m=3-c=2.yaml
@@ -0,0 +1 @@
+.qa/erasure-code/ec-rados-plugin=shec-k=4-m=3-c=2.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code/% b/qa/suites/rados/thrash-erasure-code/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/%
diff --git a/qa/suites/rados/thrash-erasure-code/.qa b/qa/suites/rados/thrash-erasure-code/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code/ceph.yaml b/qa/suites/rados/thrash-erasure-code/ceph.yaml
new file mode 100644
index 000000000..2030acb90
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/ceph.yaml
@@ -0,0 +1,3 @@
+tasks:
+- install:
+- ceph:
diff --git a/qa/suites/rados/thrash-erasure-code/clusters b/qa/suites/rados/thrash-erasure-code/clusters
new file mode 120000
index 000000000..7aac47be3
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/clusters
@@ -0,0 +1 @@
+../thrash/clusters
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code/fast/.qa b/qa/suites/rados/thrash-erasure-code/fast/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/fast/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code/fast/fast.yaml b/qa/suites/rados/thrash-erasure-code/fast/fast.yaml
new file mode 100644
index 000000000..8ebfee0a9
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/fast/fast.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        osd pool default ec fast read: true
diff --git a/qa/suites/rados/thrash-erasure-code/fast/normal.yaml b/qa/suites/rados/thrash-erasure-code/fast/normal.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/fast/normal.yaml
diff --git a/qa/suites/rados/thrash-erasure-code/mon_election b/qa/suites/rados/thrash-erasure-code/mon_election
new file mode 120000
index 000000000..3f331e621
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/mon_election
@@ -0,0 +1 @@
+.qa/mon_election
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code/msgr-failures b/qa/suites/rados/thrash-erasure-code/msgr-failures
new file mode 120000
index 000000000..03689aa44
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/msgr-failures
@@ -0,0 +1 @@
+../thrash/msgr-failures
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code/objectstore b/qa/suites/rados/thrash-erasure-code/objectstore
new file mode 120000
index 000000000..848c65f9e
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/objectstore
@@ -0,0 +1 @@
+.qa/objectstore_debug
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code/rados.yaml b/qa/suites/rados/thrash-erasure-code/rados.yaml
new file mode 120000
index 000000000..d256979c0
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/rados.yaml
@@ -0,0 +1 @@
+.qa/config/rados.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code/recovery-overrides b/qa/suites/rados/thrash-erasure-code/recovery-overrides
new file mode 120000
index 000000000..1957f2c42
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/recovery-overrides
@@ -0,0 +1 @@
+../thrash/2-recovery-overrides
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code/supported-random-distro$ b/qa/suites/rados/thrash-erasure-code/supported-random-distro$
new file mode 120000
index 000000000..7cef21eef
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/supported-random-distro$
@@ -0,0 +1 @@
+../basic/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code/thrashers/.qa b/qa/suites/rados/thrash-erasure-code/thrashers/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/thrashers/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code/thrashers/careful.yaml b/qa/suites/rados/thrash-erasure-code/thrashers/careful.yaml
new file mode 100644
index 000000000..0602f01ad
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/thrashers/careful.yaml
@@ -0,0 +1,20 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        osd debug reject backfill probability: .3
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd max backfills: 2
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 1
+    chance_pgnum_shrink: 1
+    chance_pgpnum_fix: 1
+    min_in: 4
+    aggressive_pg_num_changes: false
diff --git a/qa/suites/rados/thrash-erasure-code/thrashers/default.yaml b/qa/suites/rados/thrash-erasure-code/thrashers/default.yaml
new file mode 100644
index 000000000..989b83e8f
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/thrashers/default.yaml
@@ -0,0 +1,21 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        osd debug reject backfill probability: .1
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd max backfills: 2
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 1
+    chance_pgnum_shrink: 1
+    chance_pgpnum_fix: 1
+    min_in: 4
+    chance_bluestore_reshard: 1
+    bluestore_new_sharding: random
diff --git a/qa/suites/rados/thrash-erasure-code/thrashers/fastread.yaml b/qa/suites/rados/thrash-erasure-code/thrashers/fastread.yaml
new file mode 100644
index 000000000..5fbb9504b
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/thrashers/fastread.yaml
@@ -0,0 +1,21 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      mon:
+        osd pool default ec fast read: true
+      osd:
+        osd debug reject backfill probability: .1
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd max backfills: 3
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 1
+    chance_pgnum_shrink: 1
+    chance_pgpnum_fix: 1
+    min_in: 4
diff --git a/qa/suites/rados/thrash-erasure-code/thrashers/minsize_recovery.yaml b/qa/suites/rados/thrash-erasure-code/thrashers/minsize_recovery.yaml
new file mode 100644
index 000000000..771d9a104
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/thrashers/minsize_recovery.yaml
@@ -0,0 +1,19 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    - \(POOL_APP_NOT_ENABLED\)
+    create_rbd_pool: False
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    conf:
+      osd:
+        osd debug reject backfill probability: .3
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd max backfills: 2
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_test_min_size: 3 
diff --git a/qa/suites/rados/thrash-erasure-code/thrashers/morepggrow.yaml b/qa/suites/rados/thrash-erasure-code/thrashers/morepggrow.yaml
new file mode 100644
index 000000000..f8c542323
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/thrashers/morepggrow.yaml
@@ -0,0 +1,17 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd max backfills: 9
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 3
+    chance_pgpnum_fix: 1
+    min_in: 4
diff --git a/qa/suites/rados/thrash-erasure-code/thrashers/pggrow.yaml b/qa/suites/rados/thrash-erasure-code/thrashers/pggrow.yaml
new file mode 100644
index 000000000..ed13bfd3e
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/thrashers/pggrow.yaml
@@ -0,0 +1,17 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd max backfills: 4
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 2
+    chance_pgpnum_fix: 1
+    min_in: 4
diff --git a/qa/suites/rados/thrash-erasure-code/thrashosds-health.yaml b/qa/suites/rados/thrash-erasure-code/thrashosds-health.yaml
new file mode 120000
index 000000000..9124eb1aa
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/thrashosds-health.yaml
@@ -0,0 +1 @@
+.qa/tasks/thrashosds-health.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code/workloads/.qa b/qa/suites/rados/thrash-erasure-code/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=clay-k=4-m=2.yaml b/qa/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=clay-k=4-m=2.yaml
new file mode 120000
index 000000000..08155ed6e
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=clay-k=4-m=2.yaml
@@ -0,0 +1 @@
+.qa/erasure-code/ec-rados-plugin=clay-k=4-m=2.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=jerasure-k=2-m=1.yaml b/qa/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=jerasure-k=2-m=1.yaml
new file mode 120000
index 000000000..af6d8042f
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=jerasure-k=2-m=1.yaml
@@ -0,0 +1 @@
+.qa/erasure-code/ec-rados-plugin=jerasure-k=2-m=1.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=jerasure-k=3-m=1.yaml b/qa/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=jerasure-k=3-m=1.yaml
new file mode 120000
index 000000000..cdf551995
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=jerasure-k=3-m=1.yaml
@@ -0,0 +1 @@
+.qa/erasure-code/ec-rados-plugin=jerasure-k=3-m=1.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-erasure-code/workloads/ec-radosbench.yaml b/qa/suites/rados/thrash-erasure-code/workloads/ec-radosbench.yaml
new file mode 100644
index 000000000..3c2ff7af0
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/workloads/ec-radosbench.yaml
@@ -0,0 +1,27 @@
+tasks:
+- full_sequential:
+  - radosbench:
+      clients: [client.0]
+      time: 150
+      unique_pool: true
+      ec_pool: true
+  - radosbench:
+      clients: [client.0]
+      time: 150
+      unique_pool: true
+      ec_pool: true
+  - radosbench:
+      clients: [client.0]
+      time: 150
+      unique_pool: true
+      ec_pool: true
+  - radosbench:
+      clients: [client.0]
+      time: 150
+      unique_pool: true
+      ec_pool: true
+  - radosbench:
+      clients: [client.0]
+      time: 150
+      unique_pool: true
+      ec_pool: true
diff --git a/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects-balanced.yaml b/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects-balanced.yaml
new file mode 100644
index 000000000..af0ac3931
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects-balanced.yaml
@@ -0,0 +1,21 @@
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 400000
+    max_seconds: 600
+    max_in_flight: 64
+    objects: 1024
+    size: 16384
+    ec_pool: true
+    balanced_reads: true
+    op_weights:
+      read: 100
+      write: 0
+      append: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
+      setattr: 25
+      rmattr: 25
diff --git a/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects-fast-read.yaml b/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects-fast-read.yaml
new file mode 100644
index 000000000..e732ec6fa
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects-fast-read.yaml
@@ -0,0 +1,21 @@
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 400000
+    max_seconds: 600
+    max_in_flight: 64
+    objects: 1024
+    size: 16384
+    ec_pool: true
+    fast_read: true
+    op_weights:
+      read: 100
+      write: 0
+      append: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
+      setattr: 25
+      rmattr: 25
diff --git a/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects-many-deletes.yaml b/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects-many-deletes.yaml
new file mode 100644
index 000000000..25b38e14f
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects-many-deletes.yaml
@@ -0,0 +1,14 @@
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 400000
+    max_seconds: 600
+    max_in_flight: 8
+    objects: 20
+    size: 16384
+    ec_pool: true
+    op_weights:
+      write: 0
+      read: 0
+      append: 10
+      delete: 20
diff --git a/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects.yaml b/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects.yaml
new file mode 100644
index 000000000..a8ac39716
--- /dev/null
+++ b/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects.yaml
@@ -0,0 +1,20 @@
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 400000
+    max_seconds: 600
+    max_in_flight: 64
+    objects: 1024
+    size: 16384
+    ec_pool: true
+    op_weights:
+      read: 100
+      write: 0
+      append: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
+      setattr: 25
+      rmattr: 25
diff --git a/qa/suites/rados/thrash-old-clients/% b/qa/suites/rados/thrash-old-clients/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/%
diff --git a/qa/suites/rados/thrash-old-clients/.qa b/qa/suites/rados/thrash-old-clients/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-old-clients/0-distro$/.qa b/qa/suites/rados/thrash-old-clients/0-distro$/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/0-distro$/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-old-clients/0-distro$/centos_8.stream_container_tools.yaml b/qa/suites/rados/thrash-old-clients/0-distro$/centos_8.stream_container_tools.yaml
new file mode 120000
index 000000000..7a86f967f
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/0-distro$/centos_8.stream_container_tools.yaml
@@ -0,0 +1 @@
+.qa/distros/podman/centos_8.stream_container_tools.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-old-clients/0-size-min-size-overrides/.qa b/qa/suites/rados/thrash-old-clients/0-size-min-size-overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/0-size-min-size-overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-old-clients/0-size-min-size-overrides/2-size-2-min-size.yaml b/qa/suites/rados/thrash-old-clients/0-size-min-size-overrides/2-size-2-min-size.yaml
new file mode 120000
index 000000000..5393a7554
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/0-size-min-size-overrides/2-size-2-min-size.yaml
@@ -0,0 +1 @@
+.qa/overrides/2-size-2-min-size.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-old-clients/0-size-min-size-overrides/3-size-2-min-size.yaml b/qa/suites/rados/thrash-old-clients/0-size-min-size-overrides/3-size-2-min-size.yaml
new file mode 120000
index 000000000..5ff70eadf
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/0-size-min-size-overrides/3-size-2-min-size.yaml
@@ -0,0 +1 @@
+.qa/overrides/3-size-2-min-size.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-old-clients/1-install/.qa b/qa/suites/rados/thrash-old-clients/1-install/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/1-install/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-old-clients/1-install/nautilus-v1only.yaml b/qa/suites/rados/thrash-old-clients/1-install/nautilus-v1only.yaml
new file mode 100644
index 000000000..39c5eb4a1
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/1-install/nautilus-v1only.yaml
@@ -0,0 +1,26 @@
+overrides:
+  ceph:
+    mon_bind_msgr2: false
+    log-ignorelist:
+      - \(MON_DOWN\)
+    conf:
+      global:
+        ms type: async
+        ms bind msgr2: false
+tasks:
+- install:
+    branch: nautilus
+    exclude_packages:
+      - cephadm
+      - ceph-mgr-cephadm
+      - ceph-immutable-object-cache
+      - python3-rados
+      - python3-rgw
+      - python3-rbd
+      - python3-cephfs
+      - ceph-volume
+    extra_packages:
+      - python-rados
+      - python-rgw
+      - python-rbd
+      - python-cephfs
diff --git a/qa/suites/rados/thrash-old-clients/1-install/nautilus-v2only.yaml b/qa/suites/rados/thrash-old-clients/1-install/nautilus-v2only.yaml
new file mode 100644
index 000000000..61337e0e6
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/1-install/nautilus-v2only.yaml
@@ -0,0 +1,26 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - \(MON_DOWN\)
+    conf:
+      global:
+        ms type: async
+        ms bind msgr2: true
+        ms bind msgr1: false
+tasks:
+- install:
+    branch: nautilus
+    exclude_packages:
+      - cephadm
+      - ceph-mgr-cephadm
+      - ceph-immutable-object-cache
+      - python3-rados
+      - python3-rgw
+      - python3-rbd
+      - python3-cephfs
+      - ceph-volume
+    extra_packages:
+      - python-rados
+      - python-rgw
+      - python-rbd
+      - python-cephfs
diff --git a/qa/suites/rados/thrash-old-clients/1-install/nautilus.yaml b/qa/suites/rados/thrash-old-clients/1-install/nautilus.yaml
new file mode 100644
index 000000000..cd05b71f5
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/1-install/nautilus.yaml
@@ -0,0 +1,21 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - \(MON_DOWN\)
+tasks:
+- install:
+    branch: nautilus
+    exclude_packages:
+      - cephadm
+      - ceph-mgr-cephadm
+      - ceph-immutable-object-cache
+      - python3-rados
+      - python3-rgw
+      - python3-rbd
+      - python3-cephfs
+      - ceph-volume
+    extra_packages:
+      - python-rados
+      - python-rgw
+      - python-rbd
+      - python-cephfs
diff --git a/qa/suites/rados/thrash-old-clients/1-install/octopus.yaml b/qa/suites/rados/thrash-old-clients/1-install/octopus.yaml
new file mode 100644
index 000000000..39d1da232
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/1-install/octopus.yaml
@@ -0,0 +1,35 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - \(MON_DOWN\)
+tasks:
+- install:
+    branch: octopus
+    exclude_packages:
+      - ceph-mgr-dashboard
+      - ceph-mgr-diskprediction-local
+      - ceph-mgr-rook
+      - ceph-mgr-cephadm
+      - ceph-base-debuginfo
+      - ceph-common-debuginfo
+      - ceph-immutable-object-cache-debuginfo
+      - ceph-radosgw-debuginfo
+      - ceph-test-debuginfo
+      - ceph-base-debuginfo
+      - ceph-mgr-debuginfo
+      - ceph-mds-debuginfo
+      - ceph-mon-debuginfo
+      - ceph-osd-debuginfo
+      - ceph-fuse-debuginfo
+      - librados-devel-debuginfo
+      - libcephfs2-debuginfo
+      - librados2-debuginfo
+      - librbd1-debuginfo
+      - python3-cephfs-debuginfo
+      - python3-rados-debuginfo
+      - python3-rbd-debuginfo
+      - python3-rgw-debuginfo
+      - rbd-fuse-debuginfo
+      - rbd-mirror-debuginfo
+      - rbd-nbd-debuginfo
+      - ceph-volume
diff --git a/qa/suites/rados/thrash-old-clients/1-install/pacific.yaml b/qa/suites/rados/thrash-old-clients/1-install/pacific.yaml
new file mode 100644
index 000000000..a26629252
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/1-install/pacific.yaml
@@ -0,0 +1,35 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - \(MON_DOWN\)
+tasks:
+- install:
+    branch: pacific
+    exclude_packages:
+      - ceph-mgr-dashboard
+      - ceph-mgr-diskprediction-local
+      - ceph-mgr-rook
+      - ceph-mgr-cephadm
+      - ceph-base-debuginfo
+      - ceph-common-debuginfo
+      - ceph-immutable-object-cache-debuginfo
+      - ceph-radosgw-debuginfo
+      - ceph-test-debuginfo
+      - ceph-base-debuginfo
+      - ceph-mgr-debuginfo
+      - ceph-mds-debuginfo
+      - ceph-mon-debuginfo
+      - ceph-osd-debuginfo
+      - ceph-fuse-debuginfo
+      - librados-devel-debuginfo
+      - libcephfs2-debuginfo
+      - librados2-debuginfo
+      - librbd1-debuginfo
+      - python3-cephfs-debuginfo
+      - python3-rados-debuginfo
+      - python3-rbd-debuginfo
+      - python3-rgw-debuginfo
+      - rbd-fuse-debuginfo
+      - rbd-mirror-debuginfo
+      - rbd-nbd-debuginfo
+      - ceph-volume
diff --git a/qa/suites/rados/thrash-old-clients/1-install/quincy.yaml b/qa/suites/rados/thrash-old-clients/1-install/quincy.yaml
new file mode 100644
index 000000000..2ce960be0
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/1-install/quincy.yaml
@@ -0,0 +1,35 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - \(MON_DOWN\)
+tasks:
+- install:
+    branch: quincy
+    exclude_packages:
+      - ceph-mgr-dashboard
+      - ceph-mgr-diskprediction-local
+      - ceph-mgr-rook
+      - ceph-mgr-cephadm
+      - ceph-base-debuginfo
+      - ceph-common-debuginfo
+      - ceph-immutable-object-cache-debuginfo
+      - ceph-radosgw-debuginfo
+      - ceph-test-debuginfo
+      - ceph-base-debuginfo
+      - ceph-mgr-debuginfo
+      - ceph-mds-debuginfo
+      - ceph-mon-debuginfo
+      - ceph-osd-debuginfo
+      - ceph-fuse-debuginfo
+      - librados-devel-debuginfo
+      - libcephfs2-debuginfo
+      - librados2-debuginfo
+      - librbd1-debuginfo
+      - python3-cephfs-debuginfo
+      - python3-rados-debuginfo
+      - python3-rbd-debuginfo
+      - python3-rgw-debuginfo
+      - rbd-fuse-debuginfo
+      - rbd-mirror-debuginfo
+      - rbd-nbd-debuginfo
+      - ceph-volume
diff --git a/qa/suites/rados/thrash-old-clients/backoff/.qa b/qa/suites/rados/thrash-old-clients/backoff/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/backoff/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-old-clients/backoff/normal.yaml b/qa/suites/rados/thrash-old-clients/backoff/normal.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/backoff/normal.yaml
diff --git a/qa/suites/rados/thrash-old-clients/backoff/peering.yaml b/qa/suites/rados/thrash-old-clients/backoff/peering.yaml
new file mode 100644
index 000000000..66d06117e
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/backoff/peering.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd backoff on peering: true
diff --git a/qa/suites/rados/thrash-old-clients/backoff/peering_and_degraded.yaml b/qa/suites/rados/thrash-old-clients/backoff/peering_and_degraded.yaml
new file mode 100644
index 000000000..e61099065
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/backoff/peering_and_degraded.yaml
@@ -0,0 +1,6 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd backoff on peering: true
+        osd backoff on degraded: true
diff --git a/qa/suites/rados/thrash-old-clients/ceph.yaml b/qa/suites/rados/thrash-old-clients/ceph.yaml
new file mode 100644
index 000000000..016ce36da
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/ceph.yaml
@@ -0,0 +1,7 @@
+# Don't verify os + flavor + sha1
+verify_ceph_hash: false
+tasks:
+- cephadm:
+    conf:
+      mon:
+        auth allow insecure global id reclaim: true
diff --git a/qa/suites/rados/thrash-old-clients/clusters/+ b/qa/suites/rados/thrash-old-clients/clusters/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/clusters/+
diff --git a/qa/suites/rados/thrash-old-clients/clusters/.qa b/qa/suites/rados/thrash-old-clients/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-old-clients/clusters/openstack.yaml b/qa/suites/rados/thrash-old-clients/clusters/openstack.yaml
new file mode 100644
index 000000000..b0f3b9b4d
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/clusters/openstack.yaml
@@ -0,0 +1,4 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 4
+      size: 30 # GB
diff --git a/qa/suites/rados/thrash-old-clients/clusters/three-plus-one.yaml b/qa/suites/rados/thrash-old-clients/clusters/three-plus-one.yaml
new file mode 100644
index 000000000..9af5382ea
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/clusters/three-plus-one.yaml
@@ -0,0 +1,13 @@
+roles:
+- [mon.a, mgr.y, osd.0, osd.1, osd.2, osd.3, client.0]
+- [mon.b, mgr.x, osd.4, osd.5, osd.6, osd.7, client.1]
+- [mon.c, osd.8, osd.9, osd.10, osd.11, client.2]
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 10 # GB
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd shutdown pgref assert: true
diff --git a/qa/suites/rados/thrash-old-clients/d-balancer/.qa b/qa/suites/rados/thrash-old-clients/d-balancer/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/d-balancer/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-old-clients/d-balancer/crush-compat.yaml b/qa/suites/rados/thrash-old-clients/d-balancer/crush-compat.yaml
new file mode 100644
index 000000000..aa867660d
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/d-balancer/crush-compat.yaml
@@ -0,0 +1,6 @@
+tasks:
+- exec:
+    mon.a:
+      - while ! ceph balancer status ; do sleep 1 ; done
+      - ceph balancer mode crush-compat
+      - ceph balancer on
diff --git a/qa/suites/rados/thrash-old-clients/d-balancer/on.yaml b/qa/suites/rados/thrash-old-clients/d-balancer/on.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/d-balancer/on.yaml
diff --git a/qa/suites/rados/thrash-old-clients/mon_election b/qa/suites/rados/thrash-old-clients/mon_election
new file mode 120000
index 000000000..3f331e621
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/mon_election
@@ -0,0 +1 @@
+.qa/mon_election
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-old-clients/msgr-failures/.qa b/qa/suites/rados/thrash-old-clients/msgr-failures/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/msgr-failures/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-old-clients/msgr-failures/fastclose.yaml b/qa/suites/rados/thrash-old-clients/msgr-failures/fastclose.yaml
new file mode 100644
index 000000000..ec45f8882
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/msgr-failures/fastclose.yaml
@@ -0,0 +1,9 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 2500
+        ms tcp read timeout: 5
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/rados/thrash-old-clients/msgr-failures/few.yaml b/qa/suites/rados/thrash-old-clients/msgr-failures/few.yaml
new file mode 100644
index 000000000..cc9a3ae69
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/msgr-failures/few.yaml
@@ -0,0 +1,10 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 5000
+        mon client directed command retry: 5
+      osd:
+        osd heartbeat use min delay socket: true
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/rados/thrash-old-clients/msgr-failures/osd-delay.yaml b/qa/suites/rados/thrash-old-clients/msgr-failures/osd-delay.yaml
new file mode 100644
index 000000000..d7cec6f36
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/msgr-failures/osd-delay.yaml
@@ -0,0 +1,12 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 2500
+        ms inject delay type: osd
+        ms inject delay probability: .005
+        ms inject delay max: 1
+        ms inject internal delays: .002
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/rados/thrash-old-clients/rados.yaml b/qa/suites/rados/thrash-old-clients/rados.yaml
new file mode 120000
index 000000000..d256979c0
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/rados.yaml
@@ -0,0 +1 @@
+.qa/config/rados.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-old-clients/thrashers/.qa b/qa/suites/rados/thrash-old-clients/thrashers/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/thrashers/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-old-clients/thrashers/careful.yaml b/qa/suites/rados/thrash-old-clients/thrashers/careful.yaml
new file mode 100644
index 000000000..8820a6cd2
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/thrashers/careful.yaml
@@ -0,0 +1,26 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        osd debug reject backfill probability: .3
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd max backfills: 3
+        osd snap trim sleep: 2
+      mon:
+        mon min osdmap epochs: 50
+        paxos service trim min: 10
+        # prune full osdmaps regularly
+        mon osdmap full prune min: 15
+        mon osdmap full prune interval: 2
+        mon osdmap full prune txsize: 2
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 1
+    chance_pgpnum_fix: 1
+    aggressive_pg_num_changes: false
diff --git a/qa/suites/rados/thrash-old-clients/thrashers/default.yaml b/qa/suites/rados/thrash-old-clients/thrashers/default.yaml
new file mode 100644
index 000000000..54dc88802
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/thrashers/default.yaml
@@ -0,0 +1,25 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        osd debug reject backfill probability: .3
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd max backfills: 3
+        osd snap trim sleep: 2
+      mon:
+        mon min osdmap epochs: 50
+        paxos service trim min: 10
+        # prune full osdmaps regularly
+        mon osdmap full prune min: 15
+        mon osdmap full prune interval: 2
+        mon osdmap full prune txsize: 2
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 1
+    chance_pgpnum_fix: 1
diff --git a/qa/suites/rados/thrash-old-clients/thrashers/mapgap.yaml b/qa/suites/rados/thrash-old-clients/thrashers/mapgap.yaml
new file mode 100644
index 000000000..c1ab4493e
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/thrashers/mapgap.yaml
@@ -0,0 +1,27 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    - osd_map_cache_size
+    - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      mon:
+        mon min osdmap epochs: 50
+        paxos service trim min: 10
+        # prune full osdmaps regularly
+        mon osdmap full prune min: 15
+        mon osdmap full prune interval: 2
+        mon osdmap full prune txsize: 2
+      osd:
+        osd map cache size: 1
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd scrub during recovery: false
+        osd max backfills: 6
+tasks:
+- thrashosds:
+    timeout: 1800
+    chance_pgnum_grow: 0.25
+    chance_pgpnum_fix: 0.25
+    chance_test_map_discontinuity: 2
diff --git a/qa/suites/rados/thrash-old-clients/thrashers/morepggrow.yaml b/qa/suites/rados/thrash-old-clients/thrashers/morepggrow.yaml
new file mode 100644
index 000000000..bb65d6a60
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/thrashers/morepggrow.yaml
@@ -0,0 +1,23 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        journal throttle high multiple: 2
+        journal throttle max multiple: 10
+        filestore queue throttle high multiple: 2
+        filestore queue throttle max multiple: 10
+        osd max backfills: 9
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 3
+    chance_pgpnum_fix: 1
+openstack:
+- volumes:
+    size: 50
diff --git a/qa/suites/rados/thrash-old-clients/thrashers/none.yaml b/qa/suites/rados/thrash-old-clients/thrashers/none.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/thrashers/none.yaml
diff --git a/qa/suites/rados/thrash-old-clients/thrashers/pggrow.yaml b/qa/suites/rados/thrash-old-clients/thrashers/pggrow.yaml
new file mode 100644
index 000000000..000550bd8
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/thrashers/pggrow.yaml
@@ -0,0 +1,25 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        filestore odsync write: true
+        osd max backfills: 2
+        osd snap trim sleep: .5
+      mon:
+        mon min osdmap epochs: 50
+        paxos service trim min: 10
+        # prune full osdmaps regularly
+        mon osdmap full prune min: 15
+        mon osdmap full prune interval: 2
+        mon osdmap full prune txsize: 2
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 2
+    chance_pgpnum_fix: 1
diff --git a/qa/suites/rados/thrash-old-clients/thrashosds-health.yaml b/qa/suites/rados/thrash-old-clients/thrashosds-health.yaml
new file mode 120000
index 000000000..9124eb1aa
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/thrashosds-health.yaml
@@ -0,0 +1 @@
+.qa/tasks/thrashosds-health.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-old-clients/workloads/.qa b/qa/suites/rados/thrash-old-clients/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash-old-clients/workloads/cache-snaps.yaml b/qa/suites/rados/thrash-old-clients/workloads/cache-snaps.yaml
new file mode 100644
index 000000000..33f667ffd
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/workloads/cache-snaps.yaml
@@ -0,0 +1,34 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - must scrub before tier agent can activate
+tasks:
+- exec:
+    client.0:
+      - sudo ceph osd pool create base 4
+      - sudo ceph osd pool application enable base rados
+      - sudo ceph osd pool create cache 4
+      - sudo ceph osd tier add base cache
+      - sudo ceph osd tier cache-mode cache writeback
+      - sudo ceph osd tier set-overlay base cache
+      - sudo ceph osd pool set cache hit_set_type bloom
+      - sudo ceph osd pool set cache hit_set_count 8
+      - sudo ceph osd pool set cache hit_set_period 3600
+      - sudo ceph osd pool set cache target_max_objects 250
+      - sudo ceph osd pool set cache min_read_recency_for_promote 2
+- rados:
+    clients: [client.2]
+    pools: [base]
+    ops: 4000
+    objects: 500
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      copy_from: 50
+      cache_flush: 50
+      cache_try_flush: 50
+      cache_evict: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
diff --git a/qa/suites/rados/thrash-old-clients/workloads/radosbench.yaml b/qa/suites/rados/thrash-old-clients/workloads/radosbench.yaml
new file mode 100644
index 000000000..6a89a4e6e
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/workloads/radosbench.yaml
@@ -0,0 +1,33 @@
+overrides:
+  ceph:
+    conf:
+      client.2:
+        debug ms: 1
+        debug objecter: 20
+        debug rados: 20
+tasks:
+- full_sequential:
+  - radosbench:
+      clients: [client.2]
+      time: 90
+  - radosbench:
+      clients: [client.2]
+      time: 90
+  - radosbench:
+      clients: [client.2]
+      time: 90
+  - radosbench:
+      clients: [client.2]
+      time: 90
+  - radosbench:
+      clients: [client.2]
+      time: 90
+  - radosbench:
+      clients: [client.2]
+      time: 90
+  - radosbench:
+      clients: [client.2]
+      time: 90
+  - radosbench:
+      clients: [client.2]
+      time: 90
diff --git a/qa/suites/rados/thrash-old-clients/workloads/rbd_cls.yaml b/qa/suites/rados/thrash-old-clients/workloads/rbd_cls.yaml
new file mode 100644
index 000000000..a8bbbafec
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/workloads/rbd_cls.yaml
@@ -0,0 +1,7 @@
+meta:
+- desc: |
+   rbd object class functional tests
+tasks:
+- exec:
+    client.2:
+      - ceph_test_cls_rbd --gtest_filter=-TestClsRbd.get_features:TestClsRbd.parents:TestClsRbd.mirror
diff --git a/qa/suites/rados/thrash-old-clients/workloads/snaps-few-objects.yaml b/qa/suites/rados/thrash-old-clients/workloads/snaps-few-objects.yaml
new file mode 100644
index 000000000..f0a5735a9
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/workloads/snaps-few-objects.yaml
@@ -0,0 +1,13 @@
+tasks:
+- rados:
+    clients: [client.2]
+    ops: 4000
+    objects: 50
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
diff --git a/qa/suites/rados/thrash-old-clients/workloads/test_rbd_api.yaml b/qa/suites/rados/thrash-old-clients/workloads/test_rbd_api.yaml
new file mode 100644
index 000000000..39617b37e
--- /dev/null
+++ b/qa/suites/rados/thrash-old-clients/workloads/test_rbd_api.yaml
@@ -0,0 +1,8 @@
+meta:
+- desc: |
+   librbd C and C++ api tests
+workload:
+- workunit:
+    clients:
+      client.2:
+        - rbd/test_librbd.sh
diff --git a/qa/suites/rados/thrash/% b/qa/suites/rados/thrash/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/thrash/%
diff --git a/qa/suites/rados/thrash/.qa b/qa/suites/rados/thrash/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash/0-size-min-size-overrides/.qa b/qa/suites/rados/thrash/0-size-min-size-overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash/0-size-min-size-overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml b/qa/suites/rados/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml
new file mode 120000
index 000000000..5393a7554
--- /dev/null
+++ b/qa/suites/rados/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml
@@ -0,0 +1 @@
+.qa/overrides/2-size-2-min-size.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml b/qa/suites/rados/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml
new file mode 120000
index 000000000..5ff70eadf
--- /dev/null
+++ b/qa/suites/rados/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml
@@ -0,0 +1 @@
+.qa/overrides/3-size-2-min-size.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash/1-pg-log-overrides/.qa b/qa/suites/rados/thrash/1-pg-log-overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash/1-pg-log-overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash/1-pg-log-overrides/normal_pg_log.yaml b/qa/suites/rados/thrash/1-pg-log-overrides/normal_pg_log.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/thrash/1-pg-log-overrides/normal_pg_log.yaml
diff --git a/qa/suites/rados/thrash/1-pg-log-overrides/short_pg_log.yaml b/qa/suites/rados/thrash/1-pg-log-overrides/short_pg_log.yaml
new file mode 120000
index 000000000..abd86d7d9
--- /dev/null
+++ b/qa/suites/rados/thrash/1-pg-log-overrides/short_pg_log.yaml
@@ -0,0 +1 @@
+.qa/overrides/short_pg_log.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash/2-recovery-overrides/$ b/qa/suites/rados/thrash/2-recovery-overrides/$
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/thrash/2-recovery-overrides/$
diff --git a/qa/suites/rados/thrash/2-recovery-overrides/.qa b/qa/suites/rados/thrash/2-recovery-overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash/2-recovery-overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash/2-recovery-overrides/default.yaml b/qa/suites/rados/thrash/2-recovery-overrides/default.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/thrash/2-recovery-overrides/default.yaml
diff --git a/qa/suites/rados/thrash/2-recovery-overrides/more-active-recovery.yaml b/qa/suites/rados/thrash/2-recovery-overrides/more-active-recovery.yaml
new file mode 120000
index 000000000..47afd7020
--- /dev/null
+++ b/qa/suites/rados/thrash/2-recovery-overrides/more-active-recovery.yaml
@@ -0,0 +1 @@
+.qa/overrides/more-active-recovery.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash/2-recovery-overrides/more-async-partial-recovery.yaml b/qa/suites/rados/thrash/2-recovery-overrides/more-async-partial-recovery.yaml
new file mode 100644
index 000000000..0bbc72db7
--- /dev/null
+++ b/qa/suites/rados/thrash/2-recovery-overrides/more-async-partial-recovery.yaml
@@ -0,0 +1,6 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        osd_async_recovery_min_cost: 1
+        osd_object_clean_region_max_num_intervals: 1000
diff --git a/qa/suites/rados/thrash/2-recovery-overrides/more-async-recovery.yaml b/qa/suites/rados/thrash/2-recovery-overrides/more-async-recovery.yaml
new file mode 100644
index 000000000..4aed086bc
--- /dev/null
+++ b/qa/suites/rados/thrash/2-recovery-overrides/more-async-recovery.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        osd_async_recovery_min_cost: 1
diff --git a/qa/suites/rados/thrash/2-recovery-overrides/more-partial-recovery.yaml b/qa/suites/rados/thrash/2-recovery-overrides/more-partial-recovery.yaml
new file mode 100644
index 000000000..88f15f2f6
--- /dev/null
+++ b/qa/suites/rados/thrash/2-recovery-overrides/more-partial-recovery.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        osd_object_clean_region_max_num_intervals: 1000
diff --git a/qa/suites/rados/thrash/3-scrub-overrides/$ b/qa/suites/rados/thrash/3-scrub-overrides/$
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/thrash/3-scrub-overrides/$
diff --git a/qa/suites/rados/thrash/3-scrub-overrides/.qa b/qa/suites/rados/thrash/3-scrub-overrides/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash/3-scrub-overrides/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash/3-scrub-overrides/default.yaml b/qa/suites/rados/thrash/3-scrub-overrides/default.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/thrash/3-scrub-overrides/default.yaml
diff --git a/qa/suites/rados/thrash/3-scrub-overrides/max-simultaneous-scrubs-2.yaml b/qa/suites/rados/thrash/3-scrub-overrides/max-simultaneous-scrubs-2.yaml
new file mode 100644
index 000000000..abf852e98
--- /dev/null
+++ b/qa/suites/rados/thrash/3-scrub-overrides/max-simultaneous-scrubs-2.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd max scrubs: 2
diff --git a/qa/suites/rados/thrash/3-scrub-overrides/max-simultaneous-scrubs-3.yaml b/qa/suites/rados/thrash/3-scrub-overrides/max-simultaneous-scrubs-3.yaml
new file mode 100644
index 000000000..3b3dfd61f
--- /dev/null
+++ b/qa/suites/rados/thrash/3-scrub-overrides/max-simultaneous-scrubs-3.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd max scrubs: 3
diff --git a/qa/suites/rados/thrash/backoff/.qa b/qa/suites/rados/thrash/backoff/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash/backoff/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash/backoff/normal.yaml b/qa/suites/rados/thrash/backoff/normal.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/thrash/backoff/normal.yaml
diff --git a/qa/suites/rados/thrash/backoff/peering.yaml b/qa/suites/rados/thrash/backoff/peering.yaml
new file mode 100644
index 000000000..66d06117e
--- /dev/null
+++ b/qa/suites/rados/thrash/backoff/peering.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd backoff on peering: true
diff --git a/qa/suites/rados/thrash/backoff/peering_and_degraded.yaml b/qa/suites/rados/thrash/backoff/peering_and_degraded.yaml
new file mode 100644
index 000000000..e61099065
--- /dev/null
+++ b/qa/suites/rados/thrash/backoff/peering_and_degraded.yaml
@@ -0,0 +1,6 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd backoff on peering: true
+        osd backoff on degraded: true
diff --git a/qa/suites/rados/thrash/ceph.yaml b/qa/suites/rados/thrash/ceph.yaml
new file mode 100644
index 000000000..67393c564
--- /dev/null
+++ b/qa/suites/rados/thrash/ceph.yaml
@@ -0,0 +1,6 @@
+tasks:
+- install:
+- ceph:
+    conf:
+      osd:
+        debug monc: 20
diff --git a/qa/suites/rados/thrash/clusters/+ b/qa/suites/rados/thrash/clusters/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/thrash/clusters/+
diff --git a/qa/suites/rados/thrash/clusters/.qa b/qa/suites/rados/thrash/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash/clusters/fixed-2.yaml b/qa/suites/rados/thrash/clusters/fixed-2.yaml
new file mode 120000
index 000000000..230ff0fda
--- /dev/null
+++ b/qa/suites/rados/thrash/clusters/fixed-2.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-2.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash/clusters/openstack.yaml b/qa/suites/rados/thrash/clusters/openstack.yaml
new file mode 100644
index 000000000..b0f3b9b4d
--- /dev/null
+++ b/qa/suites/rados/thrash/clusters/openstack.yaml
@@ -0,0 +1,4 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 4
+      size: 30 # GB
diff --git a/qa/suites/rados/thrash/crc-failures/.qa b/qa/suites/rados/thrash/crc-failures/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash/crc-failures/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash/crc-failures/bad_map_crc_failure.yaml b/qa/suites/rados/thrash/crc-failures/bad_map_crc_failure.yaml
new file mode 100644
index 000000000..5bbb4385e
--- /dev/null
+++ b/qa/suites/rados/thrash/crc-failures/bad_map_crc_failure.yaml
@@ -0,0 +1,7 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd inject bad map crc probability: 0.1
+    log-ignorelist:
+      - failed to encode map
diff --git a/qa/suites/rados/thrash/crc-failures/default.yaml b/qa/suites/rados/thrash/crc-failures/default.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/thrash/crc-failures/default.yaml
diff --git a/qa/suites/rados/thrash/d-balancer/.qa b/qa/suites/rados/thrash/d-balancer/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash/d-balancer/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash/d-balancer/crush-compat.yaml b/qa/suites/rados/thrash/d-balancer/crush-compat.yaml
new file mode 100644
index 000000000..aa867660d
--- /dev/null
+++ b/qa/suites/rados/thrash/d-balancer/crush-compat.yaml
@@ -0,0 +1,6 @@
+tasks:
+- exec:
+    mon.a:
+      - while ! ceph balancer status ; do sleep 1 ; done
+      - ceph balancer mode crush-compat
+      - ceph balancer on
diff --git a/qa/suites/rados/thrash/d-balancer/on.yaml b/qa/suites/rados/thrash/d-balancer/on.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/thrash/d-balancer/on.yaml
diff --git a/qa/suites/rados/thrash/mon_election b/qa/suites/rados/thrash/mon_election
new file mode 120000
index 000000000..3f331e621
--- /dev/null
+++ b/qa/suites/rados/thrash/mon_election
@@ -0,0 +1 @@
+.qa/mon_election
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash/msgr b/qa/suites/rados/thrash/msgr
new file mode 120000
index 000000000..57bee80db
--- /dev/null
+++ b/qa/suites/rados/thrash/msgr
@@ -0,0 +1 @@
+.qa/msgr
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash/msgr-failures/.qa b/qa/suites/rados/thrash/msgr-failures/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash/msgr-failures/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash/msgr-failures/fastclose.yaml b/qa/suites/rados/thrash/msgr-failures/fastclose.yaml
new file mode 100644
index 000000000..ec45f8882
--- /dev/null
+++ b/qa/suites/rados/thrash/msgr-failures/fastclose.yaml
@@ -0,0 +1,9 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 2500
+        ms tcp read timeout: 5
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/rados/thrash/msgr-failures/few.yaml b/qa/suites/rados/thrash/msgr-failures/few.yaml
new file mode 100644
index 000000000..cc9a3ae69
--- /dev/null
+++ b/qa/suites/rados/thrash/msgr-failures/few.yaml
@@ -0,0 +1,10 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 5000
+        mon client directed command retry: 5
+      osd:
+        osd heartbeat use min delay socket: true
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/rados/thrash/msgr-failures/osd-delay.yaml b/qa/suites/rados/thrash/msgr-failures/osd-delay.yaml
new file mode 100644
index 000000000..d7cec6f36
--- /dev/null
+++ b/qa/suites/rados/thrash/msgr-failures/osd-delay.yaml
@@ -0,0 +1,12 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 2500
+        ms inject delay type: osd
+        ms inject delay probability: .005
+        ms inject delay max: 1
+        ms inject internal delays: .002
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/rados/thrash/msgr-failures/osd-dispatch-delay.yaml b/qa/suites/rados/thrash/msgr-failures/osd-dispatch-delay.yaml
new file mode 100644
index 000000000..aff059fb8
--- /dev/null
+++ b/qa/suites/rados/thrash/msgr-failures/osd-dispatch-delay.yaml
@@ -0,0 +1,7 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        osd debug inject dispatch delay duration: 0.1
+        osd debug inject dispatch delay probability: 0.1
+
diff --git a/qa/suites/rados/thrash/objectstore b/qa/suites/rados/thrash/objectstore
new file mode 120000
index 000000000..848c65f9e
--- /dev/null
+++ b/qa/suites/rados/thrash/objectstore
@@ -0,0 +1 @@
+.qa/objectstore_debug
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash/rados.yaml b/qa/suites/rados/thrash/rados.yaml
new file mode 120000
index 000000000..d256979c0
--- /dev/null
+++ b/qa/suites/rados/thrash/rados.yaml
@@ -0,0 +1 @@
+.qa/config/rados.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash/supported-random-distro$ b/qa/suites/rados/thrash/supported-random-distro$
new file mode 120000
index 000000000..7cef21eef
--- /dev/null
+++ b/qa/suites/rados/thrash/supported-random-distro$
@@ -0,0 +1 @@
+../basic/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash/thrashers/.qa b/qa/suites/rados/thrash/thrashers/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash/thrashers/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash/thrashers/careful.yaml b/qa/suites/rados/thrash/thrashers/careful.yaml
new file mode 100644
index 000000000..8190657f3
--- /dev/null
+++ b/qa/suites/rados/thrash/thrashers/careful.yaml
@@ -0,0 +1,26 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    conf:
+      osd:
+        osd debug reject backfill probability: .3
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd max backfills: 3
+        osd snap trim sleep: 2
+      mon:
+        mon min osdmap epochs: 50
+        paxos service trim min: 10
+        # prune full osdmaps regularly
+        mon osdmap full prune min: 15
+        mon osdmap full prune interval: 2
+        mon osdmap full prune txsize: 2
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 1
+    chance_pgnum_shrink: 1
+    chance_pgpnum_fix: 1
+    aggressive_pg_num_changes: false
diff --git a/qa/suites/rados/thrash/thrashers/default.yaml b/qa/suites/rados/thrash/thrashers/default.yaml
new file mode 100644
index 000000000..5a300a9ff
--- /dev/null
+++ b/qa/suites/rados/thrash/thrashers/default.yaml
@@ -0,0 +1,28 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    conf:
+      osd:
+        osd debug reject backfill probability: .3
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd max backfills: 3
+        osd snap trim sleep: 2
+        osd delete sleep: 1
+      mon:
+        mon min osdmap epochs: 50
+        paxos service trim min: 10
+        # prune full osdmaps regularly
+        mon osdmap full prune min: 15
+        mon osdmap full prune interval: 2
+        mon osdmap full prune txsize: 2
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 1
+    chance_pgnum_shrink: 1
+    chance_pgpnum_fix: 1
+    chance_bluestore_reshard: 1
+    bluestore_new_sharding: random
diff --git a/qa/suites/rados/thrash/thrashers/mapgap.yaml b/qa/suites/rados/thrash/thrashers/mapgap.yaml
new file mode 100644
index 000000000..3b34f5b6b
--- /dev/null
+++ b/qa/suites/rados/thrash/thrashers/mapgap.yaml
@@ -0,0 +1,27 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    - osd_map_cache_size
+    conf:
+      mon:
+        mon min osdmap epochs: 50
+        paxos service trim min: 10
+        # prune full osdmaps regularly
+        mon osdmap full prune min: 15
+        mon osdmap full prune interval: 2
+        mon osdmap full prune txsize: 2
+      osd:
+        osd map cache size: 1
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd scrub during recovery: false
+        osd max backfills: 6
+tasks:
+- thrashosds:
+    timeout: 1800
+    chance_pgnum_grow: 0.25
+    chance_pgnum_shrink: 0.25
+    chance_pgpnum_fix: 0.25
+    chance_test_map_discontinuity: 2
diff --git a/qa/suites/rados/thrash/thrashers/morepggrow.yaml b/qa/suites/rados/thrash/thrashers/morepggrow.yaml
new file mode 100644
index 000000000..f18a88711
--- /dev/null
+++ b/qa/suites/rados/thrash/thrashers/morepggrow.yaml
@@ -0,0 +1,22 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        journal throttle high multiple: 2
+        journal throttle max multiple: 10
+        filestore queue throttle high multiple: 2
+        filestore queue throttle max multiple: 10
+        osd max backfills: 9
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 3
+    chance_pgpnum_fix: 1
+openstack:
+- volumes:
+    size: 50
diff --git a/qa/suites/rados/thrash/thrashers/none.yaml b/qa/suites/rados/thrash/thrashers/none.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/thrash/thrashers/none.yaml
diff --git a/qa/suites/rados/thrash/thrashers/pggrow.yaml b/qa/suites/rados/thrash/thrashers/pggrow.yaml
new file mode 100644
index 000000000..54498d0cf
--- /dev/null
+++ b/qa/suites/rados/thrash/thrashers/pggrow.yaml
@@ -0,0 +1,24 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    conf:
+      osd:
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        filestore odsync write: true
+        osd max backfills: 2
+        osd snap trim sleep: .5
+      mon:
+        mon min osdmap epochs: 50
+        paxos service trim min: 10
+        # prune full osdmaps regularly
+        mon osdmap full prune min: 15
+        mon osdmap full prune interval: 2
+        mon osdmap full prune txsize: 2
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 2
+    chance_pgpnum_fix: 1
diff --git a/qa/suites/rados/thrash/thrashosds-health.yaml b/qa/suites/rados/thrash/thrashosds-health.yaml
new file mode 120000
index 000000000..9124eb1aa
--- /dev/null
+++ b/qa/suites/rados/thrash/thrashosds-health.yaml
@@ -0,0 +1 @@
+.qa/tasks/thrashosds-health.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash/workloads/.qa b/qa/suites/rados/thrash/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/thrash/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/thrash/workloads/admin_socket_objecter_requests.yaml b/qa/suites/rados/thrash/workloads/admin_socket_objecter_requests.yaml
new file mode 100644
index 000000000..6885f72aa
--- /dev/null
+++ b/qa/suites/rados/thrash/workloads/admin_socket_objecter_requests.yaml
@@ -0,0 +1,15 @@
+overrides:
+  ceph:
+    conf:
+      client.0:
+        admin socket: /var/run/ceph/ceph-$name.asok
+    log-ignorelist:
+    - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- radosbench:
+    clients: [client.0]
+    time: 150
+- admin_socket:
+    client.0:
+      objecter_requests:
+        test: "http://git.ceph.com/?p={repo};a=blob_plain;f=src/test/admin_socket/objecter_requests;hb={branch}"
diff --git a/qa/suites/rados/thrash/workloads/cache-agent-big.yaml b/qa/suites/rados/thrash/workloads/cache-agent-big.yaml
new file mode 100644
index 000000000..9ca2576d4
--- /dev/null
+++ b/qa/suites/rados/thrash/workloads/cache-agent-big.yaml
@@ -0,0 +1,37 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - must scrub before tier agent can activate
+      - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        # override short_pg_log_entries.yaml (which sets these under [global])
+        osd_min_pg_log_entries: 3000
+        osd_max_pg_log_entries: 3000
+tasks:
+- exec:
+    client.0:
+      - sudo ceph osd erasure-code-profile set myprofile crush-failure-domain=osd m=2 k=2
+      - sudo ceph osd pool create base 4 4 erasure myprofile
+      - sudo ceph osd pool application enable base rados
+      - sudo ceph osd pool set base min_size 2
+      - sudo ceph osd pool create cache 4
+      - sudo ceph osd tier add base cache
+      - sudo ceph osd tier cache-mode cache writeback
+      - sudo ceph osd tier set-overlay base cache
+      - sudo ceph osd pool set cache hit_set_type bloom
+      - sudo ceph osd pool set cache hit_set_count 8
+      - sudo ceph osd pool set cache hit_set_period 60
+      - sudo ceph osd pool set cache target_max_objects 5000
+- rados:
+    clients: [client.0]
+    pools: [base]
+    ops: 10000
+    objects: 6600
+    max_seconds: 1200
+    size: 1024
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      copy_from: 50
diff --git a/qa/suites/rados/thrash/workloads/cache-agent-small.yaml b/qa/suites/rados/thrash/workloads/cache-agent-small.yaml
new file mode 100644
index 000000000..108009e3b
--- /dev/null
+++ b/qa/suites/rados/thrash/workloads/cache-agent-small.yaml
@@ -0,0 +1,35 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - must scrub before tier agent can activate
+      - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        # override short_pg_log_entries.yaml (which sets these under [global])
+        osd_min_pg_log_entries: 3000
+        osd_max_pg_log_entries: 3000
+tasks:
+- exec:
+    client.0:
+      - sudo ceph osd pool create base 4
+      - sudo ceph osd pool application enable base rados
+      - sudo ceph osd pool create cache 4
+      - sudo ceph osd tier add base cache
+      - sudo ceph osd tier cache-mode cache writeback
+      - sudo ceph osd tier set-overlay base cache
+      - sudo ceph osd pool set cache hit_set_type bloom
+      - sudo ceph osd pool set cache hit_set_count 8
+      - sudo ceph osd pool set cache hit_set_period 60
+      - sudo ceph osd pool set cache target_max_objects 250
+      - sudo ceph osd pool set cache min_read_recency_for_promote 2
+      - sudo ceph osd pool set cache min_write_recency_for_promote 2
+- rados:
+    clients: [client.0]
+    pools: [base]
+    ops: 4000
+    objects: 500
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      copy_from: 50
diff --git a/qa/suites/rados/thrash/workloads/cache-pool-snaps-readproxy.yaml b/qa/suites/rados/thrash/workloads/cache-pool-snaps-readproxy.yaml
new file mode 100644
index 000000000..f864e1170
--- /dev/null
+++ b/qa/suites/rados/thrash/workloads/cache-pool-snaps-readproxy.yaml
@@ -0,0 +1,40 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - must scrub before tier agent can activate
+      - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        # override short_pg_log_entries.yaml (which sets these under [global])
+        osd_min_pg_log_entries: 3000
+        osd_max_pg_log_entries: 3000
+tasks:
+- exec:
+    client.0:
+      - sudo ceph osd pool create base 4
+      - sudo ceph osd pool application enable base rados
+      - sudo ceph osd pool create cache 4
+      - sudo ceph osd tier add base cache
+      - sudo ceph osd tier cache-mode cache readproxy
+      - sudo ceph osd tier set-overlay base cache
+      - sudo ceph osd pool set cache hit_set_type bloom
+      - sudo ceph osd pool set cache hit_set_count 8
+      - sudo ceph osd pool set cache hit_set_period 3600
+      - sudo ceph osd pool set cache target_max_objects 250
+- rados:
+    clients: [client.0]
+    pools: [base]
+    ops: 4000
+    objects: 500
+    pool_snaps: true
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      copy_from: 50
+      cache_flush: 50
+      cache_try_flush: 50
+      cache_evict: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
diff --git a/qa/suites/rados/thrash/workloads/cache-pool-snaps.yaml b/qa/suites/rados/thrash/workloads/cache-pool-snaps.yaml
new file mode 100644
index 000000000..6bf97c692
--- /dev/null
+++ b/qa/suites/rados/thrash/workloads/cache-pool-snaps.yaml
@@ -0,0 +1,45 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - must scrub before tier agent can activate
+      - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        # override short_pg_log_entries.yaml (which sets these under [global])
+        osd_min_pg_log_entries: 3000
+        osd_max_pg_log_entries: 3000
+tasks:
+- exec:
+    client.0:
+      - sudo ceph osd pool create base 4
+      - sudo ceph osd pool application enable base rados
+      - sudo ceph osd pool create cache 4
+      - sudo ceph osd tier add base cache
+      - sudo ceph osd tier cache-mode cache writeback
+      - sudo ceph osd tier set-overlay base cache
+      - sudo ceph osd pool set cache hit_set_type bloom
+      - sudo ceph osd pool set cache hit_set_count 8
+      - sudo ceph osd pool set cache hit_set_period 3600
+      - sudo ceph osd pool set cache target_max_objects 250
+      - sudo ceph osd pool set cache min_read_recency_for_promote 0
+      - sudo ceph osd pool set cache min_write_recency_for_promote 0
+- rados:
+    clients: [client.0]
+    pools: [base]
+    ops: 4000
+    objects: 500
+    pool_snaps: true
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      copy_from: 50
+      cache_flush: 50
+      cache_try_flush: 50
+      cache_evict: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+openstack:
+  - machine:
+      ram: 15000 # MB
diff --git a/qa/suites/rados/thrash/workloads/cache-snaps-balanced.yaml b/qa/suites/rados/thrash/workloads/cache-snaps-balanced.yaml
new file mode 100644
index 000000000..574a1f753
--- /dev/null
+++ b/qa/suites/rados/thrash/workloads/cache-snaps-balanced.yaml
@@ -0,0 +1,41 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - must scrub before tier agent can activate
+      - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        # override short_pg_log_entries.yaml (which sets these under [global])
+        osd_min_pg_log_entries: 3000
+        osd_max_pg_log_entries: 3000
+tasks:
+- exec:
+    client.0:
+      - sudo ceph osd pool create base 4
+      - sudo ceph osd pool application enable base rados
+      - sudo ceph osd pool create cache 4
+      - sudo ceph osd tier add base cache
+      - sudo ceph osd tier cache-mode cache writeback
+      - sudo ceph osd tier set-overlay base cache
+      - sudo ceph osd pool set cache hit_set_type bloom
+      - sudo ceph osd pool set cache hit_set_count 8
+      - sudo ceph osd pool set cache hit_set_period 3600
+      - sudo ceph osd pool set cache target_max_objects 250
+      - sudo ceph osd pool set cache min_read_recency_for_promote 2
+- rados:
+    clients: [client.0]
+    pools: [base]
+    ops: 4000
+    objects: 500
+    balance_reads: true
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      copy_from: 50
+      cache_flush: 50
+      cache_try_flush: 50
+      cache_evict: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
diff --git a/qa/suites/rados/thrash/workloads/cache-snaps.yaml b/qa/suites/rados/thrash/workloads/cache-snaps.yaml
new file mode 100644
index 000000000..6d11f4cf1
--- /dev/null
+++ b/qa/suites/rados/thrash/workloads/cache-snaps.yaml
@@ -0,0 +1,40 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - must scrub before tier agent can activate
+      - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        # override short_pg_log_entries.yaml (which sets these under [global])
+        osd_min_pg_log_entries: 3000
+        osd_max_pg_log_entries: 3000
+tasks:
+- exec:
+    client.0:
+      - sudo ceph osd pool create base 4
+      - sudo ceph osd pool application enable base rados
+      - sudo ceph osd pool create cache 4
+      - sudo ceph osd tier add base cache
+      - sudo ceph osd tier cache-mode cache writeback
+      - sudo ceph osd tier set-overlay base cache
+      - sudo ceph osd pool set cache hit_set_type bloom
+      - sudo ceph osd pool set cache hit_set_count 8
+      - sudo ceph osd pool set cache hit_set_period 3600
+      - sudo ceph osd pool set cache target_max_objects 250
+      - sudo ceph osd pool set cache min_read_recency_for_promote 2
+- rados:
+    clients: [client.0]
+    pools: [base]
+    ops: 4000
+    objects: 500
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      copy_from: 50
+      cache_flush: 50
+      cache_try_flush: 50
+      cache_evict: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
diff --git a/qa/suites/rados/thrash/workloads/cache.yaml b/qa/suites/rados/thrash/workloads/cache.yaml
new file mode 100644
index 000000000..bd9daac7a
--- /dev/null
+++ b/qa/suites/rados/thrash/workloads/cache.yaml
@@ -0,0 +1,37 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - must scrub before tier agent can activate
+      - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      osd:
+        # override short_pg_log_entries.yaml (which sets these under [global])
+        osd_min_pg_log_entries: 3000
+        osd_max_pg_log_entries: 3000
+tasks:
+- exec:
+    client.0:
+      - sudo ceph osd pool create base 4
+      - sudo ceph osd pool application enable base rados
+      - sudo ceph osd pool create cache 4
+      - sudo ceph osd tier add base cache
+      - sudo ceph osd tier cache-mode cache writeback
+      - sudo ceph osd tier set-overlay base cache
+      - sudo ceph osd pool set cache hit_set_type bloom
+      - sudo ceph osd pool set cache hit_set_count 8
+      - sudo ceph osd pool set cache hit_set_period 3600
+      - sudo ceph osd pool set cache min_read_recency_for_promote 0
+      - sudo ceph osd pool set cache min_write_recency_for_promote 0
+- rados:
+    clients: [client.0]
+    pools: [base]
+    ops: 4000
+    objects: 500
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      copy_from: 50
+      cache_flush: 50
+      cache_try_flush: 50
+      cache_evict: 50
diff --git a/qa/suites/rados/thrash/workloads/dedup-io-mixed.yaml b/qa/suites/rados/thrash/workloads/dedup-io-mixed.yaml
new file mode 100644
index 000000000..7758525a6
--- /dev/null
+++ b/qa/suites/rados/thrash/workloads/dedup-io-mixed.yaml
@@ -0,0 +1,24 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- exec:
+    client.0:
+      - sudo ceph osd pool create low_tier 4
+- rados:
+    clients: [client.0]
+    low_tier_pool: 'low_tier'
+    ops: 1500
+    objects: 50
+    set_chunk: true
+    enable_dedup: true
+    dedup_chunk_size: '131072'
+    dedup_chunk_algo: 'fastcdc'
+    op_weights:
+      read: 100
+      write: 50
+      set_chunk: 30
+      tier_promote: 10
+      tier_flush: 5
+      tier_evict: 10
diff --git a/qa/suites/rados/thrash/workloads/dedup-io-snaps.yaml b/qa/suites/rados/thrash/workloads/dedup-io-snaps.yaml
new file mode 100644
index 000000000..3d2ce3026
--- /dev/null
+++ b/qa/suites/rados/thrash/workloads/dedup-io-snaps.yaml
@@ -0,0 +1,27 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- exec:
+    client.0:
+      - sudo ceph osd pool create low_tier 4
+- rados:
+    clients: [client.0]
+    low_tier_pool: 'low_tier'
+    ops: 1500
+    objects: 50
+    set_chunk: true
+    enable_dedup: true
+    dedup_chunk_size: '131072'
+    dedup_chunk_algo: 'fastcdc'
+    op_weights:
+      read: 100
+      write: 50
+      set_chunk: 30
+      tier_promote: 10
+      tier_flush: 5
+      tier_evict: 10
+      snap_create: 10
+      snap_remove: 10
+      rollback: 10
diff --git a/qa/suites/rados/thrash/workloads/pool-snaps-few-objects.yaml b/qa/suites/rados/thrash/workloads/pool-snaps-few-objects.yaml
new file mode 100644
index 000000000..f60afb809
--- /dev/null
+++ b/qa/suites/rados/thrash/workloads/pool-snaps-few-objects.yaml
@@ -0,0 +1,21 @@
+override:
+  conf:
+    osd:
+      osd deep scrub update digest min age: 0
+  ceph:
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 50
+    pool_snaps: true
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
diff --git a/qa/suites/rados/thrash/workloads/rados_api_tests.yaml b/qa/suites/rados/thrash/workloads/rados_api_tests.yaml
new file mode 100644
index 000000000..3e72897ae
--- /dev/null
+++ b/qa/suites/rados/thrash/workloads/rados_api_tests.yaml
@@ -0,0 +1,23 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - reached quota
+      - \(POOL_APP_NOT_ENABLED\)
+      - \(PG_AVAILABILITY\)
+    crush_tunables: jewel
+    conf:
+      client:
+        debug ms: 1
+        debug objecter: 20
+        debug rados: 20
+      mon:
+        mon warn on pool no app: false
+        debug mgrc: 20
+      osd:
+        osd class load list: "*"
+        osd class default list: "*"
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rados/test.sh
diff --git a/qa/suites/rados/thrash/workloads/radosbench-high-concurrency.yaml b/qa/suites/rados/thrash/workloads/radosbench-high-concurrency.yaml
new file mode 100644
index 000000000..afdb3794d
--- /dev/null
+++ b/qa/suites/rados/thrash/workloads/radosbench-high-concurrency.yaml
@@ -0,0 +1,51 @@
+overrides:
+  ceph:
+    conf:
+      client.0:
+        debug ms: 1
+        debug objecter: 20
+        debug rados: 20
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- full_sequential:
+  - radosbench:
+      clients: [client.0]
+      concurrency: 128
+      size: 8192
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      concurrency: 128
+      size: 8192
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      concurrency: 128
+      size: 8192
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      concurrency: 128
+      size: 8192
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      concurrency: 128
+      size: 8192
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      concurrency: 128
+      size: 8192
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      concurrency: 128
+      size: 8192
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      concurrency: 128
+      size: 8192
+      time: 90
diff --git a/qa/suites/rados/thrash/workloads/radosbench.yaml b/qa/suites/rados/thrash/workloads/radosbench.yaml
new file mode 100644
index 000000000..32efe0ba9
--- /dev/null
+++ b/qa/suites/rados/thrash/workloads/radosbench.yaml
@@ -0,0 +1,26 @@
+overrides:
+  ceph:
+    conf:
+      client.0:
+        debug ms: 1
+        debug objecter: 20
+        debug rados: 20
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- full_sequential:
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
diff --git a/qa/suites/rados/thrash/workloads/redirect.yaml b/qa/suites/rados/thrash/workloads/redirect.yaml
new file mode 100644
index 000000000..14cce6643
--- /dev/null
+++ b/qa/suites/rados/thrash/workloads/redirect.yaml
@@ -0,0 +1,19 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- exec:
+    client.0:
+      - sudo ceph osd pool create low_tier 4
+- rados:
+    clients: [client.0]
+    low_tier_pool: 'low_tier'
+    ops: 4000
+    objects: 500
+    set_redirect: true
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      copy_from: 50
diff --git a/qa/suites/rados/thrash/workloads/redirect_promote_tests.yaml b/qa/suites/rados/thrash/workloads/redirect_promote_tests.yaml
new file mode 100644
index 000000000..23226771d
--- /dev/null
+++ b/qa/suites/rados/thrash/workloads/redirect_promote_tests.yaml
@@ -0,0 +1,18 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- exec:
+    client.0:
+      - sudo ceph osd pool create low_tier 4
+- rados:
+    clients: [client.0]
+    low_tier_pool: 'low_tier'
+    ops: 4000
+    objects: 500
+    set_redirect: true
+    op_weights:
+      set_redirect: 100
+      read: 50
+      tier_promote: 30
diff --git a/qa/suites/rados/thrash/workloads/redirect_set_object.yaml b/qa/suites/rados/thrash/workloads/redirect_set_object.yaml
new file mode 100644
index 000000000..7fe81435c
--- /dev/null
+++ b/qa/suites/rados/thrash/workloads/redirect_set_object.yaml
@@ -0,0 +1,17 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- exec:
+    client.0:
+      - sudo ceph osd pool create low_tier 4
+- rados:
+    clients: [client.0]
+    low_tier_pool: 'low_tier'
+    ops: 4000
+    objects: 500
+    set_redirect: true
+    op_weights:
+      set_redirect: 100
+      copy_from: 100
diff --git a/qa/suites/rados/thrash/workloads/set-chunks-read.yaml b/qa/suites/rados/thrash/workloads/set-chunks-read.yaml
new file mode 100644
index 000000000..fa6d6a8b8
--- /dev/null
+++ b/qa/suites/rados/thrash/workloads/set-chunks-read.yaml
@@ -0,0 +1,17 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- exec:
+    client.0:
+      - sudo ceph osd pool create low_tier 4
+- rados:
+    clients: [client.0]
+    low_tier_pool: 'low_tier'
+    ops: 4000
+    objects: 300
+    set_chunk: true
+    op_weights:
+      chunk_read: 100
+      tier_promote: 10
diff --git a/qa/suites/rados/thrash/workloads/small-objects-balanced.yaml b/qa/suites/rados/thrash/workloads/small-objects-balanced.yaml
new file mode 100644
index 000000000..ece22cd36
--- /dev/null
+++ b/qa/suites/rados/thrash/workloads/small-objects-balanced.yaml
@@ -0,0 +1,24 @@
+overrides:
+  ceph:
+    crush_tunables: jewel
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 400000
+    max_seconds: 600
+    max_in_flight: 64
+    objects: 1024
+    size: 16384
+    balance_reads: true
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
+      setattr: 25
+      rmattr: 25
diff --git a/qa/suites/rados/thrash/workloads/small-objects-localized.yaml b/qa/suites/rados/thrash/workloads/small-objects-localized.yaml
new file mode 100644
index 000000000..ad791ee11
--- /dev/null
+++ b/qa/suites/rados/thrash/workloads/small-objects-localized.yaml
@@ -0,0 +1,24 @@
+overrides:
+  ceph:
+    crush_tunables: jewel
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 400000
+    max_seconds: 600
+    max_in_flight: 64
+    objects: 1024
+    size: 16384
+    localize_reads: true
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
+      setattr: 25
+      rmattr: 25
diff --git a/qa/suites/rados/thrash/workloads/small-objects.yaml b/qa/suites/rados/thrash/workloads/small-objects.yaml
new file mode 100644
index 000000000..6f9edfae8
--- /dev/null
+++ b/qa/suites/rados/thrash/workloads/small-objects.yaml
@@ -0,0 +1,23 @@
+overrides:
+  ceph:
+    crush_tunables: jewel
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 400000
+    max_seconds: 600
+    max_in_flight: 64
+    objects: 1024
+    size: 16384
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
+      setattr: 25
+      rmattr: 25
diff --git a/qa/suites/rados/thrash/workloads/snaps-few-objects-balanced.yaml b/qa/suites/rados/thrash/workloads/snaps-few-objects-balanced.yaml
new file mode 100644
index 000000000..ffb6cbc8b
--- /dev/null
+++ b/qa/suites/rados/thrash/workloads/snaps-few-objects-balanced.yaml
@@ -0,0 +1,18 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 50
+    balance_reads: true
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
diff --git a/qa/suites/rados/thrash/workloads/snaps-few-objects-localized.yaml b/qa/suites/rados/thrash/workloads/snaps-few-objects-localized.yaml
new file mode 100644
index 000000000..eca004716
--- /dev/null
+++ b/qa/suites/rados/thrash/workloads/snaps-few-objects-localized.yaml
@@ -0,0 +1,18 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 50
+    localize_reads: true
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
diff --git a/qa/suites/rados/thrash/workloads/snaps-few-objects.yaml b/qa/suites/rados/thrash/workloads/snaps-few-objects.yaml
new file mode 100644
index 000000000..955327b29
--- /dev/null
+++ b/qa/suites/rados/thrash/workloads/snaps-few-objects.yaml
@@ -0,0 +1,17 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 50
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
diff --git a/qa/suites/rados/thrash/workloads/write_fadvise_dontneed.yaml b/qa/suites/rados/thrash/workloads/write_fadvise_dontneed.yaml
new file mode 100644
index 000000000..182fc1431
--- /dev/null
+++ b/qa/suites/rados/thrash/workloads/write_fadvise_dontneed.yaml
@@ -0,0 +1,12 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 500
+    write_fadvise_dontneed: true
+    op_weights:
+      write: 100
diff --git a/qa/suites/rados/upgrade/.qa b/qa/suites/rados/upgrade/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/upgrade/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/upgrade/parallel b/qa/suites/rados/upgrade/parallel
new file mode 120000
index 000000000..84b63d6a5
--- /dev/null
+++ b/qa/suites/rados/upgrade/parallel
@@ -0,0 +1 @@
+../../upgrade/quincy-x/parallel/
+\ No newline at end of file
diff --git a/qa/suites/rados/valgrind-leaks/% b/qa/suites/rados/valgrind-leaks/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/valgrind-leaks/%
diff --git a/qa/suites/rados/valgrind-leaks/.qa b/qa/suites/rados/valgrind-leaks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/valgrind-leaks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/valgrind-leaks/1-start.yaml b/qa/suites/rados/valgrind-leaks/1-start.yaml
new file mode 100644
index 000000000..1cdd8a688
--- /dev/null
+++ b/qa/suites/rados/valgrind-leaks/1-start.yaml
@@ -0,0 +1,31 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 2
+      size: 10 # GB
+
+overrides:
+  install:
+    ceph:
+      debuginfo: true
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(PG_
+      - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      global:
+        osd heartbeat grace: 40
+        osd max object name len: 460
+        osd max object namespace len: 64
+      mon:
+        mon osd crush smoke test: false
+      osd:
+        osd fast shutdown: false
+    valgrind:
+      mon: [--tool=memcheck, --leak-check=full, --show-reachable=yes]
+      osd: [--tool=memcheck]
+roles:
+- [mon.a, mon.b, mon.c, mgr.x, mgr.y, osd.0, osd.1, osd.2, client.0]
+tasks:
+- install:
+- ceph:
diff --git a/qa/suites/rados/valgrind-leaks/2-inject-leak/.qa b/qa/suites/rados/valgrind-leaks/2-inject-leak/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/valgrind-leaks/2-inject-leak/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/valgrind-leaks/2-inject-leak/mon.yaml b/qa/suites/rados/valgrind-leaks/2-inject-leak/mon.yaml
new file mode 100644
index 000000000..695a9f8af
--- /dev/null
+++ b/qa/suites/rados/valgrind-leaks/2-inject-leak/mon.yaml
@@ -0,0 +1,7 @@
+overrides:
+  ceph:
+    expect_valgrind_errors: true
+tasks:
+- exec:
+    mon.a:
+      - ceph tell mon.a leak_some_memory
diff --git a/qa/suites/rados/valgrind-leaks/2-inject-leak/none.yaml b/qa/suites/rados/valgrind-leaks/2-inject-leak/none.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/valgrind-leaks/2-inject-leak/none.yaml
diff --git a/qa/suites/rados/valgrind-leaks/2-inject-leak/osd.yaml b/qa/suites/rados/valgrind-leaks/2-inject-leak/osd.yaml
new file mode 100644
index 000000000..f249f16e1
--- /dev/null
+++ b/qa/suites/rados/valgrind-leaks/2-inject-leak/osd.yaml
@@ -0,0 +1,7 @@
+overrides:
+  ceph:
+    expect_valgrind_errors: true
+tasks:
+- exec:
+    mon.a:
+      - ceph tell osd.0 leak_some_memory
diff --git a/qa/suites/rados/valgrind-leaks/centos_latest.yaml b/qa/suites/rados/valgrind-leaks/centos_latest.yaml
new file mode 120000
index 000000000..bd9854e70
--- /dev/null
+++ b/qa/suites/rados/valgrind-leaks/centos_latest.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/centos_latest.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/verify/% b/qa/suites/rados/verify/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/verify/%
diff --git a/qa/suites/rados/verify/.qa b/qa/suites/rados/verify/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/verify/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/verify/centos_latest.yaml b/qa/suites/rados/verify/centos_latest.yaml
new file mode 120000
index 000000000..bd9854e70
--- /dev/null
+++ b/qa/suites/rados/verify/centos_latest.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/centos_latest.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/verify/ceph.yaml b/qa/suites/rados/verify/ceph.yaml
new file mode 100644
index 000000000..fc5ce350a
--- /dev/null
+++ b/qa/suites/rados/verify/ceph.yaml
@@ -0,0 +1,15 @@
+overrides:
+  ceph:
+    conf:
+      mon:
+        mon min osdmap epochs: 50
+        paxos service trim min: 10
+        # prune full osdmaps regularly
+        mon osdmap full prune min: 15
+        mon osdmap full prune interval: 2
+        mon osdmap full prune txsize: 2
+      osd:
+        debug monc: 20
+tasks:
+- install:
+- ceph:
diff --git a/qa/suites/rados/verify/clusters/+ b/qa/suites/rados/verify/clusters/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/verify/clusters/+
diff --git a/qa/suites/rados/verify/clusters/.qa b/qa/suites/rados/verify/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/verify/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/verify/clusters/fixed-2.yaml b/qa/suites/rados/verify/clusters/fixed-2.yaml
new file mode 120000
index 000000000..230ff0fda
--- /dev/null
+++ b/qa/suites/rados/verify/clusters/fixed-2.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-2.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/verify/clusters/openstack.yaml b/qa/suites/rados/verify/clusters/openstack.yaml
new file mode 100644
index 000000000..e559d9126
--- /dev/null
+++ b/qa/suites/rados/verify/clusters/openstack.yaml
@@ -0,0 +1,4 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 4
+      size: 10 # GB
diff --git a/qa/suites/rados/verify/d-thrash/.qa b/qa/suites/rados/verify/d-thrash/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/verify/d-thrash/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/verify/d-thrash/default/+ b/qa/suites/rados/verify/d-thrash/default/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/verify/d-thrash/default/+
diff --git a/qa/suites/rados/verify/d-thrash/default/.qa b/qa/suites/rados/verify/d-thrash/default/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/verify/d-thrash/default/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/verify/d-thrash/default/default.yaml b/qa/suites/rados/verify/d-thrash/default/default.yaml
new file mode 100644
index 000000000..41b35926f
--- /dev/null
+++ b/qa/suites/rados/verify/d-thrash/default/default.yaml
@@ -0,0 +1,11 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 1
+    chance_pgnum_shrink: 1
+    chance_pgpnum_fix: 1
diff --git a/qa/suites/rados/verify/d-thrash/default/thrashosds-health.yaml b/qa/suites/rados/verify/d-thrash/default/thrashosds-health.yaml
new file mode 120000
index 000000000..9124eb1aa
--- /dev/null
+++ b/qa/suites/rados/verify/d-thrash/default/thrashosds-health.yaml
@@ -0,0 +1 @@
+.qa/tasks/thrashosds-health.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/verify/d-thrash/none.yaml b/qa/suites/rados/verify/d-thrash/none.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rados/verify/d-thrash/none.yaml
diff --git a/qa/suites/rados/verify/mon_election b/qa/suites/rados/verify/mon_election
new file mode 120000
index 000000000..3f331e621
--- /dev/null
+++ b/qa/suites/rados/verify/mon_election
@@ -0,0 +1 @@
+.qa/mon_election
+\ No newline at end of file
diff --git a/qa/suites/rados/verify/msgr b/qa/suites/rados/verify/msgr
new file mode 120000
index 000000000..57bee80db
--- /dev/null
+++ b/qa/suites/rados/verify/msgr
@@ -0,0 +1 @@
+.qa/msgr
+\ No newline at end of file
diff --git a/qa/suites/rados/verify/msgr-failures/.qa b/qa/suites/rados/verify/msgr-failures/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/verify/msgr-failures/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/verify/msgr-failures/few.yaml b/qa/suites/rados/verify/msgr-failures/few.yaml
new file mode 100644
index 000000000..519288992
--- /dev/null
+++ b/qa/suites/rados/verify/msgr-failures/few.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 5000
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/rados/verify/objectstore b/qa/suites/rados/verify/objectstore
new file mode 120000
index 000000000..848c65f9e
--- /dev/null
+++ b/qa/suites/rados/verify/objectstore
@@ -0,0 +1 @@
+.qa/objectstore_debug
+\ No newline at end of file
diff --git a/qa/suites/rados/verify/rados.yaml b/qa/suites/rados/verify/rados.yaml
new file mode 120000
index 000000000..d256979c0
--- /dev/null
+++ b/qa/suites/rados/verify/rados.yaml
@@ -0,0 +1 @@
+.qa/config/rados.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/verify/tasks/.qa b/qa/suites/rados/verify/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/verify/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/verify/tasks/mon_recovery.yaml b/qa/suites/rados/verify/tasks/mon_recovery.yaml
new file mode 100644
index 000000000..06d9602e6
--- /dev/null
+++ b/qa/suites/rados/verify/tasks/mon_recovery.yaml
@@ -0,0 +1,12 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(MON_DOWN\)
+      - \(OSDMAP_FLAGS\)
+      - \(SMALLER_PGP_NUM\)
+      - \(POOL_APP_NOT_ENABLED\)
+      - \(SLOW OPS\)
+      - slow request
+tasks:
+- mon_recovery:
diff --git a/qa/suites/rados/verify/tasks/rados_api_tests.yaml b/qa/suites/rados/verify/tasks/rados_api_tests.yaml
new file mode 100644
index 000000000..e5a54e69e
--- /dev/null
+++ b/qa/suites/rados/verify/tasks/rados_api_tests.yaml
@@ -0,0 +1,34 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - reached quota
+      - overall HEALTH_
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - \(POOL_FULL\)
+      - \(SMALLER_PGP_NUM\)
+      - \(SLOW_OPS\)
+      - \(CACHE_POOL_NEAR_FULL\)
+      - \(POOL_APP_NOT_ENABLED\)
+      - \(PG_AVAILABILITY\)
+      - \(OBJECT_MISPLACED\)
+      - slow request
+    conf:
+      client:
+        debug ms: 1
+        debug objecter: 20
+        debug rados: 20
+        debug monc: 20
+      mon:
+        mon warn on pool no app: false
+      osd:
+        osd class load list: "*"
+        osd class default list: "*"
+        osd client watch timeout: 120
+tasks:
+- workunit:
+    timeout: 6h
+    env:
+      ALLOW_TIMEOUTS: "1"
+    clients:
+      client.0:
+        - rados/test.sh
diff --git a/qa/suites/rados/verify/tasks/rados_cls_all.yaml b/qa/suites/rados/verify/tasks/rados_cls_all.yaml
new file mode 100644
index 000000000..0236326f3
--- /dev/null
+++ b/qa/suites/rados/verify/tasks/rados_cls_all.yaml
@@ -0,0 +1,13 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd_class_load_list: "*"
+        osd_class_default_list: "*"
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - cls
diff --git a/qa/suites/rados/verify/validater/.qa b/qa/suites/rados/verify/validater/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rados/verify/validater/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rados/verify/validater/lockdep.yaml b/qa/suites/rados/verify/validater/lockdep.yaml
new file mode 100644
index 000000000..25f84355c
--- /dev/null
+++ b/qa/suites/rados/verify/validater/lockdep.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        lockdep: true
diff --git a/qa/suites/rados/verify/validater/valgrind.yaml b/qa/suites/rados/verify/validater/valgrind.yaml
new file mode 100644
index 000000000..03accceaf
--- /dev/null
+++ b/qa/suites/rados/verify/validater/valgrind.yaml
@@ -0,0 +1,31 @@
+# see http://tracker.ceph.com/issues/20360 and http://tracker.ceph.com/issues/18126
+os_type: centos
+
+overrides:
+  install:
+    ceph:
+      debuginfo: true
+  ceph:
+    conf:
+      global:
+        osd heartbeat grace: 80
+      mon:
+        mon osd crush smoke test: false
+      osd:
+        osd fast shutdown: false
+        debug bluestore: 1
+        debug bluefs: 1
+    log-ignorelist:
+      - overall HEALTH_
+# valgrind is slow.. we might get PGs stuck peering etc
+      - \(PG_
+# mons sometimes are left off of initial quorum due to valgrind slowness.  ok to ignore here because we'll still catch an actual crash due to the core
+      - \(MON_DOWN\)
+      - \(SLOW_OPS\)
+      - slow request
+    valgrind:
+      mon: [--tool=memcheck, --leak-check=full, --show-reachable=yes]
+      osd: [--tool=memcheck]
+      mds: [--tool=memcheck]
+# https://tracker.ceph.com/issues/38621
+#      mgr: [--tool=memcheck]
diff --git a/qa/suites/rbd/.qa b/qa/suites/rbd/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/basic/% b/qa/suites/rbd/basic/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/basic/%
diff --git a/qa/suites/rbd/basic/.qa b/qa/suites/rbd/basic/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/basic/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/basic/base/.qa b/qa/suites/rbd/basic/base/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/basic/base/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/basic/base/install.yaml b/qa/suites/rbd/basic/base/install.yaml
new file mode 100644
index 000000000..2030acb90
--- /dev/null
+++ b/qa/suites/rbd/basic/base/install.yaml
@@ -0,0 +1,3 @@
+tasks:
+- install:
+- ceph:
diff --git a/qa/suites/rbd/basic/clusters/+ b/qa/suites/rbd/basic/clusters/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/basic/clusters/+
diff --git a/qa/suites/rbd/basic/clusters/.qa b/qa/suites/rbd/basic/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/basic/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/basic/clusters/fixed-1.yaml b/qa/suites/rbd/basic/clusters/fixed-1.yaml
new file mode 120000
index 000000000..02df5dd0c
--- /dev/null
+++ b/qa/suites/rbd/basic/clusters/fixed-1.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-1.yaml
+\ No newline at end of file
diff --git a/qa/suites/rbd/basic/clusters/openstack.yaml b/qa/suites/rbd/basic/clusters/openstack.yaml
new file mode 100644
index 000000000..f4d1349b4
--- /dev/null
+++ b/qa/suites/rbd/basic/clusters/openstack.yaml
@@ -0,0 +1,4 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 30 # GB
diff --git a/qa/suites/rbd/basic/conf b/qa/suites/rbd/basic/conf
new file mode 120000
index 000000000..4bc0fe86c
--- /dev/null
+++ b/qa/suites/rbd/basic/conf
@@ -0,0 +1 @@
+.qa/rbd/conf
+\ No newline at end of file
diff --git a/qa/suites/rbd/basic/msgr-failures/.qa b/qa/suites/rbd/basic/msgr-failures/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/basic/msgr-failures/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/basic/msgr-failures/few.yaml b/qa/suites/rbd/basic/msgr-failures/few.yaml
new file mode 100644
index 000000000..519288992
--- /dev/null
+++ b/qa/suites/rbd/basic/msgr-failures/few.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 5000
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/rbd/basic/objectstore b/qa/suites/rbd/basic/objectstore
new file mode 120000
index 000000000..c40bd3261
--- /dev/null
+++ b/qa/suites/rbd/basic/objectstore
@@ -0,0 +1 @@
+.qa/objectstore
+\ No newline at end of file
diff --git a/qa/suites/rbd/basic/supported-random-distro$ b/qa/suites/rbd/basic/supported-random-distro$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/rbd/basic/supported-random-distro$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rbd/basic/tasks/.qa b/qa/suites/rbd/basic/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/basic/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/basic/tasks/rbd_api_tests_old_format.yaml b/qa/suites/rbd/basic/tasks/rbd_api_tests_old_format.yaml
new file mode 100644
index 000000000..5bb734d24
--- /dev/null
+++ b/qa/suites/rbd/basic/tasks/rbd_api_tests_old_format.yaml
@@ -0,0 +1,12 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - is full \(reached quota
+      - \(POOL_FULL\)
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/test_librbd.sh
diff --git a/qa/suites/rbd/basic/tasks/rbd_cls_tests.yaml b/qa/suites/rbd/basic/tasks/rbd_cls_tests.yaml
new file mode 100644
index 000000000..51b35e2e1
--- /dev/null
+++ b/qa/suites/rbd/basic/tasks/rbd_cls_tests.yaml
@@ -0,0 +1,7 @@
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - cls/test_cls_rbd.sh
+        - cls/test_cls_lock.sh
+        - cls/test_cls_journal.sh
diff --git a/qa/suites/rbd/basic/tasks/rbd_lock_and_fence.yaml b/qa/suites/rbd/basic/tasks/rbd_lock_and_fence.yaml
new file mode 100644
index 000000000..d2c80ad65
--- /dev/null
+++ b/qa/suites/rbd/basic/tasks/rbd_lock_and_fence.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/test_lock_fence.sh
diff --git a/qa/suites/rbd/basic/tasks/rbd_python_api_tests_old_format.yaml b/qa/suites/rbd/basic/tasks/rbd_python_api_tests_old_format.yaml
new file mode 100644
index 000000000..eae484a97
--- /dev/null
+++ b/qa/suites/rbd/basic/tasks/rbd_python_api_tests_old_format.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - \(SLOW_OPS\)
+      - slow request
+  install:
+    ceph:
+      extra_system_packages:
+      - python3-pytest
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/test_librbd_python.sh
diff --git a/qa/suites/rbd/cli/% b/qa/suites/rbd/cli/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/cli/%
diff --git a/qa/suites/rbd/cli/.qa b/qa/suites/rbd/cli/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/cli/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/cli/base/.qa b/qa/suites/rbd/cli/base/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/cli/base/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/cli/base/install.yaml b/qa/suites/rbd/cli/base/install.yaml
new file mode 100644
index 000000000..2030acb90
--- /dev/null
+++ b/qa/suites/rbd/cli/base/install.yaml
@@ -0,0 +1,3 @@
+tasks:
+- install:
+- ceph:
diff --git a/qa/suites/rbd/cli/clusters b/qa/suites/rbd/cli/clusters
new file mode 120000
index 000000000..ae92569e8
--- /dev/null
+++ b/qa/suites/rbd/cli/clusters
@@ -0,0 +1 @@
+../basic/clusters
+\ No newline at end of file
diff --git a/qa/suites/rbd/cli/conf b/qa/suites/rbd/cli/conf
new file mode 120000
index 000000000..4bc0fe86c
--- /dev/null
+++ b/qa/suites/rbd/cli/conf
@@ -0,0 +1 @@
+.qa/rbd/conf
+\ No newline at end of file
diff --git a/qa/suites/rbd/cli/data-pool b/qa/suites/rbd/cli/data-pool
new file mode 120000
index 000000000..3df827572
--- /dev/null
+++ b/qa/suites/rbd/cli/data-pool
@@ -0,0 +1 @@
+.qa/rbd/data-pool/
+\ No newline at end of file
diff --git a/qa/suites/rbd/cli/features/.qa b/qa/suites/rbd/cli/features/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/cli/features/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/cli/features/defaults.yaml b/qa/suites/rbd/cli/features/defaults.yaml
new file mode 100644
index 000000000..75afd68dd
--- /dev/null
+++ b/qa/suites/rbd/cli/features/defaults.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default features: 61
diff --git a/qa/suites/rbd/cli/features/journaling.yaml b/qa/suites/rbd/cli/features/journaling.yaml
new file mode 100644
index 000000000..6cea62a88
--- /dev/null
+++ b/qa/suites/rbd/cli/features/journaling.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default features: 125
diff --git a/qa/suites/rbd/cli/features/layering.yaml b/qa/suites/rbd/cli/features/layering.yaml
new file mode 100644
index 000000000..429b8e145
--- /dev/null
+++ b/qa/suites/rbd/cli/features/layering.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default features: 1
diff --git a/qa/suites/rbd/cli/msgr-failures/.qa b/qa/suites/rbd/cli/msgr-failures/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/cli/msgr-failures/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/cli/msgr-failures/few.yaml b/qa/suites/rbd/cli/msgr-failures/few.yaml
new file mode 100644
index 000000000..519288992
--- /dev/null
+++ b/qa/suites/rbd/cli/msgr-failures/few.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 5000
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/rbd/cli/objectstore b/qa/suites/rbd/cli/objectstore
new file mode 120000
index 000000000..c40bd3261
--- /dev/null
+++ b/qa/suites/rbd/cli/objectstore
@@ -0,0 +1 @@
+.qa/objectstore
+\ No newline at end of file
diff --git a/qa/suites/rbd/cli/supported-random-distro$ b/qa/suites/rbd/cli/supported-random-distro$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/rbd/cli/supported-random-distro$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rbd/cli/workloads/.qa b/qa/suites/rbd/cli/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/cli/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/cli/workloads/rbd_cli_generic.yaml b/qa/suites/rbd/cli/workloads/rbd_cli_generic.yaml
new file mode 100644
index 000000000..be43b3e8d
--- /dev/null
+++ b/qa/suites/rbd/cli/workloads/rbd_cli_generic.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/cli_generic.sh
diff --git a/qa/suites/rbd/cli/workloads/rbd_cli_groups.yaml b/qa/suites/rbd/cli/workloads/rbd_cli_groups.yaml
new file mode 100644
index 000000000..6ff836342
--- /dev/null
+++ b/qa/suites/rbd/cli/workloads/rbd_cli_groups.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/rbd_groups.sh
diff --git a/qa/suites/rbd/cli/workloads/rbd_cli_import_export.yaml b/qa/suites/rbd/cli/workloads/rbd_cli_import_export.yaml
new file mode 100644
index 000000000..b08f2612f
--- /dev/null
+++ b/qa/suites/rbd/cli/workloads/rbd_cli_import_export.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/import_export.sh
diff --git a/qa/suites/rbd/cli/workloads/rbd_cli_luks_encryption.yaml b/qa/suites/rbd/cli/workloads/rbd_cli_luks_encryption.yaml
new file mode 100644
index 000000000..bb5d1608e
--- /dev/null
+++ b/qa/suites/rbd/cli/workloads/rbd_cli_luks_encryption.yaml
@@ -0,0 +1,9 @@
+overrides:
+  install:
+    ceph:
+      extra_packages: [rbd-nbd]
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/luks-encryption.sh
diff --git a/qa/suites/rbd/cli/workloads/rbd_cli_migration.yaml b/qa/suites/rbd/cli/workloads/rbd_cli_migration.yaml
new file mode 100644
index 000000000..b04ac08f7
--- /dev/null
+++ b/qa/suites/rbd/cli/workloads/rbd_cli_migration.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/cli_migration.sh
diff --git a/qa/suites/rbd/cli/workloads/rbd_support_module_recovery.yaml b/qa/suites/rbd/cli/workloads/rbd_support_module_recovery.yaml
new file mode 100644
index 000000000..aa4d0001f
--- /dev/null
+++ b/qa/suites/rbd/cli/workloads/rbd_support_module_recovery.yaml
@@ -0,0 +1,13 @@
+overrides:
+  ceph:
+    conf:
+      mgr:
+        debug rbd: 20
+tasks:
+- install:
+    extra_system_packages:
+      - fio
+- workunit:
+    clients:
+      client.0:
+        - rbd/rbd_support_module_recovery.sh
diff --git a/qa/suites/rbd/cli_v1/% b/qa/suites/rbd/cli_v1/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/cli_v1/%
diff --git a/qa/suites/rbd/cli_v1/.qa b/qa/suites/rbd/cli_v1/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/cli_v1/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/cli_v1/base/.qa b/qa/suites/rbd/cli_v1/base/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/cli_v1/base/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/cli_v1/base/install.yaml b/qa/suites/rbd/cli_v1/base/install.yaml
new file mode 100644
index 000000000..2030acb90
--- /dev/null
+++ b/qa/suites/rbd/cli_v1/base/install.yaml
@@ -0,0 +1,3 @@
+tasks:
+- install:
+- ceph:
diff --git a/qa/suites/rbd/cli_v1/clusters b/qa/suites/rbd/cli_v1/clusters
new file mode 120000
index 000000000..ae92569e8
--- /dev/null
+++ b/qa/suites/rbd/cli_v1/clusters
@@ -0,0 +1 @@
+../basic/clusters
+\ No newline at end of file
diff --git a/qa/suites/rbd/cli_v1/conf b/qa/suites/rbd/cli_v1/conf
new file mode 120000
index 000000000..4bc0fe86c
--- /dev/null
+++ b/qa/suites/rbd/cli_v1/conf
@@ -0,0 +1 @@
+.qa/rbd/conf
+\ No newline at end of file
diff --git a/qa/suites/rbd/cli_v1/features/.qa b/qa/suites/rbd/cli_v1/features/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/cli_v1/features/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/cli_v1/features/format-1.yaml b/qa/suites/rbd/cli_v1/features/format-1.yaml
new file mode 100644
index 000000000..9c5320835
--- /dev/null
+++ b/qa/suites/rbd/cli_v1/features/format-1.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default format: 1
diff --git a/qa/suites/rbd/cli_v1/msgr-failures/.qa b/qa/suites/rbd/cli_v1/msgr-failures/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/cli_v1/msgr-failures/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/cli_v1/msgr-failures/few.yaml b/qa/suites/rbd/cli_v1/msgr-failures/few.yaml
new file mode 100644
index 000000000..519288992
--- /dev/null
+++ b/qa/suites/rbd/cli_v1/msgr-failures/few.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 5000
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/rbd/cli_v1/objectstore b/qa/suites/rbd/cli_v1/objectstore
new file mode 120000
index 000000000..c40bd3261
--- /dev/null
+++ b/qa/suites/rbd/cli_v1/objectstore
@@ -0,0 +1 @@
+.qa/objectstore
+\ No newline at end of file
diff --git a/qa/suites/rbd/cli_v1/supported-random-distro$ b/qa/suites/rbd/cli_v1/supported-random-distro$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/rbd/cli_v1/supported-random-distro$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rbd/cli_v1/workloads/.qa b/qa/suites/rbd/cli_v1/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/cli_v1/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/cli_v1/workloads/rbd_cli_generic.yaml b/qa/suites/rbd/cli_v1/workloads/rbd_cli_generic.yaml
new file mode 100644
index 000000000..be43b3e8d
--- /dev/null
+++ b/qa/suites/rbd/cli_v1/workloads/rbd_cli_generic.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/cli_generic.sh
diff --git a/qa/suites/rbd/cli_v1/workloads/rbd_cli_import_export.yaml b/qa/suites/rbd/cli_v1/workloads/rbd_cli_import_export.yaml
new file mode 100644
index 000000000..b08f2612f
--- /dev/null
+++ b/qa/suites/rbd/cli_v1/workloads/rbd_cli_import_export.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/import_export.sh
diff --git a/qa/suites/rbd/encryption/% b/qa/suites/rbd/encryption/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/encryption/%
diff --git a/qa/suites/rbd/encryption/.qa b/qa/suites/rbd/encryption/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/encryption/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/encryption/cache/.qa b/qa/suites/rbd/encryption/cache/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/encryption/cache/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/encryption/cache/none.yaml b/qa/suites/rbd/encryption/cache/none.yaml
new file mode 100644
index 000000000..42fd9c955
--- /dev/null
+++ b/qa/suites/rbd/encryption/cache/none.yaml
@@ -0,0 +1,6 @@
+tasks:
+- install:
+- ceph:
+    conf:
+      client:
+        rbd cache: false
diff --git a/qa/suites/rbd/encryption/cache/writearound.yaml b/qa/suites/rbd/encryption/cache/writearound.yaml
new file mode 100644
index 000000000..b6f8e319b
--- /dev/null
+++ b/qa/suites/rbd/encryption/cache/writearound.yaml
@@ -0,0 +1,7 @@
+tasks:
+- install:
+- ceph:
+    conf:
+      client:
+        rbd cache: true
+        rbd cache policy: writearound
diff --git a/qa/suites/rbd/encryption/cache/writeback.yaml b/qa/suites/rbd/encryption/cache/writeback.yaml
new file mode 100644
index 000000000..a55ec1df0
--- /dev/null
+++ b/qa/suites/rbd/encryption/cache/writeback.yaml
@@ -0,0 +1,7 @@
+tasks:
+- install:
+- ceph:
+    conf:
+      client:
+        rbd cache: true
+        rbd cache policy: writeback
diff --git a/qa/suites/rbd/encryption/cache/writethrough.yaml b/qa/suites/rbd/encryption/cache/writethrough.yaml
new file mode 100644
index 000000000..6dc29e16c
--- /dev/null
+++ b/qa/suites/rbd/encryption/cache/writethrough.yaml
@@ -0,0 +1,7 @@
+tasks:
+- install:
+- ceph:
+    conf:
+      client:
+        rbd cache: true
+        rbd cache max dirty: 0
diff --git a/qa/suites/rbd/encryption/clusters/+ b/qa/suites/rbd/encryption/clusters/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/encryption/clusters/+
diff --git a/qa/suites/rbd/encryption/clusters/.qa b/qa/suites/rbd/encryption/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/encryption/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/encryption/clusters/fixed-3.yaml b/qa/suites/rbd/encryption/clusters/fixed-3.yaml
new file mode 120000
index 000000000..f75a848b8
--- /dev/null
+++ b/qa/suites/rbd/encryption/clusters/fixed-3.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-3.yaml
+\ No newline at end of file
diff --git a/qa/suites/rbd/encryption/clusters/openstack.yaml b/qa/suites/rbd/encryption/clusters/openstack.yaml
new file mode 100644
index 000000000..9c39c7e5f
--- /dev/null
+++ b/qa/suites/rbd/encryption/clusters/openstack.yaml
@@ -0,0 +1,8 @@
+openstack:
+  - machine:
+      disk: 40 # GB
+      ram: 30000 # MB
+      cpus: 1
+    volumes: # attached to each instance
+      count: 4
+      size: 30 # GB
diff --git a/qa/suites/rbd/encryption/conf b/qa/suites/rbd/encryption/conf
new file mode 120000
index 000000000..4bc0fe86c
--- /dev/null
+++ b/qa/suites/rbd/encryption/conf
@@ -0,0 +1 @@
+.qa/rbd/conf
+\ No newline at end of file
diff --git a/qa/suites/rbd/encryption/data-pool b/qa/suites/rbd/encryption/data-pool
new file mode 120000
index 000000000..3df827572
--- /dev/null
+++ b/qa/suites/rbd/encryption/data-pool
@@ -0,0 +1 @@
+.qa/rbd/data-pool/
+\ No newline at end of file
diff --git a/qa/suites/rbd/encryption/features/.qa b/qa/suites/rbd/encryption/features/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/encryption/features/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/encryption/features/defaults.yaml b/qa/suites/rbd/encryption/features/defaults.yaml
new file mode 100644
index 000000000..75afd68dd
--- /dev/null
+++ b/qa/suites/rbd/encryption/features/defaults.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default features: 61
diff --git a/qa/suites/rbd/encryption/msgr-failures/.qa b/qa/suites/rbd/encryption/msgr-failures/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/encryption/msgr-failures/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/encryption/msgr-failures/few.yaml b/qa/suites/rbd/encryption/msgr-failures/few.yaml
new file mode 100644
index 000000000..ca8e09853
--- /dev/null
+++ b/qa/suites/rbd/encryption/msgr-failures/few.yaml
@@ -0,0 +1,9 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 5000
+        mon client directed command retry: 5
+    log-ignorelist:
+    - but it is still running
+    - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/rbd/encryption/objectstore b/qa/suites/rbd/encryption/objectstore
new file mode 120000
index 000000000..c40bd3261
--- /dev/null
+++ b/qa/suites/rbd/encryption/objectstore
@@ -0,0 +1 @@
+.qa/objectstore
+\ No newline at end of file
diff --git a/qa/suites/rbd/encryption/supported-random-distro$ b/qa/suites/rbd/encryption/supported-random-distro$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/rbd/encryption/supported-random-distro$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rbd/encryption/workloads/.qa b/qa/suites/rbd/encryption/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/encryption/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks1.yaml b/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks1.yaml
new file mode 100644
index 000000000..cb3659f97
--- /dev/null
+++ b/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks1.yaml
@@ -0,0 +1,13 @@
+overrides:
+  install:
+    ceph:
+      extra_packages: [rbd-nbd]
+tasks:
+- qemu:
+    all:
+      clone: true
+      parent_encryption_format: luks1
+      type: block
+      disks: 3
+      test: qa/run_xfstests_qemu.sh
+exclude_arch: armv7l
diff --git a/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks1_luks1.yaml b/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks1_luks1.yaml
new file mode 100644
index 000000000..1db50d600
--- /dev/null
+++ b/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks1_luks1.yaml
@@ -0,0 +1,14 @@
+overrides:
+  install:
+    ceph:
+      extra_packages: [rbd-nbd]
+tasks:
+- qemu:
+    all:
+      clone: true
+      parent_encryption_format: luks1
+      encryption_format: luks1
+      type: block
+      disks: 3
+      test: qa/run_xfstests_qemu.sh
+exclude_arch: armv7l
diff --git a/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks1_luks2.yaml b/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks1_luks2.yaml
new file mode 100644
index 000000000..a8ef5f2dd
--- /dev/null
+++ b/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks1_luks2.yaml
@@ -0,0 +1,14 @@
+overrides:
+  install:
+    ceph:
+      extra_packages: [rbd-nbd]
+tasks:
+- qemu:
+    all:
+      clone: true
+      parent_encryption_format: luks1
+      encryption_format: luks2
+      type: block
+      disks: 3
+      test: qa/run_xfstests_qemu.sh
+exclude_arch: armv7l
diff --git a/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks2.yaml b/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks2.yaml
new file mode 100644
index 000000000..203372d60
--- /dev/null
+++ b/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks2.yaml
@@ -0,0 +1,13 @@
+overrides:
+  install:
+    ceph:
+      extra_packages: [rbd-nbd]
+tasks:
+- qemu:
+    all:
+      clone: true
+      parent_encryption_format: luks2
+      type: block
+      disks: 3
+      test: qa/run_xfstests_qemu.sh
+exclude_arch: armv7l
diff --git a/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks2_luks1.yaml b/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks2_luks1.yaml
new file mode 100644
index 000000000..727e5c32a
--- /dev/null
+++ b/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks2_luks1.yaml
@@ -0,0 +1,14 @@
+overrides:
+  install:
+    ceph:
+      extra_packages: [rbd-nbd]
+tasks:
+- qemu:
+    all:
+      clone: true
+      parent_encryption_format: luks2
+      encryption_format: luks1
+      type: block
+      disks: 3
+      test: qa/run_xfstests_qemu.sh
+exclude_arch: armv7l
diff --git a/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks2_luks2.yaml b/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks2_luks2.yaml
new file mode 100644
index 000000000..43ded129f
--- /dev/null
+++ b/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks2_luks2.yaml
@@ -0,0 +1,14 @@
+overrides:
+  install:
+    ceph:
+      extra_packages: [rbd-nbd]
+tasks:
+- qemu:
+    all:
+      clone: true
+      parent_encryption_format: luks2
+      encryption_format: luks2
+      type: block
+      disks: 3
+      test: qa/run_xfstests_qemu.sh
+exclude_arch: armv7l
diff --git a/qa/suites/rbd/encryption/workloads/qemu_xfstests_none_luks1.yaml b/qa/suites/rbd/encryption/workloads/qemu_xfstests_none_luks1.yaml
new file mode 100644
index 000000000..7f3f3776f
--- /dev/null
+++ b/qa/suites/rbd/encryption/workloads/qemu_xfstests_none_luks1.yaml
@@ -0,0 +1,13 @@
+overrides:
+  install:
+    ceph:
+      extra_packages: [rbd-nbd]
+tasks:
+- qemu:
+    all:
+      clone: true
+      encryption_format: luks1
+      type: block
+      disks: 3
+      test: qa/run_xfstests_qemu.sh
+exclude_arch: armv7l
diff --git a/qa/suites/rbd/encryption/workloads/qemu_xfstests_none_luks2.yaml b/qa/suites/rbd/encryption/workloads/qemu_xfstests_none_luks2.yaml
new file mode 100644
index 000000000..c9d9829a9
--- /dev/null
+++ b/qa/suites/rbd/encryption/workloads/qemu_xfstests_none_luks2.yaml
@@ -0,0 +1,13 @@
+overrides:
+  install:
+    ceph:
+      extra_packages: [rbd-nbd]
+tasks:
+- qemu:
+    all:
+      clone: true
+      encryption_format: luks2
+      type: block
+      disks: 3
+      test: qa/run_xfstests_qemu.sh
+exclude_arch: armv7l
diff --git a/qa/suites/rbd/immutable-object-cache/% b/qa/suites/rbd/immutable-object-cache/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/immutable-object-cache/%
diff --git a/qa/suites/rbd/immutable-object-cache/.qa b/qa/suites/rbd/immutable-object-cache/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/immutable-object-cache/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/immutable-object-cache/clusters/+ b/qa/suites/rbd/immutable-object-cache/clusters/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/immutable-object-cache/clusters/+
diff --git a/qa/suites/rbd/immutable-object-cache/clusters/.qa b/qa/suites/rbd/immutable-object-cache/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/immutable-object-cache/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/immutable-object-cache/clusters/fix-2.yaml b/qa/suites/rbd/immutable-object-cache/clusters/fix-2.yaml
new file mode 100644
index 000000000..dbccecbce
--- /dev/null
+++ b/qa/suites/rbd/immutable-object-cache/clusters/fix-2.yaml
@@ -0,0 +1,3 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1]
+- [mon.b, mgr.y, osd.2, osd.3, client.0]
diff --git a/qa/suites/rbd/immutable-object-cache/clusters/openstack.yaml b/qa/suites/rbd/immutable-object-cache/clusters/openstack.yaml
new file mode 100644
index 000000000..b113e4f2e
--- /dev/null
+++ b/qa/suites/rbd/immutable-object-cache/clusters/openstack.yaml
@@ -0,0 +1,4 @@
+openstack:
+    - volumes: # attached to each instance
+        count: 4
+        size: 10 # GB
diff --git a/qa/suites/rbd/immutable-object-cache/conf b/qa/suites/rbd/immutable-object-cache/conf
new file mode 120000
index 000000000..4bc0fe86c
--- /dev/null
+++ b/qa/suites/rbd/immutable-object-cache/conf
@@ -0,0 +1 @@
+.qa/rbd/conf
+\ No newline at end of file
diff --git a/qa/suites/rbd/immutable-object-cache/pool/.qa b/qa/suites/rbd/immutable-object-cache/pool/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/immutable-object-cache/pool/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/immutable-object-cache/pool/ceph_and_immutable_object_cache.yaml b/qa/suites/rbd/immutable-object-cache/pool/ceph_and_immutable_object_cache.yaml
new file mode 100644
index 000000000..e977c1ba6
--- /dev/null
+++ b/qa/suites/rbd/immutable-object-cache/pool/ceph_and_immutable_object_cache.yaml
@@ -0,0 +1,12 @@
+tasks:
+- install:
+    extra_packages: ['ceph-immutable-object-cache']
+- ceph:
+    conf:
+      client:
+        rbd parent cache enabled: true
+        rbd plugins: parent_cache
+        immutable_object_cache_path: /tmp/ceph-immutable-object-cache
+        immutable_object_cache_max_size: 10G
+- immutable_object_cache:
+    client.0:
diff --git a/qa/suites/rbd/immutable-object-cache/supported-random-distro$ b/qa/suites/rbd/immutable-object-cache/supported-random-distro$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/rbd/immutable-object-cache/supported-random-distro$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rbd/immutable-object-cache/workloads/.qa b/qa/suites/rbd/immutable-object-cache/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/immutable-object-cache/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/immutable-object-cache/workloads/c_api_tests_with_defaults.yaml b/qa/suites/rbd/immutable-object-cache/workloads/c_api_tests_with_defaults.yaml
new file mode 120000
index 000000000..949032725
--- /dev/null
+++ b/qa/suites/rbd/immutable-object-cache/workloads/c_api_tests_with_defaults.yaml
@@ -0,0 +1 @@
+../../librbd/workloads/c_api_tests_with_defaults.yaml
+\ No newline at end of file
diff --git a/qa/suites/rbd/immutable-object-cache/workloads/fio_on_immutable_object_cache.yaml b/qa/suites/rbd/immutable-object-cache/workloads/fio_on_immutable_object_cache.yaml
new file mode 100644
index 000000000..08d76ee15
--- /dev/null
+++ b/qa/suites/rbd/immutable-object-cache/workloads/fio_on_immutable_object_cache.yaml
@@ -0,0 +1,11 @@
+tasks:
+- rbd_fio:
+    client.0:
+      thick-provision: true
+      fio-io-size: 100%
+      formats: [2]
+      features: [[layering]]
+      io-engine: rbd
+      test-clone-io: 1
+      rw: randread
+      runtime: 600
diff --git a/qa/suites/rbd/immutable-object-cache/workloads/qemu_on_immutable_object_cache_and_thrash.yaml b/qa/suites/rbd/immutable-object-cache/workloads/qemu_on_immutable_object_cache_and_thrash.yaml
new file mode 100644
index 000000000..33a5cf0b1
--- /dev/null
+++ b/qa/suites/rbd/immutable-object-cache/workloads/qemu_on_immutable_object_cache_and_thrash.yaml
@@ -0,0 +1,11 @@
+tasks:
+- qemu:
+    client.0:
+      clone: true
+      test: qa/run_xfstests_qemu.sh
+      type: block
+      cpus: 4
+      memory: 4096
+      disks: 3
+- immutable_object_cache_thrash:
+    client.0:
diff --git a/qa/suites/rbd/iscsi/% b/qa/suites/rbd/iscsi/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/iscsi/%
diff --git a/qa/suites/rbd/iscsi/.qa b/qa/suites/rbd/iscsi/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/iscsi/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/iscsi/0-single-container-host.yaml b/qa/suites/rbd/iscsi/0-single-container-host.yaml
new file mode 120000
index 000000000..7406e749c
--- /dev/null
+++ b/qa/suites/rbd/iscsi/0-single-container-host.yaml
@@ -0,0 +1 @@
+.qa/distros/single-container-host.yaml
+\ No newline at end of file
diff --git a/qa/suites/rbd/iscsi/base/.qa b/qa/suites/rbd/iscsi/base/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/iscsi/base/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/iscsi/base/install.yaml b/qa/suites/rbd/iscsi/base/install.yaml
new file mode 100644
index 000000000..5c5a6c31f
--- /dev/null
+++ b/qa/suites/rbd/iscsi/base/install.yaml
@@ -0,0 +1,14 @@
+use_shaman: True
+tasks:
+- cephadm:
+- cephadm.shell:
+    host.a:
+    - ceph orch status
+    - ceph orch ps
+    - ceph orch ls
+    - ceph orch host ls
+    - ceph orch device ls
+- install:
+    extra_packages:
+      - iscsi-initiator-utils
+      - device-mapper-multipath
diff --git a/qa/suites/rbd/iscsi/cluster/+ b/qa/suites/rbd/iscsi/cluster/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/iscsi/cluster/+
diff --git a/qa/suites/rbd/iscsi/cluster/.qa b/qa/suites/rbd/iscsi/cluster/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/iscsi/cluster/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/iscsi/cluster/fixed-3.yaml b/qa/suites/rbd/iscsi/cluster/fixed-3.yaml
new file mode 100644
index 000000000..426ea32cf
--- /dev/null
+++ b/qa/suites/rbd/iscsi/cluster/fixed-3.yaml
@@ -0,0 +1,19 @@
+roles:
+- - host.a
+  - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - client.0
+  - ceph.iscsi.iscsi.a
+- - mon.b
+  - osd.2
+  - osd.3
+  - osd.4
+  - client.1
+- - mon.c
+  - osd.5
+  - osd.6
+  - osd.7
+  - client.2
+  - ceph.iscsi.iscsi.b
diff --git a/qa/suites/rbd/iscsi/cluster/openstack.yaml b/qa/suites/rbd/iscsi/cluster/openstack.yaml
new file mode 100644
index 000000000..40fef4770
--- /dev/null
+++ b/qa/suites/rbd/iscsi/cluster/openstack.yaml
@@ -0,0 +1,8 @@
+openstack:
+  - machine:
+      disk: 40 # GB
+      ram: 8000 # MB
+      cpus: 1
+    volumes: # attached to each instance
+      count: 4
+      size: 30 # GB
diff --git a/qa/suites/rbd/iscsi/conf b/qa/suites/rbd/iscsi/conf
new file mode 120000
index 000000000..4bc0fe86c
--- /dev/null
+++ b/qa/suites/rbd/iscsi/conf
@@ -0,0 +1 @@
+.qa/rbd/conf
+\ No newline at end of file
diff --git a/qa/suites/rbd/iscsi/workloads/.qa b/qa/suites/rbd/iscsi/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/iscsi/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/iscsi/workloads/cephadm_iscsi.yaml b/qa/suites/rbd/iscsi/workloads/cephadm_iscsi.yaml
new file mode 100644
index 000000000..202e6d8b5
--- /dev/null
+++ b/qa/suites/rbd/iscsi/workloads/cephadm_iscsi.yaml
@@ -0,0 +1,21 @@
+tasks:
+- ceph_iscsi_client:
+    clients: [client.1]
+- cram:
+    parallel: False
+    clients:
+      client.0:
+      - src/test/cli-integration/rbd/gwcli_create.t
+      client.1:
+      - src/test/cli-integration/rbd/iscsi_client.t
+      client.2:
+      - src/test/cli-integration/rbd/gwcli_delete.t
+- cram:
+    parallel: False
+    clients:
+      client.0:
+      - src/test/cli-integration/rbd/rest_api_create.t
+      client.1:
+      - src/test/cli-integration/rbd/iscsi_client.t
+      client.2:
+      - src/test/cli-integration/rbd/rest_api_delete.t
diff --git a/qa/suites/rbd/librbd/% b/qa/suites/rbd/librbd/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/librbd/%
diff --git a/qa/suites/rbd/librbd/.qa b/qa/suites/rbd/librbd/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/librbd/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/librbd/cache/.qa b/qa/suites/rbd/librbd/cache/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/librbd/cache/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/librbd/cache/none.yaml b/qa/suites/rbd/librbd/cache/none.yaml
new file mode 100644
index 000000000..42fd9c955
--- /dev/null
+++ b/qa/suites/rbd/librbd/cache/none.yaml
@@ -0,0 +1,6 @@
+tasks:
+- install:
+- ceph:
+    conf:
+      client:
+        rbd cache: false
diff --git a/qa/suites/rbd/librbd/cache/writearound.yaml b/qa/suites/rbd/librbd/cache/writearound.yaml
new file mode 100644
index 000000000..b6f8e319b
--- /dev/null
+++ b/qa/suites/rbd/librbd/cache/writearound.yaml
@@ -0,0 +1,7 @@
+tasks:
+- install:
+- ceph:
+    conf:
+      client:
+        rbd cache: true
+        rbd cache policy: writearound
diff --git a/qa/suites/rbd/librbd/cache/writeback.yaml b/qa/suites/rbd/librbd/cache/writeback.yaml
new file mode 100644
index 000000000..a55ec1df0
--- /dev/null
+++ b/qa/suites/rbd/librbd/cache/writeback.yaml
@@ -0,0 +1,7 @@
+tasks:
+- install:
+- ceph:
+    conf:
+      client:
+        rbd cache: true
+        rbd cache policy: writeback
diff --git a/qa/suites/rbd/librbd/cache/writethrough.yaml b/qa/suites/rbd/librbd/cache/writethrough.yaml
new file mode 100644
index 000000000..6dc29e16c
--- /dev/null
+++ b/qa/suites/rbd/librbd/cache/writethrough.yaml
@@ -0,0 +1,7 @@
+tasks:
+- install:
+- ceph:
+    conf:
+      client:
+        rbd cache: true
+        rbd cache max dirty: 0
diff --git a/qa/suites/rbd/librbd/clusters/+ b/qa/suites/rbd/librbd/clusters/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/librbd/clusters/+
diff --git a/qa/suites/rbd/librbd/clusters/.qa b/qa/suites/rbd/librbd/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/librbd/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/librbd/clusters/fixed-3.yaml b/qa/suites/rbd/librbd/clusters/fixed-3.yaml
new file mode 120000
index 000000000..f75a848b8
--- /dev/null
+++ b/qa/suites/rbd/librbd/clusters/fixed-3.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-3.yaml
+\ No newline at end of file
diff --git a/qa/suites/rbd/librbd/clusters/openstack.yaml b/qa/suites/rbd/librbd/clusters/openstack.yaml
new file mode 100644
index 000000000..b0f3b9b4d
--- /dev/null
+++ b/qa/suites/rbd/librbd/clusters/openstack.yaml
@@ -0,0 +1,4 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 4
+      size: 30 # GB
diff --git a/qa/suites/rbd/librbd/conf b/qa/suites/rbd/librbd/conf
new file mode 120000
index 000000000..4bc0fe86c
--- /dev/null
+++ b/qa/suites/rbd/librbd/conf
@@ -0,0 +1 @@
+.qa/rbd/conf
+\ No newline at end of file
diff --git a/qa/suites/rbd/librbd/data-pool b/qa/suites/rbd/librbd/data-pool
new file mode 120000
index 000000000..3df827572
--- /dev/null
+++ b/qa/suites/rbd/librbd/data-pool
@@ -0,0 +1 @@
+.qa/rbd/data-pool/
+\ No newline at end of file
diff --git a/qa/suites/rbd/librbd/extra-conf/.qa b/qa/suites/rbd/librbd/extra-conf/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/librbd/extra-conf/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/librbd/extra-conf/copy-on-read.yaml b/qa/suites/rbd/librbd/extra-conf/copy-on-read.yaml
new file mode 100644
index 000000000..ce99e7ec0
--- /dev/null
+++ b/qa/suites/rbd/librbd/extra-conf/copy-on-read.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd clone copy on read: true
diff --git a/qa/suites/rbd/librbd/extra-conf/none.yaml b/qa/suites/rbd/librbd/extra-conf/none.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/librbd/extra-conf/none.yaml
diff --git a/qa/suites/rbd/librbd/extra-conf/permit-partial-discard.yaml b/qa/suites/rbd/librbd/extra-conf/permit-partial-discard.yaml
new file mode 100644
index 000000000..a99294696
--- /dev/null
+++ b/qa/suites/rbd/librbd/extra-conf/permit-partial-discard.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd skip partial discard: false
diff --git a/qa/suites/rbd/librbd/min-compat-client/.qa b/qa/suites/rbd/librbd/min-compat-client/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/librbd/min-compat-client/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/librbd/min-compat-client/default.yaml b/qa/suites/rbd/librbd/min-compat-client/default.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/librbd/min-compat-client/default.yaml
diff --git a/qa/suites/rbd/librbd/min-compat-client/octopus.yaml b/qa/suites/rbd/librbd/min-compat-client/octopus.yaml
new file mode 100644
index 000000000..1cd2df483
--- /dev/null
+++ b/qa/suites/rbd/librbd/min-compat-client/octopus.yaml
@@ -0,0 +1,4 @@
+tasks:
+- exec:
+    client.0:
+      - sudo ceph osd set-require-min-compat-client octopus
diff --git a/qa/suites/rbd/librbd/msgr-failures/.qa b/qa/suites/rbd/librbd/msgr-failures/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/librbd/msgr-failures/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/librbd/msgr-failures/few.yaml b/qa/suites/rbd/librbd/msgr-failures/few.yaml
new file mode 100644
index 000000000..df2a313a6
--- /dev/null
+++ b/qa/suites/rbd/librbd/msgr-failures/few.yaml
@@ -0,0 +1,9 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 5000
+        mon client directed command retry: 5
+    log-ignorelist:
+      - but it is still running
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/rbd/librbd/objectstore b/qa/suites/rbd/librbd/objectstore
new file mode 120000
index 000000000..c40bd3261
--- /dev/null
+++ b/qa/suites/rbd/librbd/objectstore
@@ -0,0 +1 @@
+.qa/objectstore
+\ No newline at end of file
diff --git a/qa/suites/rbd/librbd/supported-random-distro$ b/qa/suites/rbd/librbd/supported-random-distro$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/rbd/librbd/supported-random-distro$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rbd/librbd/workloads/.qa b/qa/suites/rbd/librbd/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/librbd/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/librbd/workloads/c_api_tests.yaml b/qa/suites/rbd/librbd/workloads/c_api_tests.yaml
new file mode 100644
index 000000000..eb63fd771
--- /dev/null
+++ b/qa/suites/rbd/librbd/workloads/c_api_tests.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - is full \(reached quota
+      - \(POOL_FULL\)
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/test_librbd.sh
+    env:
+      RBD_FEATURES: "1"
diff --git a/qa/suites/rbd/librbd/workloads/c_api_tests_with_defaults.yaml b/qa/suites/rbd/librbd/workloads/c_api_tests_with_defaults.yaml
new file mode 100644
index 000000000..ec4194598
--- /dev/null
+++ b/qa/suites/rbd/librbd/workloads/c_api_tests_with_defaults.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - is full \(reached quota
+      - \(POOL_FULL\)
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/test_librbd.sh
+    env:
+      RBD_FEATURES: "61"
diff --git a/qa/suites/rbd/librbd/workloads/c_api_tests_with_journaling.yaml b/qa/suites/rbd/librbd/workloads/c_api_tests_with_journaling.yaml
new file mode 100644
index 000000000..6c3686806
--- /dev/null
+++ b/qa/suites/rbd/librbd/workloads/c_api_tests_with_journaling.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - is full \(reached quota
+      - \(POOL_FULL\)
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/test_librbd.sh
+    env:
+      RBD_FEATURES: "125"
diff --git a/qa/suites/rbd/librbd/workloads/fsx.yaml b/qa/suites/rbd/librbd/workloads/fsx.yaml
new file mode 100644
index 000000000..6d8cd5f1a
--- /dev/null
+++ b/qa/suites/rbd/librbd/workloads/fsx.yaml
@@ -0,0 +1,4 @@
+tasks:
+- rbd_fsx:
+    clients: [client.0]
+    ops: 20000
diff --git a/qa/suites/rbd/librbd/workloads/python_api_tests.yaml b/qa/suites/rbd/librbd/workloads/python_api_tests.yaml
new file mode 100644
index 000000000..516c323df
--- /dev/null
+++ b/qa/suites/rbd/librbd/workloads/python_api_tests.yaml
@@ -0,0 +1,12 @@
+overrides:
+  install:
+    ceph:
+      extra_system_packages:
+      - python3-pytest
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/test_librbd_python.sh
+    env:
+      RBD_FEATURES: "1"
diff --git a/qa/suites/rbd/librbd/workloads/python_api_tests_with_defaults.yaml b/qa/suites/rbd/librbd/workloads/python_api_tests_with_defaults.yaml
new file mode 100644
index 000000000..831f3762b
--- /dev/null
+++ b/qa/suites/rbd/librbd/workloads/python_api_tests_with_defaults.yaml
@@ -0,0 +1,12 @@
+overrides:
+  install:
+    ceph:
+      extra_system_packages:
+      - python3-pytest
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/test_librbd_python.sh
+    env:
+      RBD_FEATURES: "61"
diff --git a/qa/suites/rbd/librbd/workloads/python_api_tests_with_journaling.yaml b/qa/suites/rbd/librbd/workloads/python_api_tests_with_journaling.yaml
new file mode 100644
index 000000000..8bd751146
--- /dev/null
+++ b/qa/suites/rbd/librbd/workloads/python_api_tests_with_journaling.yaml
@@ -0,0 +1,12 @@
+overrides:
+  install:
+    ceph:
+      extra_system_packages:
+      - python3-pytest
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/test_librbd_python.sh
+    env:
+      RBD_FEATURES: "125"
diff --git a/qa/suites/rbd/librbd/workloads/rbd_fio.yaml b/qa/suites/rbd/librbd/workloads/rbd_fio.yaml
new file mode 100644
index 000000000..ff788c6a8
--- /dev/null
+++ b/qa/suites/rbd/librbd/workloads/rbd_fio.yaml
@@ -0,0 +1,10 @@
+tasks:
+- rbd_fio:
+    client.0:
+      fio-io-size: 80%
+      formats: [2]
+      features: [[layering],[layering,exclusive-lock,object-map]]
+      io-engine: rbd
+      test-clone-io: 1
+      rw: randrw
+      runtime: 900
diff --git a/qa/suites/rbd/maintenance/% b/qa/suites/rbd/maintenance/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/maintenance/%
diff --git a/qa/suites/rbd/maintenance/.qa b/qa/suites/rbd/maintenance/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/maintenance/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/maintenance/base/.qa b/qa/suites/rbd/maintenance/base/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/maintenance/base/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/maintenance/base/install.yaml b/qa/suites/rbd/maintenance/base/install.yaml
new file mode 100644
index 000000000..2030acb90
--- /dev/null
+++ b/qa/suites/rbd/maintenance/base/install.yaml
@@ -0,0 +1,3 @@
+tasks:
+- install:
+- ceph:
diff --git a/qa/suites/rbd/maintenance/clusters/+ b/qa/suites/rbd/maintenance/clusters/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/maintenance/clusters/+
diff --git a/qa/suites/rbd/maintenance/clusters/.qa b/qa/suites/rbd/maintenance/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/maintenance/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/maintenance/clusters/fixed-3.yaml b/qa/suites/rbd/maintenance/clusters/fixed-3.yaml
new file mode 120000
index 000000000..f75a848b8
--- /dev/null
+++ b/qa/suites/rbd/maintenance/clusters/fixed-3.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-3.yaml
+\ No newline at end of file
diff --git a/qa/suites/rbd/maintenance/clusters/openstack.yaml b/qa/suites/rbd/maintenance/clusters/openstack.yaml
new file mode 120000
index 000000000..3e5028f9a
--- /dev/null
+++ b/qa/suites/rbd/maintenance/clusters/openstack.yaml
@@ -0,0 +1 @@
+../../qemu/clusters/openstack.yaml
+\ No newline at end of file
diff --git a/qa/suites/rbd/maintenance/conf b/qa/suites/rbd/maintenance/conf
new file mode 120000
index 000000000..4bc0fe86c
--- /dev/null
+++ b/qa/suites/rbd/maintenance/conf
@@ -0,0 +1 @@
+.qa/rbd/conf
+\ No newline at end of file
diff --git a/qa/suites/rbd/maintenance/objectstore b/qa/suites/rbd/maintenance/objectstore
new file mode 120000
index 000000000..c40bd3261
--- /dev/null
+++ b/qa/suites/rbd/maintenance/objectstore
@@ -0,0 +1 @@
+.qa/objectstore
+\ No newline at end of file
diff --git a/qa/suites/rbd/maintenance/qemu/.qa b/qa/suites/rbd/maintenance/qemu/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/maintenance/qemu/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/maintenance/qemu/xfstests.yaml b/qa/suites/rbd/maintenance/qemu/xfstests.yaml
new file mode 100644
index 000000000..135103b34
--- /dev/null
+++ b/qa/suites/rbd/maintenance/qemu/xfstests.yaml
@@ -0,0 +1,14 @@
+tasks:
+- parallel:
+    - io_workload
+    - op_workload
+io_workload:
+  sequential:
+    - qemu:
+        client.0:
+          clone: true
+          type: block
+          disks: 3
+          time_wait: 120
+          test: qa/run_xfstests_qemu.sh
+exclude_arch: armv7l
diff --git a/qa/suites/rbd/maintenance/supported-random-distro$ b/qa/suites/rbd/maintenance/supported-random-distro$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/rbd/maintenance/supported-random-distro$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rbd/maintenance/workloads/.qa b/qa/suites/rbd/maintenance/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/maintenance/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/maintenance/workloads/dynamic_features.yaml b/qa/suites/rbd/maintenance/workloads/dynamic_features.yaml
new file mode 100644
index 000000000..d7e1c1ed0
--- /dev/null
+++ b/qa/suites/rbd/maintenance/workloads/dynamic_features.yaml
@@ -0,0 +1,8 @@
+op_workload:
+  sequential:
+    - workunit:
+        clients:
+          client.0:
+          - rbd/qemu_dynamic_features.sh
+        env:
+          IMAGE_NAME: client.0.1-clone
diff --git a/qa/suites/rbd/maintenance/workloads/dynamic_features_no_cache.yaml b/qa/suites/rbd/maintenance/workloads/dynamic_features_no_cache.yaml
new file mode 100644
index 000000000..5d80e6bd3
--- /dev/null
+++ b/qa/suites/rbd/maintenance/workloads/dynamic_features_no_cache.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd cache: false
+op_workload:
+  sequential:
+    - workunit:
+        clients:
+          client.0:
+          - rbd/qemu_dynamic_features.sh
+        env:
+          IMAGE_NAME: client.0.1-clone
+        timeout: 0
diff --git a/qa/suites/rbd/maintenance/workloads/rebuild_object_map.yaml b/qa/suites/rbd/maintenance/workloads/rebuild_object_map.yaml
new file mode 100644
index 000000000..308158f61
--- /dev/null
+++ b/qa/suites/rbd/maintenance/workloads/rebuild_object_map.yaml
@@ -0,0 +1,8 @@
+op_workload:
+  sequential:
+    - workunit:
+        clients:
+          client.0:
+          - rbd/qemu_rebuild_object_map.sh
+        env:
+          IMAGE_NAME: client.0.1-clone
diff --git a/qa/suites/rbd/migration/% b/qa/suites/rbd/migration/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/migration/%
diff --git a/qa/suites/rbd/migration/.qa b/qa/suites/rbd/migration/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/migration/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/migration/1-base b/qa/suites/rbd/migration/1-base
new file mode 120000
index 000000000..fd10a859d
--- /dev/null
+++ b/qa/suites/rbd/migration/1-base
@@ -0,0 +1 @@
+../thrash/base
+\ No newline at end of file
diff --git a/qa/suites/rbd/migration/2-clusters/+ b/qa/suites/rbd/migration/2-clusters/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/migration/2-clusters/+
diff --git a/qa/suites/rbd/migration/2-clusters/.qa b/qa/suites/rbd/migration/2-clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/migration/2-clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/migration/2-clusters/fixed-3.yaml b/qa/suites/rbd/migration/2-clusters/fixed-3.yaml
new file mode 120000
index 000000000..f75a848b8
--- /dev/null
+++ b/qa/suites/rbd/migration/2-clusters/fixed-3.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-3.yaml
+\ No newline at end of file
diff --git a/qa/suites/rbd/migration/2-clusters/openstack.yaml b/qa/suites/rbd/migration/2-clusters/openstack.yaml
new file mode 100644
index 000000000..9c39c7e5f
--- /dev/null
+++ b/qa/suites/rbd/migration/2-clusters/openstack.yaml
@@ -0,0 +1,8 @@
+openstack:
+  - machine:
+      disk: 40 # GB
+      ram: 30000 # MB
+      cpus: 1
+    volumes: # attached to each instance
+      count: 4
+      size: 30 # GB
diff --git a/qa/suites/rbd/migration/3-objectstore b/qa/suites/rbd/migration/3-objectstore
new file mode 120000
index 000000000..c40bd3261
--- /dev/null
+++ b/qa/suites/rbd/migration/3-objectstore
@@ -0,0 +1 @@
+.qa/objectstore
+\ No newline at end of file
diff --git a/qa/suites/rbd/migration/4-supported-random-distro$ b/qa/suites/rbd/migration/4-supported-random-distro$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/rbd/migration/4-supported-random-distro$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rbd/migration/5-data-pool b/qa/suites/rbd/migration/5-data-pool
new file mode 120000
index 000000000..3df827572
--- /dev/null
+++ b/qa/suites/rbd/migration/5-data-pool
@@ -0,0 +1 @@
+.qa/rbd/data-pool/
+\ No newline at end of file
diff --git a/qa/suites/rbd/migration/6-prepare/.qa b/qa/suites/rbd/migration/6-prepare/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/migration/6-prepare/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/migration/6-prepare/qcow2-file.yaml b/qa/suites/rbd/migration/6-prepare/qcow2-file.yaml
new file mode 100644
index 000000000..8e3b9f958
--- /dev/null
+++ b/qa/suites/rbd/migration/6-prepare/qcow2-file.yaml
@@ -0,0 +1,9 @@
+tasks:
+  - exec:
+      client.0:
+        - mkdir /home/ubuntu/cephtest/migration
+        - wget -nv -O /home/ubuntu/cephtest/migration/base.client.0.qcow2 http://download.ceph.com/qa/ubuntu-12.04.qcow2
+        - qemu-img create -f qcow2 /home/ubuntu/cephtest/migration/empty.qcow2 1G
+        - echo '{"type":"qcow","stream":{"type":"file","file_path":"/home/ubuntu/cephtest/migration/base.client.0.qcow2"}}' | rbd migration prepare --import-only --source-spec-path - client.0.0
+        - rbd migration prepare --import-only --source-spec '{"type":"qcow","stream":{"type":"file","file_path":"/home/ubuntu/cephtest/migration/empty.qcow2"}}' client.0.1
+        - rbd migration prepare --import-only --source-spec '{"type":"qcow","stream":{"type":"file","file_path":"/home/ubuntu/cephtest/migration/empty.qcow2"}}' client.0.2
diff --git a/qa/suites/rbd/migration/6-prepare/qcow2-http.yaml b/qa/suites/rbd/migration/6-prepare/qcow2-http.yaml
new file mode 100644
index 000000000..890d14417
--- /dev/null
+++ b/qa/suites/rbd/migration/6-prepare/qcow2-http.yaml
@@ -0,0 +1,8 @@
+tasks:
+  - exec:
+      client.0:
+        - mkdir /home/ubuntu/cephtest/migration
+        - qemu-img create -f qcow2 /home/ubuntu/cephtest/migration/empty.qcow2 1G
+        - echo '{"type":"qcow","stream":{"type":"http","url":"http://download.ceph.com/qa/ubuntu-12.04.qcow2"}}' | rbd migration prepare --import-only --source-spec-path - client.0.0
+        - rbd migration prepare --import-only --source-spec '{"type":"qcow","stream":{"type":"file","file_path":"/home/ubuntu/cephtest/migration/empty.qcow2"}}' client.0.1
+        - rbd migration prepare --import-only --source-spec '{"type":"qcow","stream":{"type":"file","file_path":"/home/ubuntu/cephtest/migration/empty.qcow2"}}' client.0.2
diff --git a/qa/suites/rbd/migration/6-prepare/raw-file.yaml b/qa/suites/rbd/migration/6-prepare/raw-file.yaml
new file mode 100644
index 000000000..0035534ef
--- /dev/null
+++ b/qa/suites/rbd/migration/6-prepare/raw-file.yaml
@@ -0,0 +1,10 @@
+tasks:
+  - exec:
+      client.0:
+        - mkdir /home/ubuntu/cephtest/migration
+        - wget -nv -O /home/ubuntu/cephtest/migration/base.client.0.qcow2 http://download.ceph.com/qa/ubuntu-12.04.qcow2
+        - qemu-img convert -f qcow2 -O raw /home/ubuntu/cephtest/migration/base.client.0.qcow2 /home/ubuntu/cephtest/migration/base.client.0.raw
+        - dd if=/dev/zero of=/home/ubuntu/cephtest/migration/empty.raw count=1 bs=1G
+        - echo '{"type":"raw","stream":{"type":"file","file_path":"/home/ubuntu/cephtest/migration/base.client.0.raw"}}' | rbd migration prepare --import-only --source-spec-path - client.0.0
+        - rbd migration prepare --import-only --source-spec '{"type":"raw","stream":{"type":"file","file_path":"/home/ubuntu/cephtest/migration/empty.raw"}}' client.0.1
+        - rbd migration prepare --import-only --source-spec '{"type":"raw","stream":{"type":"file","file_path":"/home/ubuntu/cephtest/migration/empty.raw"}}' client.0.2
diff --git a/qa/suites/rbd/migration/7-io-workloads/.qa b/qa/suites/rbd/migration/7-io-workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/migration/7-io-workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/migration/7-io-workloads/qemu_xfstests.yaml b/qa/suites/rbd/migration/7-io-workloads/qemu_xfstests.yaml
new file mode 100644
index 000000000..2617a4192
--- /dev/null
+++ b/qa/suites/rbd/migration/7-io-workloads/qemu_xfstests.yaml
@@ -0,0 +1,15 @@
+io_workload:
+  sequential:
+    - qemu:
+        client.0:
+          clone: true
+          type: block
+          disks:
+            - action: none
+              image_name: client.0.0
+            - action: none
+              image_name: client.0.1
+            - action: none
+              image_name: client.0.2
+          test: qa/run_xfstests_qemu.sh
+exclude_arch: armv7l
diff --git a/qa/suites/rbd/migration/8-migrate-workloads/.qa b/qa/suites/rbd/migration/8-migrate-workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/migration/8-migrate-workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/migration/8-migrate-workloads/execute.yaml b/qa/suites/rbd/migration/8-migrate-workloads/execute.yaml
new file mode 100644
index 000000000..d7735c426
--- /dev/null
+++ b/qa/suites/rbd/migration/8-migrate-workloads/execute.yaml
@@ -0,0 +1,14 @@
+tasks:
+  - parallel:
+      - io_workload
+      - migrate_workload
+migrate_workload:
+  sequential:
+    - exec:
+        client.0:
+          - sleep 60
+          - rbd migration execute client.0.0
+          - sleep 60
+          - rbd migration commit client.0.0
+          - sleep 60
+          - rbd migration execute client.0.1
diff --git a/qa/suites/rbd/migration/9-cleanup/.qa b/qa/suites/rbd/migration/9-cleanup/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/migration/9-cleanup/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/migration/9-cleanup/cleanup.yaml b/qa/suites/rbd/migration/9-cleanup/cleanup.yaml
new file mode 100644
index 000000000..18c2bb5f4
--- /dev/null
+++ b/qa/suites/rbd/migration/9-cleanup/cleanup.yaml
@@ -0,0 +1,4 @@
+tasks:
+  - exec:
+      client.0:
+        - rm -rf /home/ubuntu/cephtest/migration
diff --git a/qa/suites/rbd/migration/conf b/qa/suites/rbd/migration/conf
new file mode 120000
index 000000000..4bc0fe86c
--- /dev/null
+++ b/qa/suites/rbd/migration/conf
@@ -0,0 +1 @@
+.qa/rbd/conf
+\ No newline at end of file
diff --git a/qa/suites/rbd/mirror-thrash/% b/qa/suites/rbd/mirror-thrash/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/mirror-thrash/%
diff --git a/qa/suites/rbd/mirror-thrash/.qa b/qa/suites/rbd/mirror-thrash/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/mirror-thrash/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/mirror-thrash/base/.qa b/qa/suites/rbd/mirror-thrash/base/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/mirror-thrash/base/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/mirror-thrash/base/install.yaml b/qa/suites/rbd/mirror-thrash/base/install.yaml
new file mode 100644
index 000000000..365c3a8cb
--- /dev/null
+++ b/qa/suites/rbd/mirror-thrash/base/install.yaml
@@ -0,0 +1,9 @@
+meta:
+- desc: run two ceph clusters and install rbd-mirror
+tasks:
+- install:
+    extra_packages: [rbd-mirror]
+- ceph:
+    cluster: cluster1
+- ceph:
+    cluster: cluster2
diff --git a/qa/suites/rbd/mirror-thrash/clients/.qa b/qa/suites/rbd/mirror-thrash/clients/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/mirror-thrash/clients/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/mirror-thrash/clients/mirror.yaml b/qa/suites/rbd/mirror-thrash/clients/mirror.yaml
new file mode 100644
index 000000000..1b6808d85
--- /dev/null
+++ b/qa/suites/rbd/mirror-thrash/clients/mirror.yaml
@@ -0,0 +1,36 @@
+meta:
+- desc: configure the permissions for client.mirror
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default features: 125
+        debug rbd: 20
+        debug rbd_mirror: 15
+        log to stderr: false
+      # override to make these names predictable
+      client.mirror.0:
+        admin socket: /var/run/ceph/rbd-mirror.$cluster-$name.asok
+        pid file: /var/run/ceph/rbd-mirror.$cluster-$name.pid
+      client.mirror.1:
+        admin socket: /var/run/ceph/rbd-mirror.$cluster-$name.asok
+        pid file: /var/run/ceph/rbd-mirror.$cluster-$name.pid
+      client.mirror.2:
+        admin socket: /var/run/ceph/rbd-mirror.$cluster-$name.asok
+        pid file: /var/run/ceph/rbd-mirror.$cluster-$name.pid
+      client.mirror.3:
+        admin socket: /var/run/ceph/rbd-mirror.$cluster-$name.asok
+        pid file: /var/run/ceph/rbd-mirror.$cluster-$name.pid
+tasks:
+- exec:
+    cluster1.client.mirror.0:
+      - "sudo ceph --cluster cluster1 auth caps client.mirror mon 'profile rbd-mirror-peer' osd 'profile rbd'"
+      - "sudo ceph --cluster cluster1 auth caps client.mirror.0 mon 'profile rbd-mirror' osd 'profile rbd'"
+      - "sudo ceph --cluster cluster1 auth caps client.mirror.1 mon 'profile rbd-mirror' osd 'profile rbd'"
+      - "sudo ceph --cluster cluster1 auth caps client.mirror.2 mon 'profile rbd-mirror' osd 'profile rbd'"
+      - "sudo ceph --cluster cluster1 auth caps client.mirror.3 mon 'profile rbd-mirror' osd 'profile rbd'"
+      - "sudo ceph --cluster cluster2 auth caps client.mirror mon 'profile rbd-mirror-peer' osd 'profile rbd'"
+      - "sudo ceph --cluster cluster2 auth caps client.mirror.0 mon 'profile rbd-mirror' osd 'profile rbd'"
+      - "sudo ceph --cluster cluster2 auth caps client.mirror.1 mon 'profile rbd-mirror' osd 'profile rbd'"
+      - "sudo ceph --cluster cluster2 auth caps client.mirror.2 mon 'profile rbd-mirror' osd 'profile rbd'"
+      - "sudo ceph --cluster cluster2 auth caps client.mirror.3 mon 'profile rbd-mirror' osd 'profile rbd'"
diff --git a/qa/suites/rbd/mirror-thrash/cluster/+ b/qa/suites/rbd/mirror-thrash/cluster/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/mirror-thrash/cluster/+
diff --git a/qa/suites/rbd/mirror-thrash/cluster/.qa b/qa/suites/rbd/mirror-thrash/cluster/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/mirror-thrash/cluster/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/mirror-thrash/cluster/2-node.yaml b/qa/suites/rbd/mirror-thrash/cluster/2-node.yaml
new file mode 100644
index 000000000..74f9fb3c4
--- /dev/null
+++ b/qa/suites/rbd/mirror-thrash/cluster/2-node.yaml
@@ -0,0 +1,31 @@
+meta:
+- desc: 2 ceph clusters with 1 mon and 3 osds each
+roles:
+- - cluster1.mon.a
+  - cluster1.mgr.x
+  - cluster2.mgr.x
+  - cluster1.osd.0
+  - cluster1.osd.1
+  - cluster1.osd.2
+  - cluster1.client.0
+  - cluster2.client.0
+- - cluster2.mon.a
+  - cluster2.osd.0
+  - cluster2.osd.1
+  - cluster2.osd.2
+  - cluster1.client.mirror
+  - cluster1.client.mirror.0
+  - cluster1.client.mirror.1
+  - cluster1.client.mirror.2
+  - cluster1.client.mirror.3
+  - cluster1.client.mirror.4
+  - cluster1.client.mirror.5
+  - cluster1.client.mirror.6
+  - cluster2.client.mirror
+  - cluster2.client.mirror.0
+  - cluster2.client.mirror.1
+  - cluster2.client.mirror.2
+  - cluster2.client.mirror.3
+  - cluster2.client.mirror.4
+  - cluster2.client.mirror.5
+  - cluster2.client.mirror.6
diff --git a/qa/suites/rbd/mirror-thrash/cluster/openstack.yaml b/qa/suites/rbd/mirror-thrash/cluster/openstack.yaml
new file mode 100644
index 000000000..f4d1349b4
--- /dev/null
+++ b/qa/suites/rbd/mirror-thrash/cluster/openstack.yaml
@@ -0,0 +1,4 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 30 # GB
diff --git a/qa/suites/rbd/mirror-thrash/conf b/qa/suites/rbd/mirror-thrash/conf
new file mode 120000
index 000000000..4bc0fe86c
--- /dev/null
+++ b/qa/suites/rbd/mirror-thrash/conf
@@ -0,0 +1 @@
+.qa/rbd/conf
+\ No newline at end of file
diff --git a/qa/suites/rbd/mirror-thrash/msgr-failures b/qa/suites/rbd/mirror-thrash/msgr-failures
new file mode 120000
index 000000000..db59eb46c
--- /dev/null
+++ b/qa/suites/rbd/mirror-thrash/msgr-failures
@@ -0,0 +1 @@
+../basic/msgr-failures
+\ No newline at end of file
diff --git a/qa/suites/rbd/mirror-thrash/objectstore b/qa/suites/rbd/mirror-thrash/objectstore
new file mode 120000
index 000000000..c40bd3261
--- /dev/null
+++ b/qa/suites/rbd/mirror-thrash/objectstore
@@ -0,0 +1 @@
+.qa/objectstore
+\ No newline at end of file
diff --git a/qa/suites/rbd/mirror-thrash/policy/.qa b/qa/suites/rbd/mirror-thrash/policy/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/mirror-thrash/policy/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/mirror-thrash/policy/none.yaml b/qa/suites/rbd/mirror-thrash/policy/none.yaml
new file mode 100644
index 000000000..e0a7c1185
--- /dev/null
+++ b/qa/suites/rbd/mirror-thrash/policy/none.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd mirror image policy type: none
diff --git a/qa/suites/rbd/mirror-thrash/policy/simple.yaml b/qa/suites/rbd/mirror-thrash/policy/simple.yaml
new file mode 100644
index 000000000..ee3082d3c
--- /dev/null
+++ b/qa/suites/rbd/mirror-thrash/policy/simple.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd mirror image policy type: simple
diff --git a/qa/suites/rbd/mirror-thrash/rbd-mirror/.qa b/qa/suites/rbd/mirror-thrash/rbd-mirror/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/mirror-thrash/rbd-mirror/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/mirror-thrash/rbd-mirror/four-per-cluster.yaml b/qa/suites/rbd/mirror-thrash/rbd-mirror/four-per-cluster.yaml
new file mode 100644
index 000000000..70df34e40
--- /dev/null
+++ b/qa/suites/rbd/mirror-thrash/rbd-mirror/four-per-cluster.yaml
@@ -0,0 +1,31 @@
+meta:
+- desc: run four rbd-mirror daemons per cluster
+tasks:
+- rbd-mirror:
+    client: cluster1.client.mirror.0
+    thrash: True
+- rbd-mirror:
+    client: cluster1.client.mirror.1
+    thrash: True
+- rbd-mirror:
+    client: cluster1.client.mirror.2
+    thrash: True
+- rbd-mirror:
+    client: cluster1.client.mirror.3
+    thrash: True
+- rbd-mirror:
+    client: cluster2.client.mirror.0
+    thrash: True
+- rbd-mirror:
+    client: cluster2.client.mirror.1
+    thrash: True
+- rbd-mirror:
+    client: cluster2.client.mirror.2
+    thrash: True
+- rbd-mirror:
+    client: cluster2.client.mirror.3
+    thrash: True
+- rbd-mirror-thrash:
+    cluster: cluster1
+- rbd-mirror-thrash:
+    cluster: cluster2
diff --git a/qa/suites/rbd/mirror-thrash/supported-random-distro$ b/qa/suites/rbd/mirror-thrash/supported-random-distro$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/rbd/mirror-thrash/supported-random-distro$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rbd/mirror-thrash/workloads/.qa b/qa/suites/rbd/mirror-thrash/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/mirror-thrash/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-fsx-workunit.yaml b/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-fsx-workunit.yaml
new file mode 100644
index 000000000..d2db0f520
--- /dev/null
+++ b/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-fsx-workunit.yaml
@@ -0,0 +1,33 @@
+meta:
+- desc: run multiple FSX workloads to simulate cluster load and then verify
+        that the images were replicated
+tasks:
+- workunit:
+    clients:
+      cluster1.client.mirror: [rbd/rbd_mirror_fsx_prepare.sh]
+    env:
+      # override workunit setting of CEPH_ARGS='--cluster'
+      CEPH_ARGS: ''
+      RBD_MIRROR_NOCLEANUP: '1'
+      RBD_MIRROR_USE_EXISTING_CLUSTER: '1'
+      RBD_MIRROR_USE_RBD_MIRROR: '1'
+- rbd_fsx:
+    clients:
+      - cluster1.client.mirror.0
+      - cluster1.client.mirror.1
+      - cluster1.client.mirror.2
+      - cluster1.client.mirror.3
+      - cluster1.client.mirror.4
+      - cluster1.client.mirror.5
+    ops: 6000
+    keep_images: true
+    pool_name: mirror
+- workunit:
+    clients:
+      cluster1.client.mirror: [rbd/rbd_mirror_fsx_compare.sh]
+    env:
+      # override workunit setting of CEPH_ARGS='--cluster'
+      CEPH_ARGS: ''
+      RBD_MIRROR_USE_EXISTING_CLUSTER: '1'
+      RBD_MIRROR_USE_RBD_MIRROR: '1'
+    timeout: 6h
diff --git a/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-journal-stress-workunit.yaml b/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-journal-stress-workunit.yaml
new file mode 100644
index 000000000..9579b70d6
--- /dev/null
+++ b/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-journal-stress-workunit.yaml
@@ -0,0 +1,15 @@
+meta:
+- desc: run the rbd_mirror_stress.sh workunit to test the rbd-mirror daemon
+tasks:
+- workunit:
+    clients:
+      cluster1.client.mirror: [rbd/rbd_mirror_stress.sh]
+    env:
+      # override workunit setting of CEPH_ARGS='--cluster'
+      CEPH_ARGS: ''
+      RBD_MIRROR_INSTANCES: '4'
+      RBD_MIRROR_USE_EXISTING_CLUSTER: '1'
+      RBD_MIRROR_USE_RBD_MIRROR: '1'
+      MIRROR_POOL_MODE: 'pool'
+      MIRROR_IMAGE_MODE: 'journal'
+    timeout: 6h
diff --git a/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-journal-workunit.yaml b/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-journal-workunit.yaml
new file mode 100644
index 000000000..5f12b2239
--- /dev/null
+++ b/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-journal-workunit.yaml
@@ -0,0 +1,12 @@
+meta:
+- desc: run the rbd_mirror_journal.sh workunit to test the rbd-mirror daemon
+tasks:
+- workunit:
+    clients:
+      cluster1.client.mirror: [rbd/rbd_mirror_journal.sh]
+    env:
+      # override workunit setting of CEPH_ARGS='--cluster'
+      CEPH_ARGS: ''
+      RBD_MIRROR_INSTANCES: '4'
+      RBD_MIRROR_USE_EXISTING_CLUSTER: '1'
+      RBD_MIRROR_USE_RBD_MIRROR: '1'
diff --git a/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-snapshot-stress-workunit-exclusive-lock.yaml b/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-snapshot-stress-workunit-exclusive-lock.yaml
new file mode 100644
index 000000000..87632483d
--- /dev/null
+++ b/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-snapshot-stress-workunit-exclusive-lock.yaml
@@ -0,0 +1,16 @@
+meta:
+- desc: run the rbd_mirror_stress.sh workunit to test the rbd-mirror daemon
+tasks:
+- workunit:
+    clients:
+      cluster1.client.mirror: [rbd/rbd_mirror_stress.sh]
+    env:
+      # override workunit setting of CEPH_ARGS='--cluster'
+      CEPH_ARGS: ''
+      MIRROR_POOL_MODE: 'image'
+      MIRROR_IMAGE_MODE: 'snapshot'
+      RBD_IMAGE_FEATURES: 'layering,exclusive-lock'
+      RBD_MIRROR_INSTANCES: '4'
+      RBD_MIRROR_USE_EXISTING_CLUSTER: '1'
+      RBD_MIRROR_USE_RBD_MIRROR: '1'
+    timeout: 6h
diff --git a/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-snapshot-stress-workunit-fast-diff.yaml b/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-snapshot-stress-workunit-fast-diff.yaml
new file mode 100644
index 000000000..fc43b0ec2
--- /dev/null
+++ b/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-snapshot-stress-workunit-fast-diff.yaml
@@ -0,0 +1,16 @@
+meta:
+- desc: run the rbd_mirror_stress.sh workunit to test the rbd-mirror daemon
+tasks:
+- workunit:
+    clients:
+      cluster1.client.mirror: [rbd/rbd_mirror_stress.sh]
+    env:
+      # override workunit setting of CEPH_ARGS='--cluster'
+      CEPH_ARGS: ''
+      MIRROR_POOL_MODE: 'image'
+      MIRROR_IMAGE_MODE: 'snapshot'
+      RBD_IMAGE_FEATURES: 'layering,exclusive-lock,object-map,fast-diff'
+      RBD_MIRROR_INSTANCES: '4'
+      RBD_MIRROR_USE_EXISTING_CLUSTER: '1'
+      RBD_MIRROR_USE_RBD_MIRROR: '1'
+    timeout: 6h
diff --git a/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-snapshot-stress-workunit-minimum.yaml b/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-snapshot-stress-workunit-minimum.yaml
new file mode 100644
index 000000000..af0ea1240
--- /dev/null
+++ b/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-snapshot-stress-workunit-minimum.yaml
@@ -0,0 +1,16 @@
+meta:
+- desc: run the rbd_mirror_stress.sh workunit to test the rbd-mirror daemon
+tasks:
+- workunit:
+    clients:
+      cluster1.client.mirror: [rbd/rbd_mirror_stress.sh]
+    env:
+      # override workunit setting of CEPH_ARGS='--cluster'
+      CEPH_ARGS: ''
+      MIRROR_POOL_MODE: 'image'
+      MIRROR_IMAGE_MODE: 'snapshot'
+      RBD_IMAGE_FEATURES: 'layering'
+      RBD_MIRROR_INSTANCES: '4'
+      RBD_MIRROR_USE_EXISTING_CLUSTER: '1'
+      RBD_MIRROR_USE_RBD_MIRROR: '1'
+    timeout: 6h
diff --git a/qa/suites/rbd/mirror/% b/qa/suites/rbd/mirror/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/mirror/%
diff --git a/qa/suites/rbd/mirror/.qa b/qa/suites/rbd/mirror/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/mirror/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/mirror/base b/qa/suites/rbd/mirror/base
new file mode 120000
index 000000000..8d9546e21
--- /dev/null
+++ b/qa/suites/rbd/mirror/base
@@ -0,0 +1 @@
+../mirror-thrash/base
+\ No newline at end of file
diff --git a/qa/suites/rbd/mirror/clients/+ b/qa/suites/rbd/mirror/clients/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/mirror/clients/+
diff --git a/qa/suites/rbd/mirror/clients/.qa b/qa/suites/rbd/mirror/clients/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/mirror/clients/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/mirror/clients/mirror-extra.yaml b/qa/suites/rbd/mirror/clients/mirror-extra.yaml
new file mode 100644
index 000000000..5eed5cfcd
--- /dev/null
+++ b/qa/suites/rbd/mirror/clients/mirror-extra.yaml
@@ -0,0 +1,24 @@
+meta:
+- desc: configure the permissions for client.mirror
+overrides:
+  ceph:
+    conf:
+      # override to make these names predictable
+      client.mirror.4:
+        admin socket: /var/run/ceph/rbd-mirror.$cluster-$name.asok
+        pid file: /var/run/ceph/rbd-mirror.$cluster-$name.pid
+      client.mirror.5:
+        admin socket: /var/run/ceph/rbd-mirror.$cluster-$name.asok
+        pid file: /var/run/ceph/rbd-mirror.$cluster-$name.pid
+      client.mirror.6:
+        admin socket: /var/run/ceph/rbd-mirror.$cluster-$name.asok
+        pid file: /var/run/ceph/rbd-mirror.$cluster-$name.pid
+tasks:
+- exec:
+    cluster1.client.mirror.0:
+      - "sudo ceph --cluster cluster1 auth caps client.mirror.4 mon 'profile rbd-mirror' osd 'profile rbd'"
+      - "sudo ceph --cluster cluster1 auth caps client.mirror.5 mon 'profile rbd-mirror' osd 'profile rbd'"
+      - "sudo ceph --cluster cluster1 auth caps client.mirror.6 mon 'profile rbd-mirror' osd 'profile rbd'"
+      - "sudo ceph --cluster cluster2 auth caps client.mirror.4 mon 'profile rbd-mirror' osd 'profile rbd'"
+      - "sudo ceph --cluster cluster2 auth caps client.mirror.5 mon 'profile rbd-mirror' osd 'profile rbd'"
+      - "sudo ceph --cluster cluster2 auth caps client.mirror.6 mon 'profile rbd-mirror' osd 'profile rbd'"
diff --git a/qa/suites/rbd/mirror/clients/mirror.yaml b/qa/suites/rbd/mirror/clients/mirror.yaml
new file mode 120000
index 000000000..0dfb0c785
--- /dev/null
+++ b/qa/suites/rbd/mirror/clients/mirror.yaml
@@ -0,0 +1 @@
+../../mirror-thrash/clients/mirror.yaml
+\ No newline at end of file
diff --git a/qa/suites/rbd/mirror/cluster b/qa/suites/rbd/mirror/cluster
new file mode 120000
index 000000000..3fc87a150
--- /dev/null
+++ b/qa/suites/rbd/mirror/cluster
@@ -0,0 +1 @@
+../mirror-thrash/cluster
+\ No newline at end of file
diff --git a/qa/suites/rbd/mirror/conf b/qa/suites/rbd/mirror/conf
new file mode 120000
index 000000000..4bc0fe86c
--- /dev/null
+++ b/qa/suites/rbd/mirror/conf
@@ -0,0 +1 @@
+.qa/rbd/conf
+\ No newline at end of file
diff --git a/qa/suites/rbd/mirror/msgr-failures b/qa/suites/rbd/mirror/msgr-failures
new file mode 120000
index 000000000..728aeab33
--- /dev/null
+++ b/qa/suites/rbd/mirror/msgr-failures
@@ -0,0 +1 @@
+../mirror-thrash/msgr-failures
+\ No newline at end of file
diff --git a/qa/suites/rbd/mirror/objectstore b/qa/suites/rbd/mirror/objectstore
new file mode 120000
index 000000000..d751ff121
--- /dev/null
+++ b/qa/suites/rbd/mirror/objectstore
@@ -0,0 +1 @@
+../mirror-thrash/objectstore
+\ No newline at end of file
diff --git a/qa/suites/rbd/mirror/supported-random-distro$ b/qa/suites/rbd/mirror/supported-random-distro$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/rbd/mirror/supported-random-distro$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rbd/mirror/workloads/.qa b/qa/suites/rbd/mirror/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/mirror/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/mirror/workloads/rbd-mirror-ha-workunit.yaml b/qa/suites/rbd/mirror/workloads/rbd-mirror-ha-workunit.yaml
new file mode 100644
index 000000000..7347f7f76
--- /dev/null
+++ b/qa/suites/rbd/mirror/workloads/rbd-mirror-ha-workunit.yaml
@@ -0,0 +1,16 @@
+meta:
+- desc: run the rbd_mirror_ha.sh workunit to test the rbd-mirror daemon
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd mirror image policy type: none
+tasks:
+- workunit:
+    clients:
+      cluster1.client.mirror: [rbd/rbd_mirror_ha.sh]
+    env:
+      # override workunit setting of CEPH_ARGS='--cluster'
+      CEPH_ARGS: ''
+      RBD_MIRROR_USE_EXISTING_CLUSTER: '1'
+    timeout: 6h
diff --git a/qa/suites/rbd/mirror/workloads/rbd-mirror-journal-bootstrap-workunit.yaml b/qa/suites/rbd/mirror/workloads/rbd-mirror-journal-bootstrap-workunit.yaml
new file mode 100644
index 000000000..b9c5562be
--- /dev/null
+++ b/qa/suites/rbd/mirror/workloads/rbd-mirror-journal-bootstrap-workunit.yaml
@@ -0,0 +1,13 @@
+meta:
+- desc: run the rbd_mirror_bootstrap.sh workunit to test the rbd-mirror daemon
+tasks:
+- workunit:
+    clients:
+      cluster1.client.mirror: [rbd/rbd_mirror_bootstrap.sh]
+    env:
+      # override workunit setting of CEPH_ARGS='--cluster'
+      CEPH_ARGS: ''
+      RBD_MIRROR_INSTANCES: '1'
+      RBD_MIRROR_USE_EXISTING_CLUSTER: '1'
+      MIRROR_POOL_MODE: 'pool'
+      MIRROR_IMAGE_MODE: 'journal'
diff --git a/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-bootstrap-workunit.yaml b/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-bootstrap-workunit.yaml
new file mode 100644
index 000000000..5ad78474d
--- /dev/null
+++ b/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-bootstrap-workunit.yaml
@@ -0,0 +1,13 @@
+meta:
+- desc: run the rbd_mirror_bootstrap.sh workunit to test the rbd-mirror daemon
+tasks:
+- workunit:
+    clients:
+      cluster1.client.mirror: [rbd/rbd_mirror_bootstrap.sh]
+    env:
+      # override workunit setting of CEPH_ARGS='--cluster'
+      CEPH_ARGS: ''
+      RBD_MIRROR_INSTANCES: '1'
+      RBD_MIRROR_USE_EXISTING_CLUSTER: '1'
+      MIRROR_POOL_MODE: 'image'
+      MIRROR_IMAGE_MODE: 'snapshot'
diff --git a/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-workunit-exclusive-lock.yaml b/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-workunit-exclusive-lock.yaml
new file mode 100644
index 000000000..29047a77d
--- /dev/null
+++ b/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-workunit-exclusive-lock.yaml
@@ -0,0 +1,13 @@
+meta:
+- desc: run the rbd_mirror_snapshot.sh workunit to test the rbd-mirror daemon
+tasks:
+- workunit:
+    clients:
+      cluster1.client.mirror: [rbd/rbd_mirror_snapshot.sh]
+    env:
+      # override workunit setting of CEPH_ARGS='--cluster'
+      CEPH_ARGS: ''
+      RBD_MIRROR_INSTANCES: '4'
+      RBD_MIRROR_USE_EXISTING_CLUSTER: '1'
+      RBD_MIRROR_CONFIG_KEY: '1'
+      RBD_IMAGE_FEATURES: 'layering,exclusive-lock'
diff --git a/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-workunit-fast-diff.yaml b/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-workunit-fast-diff.yaml
new file mode 100644
index 000000000..af13c92b5
--- /dev/null
+++ b/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-workunit-fast-diff.yaml
@@ -0,0 +1,13 @@
+meta:
+- desc: run the rbd_mirror_snapshot.sh workunit to test the rbd-mirror daemon
+tasks:
+- workunit:
+    clients:
+      cluster1.client.mirror: [rbd/rbd_mirror_snapshot.sh]
+    env:
+      # override workunit setting of CEPH_ARGS='--cluster'
+      CEPH_ARGS: ''
+      RBD_MIRROR_INSTANCES: '4'
+      RBD_MIRROR_USE_EXISTING_CLUSTER: '1'
+      RBD_MIRROR_CONFIG_KEY: '1'
+      RBD_IMAGE_FEATURES: 'layering,exclusive-lock,object-map,fast-diff'
diff --git a/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-workunit-journaling.yaml b/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-workunit-journaling.yaml
new file mode 100644
index 000000000..5ea2bb105
--- /dev/null
+++ b/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-workunit-journaling.yaml
@@ -0,0 +1,13 @@
+meta:
+- desc: run the rbd_mirror_snapshot.sh workunit to test the rbd-mirror daemon
+tasks:
+- workunit:
+    clients:
+      cluster1.client.mirror: [rbd/rbd_mirror_snapshot.sh]
+    env:
+      # override workunit setting of CEPH_ARGS='--cluster'
+      CEPH_ARGS: ''
+      RBD_MIRROR_INSTANCES: '4'
+      RBD_MIRROR_USE_EXISTING_CLUSTER: '1'
+      RBD_MIRROR_CONFIG_KEY: '1'
+      RBD_IMAGE_FEATURES: 'layering,exclusive-lock,journaling'
diff --git a/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-workunit-minimum.yaml b/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-workunit-minimum.yaml
new file mode 100644
index 000000000..e21d57b2b
--- /dev/null
+++ b/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-workunit-minimum.yaml
@@ -0,0 +1,13 @@
+meta:
+- desc: run the rbd_mirror_snapshot.sh workunit to test the rbd-mirror daemon
+tasks:
+- workunit:
+    clients:
+      cluster1.client.mirror: [rbd/rbd_mirror_snapshot.sh]
+    env:
+      # override workunit setting of CEPH_ARGS='--cluster'
+      CEPH_ARGS: ''
+      RBD_MIRROR_INSTANCES: '4'
+      RBD_MIRROR_USE_EXISTING_CLUSTER: '1'
+      RBD_MIRROR_CONFIG_KEY: '1'
+      RBD_IMAGE_FEATURES: 'layering'
diff --git a/qa/suites/rbd/mirror/workloads/rbd-mirror-stress-workunit-min-compat-client-octopus.yaml b/qa/suites/rbd/mirror/workloads/rbd-mirror-stress-workunit-min-compat-client-octopus.yaml
new file mode 100644
index 000000000..5cc351bb9
--- /dev/null
+++ b/qa/suites/rbd/mirror/workloads/rbd-mirror-stress-workunit-min-compat-client-octopus.yaml
@@ -0,0 +1,11 @@
+meta:
+- desc: run the rbd_mirror_stress.sh workunit to test the rbd-mirror daemon
+tasks:
+- workunit:
+    clients:
+      cluster1.client.mirror: [rbd/rbd_mirror_stress.sh]
+    env:
+      # override workunit setting of CEPH_ARGS='--cluster'
+      CEPH_ARGS: ''
+      RBD_MIRROR_USE_EXISTING_CLUSTER: '1'
+      RBD_MIRROR_MIN_COMPAT_CLIENT: 'octopus'
diff --git a/qa/suites/rbd/mirror/workloads/rbd-mirror-workunit-config-key.yaml b/qa/suites/rbd/mirror/workloads/rbd-mirror-workunit-config-key.yaml
new file mode 100644
index 000000000..0102050eb
--- /dev/null
+++ b/qa/suites/rbd/mirror/workloads/rbd-mirror-workunit-config-key.yaml
@@ -0,0 +1,12 @@
+meta:
+- desc: run the rbd_mirror_journal.sh workunit to test the rbd-mirror daemon
+tasks:
+- workunit:
+    clients:
+      cluster1.client.mirror: [rbd/rbd_mirror_journal.sh]
+    env:
+      # override workunit setting of CEPH_ARGS='--cluster'
+      CEPH_ARGS: ''
+      RBD_MIRROR_INSTANCES: '4'
+      RBD_MIRROR_USE_EXISTING_CLUSTER: '1'
+      RBD_MIRROR_CONFIG_KEY: '1'
diff --git a/qa/suites/rbd/mirror/workloads/rbd-mirror-workunit-min-compat-client-octopus.yaml b/qa/suites/rbd/mirror/workloads/rbd-mirror-workunit-min-compat-client-octopus.yaml
new file mode 100644
index 000000000..5bd024d2d
--- /dev/null
+++ b/qa/suites/rbd/mirror/workloads/rbd-mirror-workunit-min-compat-client-octopus.yaml
@@ -0,0 +1,11 @@
+meta:
+- desc: run the rbd_mirror_journal.sh workunit to test the rbd-mirror daemon
+tasks:
+- workunit:
+    clients:
+      cluster1.client.mirror: [rbd/rbd_mirror_journal.sh]
+    env:
+      # override workunit setting of CEPH_ARGS='--cluster'
+      CEPH_ARGS: ''
+      RBD_MIRROR_USE_EXISTING_CLUSTER: '1'
+      RBD_MIRROR_MIN_COMPAT_CLIENT: 'octopus'
diff --git a/qa/suites/rbd/mirror/workloads/rbd-mirror-workunit-policy-none.yaml b/qa/suites/rbd/mirror/workloads/rbd-mirror-workunit-policy-none.yaml
new file mode 100644
index 000000000..0a610ea2f
--- /dev/null
+++ b/qa/suites/rbd/mirror/workloads/rbd-mirror-workunit-policy-none.yaml
@@ -0,0 +1,16 @@
+meta:
+- desc: run the rbd_mirror_journal.sh workunit to test the rbd-mirror daemon
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd mirror image policy type: none
+tasks:
+- workunit:
+    clients:
+      cluster1.client.mirror: [rbd/rbd_mirror_journal.sh]
+    env:
+      # override workunit setting of CEPH_ARGS='--cluster'
+      CEPH_ARGS: ''
+      RBD_MIRROR_INSTANCES: '4'
+      RBD_MIRROR_USE_EXISTING_CLUSTER: '1'
diff --git a/qa/suites/rbd/mirror/workloads/rbd-mirror-workunit-policy-simple.yaml b/qa/suites/rbd/mirror/workloads/rbd-mirror-workunit-policy-simple.yaml
new file mode 100644
index 000000000..883e8abd3
--- /dev/null
+++ b/qa/suites/rbd/mirror/workloads/rbd-mirror-workunit-policy-simple.yaml
@@ -0,0 +1,16 @@
+meta:
+- desc: run the rbd_mirror_journal.sh workunit to test the rbd-mirror daemon
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd mirror image policy type: simple
+tasks:
+- workunit:
+    clients:
+      cluster1.client.mirror: [rbd/rbd_mirror_journal.sh]
+    env:
+      # override workunit setting of CEPH_ARGS='--cluster'
+      CEPH_ARGS: ''
+      RBD_MIRROR_INSTANCES: '4'
+      RBD_MIRROR_USE_EXISTING_CLUSTER: '1'
diff --git a/qa/suites/rbd/nbd/% b/qa/suites/rbd/nbd/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/nbd/%
diff --git a/qa/suites/rbd/nbd/.qa b/qa/suites/rbd/nbd/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/nbd/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/nbd/base b/qa/suites/rbd/nbd/base
new file mode 120000
index 000000000..fd10a859d
--- /dev/null
+++ b/qa/suites/rbd/nbd/base
@@ -0,0 +1 @@
+../thrash/base
+\ No newline at end of file
diff --git a/qa/suites/rbd/nbd/cluster/+ b/qa/suites/rbd/nbd/cluster/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/nbd/cluster/+
diff --git a/qa/suites/rbd/nbd/cluster/.qa b/qa/suites/rbd/nbd/cluster/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/nbd/cluster/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/nbd/cluster/fixed-3.yaml b/qa/suites/rbd/nbd/cluster/fixed-3.yaml
new file mode 100644
index 000000000..182589152
--- /dev/null
+++ b/qa/suites/rbd/nbd/cluster/fixed-3.yaml
@@ -0,0 +1,4 @@
+roles:
+- [mon.a, mon.c, osd.0, osd.1, osd.2]
+- [mon.b, mgr.x, osd.3, osd.4, osd.5]
+- [client.0]
diff --git a/qa/suites/rbd/nbd/cluster/openstack.yaml b/qa/suites/rbd/nbd/cluster/openstack.yaml
new file mode 120000
index 000000000..48becbb83
--- /dev/null
+++ b/qa/suites/rbd/nbd/cluster/openstack.yaml
@@ -0,0 +1 @@
+../../thrash/clusters/openstack.yaml
+\ No newline at end of file
diff --git a/qa/suites/rbd/nbd/conf b/qa/suites/rbd/nbd/conf
new file mode 120000
index 000000000..4bc0fe86c
--- /dev/null
+++ b/qa/suites/rbd/nbd/conf
@@ -0,0 +1 @@
+.qa/rbd/conf
+\ No newline at end of file
diff --git a/qa/suites/rbd/nbd/msgr-failures b/qa/suites/rbd/nbd/msgr-failures
new file mode 120000
index 000000000..03689aa44
--- /dev/null
+++ b/qa/suites/rbd/nbd/msgr-failures
@@ -0,0 +1 @@
+../thrash/msgr-failures
+\ No newline at end of file
diff --git a/qa/suites/rbd/nbd/objectstore b/qa/suites/rbd/nbd/objectstore
new file mode 120000
index 000000000..c40bd3261
--- /dev/null
+++ b/qa/suites/rbd/nbd/objectstore
@@ -0,0 +1 @@
+.qa/objectstore
+\ No newline at end of file
diff --git a/qa/suites/rbd/nbd/supported-random-distro$ b/qa/suites/rbd/nbd/supported-random-distro$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/rbd/nbd/supported-random-distro$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rbd/nbd/thrashers b/qa/suites/rbd/nbd/thrashers
new file mode 120000
index 000000000..f461dadc3
--- /dev/null
+++ b/qa/suites/rbd/nbd/thrashers
@@ -0,0 +1 @@
+../thrash/thrashers
+\ No newline at end of file
diff --git a/qa/suites/rbd/nbd/thrashosds-health.yaml b/qa/suites/rbd/nbd/thrashosds-health.yaml
new file mode 120000
index 000000000..9124eb1aa
--- /dev/null
+++ b/qa/suites/rbd/nbd/thrashosds-health.yaml
@@ -0,0 +1 @@
+.qa/tasks/thrashosds-health.yaml
+\ No newline at end of file
diff --git a/qa/suites/rbd/nbd/workloads/.qa b/qa/suites/rbd/nbd/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/nbd/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/nbd/workloads/rbd_fsx_nbd.yaml b/qa/suites/rbd/nbd/workloads/rbd_fsx_nbd.yaml
new file mode 100644
index 000000000..b5737671f
--- /dev/null
+++ b/qa/suites/rbd/nbd/workloads/rbd_fsx_nbd.yaml
@@ -0,0 +1,14 @@
+overrides:
+  install:
+    ceph:
+      extra_packages: [rbd-nbd]
+tasks:
+- rbd_fsx:
+    clients: [client.0]
+    ops: 6000
+    nbd: True
+    holebdy: 512
+    punch_holes: true
+    readbdy: 512
+    truncbdy: 512
+    writebdy: 512
diff --git a/qa/suites/rbd/nbd/workloads/rbd_nbd.yaml b/qa/suites/rbd/nbd/workloads/rbd_nbd.yaml
new file mode 100644
index 000000000..ededea024
--- /dev/null
+++ b/qa/suites/rbd/nbd/workloads/rbd_nbd.yaml
@@ -0,0 +1,9 @@
+overrides:
+  install:
+    ceph:
+      extra_packages: [rbd-nbd]
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/rbd-nbd.sh
diff --git a/qa/suites/rbd/nbd/workloads/rbd_nbd_diff_continuous.yaml b/qa/suites/rbd/nbd/workloads/rbd_nbd_diff_continuous.yaml
new file mode 100644
index 000000000..e0a7ebe33
--- /dev/null
+++ b/qa/suites/rbd/nbd/workloads/rbd_nbd_diff_continuous.yaml
@@ -0,0 +1,14 @@
+overrides:
+  install:
+    ceph:
+      extra_packages:
+        - rbd-nbd
+      extra_system_packages:
+        - pv
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/diff_continuous.sh
+    env:
+      RBD_DEVICE_TYPE: "nbd"
diff --git a/qa/suites/rbd/pwl-cache/.qa b/qa/suites/rbd/pwl-cache/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/pwl-cache/home/% b/qa/suites/rbd/pwl-cache/home/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/home/%
diff --git a/qa/suites/rbd/pwl-cache/home/.qa b/qa/suites/rbd/pwl-cache/home/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/home/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/pwl-cache/home/1-base b/qa/suites/rbd/pwl-cache/home/1-base
new file mode 120000
index 000000000..89c3c7e84
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/home/1-base
@@ -0,0 +1 @@
+../../basic/base/
+\ No newline at end of file
diff --git a/qa/suites/rbd/pwl-cache/home/2-cluster/+ b/qa/suites/rbd/pwl-cache/home/2-cluster/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/home/2-cluster/+
diff --git a/qa/suites/rbd/pwl-cache/home/2-cluster/.qa b/qa/suites/rbd/pwl-cache/home/2-cluster/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/home/2-cluster/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/pwl-cache/home/2-cluster/fix-2.yaml b/qa/suites/rbd/pwl-cache/home/2-cluster/fix-2.yaml
new file mode 100644
index 000000000..dbccecbce
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/home/2-cluster/fix-2.yaml
@@ -0,0 +1,3 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1]
+- [mon.b, mgr.y, osd.2, osd.3, client.0]
diff --git a/qa/suites/rbd/pwl-cache/home/2-cluster/openstack.yaml b/qa/suites/rbd/pwl-cache/home/2-cluster/openstack.yaml
new file mode 100644
index 000000000..b113e4f2e
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/home/2-cluster/openstack.yaml
@@ -0,0 +1,4 @@
+openstack:
+    - volumes: # attached to each instance
+        count: 4
+        size: 10 # GB
diff --git a/qa/suites/rbd/pwl-cache/home/3-supported-random-distro$ b/qa/suites/rbd/pwl-cache/home/3-supported-random-distro$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/home/3-supported-random-distro$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rbd/pwl-cache/home/4-cache-path.yaml b/qa/suites/rbd/pwl-cache/home/4-cache-path.yaml
new file mode 100644
index 000000000..be4641b01
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/home/4-cache-path.yaml
@@ -0,0 +1,13 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd_persistent_cache_path: /home/ubuntu/cephtest/rbd-pwl-cache
+        rbd_plugins: pwl_cache
+tasks:
+- exec:
+    client.0:
+      - "mkdir -m 777 /home/ubuntu/cephtest/rbd-pwl-cache"
+- exec_on_cleanup:
+    client.0:
+      - "rm -rf /home/ubuntu/cephtest/rbd-pwl-cache"
diff --git a/qa/suites/rbd/pwl-cache/home/5-cache-mode/.qa b/qa/suites/rbd/pwl-cache/home/5-cache-mode/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/home/5-cache-mode/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/pwl-cache/home/5-cache-mode/rwl.yaml b/qa/suites/rbd/pwl-cache/home/5-cache-mode/rwl.yaml
new file mode 100644
index 000000000..5aeab26b3
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/home/5-cache-mode/rwl.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd_persistent_cache_mode: rwl
diff --git a/qa/suites/rbd/pwl-cache/home/5-cache-mode/ssd.yaml b/qa/suites/rbd/pwl-cache/home/5-cache-mode/ssd.yaml
new file mode 100644
index 000000000..082149147
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/home/5-cache-mode/ssd.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd_persistent_cache_mode: ssd
diff --git a/qa/suites/rbd/pwl-cache/home/6-cache-size/.qa b/qa/suites/rbd/pwl-cache/home/6-cache-size/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/home/6-cache-size/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/pwl-cache/home/6-cache-size/1G.yaml b/qa/suites/rbd/pwl-cache/home/6-cache-size/1G.yaml
new file mode 100644
index 000000000..53fcddcdf
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/home/6-cache-size/1G.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd_persistent_cache_size: 1073741824
diff --git a/qa/suites/rbd/pwl-cache/home/6-cache-size/8G.yaml b/qa/suites/rbd/pwl-cache/home/6-cache-size/8G.yaml
new file mode 100644
index 000000000..b53d36852
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/home/6-cache-size/8G.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd_persistent_cache_size: 8589934592
diff --git a/qa/suites/rbd/pwl-cache/home/7-workloads/.qa b/qa/suites/rbd/pwl-cache/home/7-workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/home/7-workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/pwl-cache/home/7-workloads/c_api_tests_with_defaults.yaml b/qa/suites/rbd/pwl-cache/home/7-workloads/c_api_tests_with_defaults.yaml
new file mode 120000
index 000000000..359001f8f
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/home/7-workloads/c_api_tests_with_defaults.yaml
@@ -0,0 +1 @@
+../../../librbd/workloads/c_api_tests_with_defaults.yaml
+\ No newline at end of file
diff --git a/qa/suites/rbd/pwl-cache/home/7-workloads/fio.yaml b/qa/suites/rbd/pwl-cache/home/7-workloads/fio.yaml
new file mode 100644
index 000000000..f7aecce8a
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/home/7-workloads/fio.yaml
@@ -0,0 +1,8 @@
+tasks:
+- rbd_fio:
+    client.0:
+      fio-io-size: 100%
+      formats: [2]
+      io-engine: rbd
+      rw: randwrite
+      runtime: 600
diff --git a/qa/suites/rbd/pwl-cache/home/7-workloads/recovery.yaml b/qa/suites/rbd/pwl-cache/home/7-workloads/recovery.yaml
new file mode 100644
index 000000000..3017beb22
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/home/7-workloads/recovery.yaml
@@ -0,0 +1,9 @@
+tasks:
+- rbd.create_image:
+    client.0:
+      image_name: testimage
+      image_size: 10240
+      image_format: 2
+- rbd_pwl_cache_recovery:
+    client.0:
+      image_name: testimage
diff --git a/qa/suites/rbd/pwl-cache/home/conf b/qa/suites/rbd/pwl-cache/home/conf
new file mode 120000
index 000000000..4bc0fe86c
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/home/conf
@@ -0,0 +1 @@
+.qa/rbd/conf
+\ No newline at end of file
diff --git a/qa/suites/rbd/pwl-cache/tmpfs/% b/qa/suites/rbd/pwl-cache/tmpfs/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/tmpfs/%
diff --git a/qa/suites/rbd/pwl-cache/tmpfs/.qa b/qa/suites/rbd/pwl-cache/tmpfs/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/tmpfs/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/pwl-cache/tmpfs/1-base b/qa/suites/rbd/pwl-cache/tmpfs/1-base
new file mode 120000
index 000000000..89c3c7e84
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/tmpfs/1-base
@@ -0,0 +1 @@
+../../basic/base/
+\ No newline at end of file
diff --git a/qa/suites/rbd/pwl-cache/tmpfs/2-cluster/+ b/qa/suites/rbd/pwl-cache/tmpfs/2-cluster/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/tmpfs/2-cluster/+
diff --git a/qa/suites/rbd/pwl-cache/tmpfs/2-cluster/.qa b/qa/suites/rbd/pwl-cache/tmpfs/2-cluster/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/tmpfs/2-cluster/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/pwl-cache/tmpfs/2-cluster/fix-2.yaml b/qa/suites/rbd/pwl-cache/tmpfs/2-cluster/fix-2.yaml
new file mode 100644
index 000000000..dbccecbce
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/tmpfs/2-cluster/fix-2.yaml
@@ -0,0 +1,3 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1]
+- [mon.b, mgr.y, osd.2, osd.3, client.0]
diff --git a/qa/suites/rbd/pwl-cache/tmpfs/2-cluster/openstack.yaml b/qa/suites/rbd/pwl-cache/tmpfs/2-cluster/openstack.yaml
new file mode 100644
index 000000000..b113e4f2e
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/tmpfs/2-cluster/openstack.yaml
@@ -0,0 +1,4 @@
+openstack:
+    - volumes: # attached to each instance
+        count: 4
+        size: 10 # GB
diff --git a/qa/suites/rbd/pwl-cache/tmpfs/3-supported-random-distro$ b/qa/suites/rbd/pwl-cache/tmpfs/3-supported-random-distro$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/tmpfs/3-supported-random-distro$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rbd/pwl-cache/tmpfs/4-cache-path.yaml b/qa/suites/rbd/pwl-cache/tmpfs/4-cache-path.yaml
new file mode 100644
index 000000000..b5578a0ae
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/tmpfs/4-cache-path.yaml
@@ -0,0 +1,22 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd_persistent_cache_path: /home/ubuntu/cephtest/rbd-pwl-cache
+        rbd_plugins: pwl_cache
+tasks:
+- exec:
+    client.0:
+      - "mkdir /home/ubuntu/cephtest/tmpfs"
+      - "mkdir /home/ubuntu/cephtest/rbd-pwl-cache"
+      - "sudo mount -t tmpfs -o size=20G tmpfs /home/ubuntu/cephtest/tmpfs"
+      - "truncate -s 20G /home/ubuntu/cephtest/tmpfs/loopfile"
+      - "mkfs.ext4 /home/ubuntu/cephtest/tmpfs/loopfile"
+      - "sudo mount -o loop /home/ubuntu/cephtest/tmpfs/loopfile /home/ubuntu/cephtest/rbd-pwl-cache"
+      - "sudo chmod 777 /home/ubuntu/cephtest/rbd-pwl-cache"
+- exec_on_cleanup:
+    client.0:
+      - "sudo umount /home/ubuntu/cephtest/rbd-pwl-cache"
+      - "sudo umount /home/ubuntu/cephtest/tmpfs"
+      - "rm -rf /home/ubuntu/cephtest/rbd-pwl-cache"
+      - "rm -rf /home/ubuntu/cephtest/tmpfs"
diff --git a/qa/suites/rbd/pwl-cache/tmpfs/5-cache-mode/.qa b/qa/suites/rbd/pwl-cache/tmpfs/5-cache-mode/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/tmpfs/5-cache-mode/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/pwl-cache/tmpfs/5-cache-mode/rwl.yaml b/qa/suites/rbd/pwl-cache/tmpfs/5-cache-mode/rwl.yaml
new file mode 100644
index 000000000..5aeab26b3
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/tmpfs/5-cache-mode/rwl.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd_persistent_cache_mode: rwl
diff --git a/qa/suites/rbd/pwl-cache/tmpfs/5-cache-mode/ssd.yaml b/qa/suites/rbd/pwl-cache/tmpfs/5-cache-mode/ssd.yaml
new file mode 100644
index 000000000..082149147
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/tmpfs/5-cache-mode/ssd.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd_persistent_cache_mode: ssd
diff --git a/qa/suites/rbd/pwl-cache/tmpfs/6-cache-size/.qa b/qa/suites/rbd/pwl-cache/tmpfs/6-cache-size/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/tmpfs/6-cache-size/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/pwl-cache/tmpfs/6-cache-size/1G.yaml b/qa/suites/rbd/pwl-cache/tmpfs/6-cache-size/1G.yaml
new file mode 100644
index 000000000..53fcddcdf
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/tmpfs/6-cache-size/1G.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd_persistent_cache_size: 1073741824
diff --git a/qa/suites/rbd/pwl-cache/tmpfs/6-cache-size/5G.yaml b/qa/suites/rbd/pwl-cache/tmpfs/6-cache-size/5G.yaml
new file mode 100644
index 000000000..1c43b5de8
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/tmpfs/6-cache-size/5G.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd_persistent_cache_size: 5368709120
diff --git a/qa/suites/rbd/pwl-cache/tmpfs/7-workloads/.qa b/qa/suites/rbd/pwl-cache/tmpfs/7-workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/tmpfs/7-workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/pwl-cache/tmpfs/7-workloads/qemu_xfstests.yaml b/qa/suites/rbd/pwl-cache/tmpfs/7-workloads/qemu_xfstests.yaml
new file mode 100644
index 000000000..255b9631e
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/tmpfs/7-workloads/qemu_xfstests.yaml
@@ -0,0 +1,8 @@
+tasks:
+- qemu:
+    client.0:
+      test: qa/run_xfstests_qemu.sh
+      type: block
+      cpus: 4
+      memory: 4096
+      disks: 3
diff --git a/qa/suites/rbd/pwl-cache/tmpfs/conf b/qa/suites/rbd/pwl-cache/tmpfs/conf
new file mode 120000
index 000000000..4bc0fe86c
--- /dev/null
+++ b/qa/suites/rbd/pwl-cache/tmpfs/conf
@@ -0,0 +1 @@
+.qa/rbd/conf
+\ No newline at end of file
diff --git a/qa/suites/rbd/qemu/% b/qa/suites/rbd/qemu/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/qemu/%
diff --git a/qa/suites/rbd/qemu/.qa b/qa/suites/rbd/qemu/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/qemu/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/qemu/cache/.qa b/qa/suites/rbd/qemu/cache/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/qemu/cache/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/qemu/cache/none.yaml b/qa/suites/rbd/qemu/cache/none.yaml
new file mode 100644
index 000000000..42fd9c955
--- /dev/null
+++ b/qa/suites/rbd/qemu/cache/none.yaml
@@ -0,0 +1,6 @@
+tasks:
+- install:
+- ceph:
+    conf:
+      client:
+        rbd cache: false
diff --git a/qa/suites/rbd/qemu/cache/writearound.yaml b/qa/suites/rbd/qemu/cache/writearound.yaml
new file mode 100644
index 000000000..b6f8e319b
--- /dev/null
+++ b/qa/suites/rbd/qemu/cache/writearound.yaml
@@ -0,0 +1,7 @@
+tasks:
+- install:
+- ceph:
+    conf:
+      client:
+        rbd cache: true
+        rbd cache policy: writearound
diff --git a/qa/suites/rbd/qemu/cache/writeback.yaml b/qa/suites/rbd/qemu/cache/writeback.yaml
new file mode 100644
index 000000000..a55ec1df0
--- /dev/null
+++ b/qa/suites/rbd/qemu/cache/writeback.yaml
@@ -0,0 +1,7 @@
+tasks:
+- install:
+- ceph:
+    conf:
+      client:
+        rbd cache: true
+        rbd cache policy: writeback
diff --git a/qa/suites/rbd/qemu/cache/writethrough.yaml b/qa/suites/rbd/qemu/cache/writethrough.yaml
new file mode 100644
index 000000000..6dc29e16c
--- /dev/null
+++ b/qa/suites/rbd/qemu/cache/writethrough.yaml
@@ -0,0 +1,7 @@
+tasks:
+- install:
+- ceph:
+    conf:
+      client:
+        rbd cache: true
+        rbd cache max dirty: 0
diff --git a/qa/suites/rbd/qemu/clusters/+ b/qa/suites/rbd/qemu/clusters/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/qemu/clusters/+
diff --git a/qa/suites/rbd/qemu/clusters/.qa b/qa/suites/rbd/qemu/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/qemu/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/qemu/clusters/fixed-3.yaml b/qa/suites/rbd/qemu/clusters/fixed-3.yaml
new file mode 120000
index 000000000..f75a848b8
--- /dev/null
+++ b/qa/suites/rbd/qemu/clusters/fixed-3.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-3.yaml
+\ No newline at end of file
diff --git a/qa/suites/rbd/qemu/clusters/openstack.yaml b/qa/suites/rbd/qemu/clusters/openstack.yaml
new file mode 100644
index 000000000..9c39c7e5f
--- /dev/null
+++ b/qa/suites/rbd/qemu/clusters/openstack.yaml
@@ -0,0 +1,8 @@
+openstack:
+  - machine:
+      disk: 40 # GB
+      ram: 30000 # MB
+      cpus: 1
+    volumes: # attached to each instance
+      count: 4
+      size: 30 # GB
diff --git a/qa/suites/rbd/qemu/conf b/qa/suites/rbd/qemu/conf
new file mode 120000
index 000000000..4bc0fe86c
--- /dev/null
+++ b/qa/suites/rbd/qemu/conf
@@ -0,0 +1 @@
+.qa/rbd/conf
+\ No newline at end of file
diff --git a/qa/suites/rbd/qemu/data-pool b/qa/suites/rbd/qemu/data-pool
new file mode 120000
index 000000000..3df827572
--- /dev/null
+++ b/qa/suites/rbd/qemu/data-pool
@@ -0,0 +1 @@
+.qa/rbd/data-pool/
+\ No newline at end of file
diff --git a/qa/suites/rbd/qemu/features/.qa b/qa/suites/rbd/qemu/features/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/qemu/features/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/qemu/features/defaults.yaml b/qa/suites/rbd/qemu/features/defaults.yaml
new file mode 100644
index 000000000..75afd68dd
--- /dev/null
+++ b/qa/suites/rbd/qemu/features/defaults.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default features: 61
diff --git a/qa/suites/rbd/qemu/features/journaling.yaml b/qa/suites/rbd/qemu/features/journaling.yaml
new file mode 100644
index 000000000..6cea62a88
--- /dev/null
+++ b/qa/suites/rbd/qemu/features/journaling.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default features: 125
diff --git a/qa/suites/rbd/qemu/features/readbalance.yaml b/qa/suites/rbd/qemu/features/readbalance.yaml
new file mode 100644
index 000000000..e3812f4c7
--- /dev/null
+++ b/qa/suites/rbd/qemu/features/readbalance.yaml
@@ -0,0 +1,10 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd read from replica policy: balance
+
+tasks:
+- exec:
+    osd.0:
+      - ceph osd set-require-min-compat-client octopus
diff --git a/qa/suites/rbd/qemu/msgr-failures/.qa b/qa/suites/rbd/qemu/msgr-failures/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/qemu/msgr-failures/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/qemu/msgr-failures/few.yaml b/qa/suites/rbd/qemu/msgr-failures/few.yaml
new file mode 100644
index 000000000..ca8e09853
--- /dev/null
+++ b/qa/suites/rbd/qemu/msgr-failures/few.yaml
@@ -0,0 +1,9 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 5000
+        mon client directed command retry: 5
+    log-ignorelist:
+    - but it is still running
+    - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/rbd/qemu/objectstore b/qa/suites/rbd/qemu/objectstore
new file mode 120000
index 000000000..c40bd3261
--- /dev/null
+++ b/qa/suites/rbd/qemu/objectstore
@@ -0,0 +1 @@
+.qa/objectstore
+\ No newline at end of file
diff --git a/qa/suites/rbd/qemu/supported-random-distro$ b/qa/suites/rbd/qemu/supported-random-distro$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/rbd/qemu/supported-random-distro$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rbd/qemu/workloads/.qa b/qa/suites/rbd/qemu/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/qemu/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/qemu/workloads/qemu_bonnie.yaml b/qa/suites/rbd/qemu/workloads/qemu_bonnie.yaml
new file mode 100644
index 000000000..0ef9ebb65
--- /dev/null
+++ b/qa/suites/rbd/qemu/workloads/qemu_bonnie.yaml
@@ -0,0 +1,6 @@
+tasks:
+- qemu:
+    all:
+      clone: true
+      test: qa/workunits/suites/bonnie.sh
+exclude_arch: armv7l
diff --git a/qa/suites/rbd/qemu/workloads/qemu_fsstress.yaml b/qa/suites/rbd/qemu/workloads/qemu_fsstress.yaml
new file mode 100644
index 000000000..95f514805
--- /dev/null
+++ b/qa/suites/rbd/qemu/workloads/qemu_fsstress.yaml
@@ -0,0 +1,6 @@
+tasks:
+- qemu:
+    all:
+      clone: true
+      test: qa/workunits/suites/fsstress.sh
+exclude_arch: armv7l
diff --git a/qa/suites/rbd/qemu/workloads/qemu_iozone.yaml.disabled b/qa/suites/rbd/qemu/workloads/qemu_iozone.yaml.disabled
new file mode 100644
index 000000000..e159e208e
--- /dev/null
+++ b/qa/suites/rbd/qemu/workloads/qemu_iozone.yaml.disabled
@@ -0,0 +1,6 @@
+tasks:
+- qemu:
+    all:
+      test: qa/workunits/suites/iozone.sh
+      image_size: 20480
+exclude_arch: armv7l
diff --git a/qa/suites/rbd/qemu/workloads/qemu_xfstests.yaml b/qa/suites/rbd/qemu/workloads/qemu_xfstests.yaml
new file mode 100644
index 000000000..198f798d4
--- /dev/null
+++ b/qa/suites/rbd/qemu/workloads/qemu_xfstests.yaml
@@ -0,0 +1,8 @@
+tasks:
+- qemu:
+    all:
+      clone: true
+      type: block
+      disks: 3
+      test: qa/run_xfstests_qemu.sh
+exclude_arch: armv7l
diff --git a/qa/suites/rbd/singleton-bluestore/% b/qa/suites/rbd/singleton-bluestore/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/singleton-bluestore/%
diff --git a/qa/suites/rbd/singleton-bluestore/.qa b/qa/suites/rbd/singleton-bluestore/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/singleton-bluestore/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/singleton-bluestore/all/.qa b/qa/suites/rbd/singleton-bluestore/all/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/singleton-bluestore/all/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/singleton-bluestore/all/issue-20295.yaml b/qa/suites/rbd/singleton-bluestore/all/issue-20295.yaml
new file mode 100644
index 000000000..48156c7a0
--- /dev/null
+++ b/qa/suites/rbd/singleton-bluestore/all/issue-20295.yaml
@@ -0,0 +1,12 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0]
+- [mon.b, mgr.y, osd.3, osd.4, osd.5]
+- [mon.c, mgr.z, osd.6, osd.7, osd.8]
+- [osd.9, osd.10, osd.11]
+tasks:
+- install:
+- ceph:
+- workunit:
+    timeout: 30m
+    clients:
+      all: [rbd/issue-20295.sh]
diff --git a/qa/suites/rbd/singleton-bluestore/conf b/qa/suites/rbd/singleton-bluestore/conf
new file mode 120000
index 000000000..4bc0fe86c
--- /dev/null
+++ b/qa/suites/rbd/singleton-bluestore/conf
@@ -0,0 +1 @@
+.qa/rbd/conf
+\ No newline at end of file
diff --git a/qa/suites/rbd/singleton-bluestore/objectstore/.qa b/qa/suites/rbd/singleton-bluestore/objectstore/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/singleton-bluestore/objectstore/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/singleton-bluestore/objectstore/bluestore-bitmap.yaml b/qa/suites/rbd/singleton-bluestore/objectstore/bluestore-bitmap.yaml
new file mode 120000
index 000000000..a59cf5175
--- /dev/null
+++ b/qa/suites/rbd/singleton-bluestore/objectstore/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/rbd/singleton-bluestore/objectstore/bluestore-comp-snappy.yaml b/qa/suites/rbd/singleton-bluestore/objectstore/bluestore-comp-snappy.yaml
new file mode 120000
index 000000000..888caf55f
--- /dev/null
+++ b/qa/suites/rbd/singleton-bluestore/objectstore/bluestore-comp-snappy.yaml
@@ -0,0 +1 @@
+.qa/objectstore/bluestore-comp-snappy.yaml
+\ No newline at end of file
diff --git a/qa/suites/rbd/singleton-bluestore/openstack.yaml b/qa/suites/rbd/singleton-bluestore/openstack.yaml
new file mode 100644
index 000000000..f4d1349b4
--- /dev/null
+++ b/qa/suites/rbd/singleton-bluestore/openstack.yaml
@@ -0,0 +1,4 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 30 # GB
diff --git a/qa/suites/rbd/singleton-bluestore/supported-random-distro$ b/qa/suites/rbd/singleton-bluestore/supported-random-distro$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/rbd/singleton-bluestore/supported-random-distro$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rbd/singleton/% b/qa/suites/rbd/singleton/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/singleton/%
diff --git a/qa/suites/rbd/singleton/.qa b/qa/suites/rbd/singleton/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/singleton/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/singleton/all/.qa b/qa/suites/rbd/singleton/all/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/singleton/all/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/singleton/all/admin_socket.yaml b/qa/suites/rbd/singleton/all/admin_socket.yaml
new file mode 100644
index 000000000..22dbd8c03
--- /dev/null
+++ b/qa/suites/rbd/singleton/all/admin_socket.yaml
@@ -0,0 +1,9 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+tasks:
+- install:
+- ceph:
+    fs: xfs
+- workunit:
+    clients:
+      all: [rbd/test_admin_socket.sh]
diff --git a/qa/suites/rbd/singleton/all/formatted-output.yaml b/qa/suites/rbd/singleton/all/formatted-output.yaml
new file mode 100644
index 000000000..7be94ef23
--- /dev/null
+++ b/qa/suites/rbd/singleton/all/formatted-output.yaml
@@ -0,0 +1,10 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+tasks:
+- install:
+- ceph:
+    fs: xfs
+- cram:
+    clients:
+      client.0:
+      - src/test/cli-integration/rbd/formatted-output.t
diff --git a/qa/suites/rbd/singleton/all/merge_diff.yaml b/qa/suites/rbd/singleton/all/merge_diff.yaml
new file mode 100644
index 000000000..31b269d63
--- /dev/null
+++ b/qa/suites/rbd/singleton/all/merge_diff.yaml
@@ -0,0 +1,9 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+tasks:
+- install:
+- ceph:
+    fs: xfs
+- workunit:
+    clients:
+      all: [rbd/merge_diff.sh]
diff --git a/qa/suites/rbd/singleton/all/mon-command-help.yaml b/qa/suites/rbd/singleton/all/mon-command-help.yaml
new file mode 100644
index 000000000..a7290aadb
--- /dev/null
+++ b/qa/suites/rbd/singleton/all/mon-command-help.yaml
@@ -0,0 +1,10 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+tasks:
+- install:
+- ceph:
+    fs: xfs
+- cram:
+    clients:
+      client.0:
+      - src/test/cli-integration/rbd/mon-command-help.t
diff --git a/qa/suites/rbd/singleton/all/permissions.yaml b/qa/suites/rbd/singleton/all/permissions.yaml
new file mode 100644
index 000000000..c00a5c9b8
--- /dev/null
+++ b/qa/suites/rbd/singleton/all/permissions.yaml
@@ -0,0 +1,9 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+tasks:
+- install:
+- ceph:
+    fs: xfs
+- workunit:
+    clients:
+      all: [rbd/permissions.sh]
diff --git a/qa/suites/rbd/singleton/all/qemu-iotests-no-cache.yaml b/qa/suites/rbd/singleton/all/qemu-iotests-no-cache.yaml
new file mode 100644
index 000000000..364dd8810
--- /dev/null
+++ b/qa/suites/rbd/singleton/all/qemu-iotests-no-cache.yaml
@@ -0,0 +1,19 @@
+exclude_arch: armv7l
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+tasks:
+- install:
+    extra_system_packages:
+      rpm:
+      - qemu-kvm-block-rbd
+      deb:
+      - qemu-block-extra
+      - qemu-utils
+- ceph:
+    fs: xfs
+    conf:
+      client:
+        rbd cache: false
+- workunit:
+    clients:
+      all: [rbd/qemu-iotests.sh]
diff --git a/qa/suites/rbd/singleton/all/qemu-iotests-writearound.yaml b/qa/suites/rbd/singleton/all/qemu-iotests-writearound.yaml
new file mode 100644
index 000000000..975708385
--- /dev/null
+++ b/qa/suites/rbd/singleton/all/qemu-iotests-writearound.yaml
@@ -0,0 +1,20 @@
+exclude_arch: armv7l
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+tasks:
+- install:
+    extra_system_packages:
+      rpm:
+      - qemu-kvm-block-rbd
+      deb:
+      - qemu-block-extra
+      - qemu-utils
+- ceph:
+    fs: xfs
+    conf:
+      client:
+        rbd cache: true
+        rbd cache policy: writearound
+- workunit:
+    clients:
+      all: [rbd/qemu-iotests.sh]
diff --git a/qa/suites/rbd/singleton/all/qemu-iotests-writeback.yaml b/qa/suites/rbd/singleton/all/qemu-iotests-writeback.yaml
new file mode 100644
index 000000000..9d078c33d
--- /dev/null
+++ b/qa/suites/rbd/singleton/all/qemu-iotests-writeback.yaml
@@ -0,0 +1,20 @@
+exclude_arch: armv7l
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+tasks:
+- install:
+    extra_system_packages:
+      rpm:
+      - qemu-kvm-block-rbd
+      deb:
+      - qemu-block-extra
+      - qemu-utils
+- ceph:
+    fs: xfs
+    conf:
+      client:
+        rbd cache: true
+        rbd cache policy: writeback
+- workunit:
+    clients:
+      all: [rbd/qemu-iotests.sh]
diff --git a/qa/suites/rbd/singleton/all/qemu-iotests-writethrough.yaml b/qa/suites/rbd/singleton/all/qemu-iotests-writethrough.yaml
new file mode 100644
index 000000000..c5250bb90
--- /dev/null
+++ b/qa/suites/rbd/singleton/all/qemu-iotests-writethrough.yaml
@@ -0,0 +1,20 @@
+exclude_arch: armv7l
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+tasks:
+- install:
+    extra_system_packages:
+      rpm:
+      - qemu-kvm-block-rbd
+      deb:
+      - qemu-block-extra
+      - qemu-utils
+- ceph:
+    fs: xfs
+    conf:
+      client:
+        rbd cache: true
+        rbd cache max dirty: 0
+- workunit:
+    clients:
+      all: [rbd/qemu-iotests.sh]
diff --git a/qa/suites/rbd/singleton/all/qos.yaml b/qa/suites/rbd/singleton/all/qos.yaml
new file mode 100644
index 000000000..66b90520b
--- /dev/null
+++ b/qa/suites/rbd/singleton/all/qos.yaml
@@ -0,0 +1,9 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+tasks:
+- install:
+- ceph:
+    fs: xfs
+- workunit:
+    clients:
+      all: [rbd/qos.sh]
diff --git a/qa/suites/rbd/singleton/all/rbd-vs-unmanaged-snaps.yaml b/qa/suites/rbd/singleton/all/rbd-vs-unmanaged-snaps.yaml
new file mode 100644
index 000000000..f14bd7431
--- /dev/null
+++ b/qa/suites/rbd/singleton/all/rbd-vs-unmanaged-snaps.yaml
@@ -0,0 +1,14 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+tasks:
+- install:
+- ceph:
+    fs: xfs
+    conf:
+      client:
+        rbd validate pool: false
+- workunit:
+    clients:
+      all:
+        - mon/rbd_snaps_ops.sh
+
diff --git a/qa/suites/rbd/singleton/all/rbd_mirror.yaml b/qa/suites/rbd/singleton/all/rbd_mirror.yaml
new file mode 100644
index 000000000..4120d21c9
--- /dev/null
+++ b/qa/suites/rbd/singleton/all/rbd_mirror.yaml
@@ -0,0 +1,12 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+tasks:
+- install:
+- ceph:
+    fs: xfs
+    log-ignorelist:
+      - overall HEALTH_
+      - \(CACHE_POOL_NO_HIT_SET\)
+- workunit:
+    clients:
+      all: [rbd/test_rbd_mirror.sh]
diff --git a/qa/suites/rbd/singleton/all/rbd_tasks.yaml b/qa/suites/rbd/singleton/all/rbd_tasks.yaml
new file mode 100644
index 000000000..782b02141
--- /dev/null
+++ b/qa/suites/rbd/singleton/all/rbd_tasks.yaml
@@ -0,0 +1,9 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+tasks:
+- install:
+- ceph:
+    fs: xfs
+- workunit:
+    clients:
+      all: [rbd/test_rbd_tasks.sh]
diff --git a/qa/suites/rbd/singleton/all/rbdmap_RBDMAPFILE.yaml b/qa/suites/rbd/singleton/all/rbdmap_RBDMAPFILE.yaml
new file mode 100644
index 000000000..0053e66ba
--- /dev/null
+++ b/qa/suites/rbd/singleton/all/rbdmap_RBDMAPFILE.yaml
@@ -0,0 +1,7 @@
+roles:
+- [client.0]
+tasks:
+- install:
+- workunit:
+    clients:
+      all: [rbd/test_rbdmap_RBDMAPFILE.sh]
diff --git a/qa/suites/rbd/singleton/all/read-flags-no-cache.yaml b/qa/suites/rbd/singleton/all/read-flags-no-cache.yaml
new file mode 100644
index 000000000..cf602cbb1
--- /dev/null
+++ b/qa/suites/rbd/singleton/all/read-flags-no-cache.yaml
@@ -0,0 +1,12 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+tasks:
+- install:
+- ceph:
+    fs: xfs
+    conf:
+      client:
+        rbd cache: false
+- workunit:
+    clients:
+      all: [rbd/read-flags.sh]
diff --git a/qa/suites/rbd/singleton/all/read-flags-writeback.yaml b/qa/suites/rbd/singleton/all/read-flags-writeback.yaml
new file mode 100644
index 000000000..ba90c1d1e
--- /dev/null
+++ b/qa/suites/rbd/singleton/all/read-flags-writeback.yaml
@@ -0,0 +1,13 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+tasks:
+- install:
+- ceph:
+    fs: xfs
+    conf:
+      client:
+        rbd cache: true
+        rbd cache policy: writeback
+- workunit:
+    clients:
+      all: [rbd/read-flags.sh]
diff --git a/qa/suites/rbd/singleton/all/read-flags-writethrough.yaml b/qa/suites/rbd/singleton/all/read-flags-writethrough.yaml
new file mode 100644
index 000000000..fc499d495
--- /dev/null
+++ b/qa/suites/rbd/singleton/all/read-flags-writethrough.yaml
@@ -0,0 +1,13 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+tasks:
+- install:
+- ceph:
+    fs: xfs
+    conf:
+      client:
+        rbd cache: true
+        rbd cache max dirty: 0
+- workunit:
+    clients:
+      all: [rbd/read-flags.sh]
diff --git a/qa/suites/rbd/singleton/all/snap-diff.yaml b/qa/suites/rbd/singleton/all/snap-diff.yaml
new file mode 100644
index 000000000..be7e68589
--- /dev/null
+++ b/qa/suites/rbd/singleton/all/snap-diff.yaml
@@ -0,0 +1,10 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+tasks:
+- install:
+- ceph:
+    fs: xfs
+- cram:
+    clients:
+      client.0:
+      - src/test/cli-integration/rbd/snap-diff.t
diff --git a/qa/suites/rbd/singleton/all/verify_pool.yaml b/qa/suites/rbd/singleton/all/verify_pool.yaml
new file mode 100644
index 000000000..5ab06f749
--- /dev/null
+++ b/qa/suites/rbd/singleton/all/verify_pool.yaml
@@ -0,0 +1,9 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+tasks:
+- install:
+- ceph:
+    fs: xfs
+- workunit:
+    clients:
+      all: [rbd/verify_pool.sh]
diff --git a/qa/suites/rbd/singleton/conf b/qa/suites/rbd/singleton/conf
new file mode 120000
index 000000000..4bc0fe86c
--- /dev/null
+++ b/qa/suites/rbd/singleton/conf
@@ -0,0 +1 @@
+.qa/rbd/conf
+\ No newline at end of file
diff --git a/qa/suites/rbd/singleton/objectstore b/qa/suites/rbd/singleton/objectstore
new file mode 120000
index 000000000..c40bd3261
--- /dev/null
+++ b/qa/suites/rbd/singleton/objectstore
@@ -0,0 +1 @@
+.qa/objectstore
+\ No newline at end of file
diff --git a/qa/suites/rbd/singleton/openstack.yaml b/qa/suites/rbd/singleton/openstack.yaml
new file mode 100644
index 000000000..21eca2bbd
--- /dev/null
+++ b/qa/suites/rbd/singleton/openstack.yaml
@@ -0,0 +1,4 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 2
+      size: 30 # GB
diff --git a/qa/suites/rbd/singleton/supported-random-distro$ b/qa/suites/rbd/singleton/supported-random-distro$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/rbd/singleton/supported-random-distro$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rbd/thrash/% b/qa/suites/rbd/thrash/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/thrash/%
diff --git a/qa/suites/rbd/thrash/.qa b/qa/suites/rbd/thrash/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/thrash/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/thrash/base/.qa b/qa/suites/rbd/thrash/base/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/thrash/base/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/thrash/base/install.yaml b/qa/suites/rbd/thrash/base/install.yaml
new file mode 100644
index 000000000..2030acb90
--- /dev/null
+++ b/qa/suites/rbd/thrash/base/install.yaml
@@ -0,0 +1,3 @@
+tasks:
+- install:
+- ceph:
diff --git a/qa/suites/rbd/thrash/clusters/+ b/qa/suites/rbd/thrash/clusters/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/thrash/clusters/+
diff --git a/qa/suites/rbd/thrash/clusters/.qa b/qa/suites/rbd/thrash/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/thrash/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/thrash/clusters/fixed-2.yaml b/qa/suites/rbd/thrash/clusters/fixed-2.yaml
new file mode 120000
index 000000000..230ff0fda
--- /dev/null
+++ b/qa/suites/rbd/thrash/clusters/fixed-2.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-2.yaml
+\ No newline at end of file
diff --git a/qa/suites/rbd/thrash/clusters/openstack.yaml b/qa/suites/rbd/thrash/clusters/openstack.yaml
new file mode 100644
index 000000000..40fef4770
--- /dev/null
+++ b/qa/suites/rbd/thrash/clusters/openstack.yaml
@@ -0,0 +1,8 @@
+openstack:
+  - machine:
+      disk: 40 # GB
+      ram: 8000 # MB
+      cpus: 1
+    volumes: # attached to each instance
+      count: 4
+      size: 30 # GB
diff --git a/qa/suites/rbd/thrash/conf b/qa/suites/rbd/thrash/conf
new file mode 120000
index 000000000..4bc0fe86c
--- /dev/null
+++ b/qa/suites/rbd/thrash/conf
@@ -0,0 +1 @@
+.qa/rbd/conf
+\ No newline at end of file
diff --git a/qa/suites/rbd/thrash/msgr-failures/.qa b/qa/suites/rbd/thrash/msgr-failures/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/thrash/msgr-failures/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/thrash/msgr-failures/few.yaml b/qa/suites/rbd/thrash/msgr-failures/few.yaml
new file mode 100644
index 000000000..519288992
--- /dev/null
+++ b/qa/suites/rbd/thrash/msgr-failures/few.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 5000
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/rbd/thrash/objectstore b/qa/suites/rbd/thrash/objectstore
new file mode 120000
index 000000000..c40bd3261
--- /dev/null
+++ b/qa/suites/rbd/thrash/objectstore
@@ -0,0 +1 @@
+.qa/objectstore
+\ No newline at end of file
diff --git a/qa/suites/rbd/thrash/supported-random-distro$ b/qa/suites/rbd/thrash/supported-random-distro$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/rbd/thrash/supported-random-distro$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rbd/thrash/thrashers/.qa b/qa/suites/rbd/thrash/thrashers/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/thrash/thrashers/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/thrash/thrashers/default.yaml b/qa/suites/rbd/thrash/thrashers/default.yaml
new file mode 100644
index 000000000..3e2bf7fe1
--- /dev/null
+++ b/qa/suites/rbd/thrash/thrashers/default.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+tasks:
+- thrashosds:
+    timeout: 1200
diff --git a/qa/suites/rbd/thrash/thrashosds-health.yaml b/qa/suites/rbd/thrash/thrashosds-health.yaml
new file mode 120000
index 000000000..9124eb1aa
--- /dev/null
+++ b/qa/suites/rbd/thrash/thrashosds-health.yaml
@@ -0,0 +1 @@
+.qa/tasks/thrashosds-health.yaml
+\ No newline at end of file
diff --git a/qa/suites/rbd/thrash/workloads/.qa b/qa/suites/rbd/thrash/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/thrash/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/thrash/workloads/journal.yaml b/qa/suites/rbd/thrash/workloads/journal.yaml
new file mode 100644
index 000000000..4dae10633
--- /dev/null
+++ b/qa/suites/rbd/thrash/workloads/journal.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/journal.sh
diff --git a/qa/suites/rbd/thrash/workloads/rbd_api_tests.yaml b/qa/suites/rbd/thrash/workloads/rbd_api_tests.yaml
new file mode 100644
index 000000000..ec4194598
--- /dev/null
+++ b/qa/suites/rbd/thrash/workloads/rbd_api_tests.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - is full \(reached quota
+      - \(POOL_FULL\)
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/test_librbd.sh
+    env:
+      RBD_FEATURES: "61"
diff --git a/qa/suites/rbd/thrash/workloads/rbd_api_tests_copy_on_read.yaml b/qa/suites/rbd/thrash/workloads/rbd_api_tests_copy_on_read.yaml
new file mode 100644
index 000000000..26e20522c
--- /dev/null
+++ b/qa/suites/rbd/thrash/workloads/rbd_api_tests_copy_on_read.yaml
@@ -0,0 +1,17 @@
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/test_librbd.sh
+    env:
+      RBD_FEATURES: "61"
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - is full \(reached quota
+      - \(POOL_FULL\)
+    conf:
+      client:
+        rbd clone copy on read: true
diff --git a/qa/suites/rbd/thrash/workloads/rbd_api_tests_journaling.yaml b/qa/suites/rbd/thrash/workloads/rbd_api_tests_journaling.yaml
new file mode 100644
index 000000000..6c3686806
--- /dev/null
+++ b/qa/suites/rbd/thrash/workloads/rbd_api_tests_journaling.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - is full \(reached quota
+      - \(POOL_FULL\)
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/test_librbd.sh
+    env:
+      RBD_FEATURES: "125"
diff --git a/qa/suites/rbd/thrash/workloads/rbd_api_tests_no_locking.yaml b/qa/suites/rbd/thrash/workloads/rbd_api_tests_no_locking.yaml
new file mode 100644
index 000000000..eb63fd771
--- /dev/null
+++ b/qa/suites/rbd/thrash/workloads/rbd_api_tests_no_locking.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - is full \(reached quota
+      - \(POOL_FULL\)
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/test_librbd.sh
+    env:
+      RBD_FEATURES: "1"
diff --git a/qa/suites/rbd/thrash/workloads/rbd_fsx_cache_writearound.yaml b/qa/suites/rbd/thrash/workloads/rbd_fsx_cache_writearound.yaml
new file mode 100644
index 000000000..e9ea1ebd1
--- /dev/null
+++ b/qa/suites/rbd/thrash/workloads/rbd_fsx_cache_writearound.yaml
@@ -0,0 +1,10 @@
+tasks:
+- rbd_fsx:
+    clients: [client.0]
+    ops: 6000
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd cache: true
+        rbd cache policy: writearound
diff --git a/qa/suites/rbd/thrash/workloads/rbd_fsx_cache_writeback.yaml b/qa/suites/rbd/thrash/workloads/rbd_fsx_cache_writeback.yaml
new file mode 100644
index 000000000..41f7d84c9
--- /dev/null
+++ b/qa/suites/rbd/thrash/workloads/rbd_fsx_cache_writeback.yaml
@@ -0,0 +1,10 @@
+tasks:
+- rbd_fsx:
+    clients: [client.0]
+    ops: 6000
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd cache: true
+        rbd cache policy: writeback
diff --git a/qa/suites/rbd/thrash/workloads/rbd_fsx_cache_writethrough.yaml b/qa/suites/rbd/thrash/workloads/rbd_fsx_cache_writethrough.yaml
new file mode 100644
index 000000000..463ba9965
--- /dev/null
+++ b/qa/suites/rbd/thrash/workloads/rbd_fsx_cache_writethrough.yaml
@@ -0,0 +1,10 @@
+tasks:
+- rbd_fsx:
+    clients: [client.0]
+    ops: 6000
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd cache: true
+        rbd cache max dirty: 0
diff --git a/qa/suites/rbd/thrash/workloads/rbd_fsx_copy_on_read.yaml b/qa/suites/rbd/thrash/workloads/rbd_fsx_copy_on_read.yaml
new file mode 100644
index 000000000..0c284ca8f
--- /dev/null
+++ b/qa/suites/rbd/thrash/workloads/rbd_fsx_copy_on_read.yaml
@@ -0,0 +1,10 @@
+tasks:
+- rbd_fsx:
+    clients: [client.0]
+    ops: 6000
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd cache: true
+        rbd clone copy on read: true
diff --git a/qa/suites/rbd/thrash/workloads/rbd_fsx_deep_copy.yaml b/qa/suites/rbd/thrash/workloads/rbd_fsx_deep_copy.yaml
new file mode 100644
index 000000000..797491499
--- /dev/null
+++ b/qa/suites/rbd/thrash/workloads/rbd_fsx_deep_copy.yaml
@@ -0,0 +1,5 @@
+tasks:
+- rbd_fsx:
+    clients: [client.0]
+    ops: 6000
+    deep_copy: True
diff --git a/qa/suites/rbd/thrash/workloads/rbd_fsx_journal.yaml b/qa/suites/rbd/thrash/workloads/rbd_fsx_journal.yaml
new file mode 100644
index 000000000..13e9a7830
--- /dev/null
+++ b/qa/suites/rbd/thrash/workloads/rbd_fsx_journal.yaml
@@ -0,0 +1,5 @@
+tasks:
+- rbd_fsx:
+    clients: [client.0]
+    ops: 6000
+    journal_replay: True
diff --git a/qa/suites/rbd/thrash/workloads/rbd_fsx_nocache.yaml b/qa/suites/rbd/thrash/workloads/rbd_fsx_nocache.yaml
new file mode 100644
index 000000000..968665e18
--- /dev/null
+++ b/qa/suites/rbd/thrash/workloads/rbd_fsx_nocache.yaml
@@ -0,0 +1,9 @@
+tasks:
+- rbd_fsx:
+    clients: [client.0]
+    ops: 6000
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd cache: false
diff --git a/qa/suites/rbd/thrash/workloads/rbd_fsx_rate_limit.yaml b/qa/suites/rbd/thrash/workloads/rbd_fsx_rate_limit.yaml
new file mode 100644
index 000000000..611320bca
--- /dev/null
+++ b/qa/suites/rbd/thrash/workloads/rbd_fsx_rate_limit.yaml
@@ -0,0 +1,11 @@
+tasks:
+- rbd_fsx:
+    clients: [client.0]
+    ops: 6000
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd qos iops limit: 50
+        rbd qos iops burst: 100
+        rbd qos schedule tick min: 100
diff --git a/qa/suites/rbd/valgrind/% b/qa/suites/rbd/valgrind/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/valgrind/%
diff --git a/qa/suites/rbd/valgrind/.qa b/qa/suites/rbd/valgrind/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/valgrind/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/valgrind/base/.qa b/qa/suites/rbd/valgrind/base/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/valgrind/base/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/valgrind/base/install.yaml b/qa/suites/rbd/valgrind/base/install.yaml
new file mode 100644
index 000000000..2030acb90
--- /dev/null
+++ b/qa/suites/rbd/valgrind/base/install.yaml
@@ -0,0 +1,3 @@
+tasks:
+- install:
+- ceph:
diff --git a/qa/suites/rbd/valgrind/centos_latest.yaml b/qa/suites/rbd/valgrind/centos_latest.yaml
new file mode 120000
index 000000000..bd9854e70
--- /dev/null
+++ b/qa/suites/rbd/valgrind/centos_latest.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/centos_latest.yaml
+\ No newline at end of file
diff --git a/qa/suites/rbd/valgrind/clusters b/qa/suites/rbd/valgrind/clusters
new file mode 120000
index 000000000..ae92569e8
--- /dev/null
+++ b/qa/suites/rbd/valgrind/clusters
@@ -0,0 +1 @@
+../basic/clusters
+\ No newline at end of file
diff --git a/qa/suites/rbd/valgrind/conf b/qa/suites/rbd/valgrind/conf
new file mode 120000
index 000000000..4bc0fe86c
--- /dev/null
+++ b/qa/suites/rbd/valgrind/conf
@@ -0,0 +1 @@
+.qa/rbd/conf
+\ No newline at end of file
diff --git a/qa/suites/rbd/valgrind/objectstore b/qa/suites/rbd/valgrind/objectstore
new file mode 120000
index 000000000..c40bd3261
--- /dev/null
+++ b/qa/suites/rbd/valgrind/objectstore
@@ -0,0 +1 @@
+.qa/objectstore
+\ No newline at end of file
diff --git a/qa/suites/rbd/valgrind/validator/.qa b/qa/suites/rbd/valgrind/validator/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/valgrind/validator/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/valgrind/validator/memcheck.yaml b/qa/suites/rbd/valgrind/validator/memcheck.yaml
new file mode 100644
index 000000000..fcea1b88c
--- /dev/null
+++ b/qa/suites/rbd/valgrind/validator/memcheck.yaml
@@ -0,0 +1,12 @@
+# see http://tracker.ceph.com/issues/20360 and http://tracker.ceph.com/issues/18126
+os_type: centos
+
+overrides:
+  install:
+    ceph:
+      debuginfo: true
+  rbd_fsx:
+    valgrind: ["--tool=memcheck"]
+  workunit:
+    env:
+      VALGRIND: "--tool=memcheck --leak-check=full"
diff --git a/qa/suites/rbd/valgrind/workloads/.qa b/qa/suites/rbd/valgrind/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/valgrind/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/valgrind/workloads/c_api_tests.yaml b/qa/suites/rbd/valgrind/workloads/c_api_tests.yaml
new file mode 100644
index 000000000..eb63fd771
--- /dev/null
+++ b/qa/suites/rbd/valgrind/workloads/c_api_tests.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - is full \(reached quota
+      - \(POOL_FULL\)
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/test_librbd.sh
+    env:
+      RBD_FEATURES: "1"
diff --git a/qa/suites/rbd/valgrind/workloads/c_api_tests_with_defaults.yaml b/qa/suites/rbd/valgrind/workloads/c_api_tests_with_defaults.yaml
new file mode 100644
index 000000000..ec4194598
--- /dev/null
+++ b/qa/suites/rbd/valgrind/workloads/c_api_tests_with_defaults.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - is full \(reached quota
+      - \(POOL_FULL\)
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/test_librbd.sh
+    env:
+      RBD_FEATURES: "61"
diff --git a/qa/suites/rbd/valgrind/workloads/c_api_tests_with_journaling.yaml b/qa/suites/rbd/valgrind/workloads/c_api_tests_with_journaling.yaml
new file mode 100644
index 000000000..6c3686806
--- /dev/null
+++ b/qa/suites/rbd/valgrind/workloads/c_api_tests_with_journaling.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - is full \(reached quota
+      - \(POOL_FULL\)
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/test_librbd.sh
+    env:
+      RBD_FEATURES: "125"
diff --git a/qa/suites/rbd/valgrind/workloads/fsx.yaml b/qa/suites/rbd/valgrind/workloads/fsx.yaml
new file mode 100644
index 000000000..5c745a2c6
--- /dev/null
+++ b/qa/suites/rbd/valgrind/workloads/fsx.yaml
@@ -0,0 +1,4 @@
+tasks:
+- rbd_fsx:
+    clients: [client.0]
+    size: 134217728
diff --git a/qa/suites/rbd/valgrind/workloads/python_api_tests.yaml b/qa/suites/rbd/valgrind/workloads/python_api_tests.yaml
new file mode 100644
index 000000000..516c323df
--- /dev/null
+++ b/qa/suites/rbd/valgrind/workloads/python_api_tests.yaml
@@ -0,0 +1,12 @@
+overrides:
+  install:
+    ceph:
+      extra_system_packages:
+      - python3-pytest
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/test_librbd_python.sh
+    env:
+      RBD_FEATURES: "1"
diff --git a/qa/suites/rbd/valgrind/workloads/python_api_tests_with_defaults.yaml b/qa/suites/rbd/valgrind/workloads/python_api_tests_with_defaults.yaml
new file mode 100644
index 000000000..831f3762b
--- /dev/null
+++ b/qa/suites/rbd/valgrind/workloads/python_api_tests_with_defaults.yaml
@@ -0,0 +1,12 @@
+overrides:
+  install:
+    ceph:
+      extra_system_packages:
+      - python3-pytest
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/test_librbd_python.sh
+    env:
+      RBD_FEATURES: "61"
diff --git a/qa/suites/rbd/valgrind/workloads/python_api_tests_with_journaling.yaml b/qa/suites/rbd/valgrind/workloads/python_api_tests_with_journaling.yaml
new file mode 100644
index 000000000..8bd751146
--- /dev/null
+++ b/qa/suites/rbd/valgrind/workloads/python_api_tests_with_journaling.yaml
@@ -0,0 +1,12 @@
+overrides:
+  install:
+    ceph:
+      extra_system_packages:
+      - python3-pytest
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/test_librbd_python.sh
+    env:
+      RBD_FEATURES: "125"
diff --git a/qa/suites/rbd/valgrind/workloads/rbd_mirror.yaml b/qa/suites/rbd/valgrind/workloads/rbd_mirror.yaml
new file mode 100644
index 000000000..ae5a83c30
--- /dev/null
+++ b/qa/suites/rbd/valgrind/workloads/rbd_mirror.yaml
@@ -0,0 +1,10 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(CACHE_POOL_NO_HIT_SET\)
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/test_rbd_mirror.sh
diff --git a/qa/suites/rgw-multisite-upgrade/.qa b/qa/suites/rgw-multisite-upgrade/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw-multisite-upgrade/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw-multisite-upgrade/pacific-x/% b/qa/suites/rgw-multisite-upgrade/pacific-x/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw-multisite-upgrade/pacific-x/%
diff --git a/qa/suites/rgw-multisite-upgrade/pacific-x/.qa b/qa/suites/rgw-multisite-upgrade/pacific-x/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw-multisite-upgrade/pacific-x/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw-multisite-upgrade/pacific-x/clusters.yaml b/qa/suites/rgw-multisite-upgrade/pacific-x/clusters.yaml
new file mode 100644
index 000000000..4e18096e0
--- /dev/null
+++ b/qa/suites/rgw-multisite-upgrade/pacific-x/clusters.yaml
@@ -0,0 +1,3 @@
+roles:
+- [c1.mon.a, c1.mgr.x, c1.osd.0, c1.osd.1, c1.osd.2, c1.client.0]
+- [c2.mon.a, c2.mgr.x, c2.osd.0, c2.osd.1, c2.osd.2, c2.client.0, c2.client.1]
diff --git a/qa/suites/rgw-multisite-upgrade/pacific-x/frontend.yaml b/qa/suites/rgw-multisite-upgrade/pacific-x/frontend.yaml
new file mode 120000
index 000000000..09ced62c4
--- /dev/null
+++ b/qa/suites/rgw-multisite-upgrade/pacific-x/frontend.yaml
@@ -0,0 +1 @@
+.qa/rgw_frontend/beast.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw-multisite-upgrade/pacific-x/overrides.yaml b/qa/suites/rgw-multisite-upgrade/pacific-x/overrides.yaml
new file mode 100644
index 000000000..35ef026d6
--- /dev/null
+++ b/qa/suites/rgw-multisite-upgrade/pacific-x/overrides.yaml
@@ -0,0 +1,23 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - \(PG_AVAILABILITY\)
+    wait-for-scrub: false
+    conf:
+      client:
+        setuser: ceph
+        setgroup: ceph
+        debug rgw: 20
+        rgw crypt s3 kms backend: testing
+        rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo=
+        rgw crypt require ssl: false
+        rgw sync log trim interval: 0
+        rgw curl low speed time: 300
+        rgw md log max shards: 4
+        rgw data log num shards: 4
+        rgw sync obj etag verify: true
+  rgw:
+    compression type: random
+  rgw-multisite-tests:
+    branch: pacific # run pacific branch of tests
+    repo: https://github.com/ceph/ceph.git
diff --git a/qa/suites/rgw-multisite-upgrade/pacific-x/realm.yaml b/qa/suites/rgw-multisite-upgrade/pacific-x/realm.yaml
new file mode 100644
index 000000000..86fc0732f
--- /dev/null
+++ b/qa/suites/rgw-multisite-upgrade/pacific-x/realm.yaml
@@ -0,0 +1,20 @@
+overrides:
+  rgw-multisite:
+    realm:
+      name: test-realm
+      is default: true
+    zonegroups:
+      - name: test-zonegroup
+        is_master: true
+        is_default: true
+        endpoints: [c1.client.0]
+        zones:
+          - name: test-zone1
+            is_master: true
+            is_default: true
+            endpoints: [c1.client.0]
+          - name: test-zone2
+            is_default: true
+            endpoints: [c2.client.0]
+  rgw-multisite-tests:
+    args: [tests.py]
diff --git a/qa/suites/rgw-multisite-upgrade/pacific-x/supported-random-distro$ b/qa/suites/rgw-multisite-upgrade/pacific-x/supported-random-distro$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/rgw-multisite-upgrade/pacific-x/supported-random-distro$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rgw-multisite-upgrade/pacific-x/tasks.yaml b/qa/suites/rgw-multisite-upgrade/pacific-x/tasks.yaml
new file mode 100644
index 000000000..843858543
--- /dev/null
+++ b/qa/suites/rgw-multisite-upgrade/pacific-x/tasks.yaml
@@ -0,0 +1,18 @@
+tasks:
+- install:
+    branch: pacific
+- ceph: {cluster: c1}
+- ceph: {cluster: c2}
+- parallel:
+  - upgrade-task
+- rgw:
+    c1.client.0:
+      port: 8000
+    c2.client.0:
+      port: 8000
+    c2.client.1:
+      port: 8001
+- rgw-multisite:
+- rgw-multisite-tests:
+    config:
+      reconfigure_delay: 60
diff --git a/qa/suites/rgw-multisite-upgrade/pacific-x/upgrade/.qa b/qa/suites/rgw-multisite-upgrade/pacific-x/upgrade/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw-multisite-upgrade/pacific-x/upgrade/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw-multisite-upgrade/pacific-x/upgrade/primary.yaml b/qa/suites/rgw-multisite-upgrade/pacific-x/upgrade/primary.yaml
new file mode 100644
index 000000000..a29581aa6
--- /dev/null
+++ b/qa/suites/rgw-multisite-upgrade/pacific-x/upgrade/primary.yaml
@@ -0,0 +1,8 @@
+upgrade-task:
+- install.upgrade: # leave c2 on pacific, upgrade c1 to current
+    c1.mon.a:
+    c1.mgr.x:
+    c1.osd.0:
+    c1.osd.1:
+    c1.osd.2:
+    c1.client.0:
diff --git a/qa/suites/rgw-multisite-upgrade/pacific-x/upgrade/secondary.yaml b/qa/suites/rgw-multisite-upgrade/pacific-x/upgrade/secondary.yaml
new file mode 100644
index 000000000..666986697
--- /dev/null
+++ b/qa/suites/rgw-multisite-upgrade/pacific-x/upgrade/secondary.yaml
@@ -0,0 +1,9 @@
+upgrade-task:
+- install.upgrade: # leave c1 on pacific, upgrade c2 to current
+    c2.mon.a:
+    c2.mgr.x:
+    c2.osd.0:
+    c2.osd.1:
+    c2.osd.2:
+    c2.client.0:
+    c2.client.1:
diff --git a/qa/suites/rgw/.qa b/qa/suites/rgw/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/cloud-transition/+ b/qa/suites/rgw/cloud-transition/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw/cloud-transition/+
diff --git a/qa/suites/rgw/cloud-transition/.qa b/qa/suites/rgw/cloud-transition/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/cloud-transition/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/cloud-transition/cluster.yaml b/qa/suites/rgw/cloud-transition/cluster.yaml
new file mode 100644
index 000000000..8e0712ea5
--- /dev/null
+++ b/qa/suites/rgw/cloud-transition/cluster.yaml
@@ -0,0 +1,3 @@
+roles:
+- [mon.a, osd.0, osd.1, osd.2, mgr.0, client.0, client.1]
+#- [mon.b, osd.3, osd.4, osd.5, mgr.1, client.2, client.3]
diff --git a/qa/suites/rgw/cloud-transition/ignore-pg-availability.yaml b/qa/suites/rgw/cloud-transition/ignore-pg-availability.yaml
new file mode 120000
index 000000000..32340b1fa
--- /dev/null
+++ b/qa/suites/rgw/cloud-transition/ignore-pg-availability.yaml
@@ -0,0 +1 @@
+.qa/rgw/ignore-pg-availability.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/cloud-transition/overrides.yaml b/qa/suites/rgw/cloud-transition/overrides.yaml
new file mode 100644
index 000000000..40ca3556b
--- /dev/null
+++ b/qa/suites/rgw/cloud-transition/overrides.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        setuser: ceph
+        setgroup: ceph
+        debug rgw: 20
+        rgw crypt s3 kms backend: testing
+        rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= testkey-2=aWIKTWFrZWZpbGUKbWFuCm91dApzcmMKVGVzdGluZwo=
+        rgw crypt require ssl: false
+        rgw lc debug interval: 10
+  rgw:
+    storage classes: LUKEWARM, FROZEN
+    frontend: beast
diff --git a/qa/suites/rgw/cloud-transition/s3tests-branch.yaml b/qa/suites/rgw/cloud-transition/s3tests-branch.yaml
new file mode 120000
index 000000000..bdcaca48a
--- /dev/null
+++ b/qa/suites/rgw/cloud-transition/s3tests-branch.yaml
@@ -0,0 +1 @@
+.qa/rgw/s3tests-branch.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/cloud-transition/supported-random-distro$ b/qa/suites/rgw/cloud-transition/supported-random-distro$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/rgw/cloud-transition/supported-random-distro$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rgw/cloud-transition/tasks/.qa b/qa/suites/rgw/cloud-transition/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/cloud-transition/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/cloud-transition/tasks/cloud_transition_s3tests.yaml b/qa/suites/rgw/cloud-transition/tasks/cloud_transition_s3tests.yaml
new file mode 100644
index 000000000..756617fba
--- /dev/null
+++ b/qa/suites/rgw/cloud-transition/tasks/cloud_transition_s3tests.yaml
@@ -0,0 +1,61 @@
+tasks:
+- install:
+- ceph:
+- rgw:
+    storage classes: LUKEWARM, FROZEN
+    client.0:
+      port: 8000
+    client.1:
+      port: 8001
+    #client.2:
+      #port: 8002
+    #client.3:
+      #port: 8003
+- rgw-cloudtier:
+    client.0:
+      # cloudtier storage class params
+      # retain_head_object = false
+      cloud_storage_class: CLOUDTIER-CLIENT0
+      cloud_client: client.1
+      cloud_regular_storage_class: LUKEWARM
+      cloud_target_storage_class: FROZEN
+      cloud_retain_head_object: "false"
+      cloud_target_path: "teuthology-client0"
+      cloudtier_user:
+        # cloud-user creds to be created on cloud-client
+        cloud_secret: "abcefgh"
+        cloud_access_key: "12345678"
+    #client.2:
+      # cloudtier storage class params
+      # retain_head_object = true
+      #
+      # Having multiple cloudtier storage classes
+      # in the same task is increasing the transition
+      # time and resulting in spurious failures.
+      # Hence disabling this until there is a
+      # consistent way of running the tests
+      # without having to depend on lc_debug_interval.
+      #
+      #cloud_storage_class: CLOUDTIER-CLIENT2
+      #cloud_client: client.3
+      #cloud_regular_storage_class: LUKEWARM
+      #cloud_retain_head_object: "true"
+      #cloud_target_path: "teuthology-client2"
+      #cloudtier_user:
+        # cloud-user creds to be created on cloud-client
+        #cloud_secret: "foobar"
+        #cloud_access_key: "87654321"
+- tox: [client.0]
+- s3tests:
+    client.0:
+      rgw_server: client.0
+      storage classes: LUKEWARM, FROZEN
+      extra_attrs: ["cloud_transition"]
+      lc_debug_interval: 10
+      cloudtier_tests: True
+    #client.2:
+      #rgw_server: client.2
+      #storage classes: LUKEWARM, FROZEN
+      #extra_attrs: ["cloud_transition"]
+      #lc_debug_interval: 10
+      #cloudtier_tests: True
diff --git a/qa/suites/rgw/crypt/% b/qa/suites/rgw/crypt/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw/crypt/%
diff --git a/qa/suites/rgw/crypt/.qa b/qa/suites/rgw/crypt/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/crypt/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/crypt/0-cluster/.qa b/qa/suites/rgw/crypt/0-cluster/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/crypt/0-cluster/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/crypt/0-cluster/fixed-1.yaml b/qa/suites/rgw/crypt/0-cluster/fixed-1.yaml
new file mode 120000
index 000000000..435ea3c75
--- /dev/null
+++ b/qa/suites/rgw/crypt/0-cluster/fixed-1.yaml
@@ -0,0 +1 @@
+../../../../clusters/fixed-1.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/crypt/1-ceph-install/.qa b/qa/suites/rgw/crypt/1-ceph-install/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/crypt/1-ceph-install/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/crypt/1-ceph-install/install.yaml b/qa/suites/rgw/crypt/1-ceph-install/install.yaml
new file mode 100644
index 000000000..07a08b9a6
--- /dev/null
+++ b/qa/suites/rgw/crypt/1-ceph-install/install.yaml
@@ -0,0 +1,7 @@
+overrides:
+  ceph:
+    wait-for-scrub: false
+
+tasks:
+- install:
+- ceph:
diff --git a/qa/suites/rgw/crypt/2-kms/.qa b/qa/suites/rgw/crypt/2-kms/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/crypt/2-kms/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/crypt/2-kms/barbican.yaml b/qa/suites/rgw/crypt/2-kms/barbican.yaml
new file mode 100644
index 000000000..0c75a131c
--- /dev/null
+++ b/qa/suites/rgw/crypt/2-kms/barbican.yaml
@@ -0,0 +1,92 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rgw crypt s3 kms backend: barbican
+        rgw keystone barbican project: rgwcrypt
+        rgw keystone barbican user: rgwcrypt-user
+        rgw keystone barbican password: rgwcrypt-pass
+        rgw keystone barbican domain: Default
+        rgw keystone api version: 3
+        rgw keystone accepted roles: admin,Member,creator
+        rgw keystone implicit tenants: true
+        rgw keystone accepted admin roles: admin
+        rgw swift enforce content length: true
+        rgw swift account in url: true
+        rgw swift versioning enabled: true
+        rgw keystone admin project: admin
+        rgw keystone admin user: admin
+        rgw keystone admin password: ADMIN
+        rgw keystone admin domain: Default
+  rgw:
+    client.0:
+      use-keystone-role: client.0
+      use-barbican-role: client.0
+
+tasks:
+- tox: [ client.0 ]
+- keystone:
+    client.0:
+      force-branch: stable/2023.1
+      services:
+        - name: swift
+          type: object-store
+          description: Swift Service
+      projects:
+        - name: rgwcrypt
+          description: Encryption Tenant
+          domain: default
+        - name: barbican
+          description: Barbican
+          domain: default
+        - name: s3
+          description: S3 project
+          domain: default
+      users:
+        - name: rgwcrypt-user
+          password: rgwcrypt-pass
+          project: rgwcrypt
+          domain: default
+        - name: barbican-user
+          password: barbican-pass
+          project: barbican
+          domain: default
+        - name: s3-user
+          password: s3-pass
+          project: s3
+          domain: default
+      roles: [ name: Member, name: creator ]
+      role-mappings:
+        - name: Member
+          user: rgwcrypt-user
+          project: rgwcrypt
+        - name: admin
+          user: barbican-user
+          project: barbican
+        - name: creator
+          user: s3-user
+          project: s3
+- barbican:
+    client.0:
+      force-branch: stable/xena
+      use-keystone-role: client.0
+      keystone_authtoken:
+        auth_plugin: password
+        username: barbican-user
+        password: barbican-pass
+        user_domain_name: Default
+      rgw_user:
+        tenantName: rgwcrypt
+        username: rgwcrypt-user
+        password: rgwcrypt-pass
+      secrets:
+        - name: my-key-1
+          base64: a2V5MS5GcWVxKzhzTGNLaGtzQkg5NGVpb1FKcFpGb2c=
+          tenantName: s3
+          username: s3-user
+          password: s3-pass
+        - name: my-key-2
+          base64: a2V5Mi5yNUNNMGFzMVdIUVZxcCt5NGVmVGlQQ1k4YWg=
+          tenantName: s3
+          username: s3-user
+          password: s3-pass
diff --git a/qa/suites/rgw/crypt/2-kms/kmip.yaml b/qa/suites/rgw/crypt/2-kms/kmip.yaml
new file mode 100644
index 000000000..0057d954e
--- /dev/null
+++ b/qa/suites/rgw/crypt/2-kms/kmip.yaml
@@ -0,0 +1,37 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rgw crypt s3 kms backend: kmip
+        rgw crypt kmip ca path: /etc/ceph/kmiproot.crt
+        rgw crypt kmip client cert: /etc/ceph/kmip-client.crt
+        rgw crypt kmip client key: /etc/ceph/kmip-client.key
+        rgw crypt kmip kms key template: pykmip-$keyid
+  rgw:
+    client.0:
+      use-pykmip-role: client.0
+
+tasks:
+- openssl_keys:
+    kmiproot:
+      client: client.0
+      cn: kmiproot
+      key-type: rsa:4096
+    kmip-server:
+      client: client.0
+      ca: kmiproot
+    kmip-client:
+      client: client.0
+      ca: kmiproot
+      cn: rgw-client
+- exec:
+    client.0:
+      - chmod 644 /home/ubuntu/cephtest/ca/kmip-client.key
+- pykmip:
+    client.0:
+      clientca: kmiproot
+      servercert: kmip-server
+      clientcert: kmip-client
+      secrets:
+      - name: pykmip-my-key-1
+      - name: pykmip-my-key-2
diff --git a/qa/suites/rgw/crypt/2-kms/testing.yaml b/qa/suites/rgw/crypt/2-kms/testing.yaml
new file mode 100644
index 000000000..e02f9caad
--- /dev/null
+++ b/qa/suites/rgw/crypt/2-kms/testing.yaml
@@ -0,0 +1,6 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rgw crypt s3 kms backend: testing
+        rgw crypt s3 kms encryption_keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= testkey-2=aWIKTWFrZWZpbGUKbWFuCm91dApzcmMKVGVzdGluZwo=
diff --git a/qa/suites/rgw/crypt/2-kms/vault_kv.yaml b/qa/suites/rgw/crypt/2-kms/vault_kv.yaml
new file mode 100644
index 000000000..9ee9366d0
--- /dev/null
+++ b/qa/suites/rgw/crypt/2-kms/vault_kv.yaml
@@ -0,0 +1,25 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rgw crypt s3 kms backend: vault
+        rgw crypt vault auth: token
+        rgw crypt vault secret engine: kv
+        rgw crypt vault prefix: /v1/kv/data
+  rgw:
+    client.0:
+      use-vault-role: client.0
+
+tasks:
+- vault:
+    client.0:
+      install_url: https://releases.hashicorp.com/vault/1.2.2/vault_1.2.2_linux_amd64.zip
+      install_sha256: 7725b35d9ca8be3668abe63481f0731ca4730509419b4eb29fa0b0baa4798458
+      root_token: test_root_token
+      engine: kv
+      prefix: /v1/kv/data/
+      secrets:
+        - path: my-key-1
+          secret: a2V5MS5GcWVxKzhzTGNLaGtzQkg5NGVpb1FKcFpGb2c=
+        - path: my-key-2
+          secret: a2V5Mi5yNUNNMGFzMVdIUVZxcCt5NGVmVGlQQ1k4YWg=
diff --git a/qa/suites/rgw/crypt/2-kms/vault_old.yaml b/qa/suites/rgw/crypt/2-kms/vault_old.yaml
new file mode 100644
index 000000000..4befc1ecf
--- /dev/null
+++ b/qa/suites/rgw/crypt/2-kms/vault_old.yaml
@@ -0,0 +1,24 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rgw crypt s3 kms backend: vault
+        rgw crypt vault auth: token
+        rgw crypt vault secret engine: transit
+        rgw crypt vault prefix: /v1/transit/export/encryption-key/
+  rgw:
+    client.0:
+      use-vault-role: client.0
+
+tasks:
+- vault:
+    client.0:
+      install_url: https://releases.hashicorp.com/vault/1.2.2/vault_1.2.2_linux_amd64.zip
+      install_sha256: 7725b35d9ca8be3668abe63481f0731ca4730509419b4eb29fa0b0baa4798458
+      root_token: test_root_token
+      engine: transit
+      flavor: old
+      prefix: /v1/transit/keys/
+      secrets:
+        - path: my-key-1
+        - path: my-key-2
diff --git a/qa/suites/rgw/crypt/2-kms/vault_transit.yaml b/qa/suites/rgw/crypt/2-kms/vault_transit.yaml
new file mode 100644
index 000000000..d20bb52bc
--- /dev/null
+++ b/qa/suites/rgw/crypt/2-kms/vault_transit.yaml
@@ -0,0 +1,29 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rgw crypt s3 kms backend: vault
+        rgw crypt vault auth: token
+        rgw crypt vault secret engine: transit
+        rgw crypt vault prefix: /v1/transit/
+        rgw crypt sse s3 backend: vault
+        rgw crypt sse s3 vault auth: token
+        rgw crypt sse s3 vault secret engine: transit
+        rgw crypt sse s3 vault prefix: /v1/transit/
+  rgw:
+    client.0:
+      use-vault-role: client.0
+  s3tests:
+    with-sse-s3: true
+
+tasks:
+- vault:
+    client.0:
+      install_url: https://releases.hashicorp.com/vault/1.2.2/vault_1.2.2_linux_amd64.zip
+      install_sha256: 7725b35d9ca8be3668abe63481f0731ca4730509419b4eb29fa0b0baa4798458
+      root_token: test_root_token
+      engine: transit
+      prefix: /v1/transit/keys/
+      secrets:
+        - path: my-key-1
+        - path: my-key-2
diff --git a/qa/suites/rgw/crypt/3-rgw/.qa b/qa/suites/rgw/crypt/3-rgw/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/crypt/3-rgw/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/crypt/3-rgw/rgw.yaml b/qa/suites/rgw/crypt/3-rgw/rgw.yaml
new file mode 100644
index 000000000..764d216aa
--- /dev/null
+++ b/qa/suites/rgw/crypt/3-rgw/rgw.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        setuser: ceph
+        setgroup: ceph
+        rgw crypt require ssl: false
+        debug rgw: 20
+  rgw:
+    compression type: random
+
+tasks:
+- rgw:
+    client.0:
diff --git a/qa/suites/rgw/crypt/4-tests/+ b/qa/suites/rgw/crypt/4-tests/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw/crypt/4-tests/+
diff --git a/qa/suites/rgw/crypt/4-tests/.qa b/qa/suites/rgw/crypt/4-tests/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/crypt/4-tests/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/crypt/4-tests/s3tests.yaml b/qa/suites/rgw/crypt/4-tests/s3tests.yaml
new file mode 100644
index 000000000..42b254922
--- /dev/null
+++ b/qa/suites/rgw/crypt/4-tests/s3tests.yaml
@@ -0,0 +1,16 @@
+tasks:
+- tox: [client.0]
+- s3tests:
+    client.0:
+      barbican:
+        kms_key: my-key-1
+        kms_key2: my-key-2
+      vault_kv:
+        key_path: my-key-1
+        key_path2: my-key-2
+      vault_old:
+        key_path: my-key-1/1
+        key_path2: my-key-2/1
+      vault_transit:
+        key_path: my-key-1
+        key_path2: my-key-2
diff --git a/qa/suites/rgw/crypt/ignore-pg-availability.yaml b/qa/suites/rgw/crypt/ignore-pg-availability.yaml
new file mode 120000
index 000000000..32340b1fa
--- /dev/null
+++ b/qa/suites/rgw/crypt/ignore-pg-availability.yaml
@@ -0,0 +1 @@
+.qa/rgw/ignore-pg-availability.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/crypt/s3tests-branch.yaml b/qa/suites/rgw/crypt/s3tests-branch.yaml
new file mode 120000
index 000000000..bdcaca48a
--- /dev/null
+++ b/qa/suites/rgw/crypt/s3tests-branch.yaml
@@ -0,0 +1 @@
+.qa/rgw/s3tests-branch.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/crypt/ubuntu_latest.yaml b/qa/suites/rgw/crypt/ubuntu_latest.yaml
new file mode 120000
index 000000000..fe01dafd4
--- /dev/null
+++ b/qa/suites/rgw/crypt/ubuntu_latest.yaml
@@ -0,0 +1 @@
+.qa/distros/all/ubuntu_latest.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/dbstore/+ b/qa/suites/rgw/dbstore/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw/dbstore/+
diff --git a/qa/suites/rgw/dbstore/.qa b/qa/suites/rgw/dbstore/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/dbstore/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/dbstore/cluster.yaml b/qa/suites/rgw/dbstore/cluster.yaml
new file mode 100644
index 000000000..496b51ba7
--- /dev/null
+++ b/qa/suites/rgw/dbstore/cluster.yaml
@@ -0,0 +1,3 @@
+roles:
+- [mon.a, osd.0, osd.1, osd.2, mgr.0, client.0]
+
diff --git a/qa/suites/rgw/dbstore/ignore-pg-availability.yaml b/qa/suites/rgw/dbstore/ignore-pg-availability.yaml
new file mode 120000
index 000000000..32340b1fa
--- /dev/null
+++ b/qa/suites/rgw/dbstore/ignore-pg-availability.yaml
@@ -0,0 +1 @@
+.qa/rgw/ignore-pg-availability.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/dbstore/overrides.yaml b/qa/suites/rgw/dbstore/overrides.yaml
new file mode 100644
index 000000000..df4aaa9af
--- /dev/null
+++ b/qa/suites/rgw/dbstore/overrides.yaml
@@ -0,0 +1,11 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        setuser: ceph
+        setgroup: ceph
+        debug rgw: 20
+        rgw crypt require ssl: false
+        rgw backend store: dbstore
+  rgw:
+    frontend: beast
diff --git a/qa/suites/rgw/dbstore/s3tests-branch.yaml b/qa/suites/rgw/dbstore/s3tests-branch.yaml
new file mode 120000
index 000000000..bdcaca48a
--- /dev/null
+++ b/qa/suites/rgw/dbstore/s3tests-branch.yaml
@@ -0,0 +1 @@
+.qa/rgw/s3tests-branch.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/dbstore/supported-random-distro$ b/qa/suites/rgw/dbstore/supported-random-distro$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/rgw/dbstore/supported-random-distro$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rgw/dbstore/tasks/.qa b/qa/suites/rgw/dbstore/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/dbstore/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/dbstore/tasks/rgw_s3tests.yaml b/qa/suites/rgw/dbstore/tasks/rgw_s3tests.yaml
new file mode 100644
index 000000000..daab5581d
--- /dev/null
+++ b/qa/suites/rgw/dbstore/tasks/rgw_s3tests.yaml
@@ -0,0 +1,16 @@
+tasks:
+- install:
+- ceph:
+- rgw: [client.0]
+- tox: [client.0]
+- exec:
+    client.0:
+      - sudo chmod 0777 /var/lib/ceph
+      - sudo chmod 0777 /var/lib/ceph/radosgw
+      - sudo chmod 0777 /var/lib/ceph/radosgw/dbstore-default_ns.db
+- s3tests:
+    client.0:
+      dbstore_tests: True
+      rgw_server: client.0
+      extra_attrs: ["not fails_on_rgw","not fails_on_dbstore"]
+
diff --git a/qa/suites/rgw/hadoop-s3a/% b/qa/suites/rgw/hadoop-s3a/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw/hadoop-s3a/%
diff --git a/qa/suites/rgw/hadoop-s3a/.qa b/qa/suites/rgw/hadoop-s3a/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/hadoop-s3a/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/hadoop-s3a/clusters/.qa b/qa/suites/rgw/hadoop-s3a/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/hadoop-s3a/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/hadoop-s3a/clusters/fixed-2.yaml b/qa/suites/rgw/hadoop-s3a/clusters/fixed-2.yaml
new file mode 120000
index 000000000..230ff0fda
--- /dev/null
+++ b/qa/suites/rgw/hadoop-s3a/clusters/fixed-2.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-2.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/hadoop-s3a/hadoop/.qa b/qa/suites/rgw/hadoop-s3a/hadoop/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/hadoop-s3a/hadoop/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/hadoop-s3a/hadoop/default.yaml b/qa/suites/rgw/hadoop-s3a/hadoop/default.yaml
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/qa/suites/rgw/hadoop-s3a/hadoop/default.yaml
@@ -0,0 +1 @@
+
diff --git a/qa/suites/rgw/hadoop-s3a/hadoop/v32.yaml b/qa/suites/rgw/hadoop-s3a/hadoop/v32.yaml
new file mode 100644
index 000000000..d017b756b
--- /dev/null
+++ b/qa/suites/rgw/hadoop-s3a/hadoop/v32.yaml
@@ -0,0 +1,3 @@
+overrides:
+  s3a-hadoop:
+    hadoop-version: '3.2.0'
diff --git a/qa/suites/rgw/hadoop-s3a/ignore-pg-availability.yaml b/qa/suites/rgw/hadoop-s3a/ignore-pg-availability.yaml
new file mode 120000
index 000000000..32340b1fa
--- /dev/null
+++ b/qa/suites/rgw/hadoop-s3a/ignore-pg-availability.yaml
@@ -0,0 +1 @@
+.qa/rgw/ignore-pg-availability.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/hadoop-s3a/overrides.yaml b/qa/suites/rgw/hadoop-s3a/overrides.yaml
new file mode 100644
index 000000000..d52080bb5
--- /dev/null
+++ b/qa/suites/rgw/hadoop-s3a/overrides.yaml
@@ -0,0 +1,6 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        setuser: ceph
+        setgroup: ceph
diff --git a/qa/suites/rgw/hadoop-s3a/s3a-hadoop.yaml b/qa/suites/rgw/hadoop-s3a/s3a-hadoop.yaml
new file mode 100644
index 000000000..ed077a89f
--- /dev/null
+++ b/qa/suites/rgw/hadoop-s3a/s3a-hadoop.yaml
@@ -0,0 +1,11 @@
+tasks:
+- install:
+- ceph:
+- ssh-keys:
+- dnsmasq:
+    client.0: [s3.]
+- rgw:
+    client.0:
+      dns-name: s3.
+- s3a-hadoop:
+    role: client.0
diff --git a/qa/suites/rgw/lifecycle/+ b/qa/suites/rgw/lifecycle/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw/lifecycle/+
diff --git a/qa/suites/rgw/lifecycle/.qa b/qa/suites/rgw/lifecycle/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/lifecycle/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/lifecycle/cluster.yaml b/qa/suites/rgw/lifecycle/cluster.yaml
new file mode 100644
index 000000000..0eab7ebad
--- /dev/null
+++ b/qa/suites/rgw/lifecycle/cluster.yaml
@@ -0,0 +1,9 @@
+roles:
+- [mon.a, osd.0, osd.1, osd.2, mgr.0, client.0]
+openstack:
+- volumes: # attached to each instance
+    count: 1
+    size: 10 # GB
+overrides:
+  rgw:
+    frontend: beast
+\ No newline at end of file
diff --git a/qa/suites/rgw/lifecycle/ignore-pg-availability.yaml b/qa/suites/rgw/lifecycle/ignore-pg-availability.yaml
new file mode 120000
index 000000000..32340b1fa
--- /dev/null
+++ b/qa/suites/rgw/lifecycle/ignore-pg-availability.yaml
@@ -0,0 +1 @@
+.qa/rgw/ignore-pg-availability.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/lifecycle/overrides.yaml b/qa/suites/rgw/lifecycle/overrides.yaml
new file mode 100644
index 000000000..83722d312
--- /dev/null
+++ b/qa/suites/rgw/lifecycle/overrides.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    wait-for-scrub: false
+    conf:
+      client:
+        setuser: ceph
+        setgroup: ceph
+        debug rgw: 20
+        rgw crypt s3 kms backend: testing
+        rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= testkey-2=aWIKTWFrZWZpbGUKbWFuCm91dApzcmMKVGVzdGluZwo=
+        rgw crypt require ssl: false
+        rgw lc debug interval: 10
+  rgw:
+    storage classes: LUKEWARM, FROZEN
diff --git a/qa/suites/rgw/lifecycle/s3tests-branch.yaml b/qa/suites/rgw/lifecycle/s3tests-branch.yaml
new file mode 120000
index 000000000..bdcaca48a
--- /dev/null
+++ b/qa/suites/rgw/lifecycle/s3tests-branch.yaml
@@ -0,0 +1 @@
+.qa/rgw/s3tests-branch.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/lifecycle/supported-random-distro$ b/qa/suites/rgw/lifecycle/supported-random-distro$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/rgw/lifecycle/supported-random-distro$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rgw/lifecycle/tasks/.qa b/qa/suites/rgw/lifecycle/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/lifecycle/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/lifecycle/tasks/rgw_s3tests.yaml b/qa/suites/rgw/lifecycle/tasks/rgw_s3tests.yaml
new file mode 100644
index 000000000..a8675dcf1
--- /dev/null
+++ b/qa/suites/rgw/lifecycle/tasks/rgw_s3tests.yaml
@@ -0,0 +1,20 @@
+tasks:
+- install:
+- ceph:
+- rgw: [client.0]
+- tox: [client.0]
+- s3tests:
+    client.0:
+      rgw_server: client.0
+      storage classes: LUKEWARM, FROZEN
+      extra_attrs: ["lifecycle"]
+      lc_debug_interval: 10
+overrides:
+  ceph:
+    conf:
+      client:
+        debug rgw: 20
+        rgw lc debug interval: 10
+        storage classes: LUKEWARM, FROZEN
+  rgw:
+    storage classes: LUKEWARM, FROZEN
diff --git a/qa/suites/rgw/multifs/% b/qa/suites/rgw/multifs/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw/multifs/%
diff --git a/qa/suites/rgw/multifs/.qa b/qa/suites/rgw/multifs/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/multifs/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/multifs/clusters/.qa b/qa/suites/rgw/multifs/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/multifs/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/multifs/clusters/fixed-2.yaml b/qa/suites/rgw/multifs/clusters/fixed-2.yaml
new file mode 120000
index 000000000..230ff0fda
--- /dev/null
+++ b/qa/suites/rgw/multifs/clusters/fixed-2.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-2.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/multifs/frontend b/qa/suites/rgw/multifs/frontend
new file mode 120000
index 000000000..926a53e83
--- /dev/null
+++ b/qa/suites/rgw/multifs/frontend
@@ -0,0 +1 @@
+.qa/rgw_frontend
+\ No newline at end of file
diff --git a/qa/suites/rgw/multifs/ignore-pg-availability.yaml b/qa/suites/rgw/multifs/ignore-pg-availability.yaml
new file mode 120000
index 000000000..32340b1fa
--- /dev/null
+++ b/qa/suites/rgw/multifs/ignore-pg-availability.yaml
@@ -0,0 +1 @@
+.qa/rgw/ignore-pg-availability.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/multifs/objectstore b/qa/suites/rgw/multifs/objectstore
new file mode 120000
index 000000000..f81d17413
--- /dev/null
+++ b/qa/suites/rgw/multifs/objectstore
@@ -0,0 +1 @@
+.qa/objectstore_cephfs
+\ No newline at end of file
diff --git a/qa/suites/rgw/multifs/overrides.yaml b/qa/suites/rgw/multifs/overrides.yaml
new file mode 100644
index 000000000..1cb489072
--- /dev/null
+++ b/qa/suites/rgw/multifs/overrides.yaml
@@ -0,0 +1,13 @@
+overrides:
+  ceph:
+    wait-for-scrub: false
+    conf:
+      client:
+        setuser: ceph
+        setgroup: ceph
+        debug rgw: 20
+        rgw crypt s3 kms backend: testing
+        rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= testkey-2=aWIKTWFrZWZpbGUKbWFuCm91dApzcmMKVGVzdGluZwo=
+        rgw crypt require ssl: false
+  rgw:
+    storage classes: LUKEWARM, FROZEN
diff --git a/qa/suites/rgw/multifs/rgw_pool_type b/qa/suites/rgw/multifs/rgw_pool_type
new file mode 120000
index 000000000..3bbd28e96
--- /dev/null
+++ b/qa/suites/rgw/multifs/rgw_pool_type
@@ -0,0 +1 @@
+.qa/rgw_pool_type
+\ No newline at end of file
diff --git a/qa/suites/rgw/multifs/s3tests-branch.yaml b/qa/suites/rgw/multifs/s3tests-branch.yaml
new file mode 120000
index 000000000..bdcaca48a
--- /dev/null
+++ b/qa/suites/rgw/multifs/s3tests-branch.yaml
@@ -0,0 +1 @@
+.qa/rgw/s3tests-branch.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/multifs/tasks/.qa b/qa/suites/rgw/multifs/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/multifs/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/multifs/tasks/rgw_bucket_quota.yaml b/qa/suites/rgw/multifs/tasks/rgw_bucket_quota.yaml
new file mode 100644
index 000000000..e07c8b5cc
--- /dev/null
+++ b/qa/suites/rgw/multifs/tasks/rgw_bucket_quota.yaml
@@ -0,0 +1,13 @@
+tasks:
+- install:
+- ceph:
+- rgw: [client.0]
+- workunit:
+    clients:
+      client.0:
+        - rgw/s3_bucket_quota.pl
+overrides:
+  ceph:
+    conf:
+      client:
+        rgw relaxed s3 bucket names: true
diff --git a/qa/suites/rgw/multifs/tasks/rgw_multipart_upload.yaml b/qa/suites/rgw/multifs/tasks/rgw_multipart_upload.yaml
new file mode 100644
index 000000000..bac4f4016
--- /dev/null
+++ b/qa/suites/rgw/multifs/tasks/rgw_multipart_upload.yaml
@@ -0,0 +1,13 @@
+tasks:
+- install:
+- ceph:
+- rgw: [client.0]
+- workunit:
+    clients:
+      client.0:
+        - rgw/s3_multipart_upload.pl
+overrides:
+  ceph:
+    conf:
+      client:
+        rgw relaxed s3 bucket names: true
diff --git a/qa/suites/rgw/multifs/tasks/rgw_ragweed.yaml b/qa/suites/rgw/multifs/tasks/rgw_ragweed.yaml
new file mode 100644
index 000000000..ab9d21fca
--- /dev/null
+++ b/qa/suites/rgw/multifs/tasks/rgw_ragweed.yaml
@@ -0,0 +1,20 @@
+tasks:
+- install:
+- ceph:
+- rgw: [client.0]
+- tox: [client.0]
+- ragweed:
+    client.0:
+      default-branch: ceph-reef
+      rgw_server: client.0
+      stages: prepare
+- ragweed:
+    client.0:
+      default-branch: ceph-reef
+      rgw_server: client.0
+      stages: check
+overrides:
+  ceph:
+    conf:
+      client:
+        rgw lc debug interval: 10
diff --git a/qa/suites/rgw/multifs/tasks/rgw_s3tests.yaml b/qa/suites/rgw/multifs/tasks/rgw_s3tests.yaml
new file mode 100644
index 000000000..7ff143243
--- /dev/null
+++ b/qa/suites/rgw/multifs/tasks/rgw_s3tests.yaml
@@ -0,0 +1,13 @@
+tasks:
+- install:
+- ceph:
+- rgw: [client.0]
+- tox: [client.0]
+- s3tests:
+    client.0:
+      rgw_server: client.0
+overrides:
+  ceph:
+    conf:
+      client:
+        rgw lc debug interval: 10
diff --git a/qa/suites/rgw/multifs/tasks/rgw_user_quota.yaml b/qa/suites/rgw/multifs/tasks/rgw_user_quota.yaml
new file mode 100644
index 000000000..92c63d2e8
--- /dev/null
+++ b/qa/suites/rgw/multifs/tasks/rgw_user_quota.yaml
@@ -0,0 +1,13 @@
+tasks:
+- install:
+- ceph:
+- rgw: [client.0]
+- workunit:
+    clients:
+      client.0:
+        - rgw/s3_user_quota.pl
+overrides:
+  ceph:
+    conf:
+      client:
+        rgw relaxed s3 bucket names: true
diff --git a/qa/suites/rgw/multifs/ubuntu_latest.yaml b/qa/suites/rgw/multifs/ubuntu_latest.yaml
new file mode 120000
index 000000000..3a09f9abb
--- /dev/null
+++ b/qa/suites/rgw/multifs/ubuntu_latest.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/ubuntu_latest.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/multisite/% b/qa/suites/rgw/multisite/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw/multisite/%
diff --git a/qa/suites/rgw/multisite/.qa b/qa/suites/rgw/multisite/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/multisite/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/multisite/clusters.yaml b/qa/suites/rgw/multisite/clusters.yaml
new file mode 100644
index 000000000..536ef7ca4
--- /dev/null
+++ b/qa/suites/rgw/multisite/clusters.yaml
@@ -0,0 +1,3 @@
+roles:
+- [c1.mon.a, c1.mgr.x, c1.osd.0, c1.osd.1, c1.osd.2, c1.client.0, c1.client.1]
+- [c2.mon.a, c2.mgr.x, c2.osd.0, c2.osd.1, c2.osd.2, c2.client.0, c2.client.1]
diff --git a/qa/suites/rgw/multisite/frontend b/qa/suites/rgw/multisite/frontend
new file mode 120000
index 000000000..926a53e83
--- /dev/null
+++ b/qa/suites/rgw/multisite/frontend
@@ -0,0 +1 @@
+.qa/rgw_frontend
+\ No newline at end of file
diff --git a/qa/suites/rgw/multisite/ignore-pg-availability.yaml b/qa/suites/rgw/multisite/ignore-pg-availability.yaml
new file mode 120000
index 000000000..32340b1fa
--- /dev/null
+++ b/qa/suites/rgw/multisite/ignore-pg-availability.yaml
@@ -0,0 +1 @@
+.qa/rgw/ignore-pg-availability.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/multisite/notify.yaml b/qa/suites/rgw/multisite/notify.yaml
new file mode 100644
index 000000000..00e0bb762
--- /dev/null
+++ b/qa/suites/rgw/multisite/notify.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client.0: # disable notifications on one zone per cluster
+        rgw data notify interval msec: 0
diff --git a/qa/suites/rgw/multisite/omap_limits.yaml b/qa/suites/rgw/multisite/omap_limits.yaml
new file mode 100644
index 000000000..9ad1ec9ee
--- /dev/null
+++ b/qa/suites/rgw/multisite/omap_limits.yaml
@@ -0,0 +1,13 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        # remove the threshold so that test_bucket_index_log_trim() will test DeleteRange
+        rocksdb delete range threshold: 0
+      # instead of expanding the matrix, run each osd with a different omap limit
+      osd.0:
+        osd_max_omap_entries_per_request: 10
+      osd.1:
+        osd_max_omap_entries_per_request: 1000
+      osd.2:
+        osd_max_omap_entries_per_request: 10000
diff --git a/qa/suites/rgw/multisite/overrides.yaml b/qa/suites/rgw/multisite/overrides.yaml
new file mode 100644
index 000000000..76c14eb91
--- /dev/null
+++ b/qa/suites/rgw/multisite/overrides.yaml
@@ -0,0 +1,21 @@
+overrides:
+  ceph:
+    wait-for-scrub: false
+    conf:
+      client:
+        setuser: ceph
+        setgroup: ceph
+        debug rgw: 20
+        debug rgw sync: 20
+        rgw crypt s3 kms backend: testing
+        rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo=
+        rgw crypt require ssl: false
+        rgw sync log trim interval: 0
+        rgw curl low speed time: 300
+        rgw md log max shards: 4
+        rgw data log num shards: 4
+        rgw sync obj etag verify: true
+        rgw sync meta inject err probability: 0.1
+        rgw sync data inject err probability: 0.1
+  rgw:
+    compression type: random
diff --git a/qa/suites/rgw/multisite/realms/.qa b/qa/suites/rgw/multisite/realms/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/multisite/realms/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/multisite/realms/three-zones.yaml b/qa/suites/rgw/multisite/realms/three-zones.yaml
new file mode 100644
index 000000000..95318b0f8
--- /dev/null
+++ b/qa/suites/rgw/multisite/realms/three-zones.yaml
@@ -0,0 +1,23 @@
+overrides:
+  rgw-multisite:
+    realm:
+      name: test-realm
+      is default: true
+    zonegroups:
+      - name: test-zonegroup
+        is_master: true
+        is_default: true
+        endpoints: [c1.client.0]
+        enabled_features: ['resharding']
+        zones:
+          - name: test-zone1
+            is_master: true
+            is_default: true
+            endpoints: [c1.client.0]
+          - name: test-zone2
+            is_default: true
+            endpoints: [c2.client.0]
+          - name: test-zone3
+            endpoints: [c1.client.1]
+  rgw-multisite-tests:
+    args: [tests.py]
diff --git a/qa/suites/rgw/multisite/realms/two-zonegroup.yaml b/qa/suites/rgw/multisite/realms/two-zonegroup.yaml
new file mode 100644
index 000000000..02710a7d9
--- /dev/null
+++ b/qa/suites/rgw/multisite/realms/two-zonegroup.yaml
@@ -0,0 +1,31 @@
+overrides:
+  rgw-multisite:
+    realm:
+      name: test-realm
+      is default: true
+    zonegroups:
+      - name: a
+        is_master: true
+        is_default: true
+        endpoints: [c1.client.0]
+        enabled_features: ['resharding']
+        zones:
+          - name: a1
+            is_master: true
+            is_default: true
+            endpoints: [c1.client.0]
+          - name: a2
+            endpoints: [c1.client.1]
+      - name: b
+        is_default: true
+        endpoints: [c2.client.0]
+        enabled_features: ['resharding']
+        zones:
+          - name: b1
+            is_master: true
+            is_default: true
+            endpoints: [c2.client.0]
+          - name: b2
+            endpoints: [c2.client.1]
+  rgw-multisite-tests:
+    args: [tests.py]
diff --git a/qa/suites/rgw/multisite/supported-random-distro$ b/qa/suites/rgw/multisite/supported-random-distro$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/rgw/multisite/supported-random-distro$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rgw/multisite/tasks/.qa b/qa/suites/rgw/multisite/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/multisite/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/multisite/tasks/test_multi.yaml b/qa/suites/rgw/multisite/tasks/test_multi.yaml
new file mode 100644
index 000000000..1a65a67b5
--- /dev/null
+++ b/qa/suites/rgw/multisite/tasks/test_multi.yaml
@@ -0,0 +1,17 @@
+tasks:
+- install:
+- ceph: {cluster: c1}
+- ceph: {cluster: c2}
+- rgw:
+    c1.client.0:
+      port: 8000
+    c1.client.1:
+      port: 8001
+    c2.client.0:
+      port: 8000
+    c2.client.1:
+      port: 8001
+- rgw-multisite:
+- rgw-multisite-tests:
+    config:
+      reconfigure_delay: 60
diff --git a/qa/suites/rgw/multisite/valgrind.yaml.disabled b/qa/suites/rgw/multisite/valgrind.yaml.disabled
new file mode 100644
index 000000000..36e20f02c
--- /dev/null
+++ b/qa/suites/rgw/multisite/valgrind.yaml.disabled
@@ -0,0 +1,20 @@
+overrides:
+  install:
+    ceph:
+  ceph:
+    conf:
+      global:
+        osd heartbeat grace: 40
+      mon:
+        mon osd crush smoke test: false
+      osd:
+        osd fast shutdown: false
+  rgw:
+    c1.client.0:
+      valgrind: [--tool=memcheck, --max-threads=1024] # http://tracker.ceph.com/issues/25214
+    c1.client.1:
+      valgrind: [--tool=memcheck, --max-threads=1024]
+    c2.client.0:
+      valgrind: [--tool=memcheck, --max-threads=1024]
+    c2.client.1:
+      valgrind: [--tool=memcheck, --max-threads=1024]
diff --git a/qa/suites/rgw/notifications/% b/qa/suites/rgw/notifications/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw/notifications/%
diff --git a/qa/suites/rgw/notifications/.qa b/qa/suites/rgw/notifications/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/notifications/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/notifications/beast.yaml b/qa/suites/rgw/notifications/beast.yaml
new file mode 120000
index 000000000..09ced62c4
--- /dev/null
+++ b/qa/suites/rgw/notifications/beast.yaml
@@ -0,0 +1 @@
+.qa/rgw_frontend/beast.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/notifications/bluestore-bitmap.yaml b/qa/suites/rgw/notifications/bluestore-bitmap.yaml
new file mode 120000
index 000000000..a59cf5175
--- /dev/null
+++ b/qa/suites/rgw/notifications/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/notifications/fixed-2.yaml b/qa/suites/rgw/notifications/fixed-2.yaml
new file mode 120000
index 000000000..230ff0fda
--- /dev/null
+++ b/qa/suites/rgw/notifications/fixed-2.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-2.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/notifications/ignore-pg-availability.yaml b/qa/suites/rgw/notifications/ignore-pg-availability.yaml
new file mode 120000
index 000000000..32340b1fa
--- /dev/null
+++ b/qa/suites/rgw/notifications/ignore-pg-availability.yaml
@@ -0,0 +1 @@
+.qa/rgw/ignore-pg-availability.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/notifications/overrides.yaml b/qa/suites/rgw/notifications/overrides.yaml
new file mode 100644
index 000000000..1cb489072
--- /dev/null
+++ b/qa/suites/rgw/notifications/overrides.yaml
@@ -0,0 +1,13 @@
+overrides:
+  ceph:
+    wait-for-scrub: false
+    conf:
+      client:
+        setuser: ceph
+        setgroup: ceph
+        debug rgw: 20
+        rgw crypt s3 kms backend: testing
+        rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= testkey-2=aWIKTWFrZWZpbGUKbWFuCm91dApzcmMKVGVzdGluZwo=
+        rgw crypt require ssl: false
+  rgw:
+    storage classes: LUKEWARM, FROZEN
diff --git a/qa/suites/rgw/notifications/supported-all-distro$/$ b/qa/suites/rgw/notifications/supported-all-distro$/$
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw/notifications/supported-all-distro$/$
diff --git a/qa/suites/rgw/notifications/supported-all-distro$/.qa b/qa/suites/rgw/notifications/supported-all-distro$/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/notifications/supported-all-distro$/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/notifications/supported-all-distro$/centos_8.yaml b/qa/suites/rgw/notifications/supported-all-distro$/centos_8.yaml
new file mode 120000
index 000000000..c23fd0540
--- /dev/null
+++ b/qa/suites/rgw/notifications/supported-all-distro$/centos_8.yaml
@@ -0,0 +1 @@
+../.qa/distros/supported-all-distro/centos_8.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/notifications/tasks/+ b/qa/suites/rgw/notifications/tasks/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw/notifications/tasks/+
diff --git a/qa/suites/rgw/notifications/tasks/.qa b/qa/suites/rgw/notifications/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/notifications/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/notifications/tasks/0-install.yaml b/qa/suites/rgw/notifications/tasks/0-install.yaml
new file mode 100644
index 000000000..013ccbd26
--- /dev/null
+++ b/qa/suites/rgw/notifications/tasks/0-install.yaml
@@ -0,0 +1,15 @@
+tasks:
+- install:
+- ceph:
+- openssl_keys:
+- rgw:
+    client.0:
+
+overrides:
+  ceph:
+    conf:
+      global:
+        osd_min_pg_log_entries: 10
+        osd_max_pg_log_entries: 10
+      client:
+        rgw lc debug interval: 10
diff --git a/qa/suites/rgw/notifications/tasks/test_amqp.yaml b/qa/suites/rgw/notifications/tasks/test_amqp.yaml
new file mode 100644
index 000000000..6807cfb65
--- /dev/null
+++ b/qa/suites/rgw/notifications/tasks/test_amqp.yaml
@@ -0,0 +1,7 @@
+tasks:
+- rabbitmq:
+    client.0:
+- notification-tests:
+    client.0:
+      extra_attr: ["amqp_test"]
+      rgw_server: client.0
diff --git a/qa/suites/rgw/notifications/tasks/test_kafka.yaml b/qa/suites/rgw/notifications/tasks/test_kafka.yaml
new file mode 100644
index 000000000..ae647df38
--- /dev/null
+++ b/qa/suites/rgw/notifications/tasks/test_kafka.yaml
@@ -0,0 +1,8 @@
+tasks:
+- kafka:
+    client.0:
+      kafka_version: 2.6.0
+- notification-tests:
+    client.0:
+      extra_attr: ["kafka_test"]
+      rgw_server: client.0
diff --git a/qa/suites/rgw/notifications/tasks/test_others.yaml b/qa/suites/rgw/notifications/tasks/test_others.yaml
new file mode 100644
index 000000000..793f6f430
--- /dev/null
+++ b/qa/suites/rgw/notifications/tasks/test_others.yaml
@@ -0,0 +1,4 @@
+tasks:
+- notification-tests:
+    client.0:
+      rgw_server: client.0
diff --git a/qa/suites/rgw/service-token/% b/qa/suites/rgw/service-token/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw/service-token/%
diff --git a/qa/suites/rgw/service-token/.qa b/qa/suites/rgw/service-token/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/service-token/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/service-token/clusters/.qa b/qa/suites/rgw/service-token/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/service-token/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/service-token/clusters/fixed-1.yaml b/qa/suites/rgw/service-token/clusters/fixed-1.yaml
new file mode 120000
index 000000000..02df5dd0c
--- /dev/null
+++ b/qa/suites/rgw/service-token/clusters/fixed-1.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-1.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/service-token/frontend b/qa/suites/rgw/service-token/frontend
new file mode 120000
index 000000000..926a53e83
--- /dev/null
+++ b/qa/suites/rgw/service-token/frontend
@@ -0,0 +1 @@
+.qa/rgw_frontend
+\ No newline at end of file
diff --git a/qa/suites/rgw/service-token/ignore-pg-availability.yaml b/qa/suites/rgw/service-token/ignore-pg-availability.yaml
new file mode 120000
index 000000000..32340b1fa
--- /dev/null
+++ b/qa/suites/rgw/service-token/ignore-pg-availability.yaml
@@ -0,0 +1 @@
+.qa/rgw/ignore-pg-availability.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/service-token/overrides.yaml b/qa/suites/rgw/service-token/overrides.yaml
new file mode 100644
index 000000000..c727ec3fd
--- /dev/null
+++ b/qa/suites/rgw/service-token/overrides.yaml
@@ -0,0 +1,22 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        setuser: ceph
+        setgroup: ceph
+        debug rgw: 20
+        rgw keystone api version: 3
+        rgw keystone url: http://localhost:5000
+        rgw keystone accepted roles: admin,Member
+        rgw keystone implicit tenants: true
+        rgw keystone accepted admin roles: admin
+        rgw swift enforce content length: true
+        rgw swift account in url: true
+        rgw swift versioning enabled: true
+        rgw keystone admin domain: Default
+        rgw keystone admin user: admin
+        rgw keystone admin password: ADMIN
+        rgw keystone admin project: admin
+        rgw keystone service token enabled: true
+        rgw keystone service token accepted roles: admin
+        rgw keystone expired token cache expiration: 10
diff --git a/qa/suites/rgw/service-token/tasks/.qa b/qa/suites/rgw/service-token/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/service-token/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/service-token/tasks/service-token.yaml b/qa/suites/rgw/service-token/tasks/service-token.yaml
new file mode 100644
index 000000000..8aef1985b
--- /dev/null
+++ b/qa/suites/rgw/service-token/tasks/service-token.yaml
@@ -0,0 +1,11 @@
+tasks:
+- install:
+- ceph:
+- rgw:
+    client.0:
+      port: 8000
+- workunit:
+    basedir: qa/workunits/rgw
+    clients:
+      client.0:
+        - keystone-service-token.sh
diff --git a/qa/suites/rgw/service-token/ubuntu_latest.yaml b/qa/suites/rgw/service-token/ubuntu_latest.yaml
new file mode 120000
index 000000000..3a09f9abb
--- /dev/null
+++ b/qa/suites/rgw/service-token/ubuntu_latest.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/ubuntu_latest.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/singleton/% b/qa/suites/rgw/singleton/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw/singleton/%
diff --git a/qa/suites/rgw/singleton/.qa b/qa/suites/rgw/singleton/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/singleton/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/singleton/all/.qa b/qa/suites/rgw/singleton/all/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/singleton/all/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/singleton/all/radosgw-admin.yaml b/qa/suites/rgw/singleton/all/radosgw-admin.yaml
new file mode 100644
index 000000000..010a0647c
--- /dev/null
+++ b/qa/suites/rgw/singleton/all/radosgw-admin.yaml
@@ -0,0 +1,21 @@
+roles:
+- [mon.a, osd.0]
+- [mgr.x, client.0, osd.1, osd.2, osd.3]
+openstack:
+- volumes: # attached to each instance
+    count: 3
+    size: 10 # GB
+tasks:
+- install:
+- ceph:
+    conf:
+      client:
+        debug ms: 1
+        rgw gc obj min wait: 15
+      osd:
+        debug ms: 1
+        debug objclass : 20
+- rgw:
+    client.0:
+- radosgw-admin:
+- radosgw-admin-rest:
diff --git a/qa/suites/rgw/singleton/frontend b/qa/suites/rgw/singleton/frontend
new file mode 120000
index 000000000..926a53e83
--- /dev/null
+++ b/qa/suites/rgw/singleton/frontend
@@ -0,0 +1 @@
+.qa/rgw_frontend
+\ No newline at end of file
diff --git a/qa/suites/rgw/singleton/ignore-pg-availability.yaml b/qa/suites/rgw/singleton/ignore-pg-availability.yaml
new file mode 120000
index 000000000..32340b1fa
--- /dev/null
+++ b/qa/suites/rgw/singleton/ignore-pg-availability.yaml
@@ -0,0 +1 @@
+.qa/rgw/ignore-pg-availability.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/singleton/objectstore b/qa/suites/rgw/singleton/objectstore
new file mode 120000
index 000000000..f81d17413
--- /dev/null
+++ b/qa/suites/rgw/singleton/objectstore
@@ -0,0 +1 @@
+.qa/objectstore_cephfs
+\ No newline at end of file
diff --git a/qa/suites/rgw/singleton/overrides.yaml b/qa/suites/rgw/singleton/overrides.yaml
new file mode 100644
index 000000000..d2aea790a
--- /dev/null
+++ b/qa/suites/rgw/singleton/overrides.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    wait-for-scrub: false
+    conf:
+      client:
+        setuser: ceph
+        setgroup: ceph
+        debug rgw: 20
diff --git a/qa/suites/rgw/singleton/rgw_pool_type b/qa/suites/rgw/singleton/rgw_pool_type
new file mode 120000
index 000000000..3bbd28e96
--- /dev/null
+++ b/qa/suites/rgw/singleton/rgw_pool_type
@@ -0,0 +1 @@
+.qa/rgw_pool_type
+\ No newline at end of file
diff --git a/qa/suites/rgw/singleton/supported-random-distro$ b/qa/suites/rgw/singleton/supported-random-distro$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/rgw/singleton/supported-random-distro$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rgw/sts/% b/qa/suites/rgw/sts/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw/sts/%
diff --git a/qa/suites/rgw/sts/.qa b/qa/suites/rgw/sts/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/sts/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/sts/cluster.yaml b/qa/suites/rgw/sts/cluster.yaml
new file mode 120000
index 000000000..230ff0fda
--- /dev/null
+++ b/qa/suites/rgw/sts/cluster.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-2.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/sts/ignore-pg-availability.yaml b/qa/suites/rgw/sts/ignore-pg-availability.yaml
new file mode 120000
index 000000000..32340b1fa
--- /dev/null
+++ b/qa/suites/rgw/sts/ignore-pg-availability.yaml
@@ -0,0 +1 @@
+.qa/rgw/ignore-pg-availability.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/sts/objectstore.yaml b/qa/suites/rgw/sts/objectstore.yaml
new file mode 120000
index 000000000..a59cf5175
--- /dev/null
+++ b/qa/suites/rgw/sts/objectstore.yaml
@@ -0,0 +1 @@
+.qa/objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/sts/overrides.yaml b/qa/suites/rgw/sts/overrides.yaml
new file mode 100644
index 000000000..1cb489072
--- /dev/null
+++ b/qa/suites/rgw/sts/overrides.yaml
@@ -0,0 +1,13 @@
+overrides:
+  ceph:
+    wait-for-scrub: false
+    conf:
+      client:
+        setuser: ceph
+        setgroup: ceph
+        debug rgw: 20
+        rgw crypt s3 kms backend: testing
+        rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= testkey-2=aWIKTWFrZWZpbGUKbWFuCm91dApzcmMKVGVzdGluZwo=
+        rgw crypt require ssl: false
+  rgw:
+    storage classes: LUKEWARM, FROZEN
diff --git a/qa/suites/rgw/sts/pool-type.yaml b/qa/suites/rgw/sts/pool-type.yaml
new file mode 120000
index 000000000..697b33b20
--- /dev/null
+++ b/qa/suites/rgw/sts/pool-type.yaml
@@ -0,0 +1 @@
+.qa/rgw_pool_type/replicated.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/sts/rgw_frontend b/qa/suites/rgw/sts/rgw_frontend
new file mode 120000
index 000000000..926a53e83
--- /dev/null
+++ b/qa/suites/rgw/sts/rgw_frontend
@@ -0,0 +1 @@
+.qa/rgw_frontend
+\ No newline at end of file
diff --git a/qa/suites/rgw/sts/s3tests-branch.yaml b/qa/suites/rgw/sts/s3tests-branch.yaml
new file mode 120000
index 000000000..bdcaca48a
--- /dev/null
+++ b/qa/suites/rgw/sts/s3tests-branch.yaml
@@ -0,0 +1 @@
+.qa/rgw/s3tests-branch.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/sts/supported-random-distro$ b/qa/suites/rgw/sts/supported-random-distro$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/rgw/sts/supported-random-distro$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rgw/sts/tasks/+ b/qa/suites/rgw/sts/tasks/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw/sts/tasks/+
diff --git a/qa/suites/rgw/sts/tasks/.qa b/qa/suites/rgw/sts/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/sts/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/sts/tasks/0-install.yaml b/qa/suites/rgw/sts/tasks/0-install.yaml
new file mode 100644
index 000000000..013ccbd26
--- /dev/null
+++ b/qa/suites/rgw/sts/tasks/0-install.yaml
@@ -0,0 +1,15 @@
+tasks:
+- install:
+- ceph:
+- openssl_keys:
+- rgw:
+    client.0:
+
+overrides:
+  ceph:
+    conf:
+      global:
+        osd_min_pg_log_entries: 10
+        osd_max_pg_log_entries: 10
+      client:
+        rgw lc debug interval: 10
diff --git a/qa/suites/rgw/sts/tasks/first.yaml b/qa/suites/rgw/sts/tasks/first.yaml
new file mode 100644
index 000000000..db785ba73
--- /dev/null
+++ b/qa/suites/rgw/sts/tasks/first.yaml
@@ -0,0 +1,16 @@
+tasks:
+- tox: [ client.0 ]
+- keycloak:
+    client.0:
+      keycloak_version: 11.0.0
+- s3tests:
+    client.0:
+      sts_tests: True
+      rgw_server: client.0
+      extra_attrs: ['webidentity_test']
+overrides:
+  ceph:
+    conf:
+      client:
+              rgw sts key: abcdefghijklmnop
+              rgw s3 auth use sts: true
diff --git a/qa/suites/rgw/sts/tasks/ststests.yaml b/qa/suites/rgw/sts/tasks/ststests.yaml
new file mode 100644
index 000000000..175071b89
--- /dev/null
+++ b/qa/suites/rgw/sts/tasks/ststests.yaml
@@ -0,0 +1,12 @@
+tasks:
+- s3tests:
+    client.0:
+      sts_tests: True
+      extra_attrs: ["test_of_sts"]
+      rgw_server: client.0
+overrides:
+  ceph:
+    conf:
+      client:
+              rgw sts key: abcdefghijklmnop
+              rgw s3 auth use sts: true
diff --git a/qa/suites/rgw/tempest/% b/qa/suites/rgw/tempest/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw/tempest/%
diff --git a/qa/suites/rgw/tempest/.qa b/qa/suites/rgw/tempest/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/tempest/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/tempest/clusters/.qa b/qa/suites/rgw/tempest/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/tempest/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/tempest/clusters/fixed-1.yaml b/qa/suites/rgw/tempest/clusters/fixed-1.yaml
new file mode 120000
index 000000000..02df5dd0c
--- /dev/null
+++ b/qa/suites/rgw/tempest/clusters/fixed-1.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-1.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/tempest/frontend b/qa/suites/rgw/tempest/frontend
new file mode 120000
index 000000000..926a53e83
--- /dev/null
+++ b/qa/suites/rgw/tempest/frontend
@@ -0,0 +1 @@
+.qa/rgw_frontend
+\ No newline at end of file
diff --git a/qa/suites/rgw/tempest/ignore-pg-availability.yaml b/qa/suites/rgw/tempest/ignore-pg-availability.yaml
new file mode 120000
index 000000000..32340b1fa
--- /dev/null
+++ b/qa/suites/rgw/tempest/ignore-pg-availability.yaml
@@ -0,0 +1 @@
+.qa/rgw/ignore-pg-availability.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/tempest/overrides.yaml b/qa/suites/rgw/tempest/overrides.yaml
new file mode 100644
index 000000000..e7a292ffd
--- /dev/null
+++ b/qa/suites/rgw/tempest/overrides.yaml
@@ -0,0 +1,7 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        setuser: ceph
+        setgroup: ceph
+        debug rgw: 20
diff --git a/qa/suites/rgw/tempest/tasks/.qa b/qa/suites/rgw/tempest/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/tempest/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/tempest/tasks/rgw_tempest.yaml b/qa/suites/rgw/tempest/tasks/rgw_tempest.yaml
new file mode 100644
index 000000000..ad9dc9dd5
--- /dev/null
+++ b/qa/suites/rgw/tempest/tasks/rgw_tempest.yaml
@@ -0,0 +1,72 @@
+tasks:
+- install:
+- ceph:
+- tox: [ client.0 ]
+- keystone:
+    client.0:
+      force-branch: stable/2023.1
+      services:
+        - name: swift
+          type: object-store
+          description: Swift Service
+- rgw:
+    client.0:
+      frontend_prefix: /swift
+      use-keystone-role: client.0
+- tempest:
+    client.0:
+      sha1: 34.1.0
+      force-branch: master
+      use-keystone-role: client.0
+      auth:
+        admin_username: admin
+        admin_project_name: admin
+        admin_password: ADMIN
+        admin_domain_name: Default
+        tempest_roles: admin
+      identity:
+        uri: http://{keystone_public_host}:{keystone_public_port}/v2.0/
+        uri_v3: http://{keystone_public_host}:{keystone_public_port}/v3/
+        auth_version: v3
+        admin_role: admin
+        default_domain_name: Default
+      object-storage:
+        reseller_admin_role: admin
+      object-storage-feature-enabled:
+        container_sync: false
+        discoverability: true
+        # TODO(tobias-urdin): Use sha256 when supported in RadosGW
+        tempurl_digest_hashlib: sha1
+      blocklist:
+        - .*test_account_quotas_negative.AccountQuotasNegativeTest.test_user_modify_quota
+        - .*test_container_acl_negative.ObjectACLsNegativeTest.*
+        - .*test_container_services_negative.ContainerNegativeTest.test_create_container_metadata_.*
+        - .*test_container_staticweb.StaticWebTest.test_web_index
+        - .*test_container_staticweb.StaticWebTest.test_web_listing_css
+        - .*test_container_synchronization.*
+        - .*test_object_services.PublicObjectTest.test_access_public_container_object_without_using_creds
+        - .*test_object_services.ObjectTest.test_create_object_with_transfer_encoding
+        - .*test_container_services.ContainerTest.test_create_container_with_remove_metadata_key
+        - .*test_container_services.ContainerTest.test_create_container_with_remove_metadata_value
+        - .*test_object_expiry.ObjectExpiryTest.test_get_object_after_expiry_time
+        - .*test_object_expiry.ObjectExpiryTest.test_get_object_at_expiry_time
+        - .*test_account_services.AccountTest.test_list_no_account_metadata
+
+overrides:
+  ceph:
+    conf:
+      global:
+        osd_min_pg_log_entries: 10
+        osd_max_pg_log_entries: 10
+      client:
+        rgw keystone api version: 3
+        rgw keystone accepted roles: admin,member
+        rgw keystone implicit tenants: true
+        rgw keystone accepted admin roles: admin
+        rgw swift enforce content length: true
+        rgw swift account in url: true
+        rgw swift versioning enabled: true
+        rgw keystone admin domain: Default
+        rgw keystone admin user: admin
+        rgw keystone admin password: ADMIN
+        rgw keystone admin project: admin
diff --git a/qa/suites/rgw/tempest/ubuntu_latest.yaml b/qa/suites/rgw/tempest/ubuntu_latest.yaml
new file mode 120000
index 000000000..3a09f9abb
--- /dev/null
+++ b/qa/suites/rgw/tempest/ubuntu_latest.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/ubuntu_latest.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/thrash/% b/qa/suites/rgw/thrash/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw/thrash/%
diff --git a/qa/suites/rgw/thrash/.qa b/qa/suites/rgw/thrash/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/thrash/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/thrash/clusters/.qa b/qa/suites/rgw/thrash/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/thrash/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/thrash/clusters/fixed-2.yaml b/qa/suites/rgw/thrash/clusters/fixed-2.yaml
new file mode 120000
index 000000000..230ff0fda
--- /dev/null
+++ b/qa/suites/rgw/thrash/clusters/fixed-2.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-2.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/thrash/frontend b/qa/suites/rgw/thrash/frontend
new file mode 120000
index 000000000..926a53e83
--- /dev/null
+++ b/qa/suites/rgw/thrash/frontend
@@ -0,0 +1 @@
+.qa/rgw_frontend
+\ No newline at end of file
diff --git a/qa/suites/rgw/thrash/ignore-pg-availability.yaml b/qa/suites/rgw/thrash/ignore-pg-availability.yaml
new file mode 120000
index 000000000..32340b1fa
--- /dev/null
+++ b/qa/suites/rgw/thrash/ignore-pg-availability.yaml
@@ -0,0 +1 @@
+.qa/rgw/ignore-pg-availability.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/thrash/install.yaml b/qa/suites/rgw/thrash/install.yaml
new file mode 100644
index 000000000..84a1d70cf
--- /dev/null
+++ b/qa/suites/rgw/thrash/install.yaml
@@ -0,0 +1,5 @@
+tasks:
+- install:
+- ceph:
+- rgw: [client.0]
+
diff --git a/qa/suites/rgw/thrash/objectstore b/qa/suites/rgw/thrash/objectstore
new file mode 120000
index 000000000..f81d17413
--- /dev/null
+++ b/qa/suites/rgw/thrash/objectstore
@@ -0,0 +1 @@
+.qa/objectstore_cephfs
+\ No newline at end of file
diff --git a/qa/suites/rgw/thrash/s3tests-branch.yaml b/qa/suites/rgw/thrash/s3tests-branch.yaml
new file mode 120000
index 000000000..bdcaca48a
--- /dev/null
+++ b/qa/suites/rgw/thrash/s3tests-branch.yaml
@@ -0,0 +1 @@
+.qa/rgw/s3tests-branch.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/thrash/thrasher/.qa b/qa/suites/rgw/thrash/thrasher/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/thrash/thrasher/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/thrash/thrasher/default.yaml b/qa/suites/rgw/thrash/thrasher/default.yaml
new file mode 100644
index 000000000..1f35f1bc9
--- /dev/null
+++ b/qa/suites/rgw/thrash/thrasher/default.yaml
@@ -0,0 +1,9 @@
+tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 1
+    chance_pgnum_shrink: 1
+    chance_pgpnum_fix: 1
+    op_delay: 30
+    chance_test_min_size: 0
+    ceph_objectstore_tool: false
diff --git a/qa/suites/rgw/thrash/thrashosds-health.yaml b/qa/suites/rgw/thrash/thrashosds-health.yaml
new file mode 120000
index 000000000..9124eb1aa
--- /dev/null
+++ b/qa/suites/rgw/thrash/thrashosds-health.yaml
@@ -0,0 +1 @@
+.qa/tasks/thrashosds-health.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/thrash/ubuntu_latest.yaml b/qa/suites/rgw/thrash/ubuntu_latest.yaml
new file mode 120000
index 000000000..3a09f9abb
--- /dev/null
+++ b/qa/suites/rgw/thrash/ubuntu_latest.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/ubuntu_latest.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/thrash/workload/.qa b/qa/suites/rgw/thrash/workload/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/thrash/workload/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/thrash/workload/rgw_bucket_quota.yaml b/qa/suites/rgw/thrash/workload/rgw_bucket_quota.yaml
new file mode 100644
index 000000000..bc441ff32
--- /dev/null
+++ b/qa/suites/rgw/thrash/workload/rgw_bucket_quota.yaml
@@ -0,0 +1,10 @@
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rgw/s3_bucket_quota.pl
+overrides:
+  ceph:
+    conf:
+      client:
+        rgw relaxed s3 bucket names: true
diff --git a/qa/suites/rgw/thrash/workload/rgw_multipart_upload.yaml b/qa/suites/rgw/thrash/workload/rgw_multipart_upload.yaml
new file mode 100644
index 000000000..1b98f2625
--- /dev/null
+++ b/qa/suites/rgw/thrash/workload/rgw_multipart_upload.yaml
@@ -0,0 +1,10 @@
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rgw/s3_multipart_upload.pl
+overrides:
+  ceph:
+    conf:
+      client:
+        rgw relaxed s3 bucket names: true
diff --git a/qa/suites/rgw/thrash/workload/rgw_s3tests.yaml b/qa/suites/rgw/thrash/workload/rgw_s3tests.yaml
new file mode 100644
index 000000000..6a59dc905
--- /dev/null
+++ b/qa/suites/rgw/thrash/workload/rgw_s3tests.yaml
@@ -0,0 +1,13 @@
+tasks:
+- tox: [client.0]
+- s3tests:
+    client.0:
+      rgw_server: client.0
+overrides:
+  ceph:
+    conf:
+      client:
+        rgw lc debug interval: 10
+        rgw crypt s3 kms backend: testing
+        rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= testkey-2=aWIKTWFrZWZpbGUKbWFuCm91dApzcmMKVGVzdGluZwo=
+        rgw crypt require ssl: false
diff --git a/qa/suites/rgw/thrash/workload/rgw_user_quota.yaml b/qa/suites/rgw/thrash/workload/rgw_user_quota.yaml
new file mode 100644
index 000000000..75ba3d474
--- /dev/null
+++ b/qa/suites/rgw/thrash/workload/rgw_user_quota.yaml
@@ -0,0 +1,10 @@
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rgw/s3_user_quota.pl
+overrides:
+  ceph:
+    conf:
+      client:
+        rgw relaxed s3 bucket names: true
diff --git a/qa/suites/rgw/tools/+ b/qa/suites/rgw/tools/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw/tools/+
diff --git a/qa/suites/rgw/tools/.qa b/qa/suites/rgw/tools/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/tools/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/tools/centos_latest.yaml b/qa/suites/rgw/tools/centos_latest.yaml
new file mode 120000
index 000000000..bd9854e70
--- /dev/null
+++ b/qa/suites/rgw/tools/centos_latest.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/centos_latest.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/tools/cluster.yaml b/qa/suites/rgw/tools/cluster.yaml
new file mode 100644
index 000000000..0eab7ebad
--- /dev/null
+++ b/qa/suites/rgw/tools/cluster.yaml
@@ -0,0 +1,9 @@
+roles:
+- [mon.a, osd.0, osd.1, osd.2, mgr.0, client.0]
+openstack:
+- volumes: # attached to each instance
+    count: 1
+    size: 10 # GB
+overrides:
+  rgw:
+    frontend: beast
+\ No newline at end of file
diff --git a/qa/suites/rgw/tools/ignore-pg-availability.yaml b/qa/suites/rgw/tools/ignore-pg-availability.yaml
new file mode 120000
index 000000000..32340b1fa
--- /dev/null
+++ b/qa/suites/rgw/tools/ignore-pg-availability.yaml
@@ -0,0 +1 @@
+.qa/rgw/ignore-pg-availability.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/tools/tasks.yaml b/qa/suites/rgw/tools/tasks.yaml
new file mode 100644
index 000000000..acceb21c8
--- /dev/null
+++ b/qa/suites/rgw/tools/tasks.yaml
@@ -0,0 +1,19 @@
+tasks:
+- install:
+- ceph:
+- rgw:
+    client.0:
+      # force rgw_dns_name to be set with the fully qualified host name;
+      # it will be appended to the empty string
+      dns-name: ''
+- workunit:
+    clients:
+       client.0:
+           - rgw/test_rgw_orphan_list.sh
+overrides:
+  ceph:
+    conf:
+      client:
+        debug rgw: 20
+        debug ms: 1
+        rgw enable static website: false
diff --git a/qa/suites/rgw/upgrade/% b/qa/suites/rgw/upgrade/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw/upgrade/%
diff --git a/qa/suites/rgw/upgrade/.qa b/qa/suites/rgw/upgrade/.qa
new file mode 120000
index 000000000..fea2489fd
--- /dev/null
+++ b/qa/suites/rgw/upgrade/.qa
@@ -0,0 +1 @@
+../.qa
+\ No newline at end of file
diff --git a/qa/suites/rgw/upgrade/1-install/.qa b/qa/suites/rgw/upgrade/1-install/.qa
new file mode 120000
index 000000000..fea2489fd
--- /dev/null
+++ b/qa/suites/rgw/upgrade/1-install/.qa
@@ -0,0 +1 @@
+../.qa
+\ No newline at end of file
diff --git a/qa/suites/rgw/upgrade/1-install/pacific/% b/qa/suites/rgw/upgrade/1-install/pacific/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw/upgrade/1-install/pacific/%
diff --git a/qa/suites/rgw/upgrade/1-install/pacific/.qa b/qa/suites/rgw/upgrade/1-install/pacific/.qa
new file mode 120000
index 000000000..fea2489fd
--- /dev/null
+++ b/qa/suites/rgw/upgrade/1-install/pacific/.qa
@@ -0,0 +1 @@
+../.qa
+\ No newline at end of file
diff --git a/qa/suites/rgw/upgrade/1-install/pacific/distro$/.qa b/qa/suites/rgw/upgrade/1-install/pacific/distro$/.qa
new file mode 120000
index 000000000..fea2489fd
--- /dev/null
+++ b/qa/suites/rgw/upgrade/1-install/pacific/distro$/.qa
@@ -0,0 +1 @@
+../.qa
+\ No newline at end of file
diff --git a/qa/suites/rgw/upgrade/1-install/pacific/distro$/centos_8.stream.yaml b/qa/suites/rgw/upgrade/1-install/pacific/distro$/centos_8.stream.yaml
new file mode 120000
index 000000000..5dceec7e2
--- /dev/null
+++ b/qa/suites/rgw/upgrade/1-install/pacific/distro$/centos_8.stream.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/centos_8.stream.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/upgrade/1-install/pacific/distro$/ubuntu_20.04.yaml b/qa/suites/rgw/upgrade/1-install/pacific/distro$/ubuntu_20.04.yaml
new file mode 120000
index 000000000..29fb99ae2
--- /dev/null
+++ b/qa/suites/rgw/upgrade/1-install/pacific/distro$/ubuntu_20.04.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/ubuntu_20.04.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/upgrade/1-install/pacific/install.yaml b/qa/suites/rgw/upgrade/1-install/pacific/install.yaml
new file mode 100644
index 000000000..ec89e7921
--- /dev/null
+++ b/qa/suites/rgw/upgrade/1-install/pacific/install.yaml
@@ -0,0 +1,5 @@
+tasks:
+- install:
+    branch: pacific
+    exclude_packages:
+      - ceph-volume
diff --git a/qa/suites/rgw/upgrade/1-install/pacific/overrides.yaml b/qa/suites/rgw/upgrade/1-install/pacific/overrides.yaml
new file mode 100644
index 000000000..d882523e0
--- /dev/null
+++ b/qa/suites/rgw/upgrade/1-install/pacific/overrides.yaml
@@ -0,0 +1,3 @@
+overrides:
+  ragweed:
+    default-branch: ceph-reef # ceph-pacific doesn't have tox, but tests are the same
diff --git a/qa/suites/rgw/upgrade/1-install/quincy/% b/qa/suites/rgw/upgrade/1-install/quincy/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw/upgrade/1-install/quincy/%
diff --git a/qa/suites/rgw/upgrade/1-install/quincy/.qa b/qa/suites/rgw/upgrade/1-install/quincy/.qa
new file mode 120000
index 000000000..fea2489fd
--- /dev/null
+++ b/qa/suites/rgw/upgrade/1-install/quincy/.qa
@@ -0,0 +1 @@
+../.qa
+\ No newline at end of file
diff --git a/qa/suites/rgw/upgrade/1-install/quincy/distro$/.qa b/qa/suites/rgw/upgrade/1-install/quincy/distro$/.qa
new file mode 120000
index 000000000..fea2489fd
--- /dev/null
+++ b/qa/suites/rgw/upgrade/1-install/quincy/distro$/.qa
@@ -0,0 +1 @@
+../.qa
+\ No newline at end of file
diff --git a/qa/suites/rgw/upgrade/1-install/quincy/distro$/centos_latest.yaml b/qa/suites/rgw/upgrade/1-install/quincy/distro$/centos_latest.yaml
new file mode 120000
index 000000000..bd9854e70
--- /dev/null
+++ b/qa/suites/rgw/upgrade/1-install/quincy/distro$/centos_latest.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/centos_latest.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/upgrade/1-install/quincy/distro$/ubuntu_20.04.yaml b/qa/suites/rgw/upgrade/1-install/quincy/distro$/ubuntu_20.04.yaml
new file mode 120000
index 000000000..29fb99ae2
--- /dev/null
+++ b/qa/suites/rgw/upgrade/1-install/quincy/distro$/ubuntu_20.04.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/ubuntu_20.04.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/upgrade/1-install/quincy/install.yaml b/qa/suites/rgw/upgrade/1-install/quincy/install.yaml
new file mode 100644
index 000000000..a96f09ee7
--- /dev/null
+++ b/qa/suites/rgw/upgrade/1-install/quincy/install.yaml
@@ -0,0 +1,3 @@
+tasks:
+- install:
+    branch: quincy
diff --git a/qa/suites/rgw/upgrade/1-install/quincy/overrides.yaml b/qa/suites/rgw/upgrade/1-install/quincy/overrides.yaml
new file mode 100644
index 000000000..02508d329
--- /dev/null
+++ b/qa/suites/rgw/upgrade/1-install/quincy/overrides.yaml
@@ -0,0 +1,3 @@
+overrides:
+  ragweed:
+    default-branch: ceph-reef # ceph-quincy doesn't have tox, but tests are the same
diff --git a/qa/suites/rgw/upgrade/2-setup.yaml b/qa/suites/rgw/upgrade/2-setup.yaml
new file mode 100644
index 000000000..f6627398e
--- /dev/null
+++ b/qa/suites/rgw/upgrade/2-setup.yaml
@@ -0,0 +1,8 @@
+tasks:
+- ceph:
+- rgw: [client.0, client.1]
+- tox: [client.0, client.1]
+- print: "installing upgraded packages"
+- install.upgrade:
+    mon.a:
+    mon.b:
diff --git a/qa/suites/rgw/upgrade/3-upgrade-sequence/osds-then-rgws.yaml b/qa/suites/rgw/upgrade/3-upgrade-sequence/osds-then-rgws.yaml
new file mode 100644
index 000000000..22bfbc3d2
--- /dev/null
+++ b/qa/suites/rgw/upgrade/3-upgrade-sequence/osds-then-rgws.yaml
@@ -0,0 +1,27 @@
+tasks:
+- print: "ragweed prepare before upgrade"
+- ragweed:
+    client.0:
+      stages: prepare
+- print: "restarting upgraded osds"
+- ceph.restart:
+    daemons: [osd.0, osd.2]
+- ceph.restart:
+    daemons: [osd.1, osd.3]
+- ceph.restart:
+    daemons: [osd.4, osd.6]
+- ceph.restart:
+    daemons: [osd.5, osd.7]
+- print: "ragweed check/prepare after osd upgrade"
+- ragweed:
+    client.0:
+      stages: check
+    client.1:
+      stages: prepare
+- print: "restarting upgraded rgw"
+- ceph.restart:
+    daemons: [rgw.*]
+- print: "ragweed check after rgw upgrade"
+- ragweed:
+    client.1:
+      stages: check
diff --git a/qa/suites/rgw/upgrade/3-upgrade-sequence/rgws-then-osds.yaml b/qa/suites/rgw/upgrade/3-upgrade-sequence/rgws-then-osds.yaml
new file mode 100644
index 000000000..662750bee
--- /dev/null
+++ b/qa/suites/rgw/upgrade/3-upgrade-sequence/rgws-then-osds.yaml
@@ -0,0 +1,27 @@
+tasks:
+- print: "ragweed prepare before upgrade"
+- ragweed:
+    client.0:
+      stages: prepare
+- print: "restarting upgraded rgws"
+- ceph.restart:
+    daemons: [rgw.*]
+- print: "ragweed check/prepare after rgw upgrade"
+- ragweed:
+    client.0:
+      stages: check
+    client.1:
+      stages: prepare
+- print: "restarting upgraded osds"
+- ceph.restart:
+    daemons: [osd.0, osd.2]
+- ceph.restart:
+    daemons: [osd.1, osd.3]
+- ceph.restart:
+    daemons: [osd.4, osd.6]
+- ceph.restart:
+    daemons: [osd.5, osd.7]
+- print: "ragweed check after osd upgrade"
+- ragweed:
+    client.1:
+      stages: check
diff --git a/qa/suites/rgw/upgrade/cluster.yaml b/qa/suites/rgw/upgrade/cluster.yaml
new file mode 120000
index 000000000..230ff0fda
--- /dev/null
+++ b/qa/suites/rgw/upgrade/cluster.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-2.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/upgrade/frontend b/qa/suites/rgw/upgrade/frontend
new file mode 120000
index 000000000..926a53e83
--- /dev/null
+++ b/qa/suites/rgw/upgrade/frontend
@@ -0,0 +1 @@
+.qa/rgw_frontend
+\ No newline at end of file
diff --git a/qa/suites/rgw/upgrade/ignore-pg-availability.yaml b/qa/suites/rgw/upgrade/ignore-pg-availability.yaml
new file mode 120000
index 000000000..32340b1fa
--- /dev/null
+++ b/qa/suites/rgw/upgrade/ignore-pg-availability.yaml
@@ -0,0 +1 @@
+.qa/rgw/ignore-pg-availability.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/upgrade/objectstore b/qa/suites/rgw/upgrade/objectstore
new file mode 120000
index 000000000..f81d17413
--- /dev/null
+++ b/qa/suites/rgw/upgrade/objectstore
@@ -0,0 +1 @@
+.qa/objectstore_cephfs
+\ No newline at end of file
diff --git a/qa/suites/rgw/upgrade/overrides.yaml b/qa/suites/rgw/upgrade/overrides.yaml
new file mode 100644
index 000000000..00592d459
--- /dev/null
+++ b/qa/suites/rgw/upgrade/overrides.yaml
@@ -0,0 +1,20 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(MON_DOWN\)
+      - \(MGR_DOWN\)
+      - \(OSD_DOWN\)
+      - \(PG_AVAILABILITY\)
+      - \(PG_DEGRADED\)
+      - slow request
+      - failed to encode map
+    wait-for-scrub: false
+    conf:
+      mon:
+        mon warn on osd down out interval zero: false
+      osd:
+        osd min pg log entries: 1
+        osd max pg log entries: 2
+  ragweed:
+    rgw_server: client.0
diff --git a/qa/suites/rgw/verify/% b/qa/suites/rgw/verify/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw/verify/%
diff --git a/qa/suites/rgw/verify/.qa b/qa/suites/rgw/verify/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/verify/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/verify/0-install.yaml b/qa/suites/rgw/verify/0-install.yaml
new file mode 100644
index 000000000..252dd1f7f
--- /dev/null
+++ b/qa/suites/rgw/verify/0-install.yaml
@@ -0,0 +1,20 @@
+tasks:
+- install:
+      # extra packages added for the rgw-datacache task
+      extra_system_packages:
+        deb: ['s3cmd']
+        rpm: ['s3cmd']
+- ceph:
+- openssl_keys:
+- rgw:
+    client.0:
+- tox: [client.0]
+
+overrides:
+  ceph:
+    conf:
+      global:
+        osd_min_pg_log_entries: 10
+        osd_max_pg_log_entries: 10
+      client:
+        rgw lc debug interval: 10
diff --git a/qa/suites/rgw/verify/clusters/.qa b/qa/suites/rgw/verify/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/verify/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/verify/clusters/fixed-2.yaml b/qa/suites/rgw/verify/clusters/fixed-2.yaml
new file mode 120000
index 000000000..230ff0fda
--- /dev/null
+++ b/qa/suites/rgw/verify/clusters/fixed-2.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-2.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/verify/datacache/.qa b/qa/suites/rgw/verify/datacache/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/verify/datacache/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/verify/datacache/no_datacache.yaml b/qa/suites/rgw/verify/datacache/no_datacache.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw/verify/datacache/no_datacache.yaml
diff --git a/qa/suites/rgw/verify/datacache/rgw-datacache.yaml b/qa/suites/rgw/verify/datacache/rgw-datacache.yaml
new file mode 100644
index 000000000..f5f8c94d5
--- /dev/null
+++ b/qa/suites/rgw/verify/datacache/rgw-datacache.yaml
@@ -0,0 +1,18 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rgw d3n l1 local datacache enabled: true
+        rgw enable ops log: true
+        rgw d3n l1 datacache persistent path: /tmp/rgw_datacache/
+        rgw d3n l1 datacache size: 10737418240
+  rgw:
+    datacache: true
+    datacache_path: /tmp/rgw_datacache
+tasks:
+- workunit:
+    clients:
+      client.0:
+      - rgw/run-datacache.sh
+    env:
+      RGW_DATACACHE_PATH: /tmp/rgw_datacache
diff --git a/qa/suites/rgw/verify/frontend b/qa/suites/rgw/verify/frontend
new file mode 120000
index 000000000..926a53e83
--- /dev/null
+++ b/qa/suites/rgw/verify/frontend
@@ -0,0 +1 @@
+.qa/rgw_frontend
+\ No newline at end of file
diff --git a/qa/suites/rgw/verify/ignore-pg-availability.yaml b/qa/suites/rgw/verify/ignore-pg-availability.yaml
new file mode 120000
index 000000000..32340b1fa
--- /dev/null
+++ b/qa/suites/rgw/verify/ignore-pg-availability.yaml
@@ -0,0 +1 @@
+.qa/rgw/ignore-pg-availability.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/verify/inline-data$/off.yaml b/qa/suites/rgw/verify/inline-data$/off.yaml
new file mode 100644
index 000000000..56d1bce10
--- /dev/null
+++ b/qa/suites/rgw/verify/inline-data$/off.yaml
@@ -0,0 +1,3 @@
+overrides:
+  rgw:
+    inline data: false
diff --git a/qa/suites/rgw/verify/inline-data$/on.yaml b/qa/suites/rgw/verify/inline-data$/on.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw/verify/inline-data$/on.yaml
diff --git a/qa/suites/rgw/verify/msgr-failures/.qa b/qa/suites/rgw/verify/msgr-failures/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/verify/msgr-failures/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/verify/msgr-failures/few.yaml b/qa/suites/rgw/verify/msgr-failures/few.yaml
new file mode 100644
index 000000000..519288992
--- /dev/null
+++ b/qa/suites/rgw/verify/msgr-failures/few.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 5000
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/rgw/verify/objectstore b/qa/suites/rgw/verify/objectstore
new file mode 120000
index 000000000..f81d17413
--- /dev/null
+++ b/qa/suites/rgw/verify/objectstore
@@ -0,0 +1 @@
+.qa/objectstore_cephfs
+\ No newline at end of file
diff --git a/qa/suites/rgw/verify/overrides.yaml b/qa/suites/rgw/verify/overrides.yaml
new file mode 100644
index 000000000..aefee7b70
--- /dev/null
+++ b/qa/suites/rgw/verify/overrides.yaml
@@ -0,0 +1,13 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        setuser: ceph
+        setgroup: ceph
+        debug rgw: 20
+        rgw crypt s3 kms backend: testing
+        rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= testkey-2=aWIKTWFrZWZpbGUKbWFuCm91dApzcmMKVGVzdGluZwo=
+        rgw crypt require ssl: false
+  rgw:
+    compression type: random
+    storage classes: LUKEWARM, FROZEN
diff --git a/qa/suites/rgw/verify/proto/.qa b/qa/suites/rgw/verify/proto/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/verify/proto/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/verify/proto/http.yaml b/qa/suites/rgw/verify/proto/http.yaml
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw/verify/proto/http.yaml
diff --git a/qa/suites/rgw/verify/proto/https.yaml b/qa/suites/rgw/verify/proto/https.yaml
new file mode 100644
index 000000000..e0742b5b0
--- /dev/null
+++ b/qa/suites/rgw/verify/proto/https.yaml
@@ -0,0 +1,14 @@
+overrides:
+  openssl_keys:
+    root:
+      client: client.0
+      key-type: rsa:4096
+      cn: teuthology
+      install: [client.0]
+    rgw.client.0:
+      client: client.0
+      ca: root
+      embed-key: true
+  rgw:
+    client.0:
+      ssl certificate: rgw.client.0
diff --git a/qa/suites/rgw/verify/rgw_pool_type b/qa/suites/rgw/verify/rgw_pool_type
new file mode 120000
index 000000000..3bbd28e96
--- /dev/null
+++ b/qa/suites/rgw/verify/rgw_pool_type
@@ -0,0 +1 @@
+.qa/rgw_pool_type
+\ No newline at end of file
diff --git a/qa/suites/rgw/verify/s3tests-branch.yaml b/qa/suites/rgw/verify/s3tests-branch.yaml
new file mode 120000
index 000000000..bdcaca48a
--- /dev/null
+++ b/qa/suites/rgw/verify/s3tests-branch.yaml
@@ -0,0 +1 @@
+.qa/rgw/s3tests-branch.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/verify/sharding$ b/qa/suites/rgw/verify/sharding$
new file mode 120000
index 000000000..148393cbf
--- /dev/null
+++ b/qa/suites/rgw/verify/sharding$
@@ -0,0 +1 @@
+.qa/rgw_bucket_sharding
+\ No newline at end of file
diff --git a/qa/suites/rgw/verify/striping$/.qa b/qa/suites/rgw/verify/striping$/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/verify/striping$/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/verify/striping$/stripe-equals-chunk.yaml b/qa/suites/rgw/verify/striping$/stripe-equals-chunk.yaml
new file mode 100644
index 000000000..9b3e20a81
--- /dev/null
+++ b/qa/suites/rgw/verify/striping$/stripe-equals-chunk.yaml
@@ -0,0 +1,7 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        # use default values where chunk-size=stripe-size
+        #rgw max chunk size: 4194304
+        #rgw obj stripe size: 4194304
diff --git a/qa/suites/rgw/verify/striping$/stripe-greater-than-chunk.yaml b/qa/suites/rgw/verify/striping$/stripe-greater-than-chunk.yaml
new file mode 100644
index 000000000..3bf40d6da
--- /dev/null
+++ b/qa/suites/rgw/verify/striping$/stripe-greater-than-chunk.yaml
@@ -0,0 +1,7 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rgw max chunk size: 4194304
+        # stripe size greater than (and not a multiple of) chunk size
+        rgw obj stripe size: 6291456
diff --git a/qa/suites/rgw/verify/supported-random-distro$ b/qa/suites/rgw/verify/supported-random-distro$
new file mode 120000
index 000000000..0862b4457
--- /dev/null
+++ b/qa/suites/rgw/verify/supported-random-distro$
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$
+\ No newline at end of file
diff --git a/qa/suites/rgw/verify/tasks/+ b/qa/suites/rgw/verify/tasks/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw/verify/tasks/+
diff --git a/qa/suites/rgw/verify/tasks/.qa b/qa/suites/rgw/verify/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/verify/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/verify/tasks/bucket-check.yaml b/qa/suites/rgw/verify/tasks/bucket-check.yaml
new file mode 100644
index 000000000..4955d41c6
--- /dev/null
+++ b/qa/suites/rgw/verify/tasks/bucket-check.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rgw/run-bucket-check.sh
diff --git a/qa/suites/rgw/verify/tasks/cls.yaml b/qa/suites/rgw/verify/tasks/cls.yaml
new file mode 100644
index 000000000..936c489bf
--- /dev/null
+++ b/qa/suites/rgw/verify/tasks/cls.yaml
@@ -0,0 +1,16 @@
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - cls/test_cls_lock.sh
+        - cls/test_cls_log.sh
+        - cls/test_cls_refcount.sh
+        - cls/test_cls_rgw.sh
+        - cls/test_cls_rgw_gc.sh
+        - cls/test_cls_rgw_stats.sh
+        - cls/test_cls_cmpomap.sh
+        - cls/test_cls_2pc_queue.sh
+        - rgw/test_rgw_gc_log.sh
+        - rgw/test_rgw_obj.sh
+        - rgw/test_rgw_throttle.sh
+        - rgw/test_librgw_file.sh
diff --git a/qa/suites/rgw/verify/tasks/mp_reupload.yaml b/qa/suites/rgw/verify/tasks/mp_reupload.yaml
new file mode 100644
index 000000000..d817a1c35
--- /dev/null
+++ b/qa/suites/rgw/verify/tasks/mp_reupload.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rgw/test_rgw_s3_mp_reupload.sh
diff --git a/qa/suites/rgw/verify/tasks/ragweed.yaml b/qa/suites/rgw/verify/tasks/ragweed.yaml
new file mode 100644
index 000000000..6ac8f29a7
--- /dev/null
+++ b/qa/suites/rgw/verify/tasks/ragweed.yaml
@@ -0,0 +1,6 @@
+tasks:
+- ragweed:
+    client.0:
+      default-branch: ceph-reef
+      rgw_server: client.0
+      stages: prepare,check
diff --git a/qa/suites/rgw/verify/tasks/reshard.yaml b/qa/suites/rgw/verify/tasks/reshard.yaml
new file mode 100644
index 000000000..db65af36a
--- /dev/null
+++ b/qa/suites/rgw/verify/tasks/reshard.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rgw/run-reshard.sh
diff --git a/qa/suites/rgw/verify/tasks/s3tests-java.yaml b/qa/suites/rgw/verify/tasks/s3tests-java.yaml
new file mode 100644
index 000000000..722d78f8a
--- /dev/null
+++ b/qa/suites/rgw/verify/tasks/s3tests-java.yaml
@@ -0,0 +1,6 @@
+tasks:
+- s3tests-java:
+    client.0:
+        force-branch: ceph-reef
+        force-repo: https://github.com/ceph/java_s3tests.git 
+
diff --git a/qa/suites/rgw/verify/tasks/s3tests.yaml b/qa/suites/rgw/verify/tasks/s3tests.yaml
new file mode 100644
index 000000000..573cffbc3
--- /dev/null
+++ b/qa/suites/rgw/verify/tasks/s3tests.yaml
@@ -0,0 +1,4 @@
+tasks:
+- s3tests:
+    client.0:
+      rgw_server: client.0
diff --git a/qa/suites/rgw/verify/tasks/versioning.yaml b/qa/suites/rgw/verify/tasks/versioning.yaml
new file mode 100644
index 000000000..ab928f270
--- /dev/null
+++ b/qa/suites/rgw/verify/tasks/versioning.yaml
@@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rgw/run-versioning.sh
diff --git a/qa/suites/rgw/verify/validater/.qa b/qa/suites/rgw/verify/validater/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/verify/validater/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/verify/validater/lockdep.yaml b/qa/suites/rgw/verify/validater/lockdep.yaml
new file mode 100644
index 000000000..941fe12b1
--- /dev/null
+++ b/qa/suites/rgw/verify/validater/lockdep.yaml
@@ -0,0 +1,7 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        lockdep: true
+      mon:
+        lockdep: true
diff --git a/qa/suites/rgw/verify/validater/valgrind.yaml b/qa/suites/rgw/verify/validater/valgrind.yaml
new file mode 100644
index 000000000..898067e68
--- /dev/null
+++ b/qa/suites/rgw/verify/validater/valgrind.yaml
@@ -0,0 +1,21 @@
+overrides:
+  install:
+    ceph:
+      #debuginfo: true
+  rgw:
+    client.0:
+      valgrind: [--tool=memcheck, --max-threads=1024] # http://tracker.ceph.com/issues/25214
+  ceph:
+    conf:
+      global:
+        osd heartbeat grace: 40
+      mon:
+        mon osd crush smoke test: false
+      osd:
+        osd fast shutdown: false
+#    valgrind:
+#      mon: [--tool=memcheck, --leak-check=full, --show-reachable=yes]
+#      osd: [--tool=memcheck]
+#      mds: [--tool=memcheck]
+## https://tracker.ceph.com/issues/38621
+##      mgr: [--tool=memcheck]
diff --git a/qa/suites/rgw/website/% b/qa/suites/rgw/website/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rgw/website/%
diff --git a/qa/suites/rgw/website/.qa b/qa/suites/rgw/website/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/website/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/website/clusters/.qa b/qa/suites/rgw/website/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/website/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/website/clusters/fixed-2.yaml b/qa/suites/rgw/website/clusters/fixed-2.yaml
new file mode 120000
index 000000000..230ff0fda
--- /dev/null
+++ b/qa/suites/rgw/website/clusters/fixed-2.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-2.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/website/frontend b/qa/suites/rgw/website/frontend
new file mode 120000
index 000000000..926a53e83
--- /dev/null
+++ b/qa/suites/rgw/website/frontend
@@ -0,0 +1 @@
+.qa/rgw_frontend
+\ No newline at end of file
diff --git a/qa/suites/rgw/website/http.yaml b/qa/suites/rgw/website/http.yaml
new file mode 100644
index 000000000..24cb6fc5d
--- /dev/null
+++ b/qa/suites/rgw/website/http.yaml
@@ -0,0 +1 @@
+# https tests would need to generate wildcard certificates; only test http for now
diff --git a/qa/suites/rgw/website/ignore-pg-availability.yaml b/qa/suites/rgw/website/ignore-pg-availability.yaml
new file mode 120000
index 000000000..32340b1fa
--- /dev/null
+++ b/qa/suites/rgw/website/ignore-pg-availability.yaml
@@ -0,0 +1 @@
+.qa/rgw/ignore-pg-availability.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/website/overrides.yaml b/qa/suites/rgw/website/overrides.yaml
new file mode 100644
index 000000000..80397571e
--- /dev/null
+++ b/qa/suites/rgw/website/overrides.yaml
@@ -0,0 +1,26 @@
+overrides:
+  install:
+  ceph:
+    conf:
+      global:
+        osd_min_pg_log_entries: 10
+        osd_max_pg_log_entries: 10
+      client:
+        setuser: ceph
+        setgroup: ceph
+        debug rgw: 20
+        rgw crypt s3 kms backend: testing
+        rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= testkey-2=aWIKTWFrZWZpbGUKbWFuCm91dApzcmMKVGVzdGluZwo=
+        rgw crypt require ssl: false
+        rgw enable static website: True
+      client.0:
+        rgw lc debug interval: 10
+      client.1:
+        rgw enable apis: s3website
+    rgw:
+      client.0:
+        valgrind: [--tool=memcheck, --max-threads=1024] # http://tracker.ceph.com/issues/25214
+      client.1:
+        valgrind: [--tool=memcheck, --max-threads=1024] # http://tracker.ceph.com/issues/25214
+  s3tests:
+    calling-format: subdomain
diff --git a/qa/suites/rgw/website/s3tests-branch.yaml b/qa/suites/rgw/website/s3tests-branch.yaml
new file mode 120000
index 000000000..bdcaca48a
--- /dev/null
+++ b/qa/suites/rgw/website/s3tests-branch.yaml
@@ -0,0 +1 @@
+.qa/rgw/s3tests-branch.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/website/tasks/.qa b/qa/suites/rgw/website/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rgw/website/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/website/tasks/s3tests-website.yaml b/qa/suites/rgw/website/tasks/s3tests-website.yaml
new file mode 100644
index 000000000..da10a6f1c
--- /dev/null
+++ b/qa/suites/rgw/website/tasks/s3tests-website.yaml
@@ -0,0 +1,17 @@
+tasks:
+- install:
+- ceph:
+- dnsmasq:
+    client.0:
+      s3.: client.0
+      s3-website.: client.1
+- rgw:
+    client.0:
+      dns-name: s3.
+    client.1:
+      dns-s3website-name: s3-website.
+- tox: [client.0]
+- s3tests:
+    client.0:
+      rgw_server: client.0
+      rgw_website_server: client.1
diff --git a/qa/suites/rgw/website/ubuntu_latest.yaml b/qa/suites/rgw/website/ubuntu_latest.yaml
new file mode 120000
index 000000000..3a09f9abb
--- /dev/null
+++ b/qa/suites/rgw/website/ubuntu_latest.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/ubuntu_latest.yaml
+\ No newline at end of file
diff --git a/qa/suites/samba/% b/qa/suites/samba/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/samba/%
diff --git a/qa/suites/samba/.qa b/qa/suites/samba/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/samba/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/samba/clusters/.qa b/qa/suites/samba/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/samba/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/samba/clusters/samba-basic.yaml b/qa/suites/samba/clusters/samba-basic.yaml
new file mode 100644
index 000000000..af432f610
--- /dev/null
+++ b/qa/suites/samba/clusters/samba-basic.yaml
@@ -0,0 +1,7 @@
+roles:
+- [mon.a, mon.b, mon.c, mgr.x, mds.a, osd.0, osd.1]
+- [samba.0, client.0, client.1]
+openstack:
+- volumes: # attached to each instance
+    count: 2
+    size: 10 # GB
diff --git a/qa/suites/samba/install/.qa b/qa/suites/samba/install/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/samba/install/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/samba/install/install.yaml b/qa/suites/samba/install/install.yaml
new file mode 100644
index 000000000..c53f9c55b
--- /dev/null
+++ b/qa/suites/samba/install/install.yaml
@@ -0,0 +1,9 @@
+# we currently can't install Samba on RHEL; need a gitbuilder and code updates
+os_type: ubuntu
+
+tasks:
+- install:
+- install:
+    project: samba
+    extra_packages: ['samba']
+- ceph:
diff --git a/qa/suites/samba/mount/.qa b/qa/suites/samba/mount/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/samba/mount/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/samba/mount/fuse.yaml b/qa/suites/samba/mount/fuse.yaml
new file mode 100644
index 000000000..d00ffdb48
--- /dev/null
+++ b/qa/suites/samba/mount/fuse.yaml
@@ -0,0 +1,6 @@
+tasks:
+- ceph-fuse: [client.0]
+- samba:
+    samba.0:
+      ceph: "{testdir}/mnt.0"
+
diff --git a/qa/suites/samba/mount/kclient.yaml b/qa/suites/samba/mount/kclient.yaml
new file mode 100644
index 000000000..8baa09f8b
--- /dev/null
+++ b/qa/suites/samba/mount/kclient.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms die on skipped message: false
+kernel:
+  client:
+    branch: testing
+tasks:
+- kclient: [client.0]
+- samba:
+    samba.0:
+      ceph: "{testdir}/mnt.0"
+
diff --git a/qa/suites/samba/mount/native.yaml b/qa/suites/samba/mount/native.yaml
new file mode 100644
index 000000000..09b8c1c4e
--- /dev/null
+++ b/qa/suites/samba/mount/native.yaml
@@ -0,0 +1,2 @@
+tasks:
+- samba:
diff --git a/qa/suites/samba/mount/noceph.yaml b/qa/suites/samba/mount/noceph.yaml
new file mode 100644
index 000000000..3cad4740d
--- /dev/null
+++ b/qa/suites/samba/mount/noceph.yaml
@@ -0,0 +1,5 @@
+tasks:
+- localdir: [client.0]
+- samba:
+    samba.0:
+      ceph: "{testdir}/mnt.0"
diff --git a/qa/suites/samba/objectstore b/qa/suites/samba/objectstore
new file mode 120000
index 000000000..c40bd3261
--- /dev/null
+++ b/qa/suites/samba/objectstore
@@ -0,0 +1 @@
+.qa/objectstore
+\ No newline at end of file
diff --git a/qa/suites/samba/workload/.qa b/qa/suites/samba/workload/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/samba/workload/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/samba/workload/cifs-dbench.yaml b/qa/suites/samba/workload/cifs-dbench.yaml
new file mode 100644
index 000000000..c13c1c099
--- /dev/null
+++ b/qa/suites/samba/workload/cifs-dbench.yaml
@@ -0,0 +1,8 @@
+tasks:
+- cifs-mount:
+    client.1:
+      share: ceph
+- workunit:
+    clients:
+      client.1:
+        - suites/dbench.sh
diff --git a/qa/suites/samba/workload/cifs-fsstress.yaml b/qa/suites/samba/workload/cifs-fsstress.yaml
new file mode 100644
index 000000000..ff003af34
--- /dev/null
+++ b/qa/suites/samba/workload/cifs-fsstress.yaml
@@ -0,0 +1,8 @@
+tasks:
+- cifs-mount:
+    client.1:
+      share: ceph
+- workunit:
+    clients:
+      client.1:
+        - suites/fsstress.sh
diff --git a/qa/suites/samba/workload/cifs-kernel-build.yaml.disabled b/qa/suites/samba/workload/cifs-kernel-build.yaml.disabled
new file mode 100644
index 000000000..ab9ff8ac7
--- /dev/null
+++ b/qa/suites/samba/workload/cifs-kernel-build.yaml.disabled
@@ -0,0 +1,9 @@
+tasks:
+- cifs-mount:
+    client.1:
+      share: ceph
+- workunit:
+    clients:
+      client.1:
+        - kernel_untar_build.sh
+
diff --git a/qa/suites/samba/workload/smbtorture.yaml b/qa/suites/samba/workload/smbtorture.yaml
new file mode 100644
index 000000000..823489a20
--- /dev/null
+++ b/qa/suites/samba/workload/smbtorture.yaml
@@ -0,0 +1,39 @@
+tasks:
+- pexec:
+    client.1:
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.lock
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.fdpass
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.unlink
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.attr
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.trans2
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.negnowait
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.dir1
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.deny1
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.deny2
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.deny3
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.denydos
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.ntdeny1
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.ntdeny2
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.tcon
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.tcondev
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.vuid
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.rw1
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.open
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.defer_open
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.xcopy
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.rename
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.properties
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.mangle
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.openattr
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.chkpath
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.secleak
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.disconnect
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.samba3error
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.smb
+#      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.bench-holdcon
+#      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.bench-holdopen
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.bench-readwrite
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.bench-torture
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.scan-pipe_number
+      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.scan-ioctl
+#      - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.scan-maxfid
diff --git a/qa/suites/smoke/.qa b/qa/suites/smoke/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/smoke/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/smoke/basic/% b/qa/suites/smoke/basic/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/smoke/basic/%
diff --git a/qa/suites/smoke/basic/.qa b/qa/suites/smoke/basic/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/smoke/basic/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/smoke/basic/clusters/+ b/qa/suites/smoke/basic/clusters/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/smoke/basic/clusters/+
diff --git a/qa/suites/smoke/basic/clusters/.qa b/qa/suites/smoke/basic/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/smoke/basic/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/smoke/basic/clusters/fixed-3-cephfs.yaml b/qa/suites/smoke/basic/clusters/fixed-3-cephfs.yaml
new file mode 120000
index 000000000..24480dfc7
--- /dev/null
+++ b/qa/suites/smoke/basic/clusters/fixed-3-cephfs.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-3-cephfs.yaml
+\ No newline at end of file
diff --git a/qa/suites/smoke/basic/clusters/openstack.yaml b/qa/suites/smoke/basic/clusters/openstack.yaml
new file mode 100644
index 000000000..7d652b491
--- /dev/null
+++ b/qa/suites/smoke/basic/clusters/openstack.yaml
@@ -0,0 +1,8 @@
+openstack:
+  - machine:
+      disk: 40 # GB
+      ram: 8000 # MB
+      cpus: 1
+    volumes: # attached to each instance
+      count: 4
+      size: 10 # GB
diff --git a/qa/suites/smoke/basic/objectstore/.qa b/qa/suites/smoke/basic/objectstore/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/smoke/basic/objectstore/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/smoke/basic/objectstore/bluestore-bitmap.yaml b/qa/suites/smoke/basic/objectstore/bluestore-bitmap.yaml
new file mode 120000
index 000000000..66cf2bc75
--- /dev/null
+++ b/qa/suites/smoke/basic/objectstore/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/objectstore_debug/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/smoke/basic/s3tests-branch.yaml b/qa/suites/smoke/basic/s3tests-branch.yaml
new file mode 120000
index 000000000..bdcaca48a
--- /dev/null
+++ b/qa/suites/smoke/basic/s3tests-branch.yaml
@@ -0,0 +1 @@
+.qa/rgw/s3tests-branch.yaml
+\ No newline at end of file
diff --git a/qa/suites/smoke/basic/supported-all-distro b/qa/suites/smoke/basic/supported-all-distro
new file mode 120000
index 000000000..ca82dde58
--- /dev/null
+++ b/qa/suites/smoke/basic/supported-all-distro
@@ -0,0 +1 @@
+.qa/distros/supported-all-distro
+\ No newline at end of file
diff --git a/qa/suites/smoke/basic/tasks/% b/qa/suites/smoke/basic/tasks/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/%
diff --git a/qa/suites/smoke/basic/tasks/.qa b/qa/suites/smoke/basic/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/smoke/basic/tasks/0-install.yaml b/qa/suites/smoke/basic/tasks/0-install.yaml
new file mode 100644
index 000000000..ceffc50d8
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/0-install.yaml
@@ -0,0 +1,3 @@
+tasks:
+- install:
+    cleanup: true
diff --git a/qa/suites/smoke/basic/tasks/test/.qa b/qa/suites/smoke/basic/tasks/test/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/test/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/smoke/basic/tasks/test/cfuse_workunit_suites_blogbench.yaml b/qa/suites/smoke/basic/tasks/test/cfuse_workunit_suites_blogbench.yaml
new file mode 100644
index 000000000..bc40416ff
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/test/cfuse_workunit_suites_blogbench.yaml
@@ -0,0 +1,10 @@
+tasks:
+- ceph:
+    fs: xfs
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+- ceph-fuse:
+- workunit:
+    clients:
+      all:
+        - suites/blogbench.sh
diff --git a/qa/suites/smoke/basic/tasks/test/cfuse_workunit_suites_fsstress.yaml b/qa/suites/smoke/basic/tasks/test/cfuse_workunit_suites_fsstress.yaml
new file mode 100644
index 000000000..e21286d59
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/test/cfuse_workunit_suites_fsstress.yaml
@@ -0,0 +1,9 @@
+tasks:
+- ceph:
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+- ceph-fuse:
+- workunit:
+    clients:
+      all:
+        - suites/fsstress.sh
diff --git a/qa/suites/smoke/basic/tasks/test/cfuse_workunit_suites_iozone.yaml b/qa/suites/smoke/basic/tasks/test/cfuse_workunit_suites_iozone.yaml
new file mode 100644
index 000000000..871606ab8
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/test/cfuse_workunit_suites_iozone.yaml
@@ -0,0 +1,9 @@
+tasks:
+- ceph:
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+- ceph-fuse: [client.0]
+- workunit:
+    clients:
+      all:
+        - suites/iozone.sh
diff --git a/qa/suites/smoke/basic/tasks/test/cfuse_workunit_suites_pjd.yaml b/qa/suites/smoke/basic/tasks/test/cfuse_workunit_suites_pjd.yaml
new file mode 100644
index 000000000..0f4469c93
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/test/cfuse_workunit_suites_pjd.yaml
@@ -0,0 +1,18 @@
+tasks:
+- ceph:
+    fs: xfs
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      mds:
+        debug mds: 20
+        debug ms: 1
+      client:
+        debug client: 20
+        debug ms: 1
+        fuse set user groups: true
+- ceph-fuse:
+- workunit:
+    clients:
+      all:
+        - suites/pjd.sh
diff --git a/qa/suites/smoke/basic/tasks/test/kclient_workunit_direct_io.yaml b/qa/suites/smoke/basic/tasks/test/kclient_workunit_direct_io.yaml
new file mode 100644
index 000000000..3720d418c
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/test/kclient_workunit_direct_io.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms die on skipped message: false
+tasks:
+- ceph:
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+- kclient:
+- workunit:
+    clients:
+      all:
+        - direct_io
diff --git a/qa/suites/smoke/basic/tasks/test/kclient_workunit_suites_dbench.yaml b/qa/suites/smoke/basic/tasks/test/kclient_workunit_suites_dbench.yaml
new file mode 100644
index 000000000..256d1f1fe
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/test/kclient_workunit_suites_dbench.yaml
@@ -0,0 +1,15 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms die on skipped message: false
+tasks:
+- ceph:
+    fs: xfs
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+- kclient:
+- workunit:
+    clients:
+      all:
+        - suites/dbench.sh
diff --git a/qa/suites/smoke/basic/tasks/test/kclient_workunit_suites_fsstress.yaml b/qa/suites/smoke/basic/tasks/test/kclient_workunit_suites_fsstress.yaml
new file mode 100644
index 000000000..649ea8e14
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/test/kclient_workunit_suites_fsstress.yaml
@@ -0,0 +1,15 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms die on skipped message: false
+tasks:
+- ceph:
+    fs: xfs
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+- kclient:
+- workunit:
+    clients:
+      all:
+        - suites/fsstress.sh
diff --git a/qa/suites/smoke/basic/tasks/test/kclient_workunit_suites_pjd.yaml b/qa/suites/smoke/basic/tasks/test/kclient_workunit_suites_pjd.yaml
new file mode 100644
index 000000000..7dea45b80
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/test/kclient_workunit_suites_pjd.yaml
@@ -0,0 +1,15 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms die on skipped message: false
+tasks:
+- ceph:
+    fs: xfs
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+- kclient:
+- workunit:
+    clients:
+      all:
+        - suites/pjd.sh
diff --git a/qa/suites/smoke/basic/tasks/test/libcephfs_interface_tests.yaml b/qa/suites/smoke/basic/tasks/test/libcephfs_interface_tests.yaml
new file mode 100644
index 000000000..3be975b6b
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/test/libcephfs_interface_tests.yaml
@@ -0,0 +1,18 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        debug ms: 1
+        debug client: 20
+      mds:
+        debug ms: 1
+        debug mds: 20
+tasks:
+- ceph:
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+- ceph-fuse:
+- workunit:
+    clients:
+      client.0:
+        - libcephfs/test.sh
diff --git a/qa/suites/smoke/basic/tasks/test/mon_thrash.yaml b/qa/suites/smoke/basic/tasks/test/mon_thrash.yaml
new file mode 100644
index 000000000..9aa6a5f1c
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/test/mon_thrash.yaml
@@ -0,0 +1,39 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - reached quota
+      - mons down
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(POOL_
+      - \(CACHE_POOL_
+      - \(SMALLER_PGP_NUM\)
+      - \(OBJECT_
+      - \(SLOW_OPS\)
+      - \(TOO_FEW_PGS\)
+      - \(OSD_SLOW_PING_TIME
+      - slow request
+    conf:
+      global:
+        ms inject delay max: 1
+        ms inject delay probability: 0.005
+        ms inject delay type: mon
+        ms inject internal delays: 0.002
+        ms inject socket failures: 2500
+        mon client directed command retry: 5
+      osd:
+        osd class load list: "*"
+        osd class default list: "*"
+tasks:
+- ceph:
+    fs: xfs
+- mon_thrash:
+    revive_delay: 90
+    thrash_delay: 1
+    thrash_many: true
+- workunit:
+    clients:
+      client.0:
+      - rados/test.sh
diff --git a/qa/suites/smoke/basic/tasks/test/rados_api_tests.yaml b/qa/suites/smoke/basic/tasks/test/rados_api_tests.yaml
new file mode 100644
index 000000000..d81428aba
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/test/rados_api_tests.yaml
@@ -0,0 +1,32 @@
+tasks:
+- ceph:
+    fs: ext4
+    log-ignorelist:
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(POOL_
+      - \(CACHE_POOL_
+      - \(SMALLER_PGP_NUM\)
+      - \(OBJECT_
+      - \(SLOW_OPS\)
+      - \(TOO_FEW_PGS\)
+      - reached quota
+      - but it is still running
+      - slow request
+    conf:
+      mon:
+        mon warn on pool no app: false
+      osd:
+        osd class load list: "*"
+        osd class default list: "*"
+- thrashosds:
+    chance_pgnum_grow: 2
+    chance_pgnum_shrink: 2
+    chance_pgpnum_fix: 1
+    timeout: 1200
+- workunit:
+    clients:
+      client.0:
+      - rados/test.sh
diff --git a/qa/suites/smoke/basic/tasks/test/rados_bench.yaml b/qa/suites/smoke/basic/tasks/test/rados_bench.yaml
new file mode 100644
index 000000000..ae8862e1c
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/test/rados_bench.yaml
@@ -0,0 +1,47 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject delay max: 1
+        ms inject delay probability: 0.005
+        ms inject delay type: osd
+        ms inject internal delays: 0.002
+        ms inject socket failures: 2500
+        mon client directed command retry: 5
+tasks:
+- ceph:
+    fs: xfs
+    log-ignorelist:
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(POOL_
+      - \(CACHE_POOL_
+      - \(SMALLER_PGP_NUM\)
+      - \(OBJECT_
+      - \(SLOW_OPS\)
+      - \(TOO_FEW_PGS\)
+      - \(OSD_SLOW_PING_TIME
+      - slow request
+- thrashosds:
+    chance_pgnum_grow: 2
+    chance_pgnum_shrink: 2
+    chance_pgpnum_fix: 1
+    timeout: 1200
+- full_sequential:
+  - radosbench:
+      clients: [client.0]
+      time: 150
+  - radosbench:
+      clients: [client.0]
+      time: 150
+  - radosbench:
+      clients: [client.0]
+      time: 150
+  - radosbench:
+      clients: [client.0]
+      time: 150
+  - radosbench:
+      clients: [client.0]
+      time: 150
diff --git a/qa/suites/smoke/basic/tasks/test/rados_cache_snaps.yaml b/qa/suites/smoke/basic/tasks/test/rados_cache_snaps.yaml
new file mode 100644
index 000000000..7178f6824
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/test/rados_cache_snaps.yaml
@@ -0,0 +1,50 @@
+tasks:
+- ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(POOL_
+      - \(CACHE_POOL_
+      - \(SMALLER_PGP_NUM\)
+      - \(OBJECT_
+      - \(SLOW_OPS\)
+      - \(TOO_FEW_PGS\)
+      - slow request
+- thrashosds:
+    chance_pgnum_grow: 2
+    chance_pgnum_shrink: 2
+    chance_pgpnum_fix: 1
+    timeout: 1200
+- exec:
+    client.0:
+    - sudo ceph osd pool create base 4
+    - sudo ceph osd pool application enable base rados
+    - sudo ceph osd pool create cache 4
+    - sudo ceph osd tier add base cache
+    - sudo ceph osd tier cache-mode cache writeback
+    - sudo ceph osd tier set-overlay base cache
+    - sudo ceph osd pool set cache hit_set_type bloom
+    - sudo ceph osd pool set cache hit_set_count 8
+    - sudo ceph osd pool set cache hit_set_period 3600
+    - sudo ceph osd pool set cache target_max_objects 250
+- rados:
+    clients:
+    - client.0
+    objects: 500
+    op_weights:
+      copy_from: 50
+      delete: 50
+      cache_evict: 50
+      cache_flush: 50
+      read: 100
+      rollback: 50
+      snap_create: 50
+      snap_remove: 50
+      cache_try_flush: 50
+      write: 100
+    ops: 4000
+    pool_snaps: true
+    pools:
+    - base
diff --git a/qa/suites/smoke/basic/tasks/test/rados_cls_all.yaml b/qa/suites/smoke/basic/tasks/test/rados_cls_all.yaml
new file mode 100644
index 000000000..c4d55728c
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/test/rados_cls_all.yaml
@@ -0,0 +1,15 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd_class_load_list: "*"
+        osd_class_default_list: "*"
+tasks:
+- ceph:
+    fs: xfs
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+- workunit:
+    clients:
+      client.0:
+        - cls
diff --git a/qa/suites/smoke/basic/tasks/test/rados_ec_snaps.yaml b/qa/suites/smoke/basic/tasks/test/rados_ec_snaps.yaml
new file mode 100644
index 000000000..5ee4a7ad9
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/test/rados_ec_snaps.yaml
@@ -0,0 +1,40 @@
+tasks:
+- ceph:
+    fs: xfs
+    log-ignorelist:
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(POOL_
+      - \(CACHE_POOL_
+      - \(SMALLER_PGP_NUM\)
+      - \(OBJECT_
+      - \(SLOW_OPS\)
+      - \(TOO_FEW_PGS\)
+      - slow request
+- thrashosds:
+    chance_pgnum_grow: 3
+    chance_pgnum_shrink: 2
+    chance_pgpnum_fix: 1
+    timeout: 1200
+- rados:
+    clients:
+    - client.0
+    ec_pool: true
+    max_in_flight: 64
+    max_seconds: 600
+    objects: 1024
+    op_weights:
+      append: 100
+      copy_from: 50
+      delete: 50
+      read: 100
+      rmattr: 25
+      rollback: 50
+      setattr: 25
+      snap_create: 50
+      snap_remove: 50
+      write: 0
+    ops: 400000
+    size: 16384
diff --git a/qa/suites/smoke/basic/tasks/test/rados_python.yaml b/qa/suites/smoke/basic/tasks/test/rados_python.yaml
new file mode 100644
index 000000000..630aa567f
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/test/rados_python.yaml
@@ -0,0 +1,21 @@
+overrides:
+  install:
+    ceph:
+      extra_system_packages:
+      - python3-pytest
+tasks:
+- ceph:
+    log-ignorelist:
+    - but it is still running
+    - overall HEALTH_
+    - \(OSDMAP_FLAGS\)
+    - \(PG_
+    - \(OSD_
+    - \(OBJECT_
+    - \(POOL_APP_NOT_ENABLED\)
+- ceph-fuse:
+- workunit:
+    timeout: 1h
+    clients:
+      client.0:
+        - rados/test_python.sh
diff --git a/qa/suites/smoke/basic/tasks/test/rados_workunit_loadgen_mix.yaml b/qa/suites/smoke/basic/tasks/test/rados_workunit_loadgen_mix.yaml
new file mode 100644
index 000000000..455d6ae87
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/test/rados_workunit_loadgen_mix.yaml
@@ -0,0 +1,12 @@
+tasks:
+- ceph:
+    fs: ext4
+    log-ignorelist:
+    - but it is still running
+    - overall HEALTH_
+    - \(POOL_APP_NOT_ENABLED\)
+- ceph-fuse:
+- workunit:
+    clients:
+      all:
+        - rados/load-gen-mix.sh
diff --git a/qa/suites/smoke/basic/tasks/test/rbd_api_tests.yaml b/qa/suites/smoke/basic/tasks/test/rbd_api_tests.yaml
new file mode 100644
index 000000000..cbd0fb27c
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/test/rbd_api_tests.yaml
@@ -0,0 +1,18 @@
+tasks:
+- ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(POOL_
+      - \(CACHE_POOL_
+      - is full \(reached quota
+    fs: xfs
+- ceph-fuse:
+- workunit:
+    clients:
+      client.0:
+        - rbd/test_librbd.sh
+    env:
+      RBD_FEATURES: "1"
diff --git a/qa/suites/smoke/basic/tasks/test/rbd_cli_import_export.yaml b/qa/suites/smoke/basic/tasks/test/rbd_cli_import_export.yaml
new file mode 100644
index 000000000..79ff9418d
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/test/rbd_cli_import_export.yaml
@@ -0,0 +1,12 @@
+tasks:
+- ceph:
+    fs: xfs
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+- ceph-fuse:
+- workunit:
+    clients:
+      client.0:
+        - rbd/import_export.sh
+    env:
+      RBD_CREATE_ARGS: --new-format
diff --git a/qa/suites/smoke/basic/tasks/test/rbd_fsx.yaml b/qa/suites/smoke/basic/tasks/test/rbd_fsx.yaml
new file mode 100644
index 000000000..92bdea280
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/test/rbd_fsx.yaml
@@ -0,0 +1,30 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(POOL_
+      - \(CACHE_POOL_
+      - \(SMALLER_PGP_NUM\)
+      - \(OBJECT_
+      - \(SLOW_OPS\)
+      - \(TOO_FEW_PGS\)
+      - \(OSD_SLOW_PING_TIME
+      - slow request
+    conf:
+      client:
+        rbd cache: true
+      global:
+        ms inject socket failures: 5000
+        mon client directed command retry: 5
+tasks:
+- ceph:
+    fs: xfs
+- thrashosds:
+    timeout: 1200
+- rbd_fsx:
+    clients:
+    - client.0
+    ops: 2000
diff --git a/qa/suites/smoke/basic/tasks/test/rbd_python_api_tests.yaml b/qa/suites/smoke/basic/tasks/test/rbd_python_api_tests.yaml
new file mode 100644
index 000000000..73e64bb2c
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/test/rbd_python_api_tests.yaml
@@ -0,0 +1,16 @@
+overrides:
+  install:
+    ceph:
+      extra_system_packages:
+      - python3-pytest
+tasks:
+- ceph:
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+- ceph-fuse:
+- workunit:
+    clients:
+      client.0:
+        - rbd/test_librbd_python.sh
+    env:
+      RBD_FEATURES: "1"
diff --git a/qa/suites/smoke/basic/tasks/test/rbd_workunit_suites_iozone.yaml b/qa/suites/smoke/basic/tasks/test/rbd_workunit_suites_iozone.yaml
new file mode 100644
index 000000000..8602447aa
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/test/rbd_workunit_suites_iozone.yaml
@@ -0,0 +1,18 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms die on skipped message: false
+      client:
+        rbd default features: 5
+tasks:
+- ceph:
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+- rbd:
+    all:
+      image_size: 20480
+- workunit:
+    clients:
+      all:
+        - suites/iozone.sh
diff --git a/qa/suites/smoke/basic/tasks/test/rgw_ec_s3tests.yaml b/qa/suites/smoke/basic/tasks/test/rgw_ec_s3tests.yaml
new file mode 100644
index 000000000..3214fd900
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/test/rgw_ec_s3tests.yaml
@@ -0,0 +1,21 @@
+overrides:
+   rgw:
+    ec-data-pool: true
+    cache-pools: true
+tasks:
+- ceph:
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+- rgw: [client.0]
+- tox: [client.0]
+- s3tests:
+    client.0:
+      rgw_server: client.0
+overrides:
+  ceph:
+    conf:
+      client:
+        rgw lc debug interval: 10
+        rgw crypt s3 kms backend: testing
+        rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= testkey-2=aWIKTWFrZWZpbGUKbWFuCm91dApzcmMKVGVzdGluZwo=
+        rgw crypt require ssl: false
diff --git a/qa/suites/smoke/basic/tasks/test/rgw_s3tests.yaml b/qa/suites/smoke/basic/tasks/test/rgw_s3tests.yaml
new file mode 100644
index 000000000..337452f75
--- /dev/null
+++ b/qa/suites/smoke/basic/tasks/test/rgw_s3tests.yaml
@@ -0,0 +1,18 @@
+tasks:
+- ceph:
+    fs: xfs
+    log-ignorelist:
+      - \(POOL_APP_NOT_ENABLED\)
+- rgw: [client.0]
+- tox: [client.0]
+- s3tests:
+    client.0:
+      rgw_server: client.0
+overrides:
+  ceph:
+    conf:
+      client:
+        rgw lc debug interval: 10
+        rgw crypt s3 kms backend: testing
+        rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= testkey-2=aWIKTWFrZWZpbGUKbWFuCm91dApzcmMKVGVzdGluZwo=
+        rgw crypt require ssl: false
diff --git a/qa/suites/stress/.qa b/qa/suites/stress/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/stress/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/stress/bench/% b/qa/suites/stress/bench/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/stress/bench/%
diff --git a/qa/suites/stress/bench/.qa b/qa/suites/stress/bench/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/stress/bench/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/stress/bench/clusters/.qa b/qa/suites/stress/bench/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/stress/bench/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/stress/bench/clusters/fixed-3-cephfs.yaml b/qa/suites/stress/bench/clusters/fixed-3-cephfs.yaml
new file mode 120000
index 000000000..24480dfc7
--- /dev/null
+++ b/qa/suites/stress/bench/clusters/fixed-3-cephfs.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-3-cephfs.yaml
+\ No newline at end of file
diff --git a/qa/suites/stress/bench/tasks/.qa b/qa/suites/stress/bench/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/stress/bench/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/stress/bench/tasks/cfuse_workunit_snaps.yaml b/qa/suites/stress/bench/tasks/cfuse_workunit_snaps.yaml
new file mode 100644
index 000000000..eafec39e3
--- /dev/null
+++ b/qa/suites/stress/bench/tasks/cfuse_workunit_snaps.yaml
@@ -0,0 +1,8 @@
+tasks:
+- install:
+- ceph:
+- ceph-fuse:
+- workunit:
+    clients:
+      all:
+        - snaps
diff --git a/qa/suites/stress/bench/tasks/kclient_workunit_suites_fsx.yaml b/qa/suites/stress/bench/tasks/kclient_workunit_suites_fsx.yaml
new file mode 100644
index 000000000..bfbb8d3db
--- /dev/null
+++ b/qa/suites/stress/bench/tasks/kclient_workunit_suites_fsx.yaml
@@ -0,0 +1,19 @@
+tasks:
+- install:
+    extra_system_packages:
+      deb:
+      - libaio-dev
+      - libtool-bin
+      - uuid-dev
+      - xfslibs-dev
+      rpm:
+      - libaio-devel
+      - libtool
+      - libuuid-devel
+      - xfsprogs-devel
+- ceph:
+- kclient:
+- workunit:
+    clients:
+      all:
+        - suites/fsx.sh
diff --git a/qa/suites/stress/thrash/% b/qa/suites/stress/thrash/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/stress/thrash/%
diff --git a/qa/suites/stress/thrash/.qa b/qa/suites/stress/thrash/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/stress/thrash/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/stress/thrash/clusters/.qa b/qa/suites/stress/thrash/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/stress/thrash/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/stress/thrash/clusters/16-osd.yaml b/qa/suites/stress/thrash/clusters/16-osd.yaml
new file mode 100644
index 000000000..76232339b
--- /dev/null
+++ b/qa/suites/stress/thrash/clusters/16-osd.yaml
@@ -0,0 +1,18 @@
+roles:
+- [mon.a, mds.a, osd.0]
+- [mon.b, mgr.x, osd.1]
+- [mon.c, mgr.y, osd.2]
+- [osd.3]
+- [osd.4]
+- [osd.5]
+- [osd.6]
+- [osd.7]
+- [osd.8]
+- [osd.9]
+- [osd.10]
+- [osd.11]
+- [osd.12]
+- [osd.13]
+- [osd.14]
+- [osd.15]
+- [client.0]
diff --git a/qa/suites/stress/thrash/clusters/3-osd-1-machine.yaml b/qa/suites/stress/thrash/clusters/3-osd-1-machine.yaml
new file mode 100644
index 000000000..8c3556ae9
--- /dev/null
+++ b/qa/suites/stress/thrash/clusters/3-osd-1-machine.yaml
@@ -0,0 +1,3 @@
+roles:
+- [mon.a, mgr.x, mds.a, osd.0, osd.1, osd.2]
+- [mon.b, mon.c, client.0]
diff --git a/qa/suites/stress/thrash/clusters/8-osd.yaml b/qa/suites/stress/thrash/clusters/8-osd.yaml
new file mode 100644
index 000000000..9f51c6bad
--- /dev/null
+++ b/qa/suites/stress/thrash/clusters/8-osd.yaml
@@ -0,0 +1,10 @@
+roles:
+- [mon.a, mds.a, osd.0]
+- [mon.b, mgr.x, osd.1]
+- [mon.c, osd.2]
+- [osd.3]
+- [osd.4]
+- [osd.5]
+- [osd.6]
+- [osd.7]
+- [client.0]
diff --git a/qa/suites/stress/thrash/thrashers/.qa b/qa/suites/stress/thrash/thrashers/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/stress/thrash/thrashers/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/stress/thrash/thrashers/default.yaml b/qa/suites/stress/thrash/thrashers/default.yaml
new file mode 100644
index 000000000..47fa40480
--- /dev/null
+++ b/qa/suites/stress/thrash/thrashers/default.yaml
@@ -0,0 +1,7 @@
+tasks:
+- install:
+- ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+- thrashosds:
diff --git a/qa/suites/stress/thrash/thrashers/fast.yaml b/qa/suites/stress/thrash/thrashers/fast.yaml
new file mode 100644
index 000000000..b2466dbe8
--- /dev/null
+++ b/qa/suites/stress/thrash/thrashers/fast.yaml
@@ -0,0 +1,9 @@
+tasks:
+- install:
+- ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+- thrashosds:
+    op_delay: 1
+    chance_down: 10
diff --git a/qa/suites/stress/thrash/thrashers/more-down.yaml b/qa/suites/stress/thrash/thrashers/more-down.yaml
new file mode 100644
index 000000000..8ba738d1f
--- /dev/null
+++ b/qa/suites/stress/thrash/thrashers/more-down.yaml
@@ -0,0 +1,8 @@
+tasks:
+- install:
+- ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+- thrashosds:
+    chance_down: 50
diff --git a/qa/suites/stress/thrash/workloads/.qa b/qa/suites/stress/thrash/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/stress/thrash/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/stress/thrash/workloads/bonnie_cfuse.yaml b/qa/suites/stress/thrash/workloads/bonnie_cfuse.yaml
new file mode 100644
index 000000000..912f12d6c
--- /dev/null
+++ b/qa/suites/stress/thrash/workloads/bonnie_cfuse.yaml
@@ -0,0 +1,6 @@
+tasks:
+- ceph-fuse:
+- workunit:
+    clients:
+      all:
+        - suites/bonnie.sh
diff --git a/qa/suites/stress/thrash/workloads/iozone_cfuse.yaml b/qa/suites/stress/thrash/workloads/iozone_cfuse.yaml
new file mode 100644
index 000000000..18a6051be
--- /dev/null
+++ b/qa/suites/stress/thrash/workloads/iozone_cfuse.yaml
@@ -0,0 +1,6 @@
+tasks:
+- ceph-fuse:
+- workunit:
+    clients:
+      all:
+        - suites/iozone.sh
diff --git a/qa/suites/stress/thrash/workloads/radosbench.yaml b/qa/suites/stress/thrash/workloads/radosbench.yaml
new file mode 100644
index 000000000..3940870fc
--- /dev/null
+++ b/qa/suites/stress/thrash/workloads/radosbench.yaml
@@ -0,0 +1,4 @@
+tasks:
+- radosbench:
+    clients: [client.0]
+    time: 1800
diff --git a/qa/suites/stress/thrash/workloads/readwrite.yaml b/qa/suites/stress/thrash/workloads/readwrite.yaml
new file mode 100644
index 000000000..c53e52b08
--- /dev/null
+++ b/qa/suites/stress/thrash/workloads/readwrite.yaml
@@ -0,0 +1,9 @@
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 500
+    op_weights:
+      read: 45
+      write: 45
+      delete: 10
diff --git a/qa/suites/teuthology/.qa b/qa/suites/teuthology/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/teuthology/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/teuthology/buildpackages/% b/qa/suites/teuthology/buildpackages/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/teuthology/buildpackages/%
diff --git a/qa/suites/teuthology/buildpackages/.qa b/qa/suites/teuthology/buildpackages/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/teuthology/buildpackages/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/teuthology/buildpackages/supported-all-distro b/qa/suites/teuthology/buildpackages/supported-all-distro
new file mode 120000
index 000000000..ca82dde58
--- /dev/null
+++ b/qa/suites/teuthology/buildpackages/supported-all-distro
@@ -0,0 +1 @@
+.qa/distros/supported-all-distro
+\ No newline at end of file
diff --git a/qa/suites/teuthology/buildpackages/tasks/.qa b/qa/suites/teuthology/buildpackages/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/teuthology/buildpackages/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/teuthology/buildpackages/tasks/default.yaml b/qa/suites/teuthology/buildpackages/tasks/default.yaml
new file mode 100644
index 000000000..cb583c763
--- /dev/null
+++ b/qa/suites/teuthology/buildpackages/tasks/default.yaml
@@ -0,0 +1,14 @@
+roles:
+    - [client.0]
+tasks:
+    - install:
+        tag: v0.94.1
+    - exec:
+        client.0:
+          - ceph --version | grep 'version 0.94.1'
+    - install.upgrade:
+        client.0:
+          tag: v0.94.3
+    - exec:
+        client.0:
+          - ceph --version | grep 'version 0.94.3'
diff --git a/qa/suites/teuthology/buildpackages/tasks/tag.yaml b/qa/suites/teuthology/buildpackages/tasks/tag.yaml
new file mode 100644
index 000000000..2bfb8a995
--- /dev/null
+++ b/qa/suites/teuthology/buildpackages/tasks/tag.yaml
@@ -0,0 +1,11 @@
+roles:
+    - [mon.a, mgr.x, client.0]
+tasks:
+    - install:
+        # tag has precedence over branch and sha1
+        tag: v0.94.1
+        branch: firefly
+        sha1: e5b6eea91cc37434f78a987d2dd1d3edd4a23f3f # dumpling
+    - exec:
+        client.0:
+          - ceph --version | grep 'version 0.94.1'
diff --git a/qa/suites/teuthology/ceph/% b/qa/suites/teuthology/ceph/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/teuthology/ceph/%
diff --git a/qa/suites/teuthology/ceph/.qa b/qa/suites/teuthology/ceph/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/teuthology/ceph/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/teuthology/ceph/clusters/.qa b/qa/suites/teuthology/ceph/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/teuthology/ceph/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/teuthology/ceph/clusters/single.yaml b/qa/suites/teuthology/ceph/clusters/single.yaml
new file mode 100644
index 000000000..0c6a40d0b
--- /dev/null
+++ b/qa/suites/teuthology/ceph/clusters/single.yaml
@@ -0,0 +1,2 @@
+roles:
+    - [mon.a, mgr.x, client.0]
diff --git a/qa/suites/teuthology/ceph/distros b/qa/suites/teuthology/ceph/distros
new file mode 120000
index 000000000..23d9e9be8
--- /dev/null
+++ b/qa/suites/teuthology/ceph/distros
@@ -0,0 +1 @@
+.qa/distros/supported
+\ No newline at end of file
diff --git a/qa/suites/teuthology/ceph/tasks/.qa b/qa/suites/teuthology/ceph/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/teuthology/ceph/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/teuthology/ceph/tasks/teuthology.yaml b/qa/suites/teuthology/ceph/tasks/teuthology.yaml
new file mode 100644
index 000000000..00081c8aa
--- /dev/null
+++ b/qa/suites/teuthology/ceph/tasks/teuthology.yaml
@@ -0,0 +1,3 @@
+tasks:
+    - install:
+    - tests:
diff --git a/qa/suites/teuthology/integration.yaml b/qa/suites/teuthology/integration.yaml
new file mode 100644
index 000000000..8a7f1c776
--- /dev/null
+++ b/qa/suites/teuthology/integration.yaml
@@ -0,0 +1,2 @@
+tasks:
+- teuthology_integration:
diff --git a/qa/suites/teuthology/multi-cluster/% b/qa/suites/teuthology/multi-cluster/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/teuthology/multi-cluster/%
diff --git a/qa/suites/teuthology/multi-cluster/.qa b/qa/suites/teuthology/multi-cluster/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/teuthology/multi-cluster/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/teuthology/multi-cluster/all/.qa b/qa/suites/teuthology/multi-cluster/all/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/teuthology/multi-cluster/all/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/teuthology/multi-cluster/all/ceph.yaml b/qa/suites/teuthology/multi-cluster/all/ceph.yaml
new file mode 100644
index 000000000..4659ef3d1
--- /dev/null
+++ b/qa/suites/teuthology/multi-cluster/all/ceph.yaml
@@ -0,0 +1,25 @@
+roles:
+- - ceph.mon.a
+  - ceph.mon.b
+  - ceph.mgr.x
+  - backup.osd.0
+  - backup.osd.1
+  - backup.osd.2
+  - backup.client.0
+- - backup.mon.a
+  - backup.mgr.x
+  - ceph.osd.0
+  - ceph.osd.1
+  - ceph.osd.2
+  - ceph.client.0
+  - client.1
+  - osd.3
+tasks:
+- install:
+- ceph:
+    cluster: backup
+- ceph:
+- workunit:
+    clients:
+      ceph.client.0: [true.sh]
+      backup.client.0: [true.sh]
diff --git a/qa/suites/teuthology/multi-cluster/all/thrashosds.yaml b/qa/suites/teuthology/multi-cluster/all/thrashosds.yaml
new file mode 100644
index 000000000..52002f57f
--- /dev/null
+++ b/qa/suites/teuthology/multi-cluster/all/thrashosds.yaml
@@ -0,0 +1,21 @@
+roles:
+- - backup.mon.a
+  - backup.mon.b
+  - backup.mgr.x
+  - backup.osd.0
+  - backup.osd.1
+  - backup.osd.2
+- - backup.mon.c
+  - backup.osd.3
+  - backup.osd.4
+  - backup.osd.5
+  - backup.client.0
+tasks:
+- install:
+- ceph:
+    cluster: backup
+- thrashosds:
+    cluster: backup
+- workunit:
+    clients:
+      all: [true.sh]
diff --git a/qa/suites/teuthology/multi-cluster/all/upgrade.yaml b/qa/suites/teuthology/multi-cluster/all/upgrade.yaml
new file mode 100644
index 000000000..0973fc390
--- /dev/null
+++ b/qa/suites/teuthology/multi-cluster/all/upgrade.yaml
@@ -0,0 +1,51 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - failed to encode map
+  conf:
+    mon:
+      mon warn on legacy crush tunables: false
+roles:
+- - ceph.mon.a
+  - ceph.mon.b
+  - ceph.mgr.x
+  - backup.osd.0
+  - backup.osd.1
+  - backup.osd.2
+  - backup.client.0
+- - backup.mon.a
+  - backup.mgr.x
+  - ceph.osd.0
+  - ceph.osd.1
+  - ceph.osd.2
+  - ceph.client.0
+  - client.1
+  - osd.3
+tasks:
+- install:
+    branch: infernalis
+- ceph:
+    cluster: backup
+- ceph:
+- workunit:
+    clients:
+      backup.client.0: [true.sh]
+      ceph.client.0: [true.sh]
+- install.upgrade:
+    ceph.mon.a:
+      branch: jewel
+    backup.mon.a:
+      branch: jewel
+- ceph.restart: [ceph.mon.a, ceph.mon.b, ceph.osd.0, ceph.osd.1, ceph.osd.2, osd.3]
+- exec:
+    ceph.client.0:
+    - ceph --version | grep -F 'version 10.'
+    client.1:
+    - ceph --cluster backup --version | grep -F 'version 10.'
+    backup.client.0:
+    # cli upgraded
+    - ceph --cluster backup --id 0 --version | grep -F 'version 10.'
+    - ceph --version | grep -F 'version 10.'
+    # backup cluster mon not upgraded
+    - ceph --cluster backup --id 0 tell mon.a version | grep -F 'version 9.2.'
+    - ceph tell mon.a version | grep -F 'version 10.'
diff --git a/qa/suites/teuthology/multi-cluster/all/workunit.yaml b/qa/suites/teuthology/multi-cluster/all/workunit.yaml
new file mode 100644
index 000000000..b1288e38e
--- /dev/null
+++ b/qa/suites/teuthology/multi-cluster/all/workunit.yaml
@@ -0,0 +1,23 @@
+roles:
+- - backup.mon.a
+  - backup.mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - client.0
+  - backup.client.0
+- - mon.a
+  - mgr.x
+  - backup.osd.0
+  - backup.osd.1
+  - backup.osd.2
+  - client.1
+  - backup.client.1
+tasks:
+- install:
+- workunit:
+    clients:
+      all: [true.sh]
+- workunit:
+    clients:
+      backup.client.1: [true.sh]
diff --git a/qa/suites/teuthology/no-ceph/% b/qa/suites/teuthology/no-ceph/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/teuthology/no-ceph/%
diff --git a/qa/suites/teuthology/no-ceph/.qa b/qa/suites/teuthology/no-ceph/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/teuthology/no-ceph/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/teuthology/no-ceph/clusters/.qa b/qa/suites/teuthology/no-ceph/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/teuthology/no-ceph/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/teuthology/no-ceph/clusters/single.yaml b/qa/suites/teuthology/no-ceph/clusters/single.yaml
new file mode 100644
index 000000000..0c6a40d0b
--- /dev/null
+++ b/qa/suites/teuthology/no-ceph/clusters/single.yaml
@@ -0,0 +1,2 @@
+roles:
+    - [mon.a, mgr.x, client.0]
diff --git a/qa/suites/teuthology/no-ceph/tasks/.qa b/qa/suites/teuthology/no-ceph/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/teuthology/no-ceph/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/teuthology/no-ceph/tasks/teuthology.yaml b/qa/suites/teuthology/no-ceph/tasks/teuthology.yaml
new file mode 100644
index 000000000..1391458b5
--- /dev/null
+++ b/qa/suites/teuthology/no-ceph/tasks/teuthology.yaml
@@ -0,0 +1,2 @@
+tasks:
+    - tests:
diff --git a/qa/suites/teuthology/nop/% b/qa/suites/teuthology/nop/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/teuthology/nop/%
diff --git a/qa/suites/teuthology/nop/.qa b/qa/suites/teuthology/nop/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/teuthology/nop/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/teuthology/nop/all/.qa b/qa/suites/teuthology/nop/all/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/teuthology/nop/all/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/teuthology/nop/all/nop.yaml b/qa/suites/teuthology/nop/all/nop.yaml
new file mode 100644
index 000000000..4a5b227e6
--- /dev/null
+++ b/qa/suites/teuthology/nop/all/nop.yaml
@@ -0,0 +1,3 @@
+tasks:
+    - nop:
+
diff --git a/qa/suites/teuthology/rgw/% b/qa/suites/teuthology/rgw/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/teuthology/rgw/%
diff --git a/qa/suites/teuthology/rgw/.qa b/qa/suites/teuthology/rgw/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/teuthology/rgw/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/teuthology/rgw/distros b/qa/suites/teuthology/rgw/distros
new file mode 120000
index 000000000..23d9e9be8
--- /dev/null
+++ b/qa/suites/teuthology/rgw/distros
@@ -0,0 +1 @@
+.qa/distros/supported
+\ No newline at end of file
diff --git a/qa/suites/teuthology/rgw/tasks/.qa b/qa/suites/teuthology/rgw/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/teuthology/rgw/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/teuthology/rgw/tasks/s3tests-fastcgi.yaml b/qa/suites/teuthology/rgw/tasks/s3tests-fastcgi.yaml
new file mode 100644
index 000000000..d76121fad
--- /dev/null
+++ b/qa/suites/teuthology/rgw/tasks/s3tests-fastcgi.yaml
@@ -0,0 +1,24 @@
+# this runs s3tests against rgw, using mod_fastcgi
+roles:
+- [mon.a, mon.c, osd.0, osd.1, osd.2, client.0]
+- [mon.b, mgr.x, osd.3, osd.4, osd.5, client.1]
+
+tasks:
+- install:
+    branch: master
+- ceph:
+- rgw: [client.0]
+- s3tests:
+    client.0:
+      rgw_server: client.0
+      force-branch: ceph-master
+overrides:
+  ceph:
+    fs: xfs
+    conf:
+      client:
+        debug rgw: 20
+        rgw lc debug interval: 10
+  rgw:
+    ec-data-pool: false
+    frontend: apache
diff --git a/qa/suites/teuthology/rgw/tasks/s3tests-fcgi.yaml b/qa/suites/teuthology/rgw/tasks/s3tests-fcgi.yaml
new file mode 100644
index 000000000..8228501ca
--- /dev/null
+++ b/qa/suites/teuthology/rgw/tasks/s3tests-fcgi.yaml
@@ -0,0 +1,26 @@
+# this runs s3tests against rgw, using mod_proxy_fcgi
+# the choice between uds or tcp with mod_proxy_fcgi depends on the distro
+roles:
+- [mon.a, mon.c, osd.0, osd.1, osd.2, client.0]
+- [mon.b, mgr.x, osd.3, osd.4, osd.5, client.1]
+
+tasks:
+- install:
+    branch: master
+- ceph:
+- rgw: [client.0]
+- s3tests:
+    client.0:
+      rgw_server: client.0
+      force-branch: ceph-master
+overrides:
+  ceph:
+    fs: xfs
+    conf:
+      client:
+        debug rgw: 20
+        rgw lc debug interval: 10
+  rgw:
+    ec-data-pool: false
+    frontend: apache
+    use_fcgi: true
diff --git a/qa/suites/teuthology/rgw/tasks/s3tests.yaml b/qa/suites/teuthology/rgw/tasks/s3tests.yaml
new file mode 100644
index 000000000..ee8e9d5b5
--- /dev/null
+++ b/qa/suites/teuthology/rgw/tasks/s3tests.yaml
@@ -0,0 +1,23 @@
+# this runs s3tests against rgw
+roles:
+- [mon.a, mon.c, osd.0, osd.1, osd.2, client.0]
+- [mon.b, mgr.x, osd.3, osd.4, osd.5, client.1]
+
+tasks:
+- install:
+    branch: master
+- ceph:
+- rgw: [client.0]
+- s3tests:
+    client.0:
+      rgw_server: client.0
+      force-branch: ceph-master
+overrides:
+  ceph:
+    fs: xfs
+    conf:
+      client:
+        debug rgw: 20
+        rgw lc debug interval: 10
+  rgw:
+    ec-data-pool: false
diff --git a/qa/suites/teuthology/workunits/.qa b/qa/suites/teuthology/workunits/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/teuthology/workunits/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/teuthology/workunits/yes.yaml b/qa/suites/teuthology/workunits/yes.yaml
new file mode 100644
index 000000000..45098dbb8
--- /dev/null
+++ b/qa/suites/teuthology/workunits/yes.yaml
@@ -0,0 +1,8 @@
+roles:
+    - [client.0]
+tasks:
+- install:
+- workunit:
+    clients:
+      all:
+        - true.sh
diff --git a/qa/suites/tgt/.qa b/qa/suites/tgt/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/tgt/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/tgt/basic/% b/qa/suites/tgt/basic/%
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/qa/suites/tgt/basic/%
@@ -0,0 +1 @@
+
diff --git a/qa/suites/tgt/basic/.qa b/qa/suites/tgt/basic/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/tgt/basic/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/tgt/basic/clusters/.qa b/qa/suites/tgt/basic/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/tgt/basic/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/tgt/basic/clusters/fixed-3.yaml b/qa/suites/tgt/basic/clusters/fixed-3.yaml
new file mode 100644
index 000000000..5e23c9e4f
--- /dev/null
+++ b/qa/suites/tgt/basic/clusters/fixed-3.yaml
@@ -0,0 +1,4 @@
+roles:
+- [mon.a, mon.c, osd.0, osd.1, osd.2]
+- [mon.b, mgr.x, mds.a, osd.3, osd.4, osd.5]
+- [client.0]
diff --git a/qa/suites/tgt/basic/msgr-failures/.qa b/qa/suites/tgt/basic/msgr-failures/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/tgt/basic/msgr-failures/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/tgt/basic/msgr-failures/few.yaml b/qa/suites/tgt/basic/msgr-failures/few.yaml
new file mode 100644
index 000000000..519288992
--- /dev/null
+++ b/qa/suites/tgt/basic/msgr-failures/few.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 5000
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/tgt/basic/msgr-failures/many.yaml b/qa/suites/tgt/basic/msgr-failures/many.yaml
new file mode 100644
index 000000000..e3855297d
--- /dev/null
+++ b/qa/suites/tgt/basic/msgr-failures/many.yaml
@@ -0,0 +1,8 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 500
+        mon client directed command retry: 5
+    log-ignorelist:
+      - \(OSD_SLOW_PING_TIME
diff --git a/qa/suites/tgt/basic/tasks/.qa b/qa/suites/tgt/basic/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/tgt/basic/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/tgt/basic/tasks/blogbench.yaml b/qa/suites/tgt/basic/tasks/blogbench.yaml
new file mode 100644
index 000000000..f77a78b6b
--- /dev/null
+++ b/qa/suites/tgt/basic/tasks/blogbench.yaml
@@ -0,0 +1,9 @@
+tasks:
+- install:
+- ceph:
+- tgt:
+- iscsi:
+- workunit:
+    clients:
+        all:
+            - suites/blogbench.sh
diff --git a/qa/suites/tgt/basic/tasks/bonnie.yaml b/qa/suites/tgt/basic/tasks/bonnie.yaml
new file mode 100644
index 000000000..2cbfcf887
--- /dev/null
+++ b/qa/suites/tgt/basic/tasks/bonnie.yaml
@@ -0,0 +1,9 @@
+tasks:
+- install:
+- ceph:
+- tgt:
+- iscsi:
+- workunit:
+    clients:
+        all:
+            - suites/bonnie.sh
diff --git a/qa/suites/tgt/basic/tasks/dbench-short.yaml b/qa/suites/tgt/basic/tasks/dbench-short.yaml
new file mode 100644
index 000000000..fcb721a4d
--- /dev/null
+++ b/qa/suites/tgt/basic/tasks/dbench-short.yaml
@@ -0,0 +1,9 @@
+tasks:
+- install:
+- ceph:
+- tgt:
+- iscsi:
+- workunit:
+    clients:
+        all:
+            - suites/dbench-short.sh
diff --git a/qa/suites/tgt/basic/tasks/dbench.yaml b/qa/suites/tgt/basic/tasks/dbench.yaml
new file mode 100644
index 000000000..7f732175f
--- /dev/null
+++ b/qa/suites/tgt/basic/tasks/dbench.yaml
@@ -0,0 +1,9 @@
+tasks:
+- install:
+- ceph:
+- tgt:
+- iscsi:
+- workunit:
+    clients:
+        all:
+            - suites/dbench.sh
diff --git a/qa/suites/tgt/basic/tasks/ffsb.yaml b/qa/suites/tgt/basic/tasks/ffsb.yaml
new file mode 100644
index 000000000..f50a3a196
--- /dev/null
+++ b/qa/suites/tgt/basic/tasks/ffsb.yaml
@@ -0,0 +1,9 @@
+tasks:
+- install:
+- ceph:
+- tgt:
+- iscsi:
+- workunit:
+    clients:
+        all:
+            - suites/ffsb.sh
diff --git a/qa/suites/tgt/basic/tasks/fio.yaml b/qa/suites/tgt/basic/tasks/fio.yaml
new file mode 100644
index 000000000..e7346ce52
--- /dev/null
+++ b/qa/suites/tgt/basic/tasks/fio.yaml
@@ -0,0 +1,9 @@
+tasks:
+- install:
+- ceph:
+- tgt:
+- iscsi:
+- workunit:
+    clients:
+        all:
+            - suites/fio.sh
diff --git a/qa/suites/tgt/basic/tasks/fsstress.yaml b/qa/suites/tgt/basic/tasks/fsstress.yaml
new file mode 100644
index 000000000..c77f511c0
--- /dev/null
+++ b/qa/suites/tgt/basic/tasks/fsstress.yaml
@@ -0,0 +1,9 @@
+tasks:
+- install:
+- ceph:
+- tgt:
+- iscsi:
+- workunit:
+    clients:
+        all:
+            - suites/fsstress.sh
diff --git a/qa/suites/tgt/basic/tasks/fsx.yaml b/qa/suites/tgt/basic/tasks/fsx.yaml
new file mode 100644
index 000000000..40f7e817a
--- /dev/null
+++ b/qa/suites/tgt/basic/tasks/fsx.yaml
@@ -0,0 +1,20 @@
+tasks:
+- install:
+    extra_system_packages:
+      deb:
+      - libaio-dev
+      - libtool-bin
+      - uuid-dev
+      - xfslibs-dev
+      rpm:
+      - libaio-devel
+      - libtool
+      - libuuid-devel
+      - xfsprogs-devel
+- ceph:
+- tgt:
+- iscsi:
+- workunit:
+    clients:
+        all:
+            - suites/fsx.sh
diff --git a/qa/suites/tgt/basic/tasks/fsync-tester.yaml b/qa/suites/tgt/basic/tasks/fsync-tester.yaml
new file mode 100644
index 000000000..ea627b7d1
--- /dev/null
+++ b/qa/suites/tgt/basic/tasks/fsync-tester.yaml
@@ -0,0 +1,9 @@
+tasks:
+- install:
+- ceph:
+- tgt:
+- iscsi:
+- workunit:
+    clients:
+        all:
+            - suites/fsync-tester.sh
diff --git a/qa/suites/tgt/basic/tasks/iogen.yaml b/qa/suites/tgt/basic/tasks/iogen.yaml
new file mode 100644
index 000000000..1065c74da
--- /dev/null
+++ b/qa/suites/tgt/basic/tasks/iogen.yaml
@@ -0,0 +1,9 @@
+tasks:
+- install:
+- ceph:
+- tgt:
+- iscsi:
+- workunit:
+    clients:
+        all:
+            - suites/iogen.sh
diff --git a/qa/suites/tgt/basic/tasks/iozone-sync.yaml b/qa/suites/tgt/basic/tasks/iozone-sync.yaml
new file mode 100644
index 000000000..ac241a417
--- /dev/null
+++ b/qa/suites/tgt/basic/tasks/iozone-sync.yaml
@@ -0,0 +1,9 @@
+tasks:
+- install:
+- ceph:
+- tgt:
+- iscsi:
+- workunit:
+    clients:
+        all:
+            - suites/iozone-sync.sh
diff --git a/qa/suites/tgt/basic/tasks/iozone.yaml b/qa/suites/tgt/basic/tasks/iozone.yaml
new file mode 100644
index 000000000..cf5604c21
--- /dev/null
+++ b/qa/suites/tgt/basic/tasks/iozone.yaml
@@ -0,0 +1,9 @@
+tasks:
+- install:
+- ceph:
+- tgt:
+- iscsi:
+- workunit:
+    clients:
+        all:
+            - suites/iozone.sh
diff --git a/qa/suites/tgt/basic/tasks/pjd.yaml b/qa/suites/tgt/basic/tasks/pjd.yaml
new file mode 100644
index 000000000..ba5c631f1
--- /dev/null
+++ b/qa/suites/tgt/basic/tasks/pjd.yaml
@@ -0,0 +1,9 @@
+tasks:
+- install:
+- ceph:
+- tgt:
+- iscsi:
+- workunit:
+    clients:
+        all:
+            - suites/pjd.sh
diff --git a/qa/suites/upgrade/.qa b/qa/suites/upgrade/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/upgrade/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/upgrade/cephfs b/qa/suites/upgrade/cephfs
new file mode 120000
index 000000000..1ff68fa8b
--- /dev/null
+++ b/qa/suites/upgrade/cephfs
@@ -0,0 +1 @@
+.qa/suites/fs/upgrade/
+\ No newline at end of file
diff --git a/qa/suites/upgrade/pacific-x/.qa b/qa/suites/upgrade/pacific-x/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/upgrade/pacific-x/parallel/% b/qa/suites/upgrade/pacific-x/parallel/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/parallel/%
diff --git a/qa/suites/upgrade/pacific-x/parallel/.qa b/qa/suites/upgrade/pacific-x/parallel/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/parallel/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/upgrade/pacific-x/parallel/0-random-distro$ b/qa/suites/upgrade/pacific-x/parallel/0-random-distro$
new file mode 120000
index 000000000..4b341719d
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/parallel/0-random-distro$
@@ -0,0 +1 @@
+.qa/distros/container-hosts
+\ No newline at end of file
diff --git a/qa/suites/upgrade/pacific-x/parallel/0-start.yaml b/qa/suites/upgrade/pacific-x/parallel/0-start.yaml
new file mode 100644
index 000000000..3814ea3ef
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/parallel/0-start.yaml
@@ -0,0 +1,33 @@
+roles:
+- - mon.a
+  - mon.c
+  - mgr.y
+  - mds.a
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+  - client.0
+  - node-exporter.a
+  - alertmanager.a
+- - mon.b
+  - mds.b
+  - mgr.x
+  - osd.4
+  - osd.5
+  - osd.6
+  - osd.7
+  - client.1
+  - prometheus.a
+  - grafana.a
+  - node-exporter.b
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 10 # GB
+overrides:
+  ceph:
+    create_rbd_pool: true
+    conf:
+      osd:
+        osd shutdown pgref assert: true
diff --git a/qa/suites/upgrade/pacific-x/parallel/1-tasks.yaml b/qa/suites/upgrade/pacific-x/parallel/1-tasks.yaml
new file mode 100644
index 000000000..064d0758d
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/parallel/1-tasks.yaml
@@ -0,0 +1,43 @@
+tasks:
+- install:
+    branch: pacific
+    exclude_packages:
+      - ceph-volume
+- print: "**** done install task..."
+- print: "**** done start installing pacific cephadm ..."
+- cephadm:
+    image: quay.ceph.io/ceph-ci/ceph:pacific
+    cephadm_branch: pacific
+    cephadm_git_url: https://github.com/ceph/ceph
+    conf:
+      osd:
+        #set config option for which cls modules are allowed to be loaded / used
+        osd_class_load_list: "*"
+        osd_class_default_list: "*"
+- print: "**** done end installing pacific cephadm ..."
+
+- print: "**** done start cephadm.shell ceph config set mgr..."
+- cephadm.shell:
+    mon.a:
+      - ceph config set mgr mgr/cephadm/use_repo_digest true --force
+- print: "**** done cephadm.shell ceph config set mgr..."
+
+- print: "**** done start telemetry pacific..."
+- workunit:
+    clients:
+      client.0:
+        - test_telemetry_pacific.sh
+- print: "**** done end telemetry pacific..."
+
+- print: "**** done start parallel"
+- parallel:
+    - workload
+    - upgrade-sequence
+- print: "**** done end parallel"
+
+- print: "**** done start telemetry x..."
+- workunit:
+    clients:
+      client.0:
+        - test_telemetry_pacific_x.sh
+- print: "**** done end telemetry x..."
diff --git a/qa/suites/upgrade/pacific-x/parallel/mon_election b/qa/suites/upgrade/pacific-x/parallel/mon_election
new file mode 120000
index 000000000..3f331e621
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/parallel/mon_election
@@ -0,0 +1 @@
+.qa/mon_election
+\ No newline at end of file
diff --git a/qa/suites/upgrade/pacific-x/parallel/upgrade-sequence.yaml b/qa/suites/upgrade/pacific-x/parallel/upgrade-sequence.yaml
new file mode 100644
index 000000000..a3f0888da
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/parallel/upgrade-sequence.yaml
@@ -0,0 +1,16 @@
+# renamed tasks: to upgrade-sequence:
+upgrade-sequence:
+   sequential:
+   - print: "**** done start upgrade, wait"
+   - cephadm.shell:
+       env: [sha1]
+       mon.a:
+         - ceph config set global log_to_journald false --force
+         - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1
+         - while ceph orch upgrade status | jq '.in_progress' | grep true ; do ceph orch ps ; ceph versions ; sleep 30 ; done
+         - ceph orch ps
+         - ceph versions
+         - ceph versions | jq -e '.overall | length == 1'
+         - ceph versions | jq -e '.overall | keys' | grep $sha1
+   - print: "**** done end upgrade, wait..."
+
diff --git a/qa/suites/upgrade/pacific-x/parallel/workload/+ b/qa/suites/upgrade/pacific-x/parallel/workload/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/parallel/workload/+
diff --git a/qa/suites/upgrade/pacific-x/parallel/workload/.qa b/qa/suites/upgrade/pacific-x/parallel/workload/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/parallel/workload/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/upgrade/pacific-x/parallel/workload/ec-rados-default.yaml b/qa/suites/upgrade/pacific-x/parallel/workload/ec-rados-default.yaml
new file mode 100644
index 000000000..67a0f39c5
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/parallel/workload/ec-rados-default.yaml
@@ -0,0 +1,25 @@
+meta:
+- desc: |
+   run run randomized correctness test for rados operations
+   on an erasure-coded pool
+workload:
+  full_sequential:
+  - print: "**** done start ec-rados-default.yaml"
+  - rados:
+      clients: [client.0]
+      ops: 4000
+      objects: 50
+      ec_pool: true
+      write_append_excl: false
+      op_weights:
+        read: 100
+        write: 0
+        append: 100
+        delete: 50
+        snap_create: 50
+        snap_remove: 50
+        rollback: 50
+        copy_from: 50
+        setattr: 25
+        rmattr: 25
+  - print: "**** done end ec-rados-default.yaml"
diff --git a/qa/suites/upgrade/pacific-x/parallel/workload/rados_api.yaml b/qa/suites/upgrade/pacific-x/parallel/workload/rados_api.yaml
new file mode 100644
index 000000000..1380a4016
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/parallel/workload/rados_api.yaml
@@ -0,0 +1,12 @@
+meta:
+- desc: |
+   object class functional tests
+workload:
+  full_sequential:
+    - print: "**** done start rados_api.yaml"
+    - workunit:
+        branch: pacific
+        clients:
+          client.0:
+            - cls
+    - print: "**** done end rados_api.yaml"
diff --git a/qa/suites/upgrade/pacific-x/parallel/workload/rados_loadgenbig.yaml b/qa/suites/upgrade/pacific-x/parallel/workload/rados_loadgenbig.yaml
new file mode 100644
index 000000000..f315b1579
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/parallel/workload/rados_loadgenbig.yaml
@@ -0,0 +1,12 @@
+meta:
+- desc: |
+   generate read/write load with rados objects ranging from 1MB to 25MB
+workload:
+  full_sequential:
+    - print: "**** done start rados_loadgenbig.yaml"
+    - workunit:
+        branch: pacific
+        clients:
+          client.0:
+            - rados/load-gen-big.sh
+    - print: "**** done end rados_loadgenbig.yaml"
diff --git a/qa/suites/upgrade/pacific-x/parallel/workload/rbd_import_export.yaml b/qa/suites/upgrade/pacific-x/parallel/workload/rbd_import_export.yaml
new file mode 100644
index 000000000..20e74c176
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/parallel/workload/rbd_import_export.yaml
@@ -0,0 +1,14 @@
+meta:
+- desc: |
+   run basic import/export cli tests for rbd
+workload:
+  full_sequential:
+    - print: "**** done start rbd_import_export.yaml"
+    - workunit:
+        branch: pacific
+        clients:
+          client.1:
+              - rbd/import_export.sh
+        env:
+          RBD_CREATE_ARGS: --new-format
+    - print: "**** done end rbd_import_export.yaml"
diff --git a/qa/suites/upgrade/pacific-x/parallel/workload/test_rbd_api.yaml b/qa/suites/upgrade/pacific-x/parallel/workload/test_rbd_api.yaml
new file mode 100644
index 000000000..6a0242b91
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/parallel/workload/test_rbd_api.yaml
@@ -0,0 +1,14 @@
+meta:
+- desc: |
+   librbd C and C++ api tests
+workload:
+  full_sequential:
+    - print: "**** done start test_rbd_api.yaml"
+    - workunit:
+        branch: pacific
+        clients:
+          client.0:
+              - rbd/test_librbd.sh
+        env:
+          RBD_FEATURES: "61"
+    - print: "**** done end test_rbd_api.yaml"
diff --git a/qa/suites/upgrade/pacific-x/parallel/workload/test_rbd_python.yaml b/qa/suites/upgrade/pacific-x/parallel/workload/test_rbd_python.yaml
new file mode 100644
index 000000000..8d1f0fd0f
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/parallel/workload/test_rbd_python.yaml
@@ -0,0 +1,20 @@
+meta:
+- desc: |
+   librbd python api tests
+overrides:
+  install:
+    ceph:
+      extra_system_packages:
+      - python3-pytest
+workload:
+  full_sequential:
+    - print: "**** done start test_rbd_python.yaml"
+    - workunit:
+        branch: pacific
+        clients:
+          client.0:
+            - rbd/test_librbd_python.sh
+        env:
+          RBD_FEATURES: "61"
+    - print: "**** done end test_rbd_python.yaml"
+
diff --git a/qa/suites/upgrade/pacific-x/stress-split/% b/qa/suites/upgrade/pacific-x/stress-split/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/stress-split/%
diff --git a/qa/suites/upgrade/pacific-x/stress-split/.qa b/qa/suites/upgrade/pacific-x/stress-split/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/stress-split/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/upgrade/pacific-x/stress-split/0-distro b/qa/suites/upgrade/pacific-x/stress-split/0-distro
new file mode 120000
index 000000000..4b341719d
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/stress-split/0-distro
@@ -0,0 +1 @@
+.qa/distros/container-hosts
+\ No newline at end of file
diff --git a/qa/suites/upgrade/pacific-x/stress-split/0-roles.yaml b/qa/suites/upgrade/pacific-x/stress-split/0-roles.yaml
new file mode 100644
index 000000000..ad3ee43d3
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/stress-split/0-roles.yaml
@@ -0,0 +1,31 @@
+roles:
+- - mon.a
+  - mon.c
+  - mgr.y
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+  - client.0
+  - node-exporter.a
+  - alertmanager.a
+- - mon.b
+  - mgr.x
+  - osd.4
+  - osd.5
+  - osd.6
+  - osd.7
+  - client.1
+  - prometheus.a
+  - grafana.a
+  - node-exporter.b
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 10 # GB
+overrides:
+  ceph:
+    create_rbd_pool: true
+    conf:
+      osd:
+        osd shutdown pgref assert: true
diff --git a/qa/suites/upgrade/pacific-x/stress-split/1-start.yaml b/qa/suites/upgrade/pacific-x/stress-split/1-start.yaml
new file mode 100644
index 000000000..9a552df99
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/stress-split/1-start.yaml
@@ -0,0 +1,122 @@
+tasks:
+- install:
+    branch: pacific
+    exclude_packages:
+      - ceph-volume
+
+- cephadm:
+    image: quay.ceph.io/ceph-ci/ceph:pacific
+    cephadm_branch: pacific
+    cephadm_git_url: https://github.com/ceph/ceph
+    conf:
+      osd:
+        #set config option for which cls modules are allowed to be loaded / used
+        osd_class_load_list: "*"
+        osd_class_default_list: "*"
+
+- cephadm.shell:
+    mon.a:
+      - ceph fs volume create foo
+      - ceph config set mon mon_warn_on_insecure_global_id_reclaim false --force
+      - ceph config set mon mon_warn_on_insecure_global_id_reclaim_allowed false --force
+
+- ceph.healthy:
+
+- print: "**** upgrading first half of cluster, with stress ****"
+- parallel:
+    - first-half-tasks
+    - first-half-sequence
+- print: "**** done upgrading first half of cluster ****"
+
+- ceph.healthy:
+
+- print: "**** applying stress + thrashing to mixed-version cluster ****"
+
+- parallel:
+    - stress-tasks
+
+- ceph.healthy:
+
+- print: "**** finishing upgrade ****"
+- parallel:
+    - second-half-tasks
+    - second-half-sequence
+
+- ceph.healthy:
+
+
+#################
+
+first-half-sequence:
+- cephadm.shell:
+    env: [sha1]
+    mon.a:
+      - ceph config set mgr mgr/cephadm/daemon_cache_timeout 60
+      - ceph config set global log_to_journald false --force
+
+      - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1
+      - ceph orch ps
+
+      - echo wait for minority of mons to upgrade
+      - while ! ceph mon versions | grep $sha1 ; do sleep 2 ; done
+      - ceph orch ps
+      - ceph orch upgrade pause
+      - sleep 60
+      - ceph orch upgrade resume
+
+      - echo wait for majority of mons to upgrade
+      - "while ! ceph mon versions | grep $sha1 | egrep ': [23]' ; do sleep 2 ; done"
+      - ceph orch ps
+      - ceph orch upgrade pause
+      - sleep 60
+      - ceph orch upgrade resume
+
+      - echo wait for all mons to upgrade
+      - "while ! ceph mon versions | grep $sha1 | grep ': 3' ; do sleep 2 ; done"
+      - ceph orch ps
+      - ceph orch upgrade pause
+      - sleep 60
+      - ceph orch upgrade resume
+
+      - echo wait for half of osds to upgrade
+      - "while ! ceph osd versions | grep $sha1 | egrep ': [45678]'; do sleep 2 ; done"
+      - ceph orch upgrade pause
+      - ceph orch ps
+
+      - ceph orch ps
+      - ceph versions
+
+
+#################
+
+stress-tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 1
+    chance_pgpnum_fix: 1
+    chance_thrash_cluster_full: 0
+    chance_thrash_pg_upmap: 0
+    chance_thrash_pg_upmap_items: 0
+    disable_objectstore_tool_tests: true
+    chance_force_recovery: 0
+    aggressive_pg_num_changes: false
+
+
+#################
+
+second-half-sequence:
+  sequential:
+    - cephadm.shell:
+        env: [sha1]
+        mon.a:
+          - ceph orch upgrade resume
+          - sleep 60
+
+          - echo wait for upgrade to complete
+          - while ceph orch upgrade status | jq '.in_progress' | grep true ; do ceph orch ps ; ceph versions ; sleep 30 ; done
+
+          - echo upgrade complete
+          - ceph orch ps
+          - ceph versions
+          - ceph versions | jq -e '.overall | length == 1'
+          - ceph versions | jq -e '.overall | keys' | grep $sha1
diff --git a/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/.qa b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/radosbench.yaml b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/radosbench.yaml
new file mode 100644
index 000000000..3816ca38c
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/radosbench.yaml
@@ -0,0 +1,19 @@
+meta:
+- desc: |
+   run randomized correctness test for rados operations
+   generate write load with rados bench
+first-half-tasks:
+- full_sequential:
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+- print: "**** done end radosbench.yaml"
diff --git a/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/rbd-cls.yaml b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/rbd-cls.yaml
new file mode 100644
index 000000000..4ebc09310
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/rbd-cls.yaml
@@ -0,0 +1,10 @@
+meta:
+- desc: |
+   run basic cls tests for rbd
+first-half-tasks:
+- workunit:
+    branch: pacific
+    clients:
+      client.0:
+        - cls/test_cls_rbd.sh
+- print: "**** done cls/test_cls_rbd.sh 5-workload"
diff --git a/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/rbd-import-export.yaml b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/rbd-import-export.yaml
new file mode 100644
index 000000000..6835c9125
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/rbd-import-export.yaml
@@ -0,0 +1,12 @@
+meta:
+- desc: |
+   run basic import/export cli tests for rbd
+first-half-tasks:
+- workunit:
+    branch: pacific
+    clients:
+      client.0:
+        - rbd/import_export.sh
+    env:
+      RBD_CREATE_ARGS: --new-format
+- print: "**** done rbd/import_export.sh 5-workload"
diff --git a/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/rbd_api.yaml b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/rbd_api.yaml
new file mode 100644
index 000000000..a7060c0ac
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/rbd_api.yaml
@@ -0,0 +1,12 @@
+meta:
+- desc: |
+   librbd C and C++ api tests
+first-half-tasks:
+- workunit:
+     branch: pacific
+     clients:
+        client.0:
+           - rbd/test_librbd.sh
+     env:
+       RBD_FEATURES: "61"
+- print: "**** done rbd/test_librbd.sh 7-workload"
diff --git a/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/readwrite.yaml b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/readwrite.yaml
new file mode 100644
index 000000000..21a9f379a
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/readwrite.yaml
@@ -0,0 +1,16 @@
+meta:
+- desc: |
+   randomized correctness test for rados operations on a replicated pool,
+   using only reads, writes, and deletes
+first-half-tasks:
+- full_sequential:
+  - rados:
+      clients: [client.0]
+      ops: 4000
+      objects: 500
+      write_append_excl: false
+      op_weights:
+        read: 45
+        write: 45
+        delete: 10
+- print: "**** done rados/readwrite 5-workload"
diff --git a/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/snaps-few-objects.yaml b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/snaps-few-objects.yaml
new file mode 100644
index 000000000..6447c2245
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/snaps-few-objects.yaml
@@ -0,0 +1,18 @@
+meta:
+- desc: |
+   randomized correctness test for rados operations on a replicated pool with snapshot operations
+first-half-tasks:
+- full_sequential:
+  - rados:
+      clients: [client.0]
+      ops: 4000
+      objects: 50
+      write_append_excl: false
+      op_weights:
+        read: 100
+        write: 100
+        delete: 50
+        snap_create: 50
+        snap_remove: 50
+        rollback: 50
+- print: "**** done rados/snaps-few-objects 5-workload"
diff --git a/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/+ b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/+
diff --git a/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/.qa b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/radosbench.yaml b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/radosbench.yaml
new file mode 100644
index 000000000..9058bd804
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/radosbench.yaml
@@ -0,0 +1,25 @@
+meta:
+- desc: |
+   run randomized correctness test for rados operations
+   generate write load with rados bench
+stress-tasks:
+- full_sequential:
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+- print: "**** done end radosbench.yaml"
diff --git a/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/rbd-cls.yaml b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/rbd-cls.yaml
new file mode 100644
index 000000000..e72875c14
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/rbd-cls.yaml
@@ -0,0 +1,10 @@
+meta:
+- desc: |
+   run basic cls tests for rbd
+stress-tasks:
+- workunit:
+    branch: pacific
+    clients:
+      client.0:
+        - cls/test_cls_rbd.sh
+- print: "**** done cls/test_cls_rbd.sh 5-workload"
diff --git a/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/rbd-import-export.yaml b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/rbd-import-export.yaml
new file mode 100644
index 000000000..c3008f3b1
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/rbd-import-export.yaml
@@ -0,0 +1,12 @@
+meta:
+- desc: |
+   run basic import/export cli tests for rbd
+stress-tasks:
+- workunit:
+    branch: pacific
+    clients:
+      client.0:
+        - rbd/import_export.sh
+    env:
+      RBD_CREATE_ARGS: --new-format
+- print: "**** done rbd/import_export.sh 5-workload"
diff --git a/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/rbd_api.yaml b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/rbd_api.yaml
new file mode 100644
index 000000000..8b52658c4
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/rbd_api.yaml
@@ -0,0 +1,12 @@
+meta:
+- desc: |
+   librbd C and C++ api tests
+stress-tasks:
+- workunit:
+     branch: pacific
+     clients:
+        client.0:
+           - rbd/test_librbd.sh
+     env:
+       RBD_FEATURES: "61"
+- print: "**** done rbd/test_librbd.sh 7-workload"
diff --git a/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/readwrite.yaml b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/readwrite.yaml
new file mode 100644
index 000000000..41e34d6d7
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/readwrite.yaml
@@ -0,0 +1,16 @@
+meta:
+- desc: |
+   randomized correctness test for rados operations on a replicated pool,
+   using only reads, writes, and deletes
+stress-tasks:
+- full_sequential:
+  - rados:
+      clients: [client.0]
+      ops: 4000
+      objects: 500
+      write_append_excl: false
+      op_weights:
+        read: 45
+        write: 45
+        delete: 10
+- print: "**** done rados/readwrite 5-workload"
diff --git a/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/snaps-few-objects.yaml b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/snaps-few-objects.yaml
new file mode 100644
index 000000000..f56d0de0f
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/snaps-few-objects.yaml
@@ -0,0 +1,18 @@
+meta:
+- desc: |
+   randomized correctness test for rados operations on a replicated pool with snapshot operations
+stress-tasks:
+- full_sequential:
+  - rados:
+      clients: [client.0]
+      ops: 4000
+      objects: 50
+      write_append_excl: false
+      op_weights:
+        read: 100
+        write: 100
+        delete: 50
+        snap_create: 50
+        snap_remove: 50
+        rollback: 50
+- print: "**** done rados/snaps-few-objects 5-workload"
diff --git a/qa/suites/upgrade/pacific-x/stress-split/4-second-half-tasks/.qa b/qa/suites/upgrade/pacific-x/stress-split/4-second-half-tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/stress-split/4-second-half-tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/upgrade/pacific-x/stress-split/4-second-half-tasks/radosbench.yaml b/qa/suites/upgrade/pacific-x/stress-split/4-second-half-tasks/radosbench.yaml
new file mode 100644
index 000000000..7268cb170
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/stress-split/4-second-half-tasks/radosbench.yaml
@@ -0,0 +1,16 @@
+meta:
+- desc: |
+   run randomized correctness test for rados operations
+   generate write load with rados bench
+second-half-tasks:
+- full_sequential:
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+- print: "**** done end radosbench.yaml"
diff --git a/qa/suites/upgrade/pacific-x/stress-split/4-second-half-tasks/rbd-import-export.yaml b/qa/suites/upgrade/pacific-x/stress-split/4-second-half-tasks/rbd-import-export.yaml
new file mode 100644
index 000000000..00cf88d54
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/stress-split/4-second-half-tasks/rbd-import-export.yaml
@@ -0,0 +1,12 @@
+meta:
+- desc: |
+   run basic import/export cli tests for rbd
+second-half-tasks:
+- workunit:
+    branch: pacific
+    clients:
+      client.0:
+        - rbd/import_export.sh
+    env:
+      RBD_CREATE_ARGS: --new-format
+- print: "**** done rbd/import_export.sh 5-workload"
diff --git a/qa/suites/upgrade/pacific-x/stress-split/mon_election b/qa/suites/upgrade/pacific-x/stress-split/mon_election
new file mode 120000
index 000000000..3f331e621
--- /dev/null
+++ b/qa/suites/upgrade/pacific-x/stress-split/mon_election
@@ -0,0 +1 @@
+.qa/mon_election
+\ No newline at end of file
diff --git a/qa/suites/upgrade/quincy-x/.qa b/qa/suites/upgrade/quincy-x/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/% b/qa/suites/upgrade/quincy-x/filestore-remove-check/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/%
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/.qa b/qa/suites/upgrade/quincy-x/filestore-remove-check/.qa
new file mode 120000
index 000000000..fea2489fd
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/.qa
@@ -0,0 +1 @@
+../.qa
+\ No newline at end of file
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/+ b/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/+
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/.qa b/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/openstack.yaml b/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/openstack.yaml
new file mode 100644
index 000000000..5caffc353
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/openstack.yaml
@@ -0,0 +1,6 @@
+openstack:
+  - machine:
+      disk: 100 # GB
+  - volumes: # attached to each instance
+      count: 4
+      size: 30 # GB
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/start.yaml b/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/start.yaml
new file mode 100644
index 000000000..b4b6f4d90
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/start.yaml
@@ -0,0 +1,34 @@
+meta:
+- desc: |
+   Run ceph on one nodes,
+   Use xfs beneath the osds. upgrade to reef
+   should fail to start the osds with filestore
+overrides:
+  ceph:
+    mon_bind_msgr2: false
+    mon_bind_addrvec: false
+    mon-health-to-clog: false
+    wait-for-healthy: false
+    wait-for-osds-up: false
+    wait-for-scrub: false
+    skip_stop_pg_num_changes: true
+    fs: xfs
+    log-ignorelist:
+      - overall HEALTH_
+      - \(MON_DOWN\)
+      - \(MGR_DOWN\)
+      - slow request
+      - \(MON_MSGR2_NOT_ENABLED\)
+      - \(POOL_APP_NOT_ENABLED\)
+    conf:
+      global:
+        enable experimental unrecoverable data corrupting features: "*"
+        mon warn on msgr2 not enabled: false
+      mon:
+        mon warn on osd down out interval zero: false
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/1-ceph-install/.qa b/qa/suites/upgrade/quincy-x/filestore-remove-check/1-ceph-install/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/1-ceph-install/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/1-ceph-install/quincy.yaml b/qa/suites/upgrade/quincy-x/filestore-remove-check/1-ceph-install/quincy.yaml
new file mode 100644
index 000000000..471bd61df
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/1-ceph-install/quincy.yaml
@@ -0,0 +1,32 @@
+meta:
+- desc: install ceph/quincy latest
+tasks:
+- install:
+    exclude_packages:
+      - ceph-mgr-cephadm
+      - cephadm
+      - libcephfs-dev
+    branch: quincy
+- print: "**** done install quincy"
+- ceph:
+    create_rbd_pool: false
+    conf:
+      global:
+        bluestore_warn_on_legacy_statfs: false
+        bluestore warn on no per pool omap: false
+        mon pg warn min per osd: 0
+      mon:
+        mon_warn_on_insecure_global_id_reclaim: false
+        mon_warn_on_insecure_global_id_reclaim_allowed: false
+    log-ignorelist:
+      - Not found or unloadable
+      - evicting unresponsive client
+- exec:
+    osd.0:
+      - ceph osd require-osd-release quincy
+- print: "**** done ceph"
+overrides:
+  ceph:
+    conf:
+      mon:
+        mon warn on osd down out interval zero: false
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/2 - upgrade.yaml b/qa/suites/upgrade/quincy-x/filestore-remove-check/2 - upgrade.yaml
new file mode 100644
index 000000000..6aa429f18
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/2 - upgrade.yaml
@@ -0,0 +1,20 @@
+meta:
+- desc: |
+   install upgrade ceph/-x on cluster
+   restart : mons, osd.*
+tasks:
+- install.upgrade:
+    mon.a:
+- exec:
+    osd.0:
+      - ceph osd require-osd-release quincy
+- print: "**** done install.upgrade of nodes"
+- ceph.restart:
+    daemons: [mon.a,mgr.x,osd.0,osd.1,osd.2]
+    mon-health-to-clog: false
+    wait-for-healthy: false
+    wait-for-osds-up: false
+    wait-for-scrub: false
+    skip_stop_pg_num_changes: true
+    expected-failure: "FileStore has been deprecated and is no longer supported"
+- print: "**** done ceph.restart of all mons and osds"
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/objectstore/.qa b/qa/suites/upgrade/quincy-x/filestore-remove-check/objectstore/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/objectstore/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/objectstore/filestore-xfs.yaml b/qa/suites/upgrade/quincy-x/filestore-remove-check/objectstore/filestore-xfs.yaml
new file mode 100644
index 000000000..b6ef47b06
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/objectstore/filestore-xfs.yaml
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: filestore
+        osd sloppy crc: true
+  ceph-deploy:
+    fs: xfs
+    filestore: True
+    conf:
+      osd:
+        osd objectstore: filestore
+        osd sloppy crc: true
+\ No newline at end of file
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/ubuntu_20.04.yaml b/qa/suites/upgrade/quincy-x/filestore-remove-check/ubuntu_20.04.yaml
new file mode 100644
index 000000000..e1374c410
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/ubuntu_20.04.yaml
@@ -0,0 +1,9 @@
+os_type: ubuntu
+os_version: "20.04"
+# the normal ubuntu 20.04 kernel (5.4.0-88-generic currently) have a bug that prevents the nvme_loop
+# from behaving.  I think it is this:
+#   https://lkml.org/lkml/2020/9/21/1456
+# (at least, that is the symptom: nvme nvme1: Connect command failed, error wo/DNR bit: 880)
+overrides:
+  kernel:
+    hwe: true
+\ No newline at end of file
diff --git a/qa/suites/upgrade/quincy-x/parallel/% b/qa/suites/upgrade/quincy-x/parallel/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/parallel/%
diff --git a/qa/suites/upgrade/quincy-x/parallel/.qa b/qa/suites/upgrade/quincy-x/parallel/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/parallel/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/upgrade/quincy-x/parallel/0-random-distro$ b/qa/suites/upgrade/quincy-x/parallel/0-random-distro$
new file mode 120000
index 000000000..4b341719d
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/parallel/0-random-distro$
@@ -0,0 +1 @@
+.qa/distros/container-hosts
+\ No newline at end of file
diff --git a/qa/suites/upgrade/quincy-x/parallel/0-start.yaml b/qa/suites/upgrade/quincy-x/parallel/0-start.yaml
new file mode 100644
index 000000000..3814ea3ef
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/parallel/0-start.yaml
@@ -0,0 +1,33 @@
+roles:
+- - mon.a
+  - mon.c
+  - mgr.y
+  - mds.a
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+  - client.0
+  - node-exporter.a
+  - alertmanager.a
+- - mon.b
+  - mds.b
+  - mgr.x
+  - osd.4
+  - osd.5
+  - osd.6
+  - osd.7
+  - client.1
+  - prometheus.a
+  - grafana.a
+  - node-exporter.b
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 10 # GB
+overrides:
+  ceph:
+    create_rbd_pool: true
+    conf:
+      osd:
+        osd shutdown pgref assert: true
diff --git a/qa/suites/upgrade/quincy-x/parallel/1-tasks.yaml b/qa/suites/upgrade/quincy-x/parallel/1-tasks.yaml
new file mode 100644
index 000000000..e57e31f2f
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/parallel/1-tasks.yaml
@@ -0,0 +1,43 @@
+tasks:
+- install:
+    branch: quincy
+    exclude_packages:
+      - ceph-volume
+- print: "**** done install task..."
+- print: "**** done start installing quincy cephadm ..."
+- cephadm:
+    image: quay.ceph.io/ceph-ci/ceph:quincy
+    cephadm_branch: quincy
+    cephadm_git_url: https://github.com/ceph/ceph
+    conf:
+      osd:
+        #set config option for which cls modules are allowed to be loaded / used
+        osd_class_load_list: "*"
+        osd_class_default_list: "*"
+- print: "**** done end installing quincy cephadm ..."
+
+- print: "**** done start cephadm.shell ceph config set mgr..."
+- cephadm.shell:
+    mon.a:
+      - ceph config set mgr mgr/cephadm/use_repo_digest true --force
+- print: "**** done cephadm.shell ceph config set mgr..."
+
+- print: "**** done start telemetry quincy..."
+- workunit:
+    clients:
+      client.0:
+        - test_telemetry_quincy.sh
+- print: "**** done end telemetry quincy..."
+
+- print: "**** done start parallel"
+- parallel:
+    - workload
+    - upgrade-sequence
+- print: "**** done end parallel"
+
+- print: "**** done start telemetry x..."
+- workunit:
+    clients:
+      client.0:
+        - test_telemetry_quincy_x.sh
+- print: "**** done end telemetry x..."
diff --git a/qa/suites/upgrade/quincy-x/parallel/mon_election b/qa/suites/upgrade/quincy-x/parallel/mon_election
new file mode 120000
index 000000000..3f331e621
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/parallel/mon_election
@@ -0,0 +1 @@
+.qa/mon_election
+\ No newline at end of file
diff --git a/qa/suites/upgrade/quincy-x/parallel/upgrade-sequence.yaml b/qa/suites/upgrade/quincy-x/parallel/upgrade-sequence.yaml
new file mode 100644
index 000000000..a3f0888da
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/parallel/upgrade-sequence.yaml
@@ -0,0 +1,16 @@
+# renamed tasks: to upgrade-sequence:
+upgrade-sequence:
+   sequential:
+   - print: "**** done start upgrade, wait"
+   - cephadm.shell:
+       env: [sha1]
+       mon.a:
+         - ceph config set global log_to_journald false --force
+         - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1
+         - while ceph orch upgrade status | jq '.in_progress' | grep true ; do ceph orch ps ; ceph versions ; sleep 30 ; done
+         - ceph orch ps
+         - ceph versions
+         - ceph versions | jq -e '.overall | length == 1'
+         - ceph versions | jq -e '.overall | keys' | grep $sha1
+   - print: "**** done end upgrade, wait..."
+
diff --git a/qa/suites/upgrade/quincy-x/parallel/workload/+ b/qa/suites/upgrade/quincy-x/parallel/workload/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/parallel/workload/+
diff --git a/qa/suites/upgrade/quincy-x/parallel/workload/.qa b/qa/suites/upgrade/quincy-x/parallel/workload/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/parallel/workload/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/upgrade/quincy-x/parallel/workload/ec-rados-default.yaml b/qa/suites/upgrade/quincy-x/parallel/workload/ec-rados-default.yaml
new file mode 100644
index 000000000..67a0f39c5
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/parallel/workload/ec-rados-default.yaml
@@ -0,0 +1,25 @@
+meta:
+- desc: |
+   run run randomized correctness test for rados operations
+   on an erasure-coded pool
+workload:
+  full_sequential:
+  - print: "**** done start ec-rados-default.yaml"
+  - rados:
+      clients: [client.0]
+      ops: 4000
+      objects: 50
+      ec_pool: true
+      write_append_excl: false
+      op_weights:
+        read: 100
+        write: 0
+        append: 100
+        delete: 50
+        snap_create: 50
+        snap_remove: 50
+        rollback: 50
+        copy_from: 50
+        setattr: 25
+        rmattr: 25
+  - print: "**** done end ec-rados-default.yaml"
diff --git a/qa/suites/upgrade/quincy-x/parallel/workload/rados_api.yaml b/qa/suites/upgrade/quincy-x/parallel/workload/rados_api.yaml
new file mode 100644
index 000000000..9c2ff9da1
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/parallel/workload/rados_api.yaml
@@ -0,0 +1,12 @@
+meta:
+- desc: |
+   object class functional tests
+workload:
+  full_sequential:
+    - print: "**** done start rados_api.yaml"
+    - workunit:
+        branch: quincy
+        clients:
+          client.0:
+            - cls
+    - print: "**** done end rados_api.yaml"
diff --git a/qa/suites/upgrade/quincy-x/parallel/workload/rados_loadgenbig.yaml b/qa/suites/upgrade/quincy-x/parallel/workload/rados_loadgenbig.yaml
new file mode 100644
index 000000000..25b1e1aaf
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/parallel/workload/rados_loadgenbig.yaml
@@ -0,0 +1,12 @@
+meta:
+- desc: |
+   generate read/write load with rados objects ranging from 1MB to 25MB
+workload:
+  full_sequential:
+    - print: "**** done start rados_loadgenbig.yaml"
+    - workunit:
+        branch: quincy
+        clients:
+          client.0:
+            - rados/load-gen-big.sh
+    - print: "**** done end rados_loadgenbig.yaml"
diff --git a/qa/suites/upgrade/quincy-x/parallel/workload/rbd_import_export.yaml b/qa/suites/upgrade/quincy-x/parallel/workload/rbd_import_export.yaml
new file mode 100644
index 000000000..82b66048c
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/parallel/workload/rbd_import_export.yaml
@@ -0,0 +1,14 @@
+meta:
+- desc: |
+   run basic import/export cli tests for rbd
+workload:
+  full_sequential:
+    - print: "**** done start rbd_import_export.yaml"
+    - workunit:
+        branch: quincy
+        clients:
+          client.1:
+              - rbd/import_export.sh
+        env:
+          RBD_CREATE_ARGS: --new-format
+    - print: "**** done end rbd_import_export.yaml"
diff --git a/qa/suites/upgrade/quincy-x/parallel/workload/test_rbd_api.yaml b/qa/suites/upgrade/quincy-x/parallel/workload/test_rbd_api.yaml
new file mode 100644
index 000000000..c871d4c8c
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/parallel/workload/test_rbd_api.yaml
@@ -0,0 +1,14 @@
+meta:
+- desc: |
+   librbd C and C++ api tests
+workload:
+  full_sequential:
+    - print: "**** done start test_rbd_api.yaml"
+    - workunit:
+        branch: quincy
+        clients:
+          client.0:
+              - rbd/test_librbd.sh
+        env:
+          RBD_FEATURES: "61"
+    - print: "**** done end test_rbd_api.yaml"
diff --git a/qa/suites/upgrade/quincy-x/parallel/workload/test_rbd_python.yaml b/qa/suites/upgrade/quincy-x/parallel/workload/test_rbd_python.yaml
new file mode 100644
index 000000000..3ae98ed1e
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/parallel/workload/test_rbd_python.yaml
@@ -0,0 +1,20 @@
+meta:
+- desc: |
+   librbd python api tests
+overrides:
+  install:
+    ceph:
+      extra_system_packages:
+      - python3-pytest
+workload:
+  full_sequential:
+    - print: "**** done start test_rbd_python.yaml"
+    - workunit:
+        branch: quincy
+        clients:
+          client.0:
+            - rbd/test_librbd_python.sh
+        env:
+          RBD_FEATURES: "61"
+    - print: "**** done end test_rbd_python.yaml"
+
diff --git a/qa/suites/upgrade/quincy-x/stress-split/% b/qa/suites/upgrade/quincy-x/stress-split/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/stress-split/%
diff --git a/qa/suites/upgrade/quincy-x/stress-split/.qa b/qa/suites/upgrade/quincy-x/stress-split/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/stress-split/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/upgrade/quincy-x/stress-split/0-distro b/qa/suites/upgrade/quincy-x/stress-split/0-distro
new file mode 120000
index 000000000..4b341719d
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/stress-split/0-distro
@@ -0,0 +1 @@
+.qa/distros/container-hosts
+\ No newline at end of file
diff --git a/qa/suites/upgrade/quincy-x/stress-split/0-roles.yaml b/qa/suites/upgrade/quincy-x/stress-split/0-roles.yaml
new file mode 100644
index 000000000..ad3ee43d3
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/stress-split/0-roles.yaml
@@ -0,0 +1,31 @@
+roles:
+- - mon.a
+  - mon.c
+  - mgr.y
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+  - client.0
+  - node-exporter.a
+  - alertmanager.a
+- - mon.b
+  - mgr.x
+  - osd.4
+  - osd.5
+  - osd.6
+  - osd.7
+  - client.1
+  - prometheus.a
+  - grafana.a
+  - node-exporter.b
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 10 # GB
+overrides:
+  ceph:
+    create_rbd_pool: true
+    conf:
+      osd:
+        osd shutdown pgref assert: true
diff --git a/qa/suites/upgrade/quincy-x/stress-split/1-start.yaml b/qa/suites/upgrade/quincy-x/stress-split/1-start.yaml
new file mode 100644
index 000000000..b9bca65fb
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/stress-split/1-start.yaml
@@ -0,0 +1,122 @@
+tasks:
+- install:
+    branch: quincy
+    exclude_packages:
+      - ceph-volume
+
+- cephadm:
+    image: quay.ceph.io/ceph-ci/ceph:quincy
+    cephadm_branch: quincy
+    cephadm_git_url: https://github.com/ceph/ceph
+    conf:
+      osd:
+        #set config option for which cls modules are allowed to be loaded / used
+        osd_class_load_list: "*"
+        osd_class_default_list: "*"
+
+- cephadm.shell:
+    mon.a:
+      - ceph fs volume create foo
+      - ceph config set mon mon_warn_on_insecure_global_id_reclaim false --force
+      - ceph config set mon mon_warn_on_insecure_global_id_reclaim_allowed false --force
+
+- ceph.healthy:
+
+- print: "**** upgrading first half of cluster, with stress ****"
+- parallel:
+    - first-half-tasks
+    - first-half-sequence
+- print: "**** done upgrading first half of cluster ****"
+
+- ceph.healthy:
+
+- print: "**** applying stress + thrashing to mixed-version cluster ****"
+
+- parallel:
+    - stress-tasks
+
+- ceph.healthy:
+
+- print: "**** finishing upgrade ****"
+- parallel:
+    - second-half-tasks
+    - second-half-sequence
+
+- ceph.healthy:
+
+
+#################
+
+first-half-sequence:
+- cephadm.shell:
+    env: [sha1]
+    mon.a:
+      - ceph config set mgr mgr/cephadm/daemon_cache_timeout 60
+      - ceph config set global log_to_journald false --force
+
+      - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1
+      - ceph orch ps
+
+      - echo wait for minority of mons to upgrade
+      - while ! ceph mon versions | grep $sha1 ; do sleep 2 ; done
+      - ceph orch ps
+      - ceph orch upgrade pause
+      - sleep 60
+      - ceph orch upgrade resume
+
+      - echo wait for majority of mons to upgrade
+      - "while ! ceph mon versions | grep $sha1 | egrep ': [23]' ; do sleep 2 ; done"
+      - ceph orch ps
+      - ceph orch upgrade pause
+      - sleep 60
+      - ceph orch upgrade resume
+
+      - echo wait for all mons to upgrade
+      - "while ! ceph mon versions | grep $sha1 | grep ': 3' ; do sleep 2 ; done"
+      - ceph orch ps
+      - ceph orch upgrade pause
+      - sleep 60
+      - ceph orch upgrade resume
+
+      - echo wait for half of osds to upgrade
+      - "while ! ceph osd versions | grep $sha1 | egrep ': [45678]'; do sleep 2 ; done"
+      - ceph orch upgrade pause
+      - ceph orch ps
+
+      - ceph orch ps
+      - ceph versions
+
+
+#################
+
+stress-tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 1
+    chance_pgpnum_fix: 1
+    chance_thrash_cluster_full: 0
+    chance_thrash_pg_upmap: 0
+    chance_thrash_pg_upmap_items: 0
+    disable_objectstore_tool_tests: true
+    chance_force_recovery: 0
+    aggressive_pg_num_changes: false
+
+
+#################
+
+second-half-sequence:
+  sequential:
+    - cephadm.shell:
+        env: [sha1]
+        mon.a:
+          - ceph orch upgrade resume
+          - sleep 60
+
+          - echo wait for upgrade to complete
+          - while ceph orch upgrade status | jq '.in_progress' | grep true ; do ceph orch ps ; ceph versions ; sleep 30 ; done
+
+          - echo upgrade complete
+          - ceph orch ps
+          - ceph versions
+          - ceph versions | jq -e '.overall | length == 1'
+          - ceph versions | jq -e '.overall | keys' | grep $sha1
diff --git a/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/.qa b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/radosbench.yaml b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/radosbench.yaml
new file mode 100644
index 000000000..3816ca38c
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/radosbench.yaml
@@ -0,0 +1,19 @@
+meta:
+- desc: |
+   run randomized correctness test for rados operations
+   generate write load with rados bench
+first-half-tasks:
+- full_sequential:
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+- print: "**** done end radosbench.yaml"
diff --git a/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/rbd-cls.yaml b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/rbd-cls.yaml
new file mode 100644
index 000000000..b722f1873
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/rbd-cls.yaml
@@ -0,0 +1,10 @@
+meta:
+- desc: |
+   run basic cls tests for rbd
+first-half-tasks:
+- workunit:
+    branch: quincy
+    clients:
+      client.0:
+        - cls/test_cls_rbd.sh
+- print: "**** done cls/test_cls_rbd.sh 5-workload"
diff --git a/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/rbd-import-export.yaml b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/rbd-import-export.yaml
new file mode 100644
index 000000000..206389055
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/rbd-import-export.yaml
@@ -0,0 +1,12 @@
+meta:
+- desc: |
+   run basic import/export cli tests for rbd
+first-half-tasks:
+- workunit:
+    branch: quincy
+    clients:
+      client.0:
+        - rbd/import_export.sh
+    env:
+      RBD_CREATE_ARGS: --new-format
+- print: "**** done rbd/import_export.sh 5-workload"
diff --git a/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/rbd_api.yaml b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/rbd_api.yaml
new file mode 100644
index 000000000..0fa5d0944
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/rbd_api.yaml
@@ -0,0 +1,12 @@
+meta:
+- desc: |
+   librbd C and C++ api tests
+first-half-tasks:
+- workunit:
+     branch: quincy
+     clients:
+        client.0:
+           - rbd/test_librbd.sh
+     env:
+       RBD_FEATURES: "61"
+- print: "**** done rbd/test_librbd.sh 7-workload"
diff --git a/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/readwrite.yaml b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/readwrite.yaml
new file mode 100644
index 000000000..21a9f379a
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/readwrite.yaml
@@ -0,0 +1,16 @@
+meta:
+- desc: |
+   randomized correctness test for rados operations on a replicated pool,
+   using only reads, writes, and deletes
+first-half-tasks:
+- full_sequential:
+  - rados:
+      clients: [client.0]
+      ops: 4000
+      objects: 500
+      write_append_excl: false
+      op_weights:
+        read: 45
+        write: 45
+        delete: 10
+- print: "**** done rados/readwrite 5-workload"
diff --git a/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/snaps-few-objects.yaml b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/snaps-few-objects.yaml
new file mode 100644
index 000000000..6447c2245
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/snaps-few-objects.yaml
@@ -0,0 +1,18 @@
+meta:
+- desc: |
+   randomized correctness test for rados operations on a replicated pool with snapshot operations
+first-half-tasks:
+- full_sequential:
+  - rados:
+      clients: [client.0]
+      ops: 4000
+      objects: 50
+      write_append_excl: false
+      op_weights:
+        read: 100
+        write: 100
+        delete: 50
+        snap_create: 50
+        snap_remove: 50
+        rollback: 50
+- print: "**** done rados/snaps-few-objects 5-workload"
diff --git a/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/+ b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/+
diff --git a/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/.qa b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/radosbench.yaml b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/radosbench.yaml
new file mode 100644
index 000000000..9058bd804
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/radosbench.yaml
@@ -0,0 +1,25 @@
+meta:
+- desc: |
+   run randomized correctness test for rados operations
+   generate write load with rados bench
+stress-tasks:
+- full_sequential:
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+- print: "**** done end radosbench.yaml"
diff --git a/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/rbd-cls.yaml b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/rbd-cls.yaml
new file mode 100644
index 000000000..649b024a4
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/rbd-cls.yaml
@@ -0,0 +1,10 @@
+meta:
+- desc: |
+   run basic cls tests for rbd
+stress-tasks:
+- workunit:
+    branch: quincy
+    clients:
+      client.0:
+        - cls/test_cls_rbd.sh
+- print: "**** done cls/test_cls_rbd.sh 5-workload"
diff --git a/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/rbd-import-export.yaml b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/rbd-import-export.yaml
new file mode 100644
index 000000000..2267e4462
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/rbd-import-export.yaml
@@ -0,0 +1,12 @@
+meta:
+- desc: |
+   run basic import/export cli tests for rbd
+stress-tasks:
+- workunit:
+    branch: quincy
+    clients:
+      client.0:
+        - rbd/import_export.sh
+    env:
+      RBD_CREATE_ARGS: --new-format
+- print: "**** done rbd/import_export.sh 5-workload"
diff --git a/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/rbd_api.yaml b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/rbd_api.yaml
new file mode 100644
index 000000000..cc4f29a08
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/rbd_api.yaml
@@ -0,0 +1,12 @@
+meta:
+- desc: |
+   librbd C and C++ api tests
+stress-tasks:
+- workunit:
+     branch: quincy
+     clients:
+        client.0:
+           - rbd/test_librbd.sh
+     env:
+       RBD_FEATURES: "61"
+- print: "**** done rbd/test_librbd.sh 7-workload"
diff --git a/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/readwrite.yaml b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/readwrite.yaml
new file mode 100644
index 000000000..41e34d6d7
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/readwrite.yaml
@@ -0,0 +1,16 @@
+meta:
+- desc: |
+   randomized correctness test for rados operations on a replicated pool,
+   using only reads, writes, and deletes
+stress-tasks:
+- full_sequential:
+  - rados:
+      clients: [client.0]
+      ops: 4000
+      objects: 500
+      write_append_excl: false
+      op_weights:
+        read: 45
+        write: 45
+        delete: 10
+- print: "**** done rados/readwrite 5-workload"
diff --git a/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/snaps-few-objects.yaml b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/snaps-few-objects.yaml
new file mode 100644
index 000000000..f56d0de0f
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/snaps-few-objects.yaml
@@ -0,0 +1,18 @@
+meta:
+- desc: |
+   randomized correctness test for rados operations on a replicated pool with snapshot operations
+stress-tasks:
+- full_sequential:
+  - rados:
+      clients: [client.0]
+      ops: 4000
+      objects: 50
+      write_append_excl: false
+      op_weights:
+        read: 100
+        write: 100
+        delete: 50
+        snap_create: 50
+        snap_remove: 50
+        rollback: 50
+- print: "**** done rados/snaps-few-objects 5-workload"
diff --git a/qa/suites/upgrade/quincy-x/stress-split/4-second-half-tasks/.qa b/qa/suites/upgrade/quincy-x/stress-split/4-second-half-tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/stress-split/4-second-half-tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/upgrade/quincy-x/stress-split/4-second-half-tasks/radosbench.yaml b/qa/suites/upgrade/quincy-x/stress-split/4-second-half-tasks/radosbench.yaml
new file mode 100644
index 000000000..7268cb170
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/stress-split/4-second-half-tasks/radosbench.yaml
@@ -0,0 +1,16 @@
+meta:
+- desc: |
+   run randomized correctness test for rados operations
+   generate write load with rados bench
+second-half-tasks:
+- full_sequential:
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+- print: "**** done end radosbench.yaml"
diff --git a/qa/suites/upgrade/quincy-x/stress-split/4-second-half-tasks/rbd-import-export.yaml b/qa/suites/upgrade/quincy-x/stress-split/4-second-half-tasks/rbd-import-export.yaml
new file mode 100644
index 000000000..1c509f755
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/stress-split/4-second-half-tasks/rbd-import-export.yaml
@@ -0,0 +1,12 @@
+meta:
+- desc: |
+   run basic import/export cli tests for rbd
+second-half-tasks:
+- workunit:
+    branch: quincy
+    clients:
+      client.0:
+        - rbd/import_export.sh
+    env:
+      RBD_CREATE_ARGS: --new-format
+- print: "**** done rbd/import_export.sh 5-workload"
diff --git a/qa/suites/upgrade/quincy-x/stress-split/mon_election b/qa/suites/upgrade/quincy-x/stress-split/mon_election
new file mode 120000
index 000000000..3f331e621
--- /dev/null
+++ b/qa/suites/upgrade/quincy-x/stress-split/mon_election
@@ -0,0 +1 @@
+.qa/mon_election
+\ No newline at end of file
diff --git a/qa/suites/upgrade/telemetry-upgrade/pacific-x/% b/qa/suites/upgrade/telemetry-upgrade/pacific-x/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/upgrade/telemetry-upgrade/pacific-x/%
diff --git a/qa/suites/upgrade/telemetry-upgrade/pacific-x/.qa b/qa/suites/upgrade/telemetry-upgrade/pacific-x/.qa
new file mode 120000
index 000000000..a23f7e045
--- /dev/null
+++ b/qa/suites/upgrade/telemetry-upgrade/pacific-x/.qa
@@ -0,0 +1 @@
+../../.qa
+\ No newline at end of file
diff --git a/qa/suites/upgrade/telemetry-upgrade/pacific-x/0-random-distro$ b/qa/suites/upgrade/telemetry-upgrade/pacific-x/0-random-distro$
new file mode 120000
index 000000000..4b341719d
--- /dev/null
+++ b/qa/suites/upgrade/telemetry-upgrade/pacific-x/0-random-distro$
@@ -0,0 +1 @@
+.qa/distros/container-hosts
+\ No newline at end of file
diff --git a/qa/suites/upgrade/telemetry-upgrade/pacific-x/0-start.yaml b/qa/suites/upgrade/telemetry-upgrade/pacific-x/0-start.yaml
new file mode 100644
index 000000000..3814ea3ef
--- /dev/null
+++ b/qa/suites/upgrade/telemetry-upgrade/pacific-x/0-start.yaml
@@ -0,0 +1,33 @@
+roles:
+- - mon.a
+  - mon.c
+  - mgr.y
+  - mds.a
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+  - client.0
+  - node-exporter.a
+  - alertmanager.a
+- - mon.b
+  - mds.b
+  - mgr.x
+  - osd.4
+  - osd.5
+  - osd.6
+  - osd.7
+  - client.1
+  - prometheus.a
+  - grafana.a
+  - node-exporter.b
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 10 # GB
+overrides:
+  ceph:
+    create_rbd_pool: true
+    conf:
+      osd:
+        osd shutdown pgref assert: true
diff --git a/qa/suites/upgrade/telemetry-upgrade/pacific-x/1-tasks.yaml b/qa/suites/upgrade/telemetry-upgrade/pacific-x/1-tasks.yaml
new file mode 100644
index 000000000..28b9eb29f
--- /dev/null
+++ b/qa/suites/upgrade/telemetry-upgrade/pacific-x/1-tasks.yaml
@@ -0,0 +1,54 @@
+tasks:
+- install:
+    branch: pacific
+    exclude_packages:
+      - ceph-volume
+- print: "**** done install task..."
+- print: "**** done start installing pacific cephadm ..."
+- cephadm:
+    image: quay.io/ceph/daemon-base:latest-pacific
+    cephadm_branch: pacific
+    cephadm_git_url: https://github.com/ceph/ceph
+    conf:
+      osd:
+        #set config option for which cls modules are allowed to be loaded / used
+        osd_class_load_list: "*"
+        osd_class_default_list: "*"
+- print: "**** done end installing pacific cephadm ..."
+
+- print: "**** done start cephadm.shell ceph config set mgr..."
+- cephadm.shell:
+    mon.a:
+      - ceph config set mgr mgr/cephadm/use_repo_digest true --force
+- print: "**** done cephadm.shell ceph config set mgr..."
+
+
+- print: "**** done start telemetry pacific..."
+- workunit:
+    clients:
+      client.0:
+        - test_telemetry_pacific.sh
+- print: "**** done end telemetry pacific..."
+
+- print: "**** done start upgrade sequence..."
+- sequential:
+    - print: "**** done start upgrade..."
+    - cephadm.shell:
+        env: [sha1]
+        mon.a:
+            - ceph config set global log_to_journald false --force
+            - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1
+            - while ceph orch upgrade status | jq '.in_progress' | grep true ; do ceph orch ps ; ceph versions ; sleep 30 ; done
+            - ceph orch ps
+            - ceph versions
+            - ceph versions | jq -e '.overall | length == 1'
+            - ceph versions | jq -e '.overall | keys' | grep $sha1
+    - print: "**** done end upgrade..."
+
+    - print: "**** done start telemetry x..."
+    - workunit:
+        clients:
+          client.0:
+            - test_telemetry_pacific_x.sh
+    - print: "**** done end telemetry x..."
+- print: "**** done end upgrade sequence..."
diff --git a/qa/suites/upgrade/telemetry-upgrade/quincy-x/% b/qa/suites/upgrade/telemetry-upgrade/quincy-x/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/upgrade/telemetry-upgrade/quincy-x/%
diff --git a/qa/suites/upgrade/telemetry-upgrade/quincy-x/.qa b/qa/suites/upgrade/telemetry-upgrade/quincy-x/.qa
new file mode 120000
index 000000000..a23f7e045
--- /dev/null
+++ b/qa/suites/upgrade/telemetry-upgrade/quincy-x/.qa
@@ -0,0 +1 @@
+../../.qa
+\ No newline at end of file
diff --git a/qa/suites/upgrade/telemetry-upgrade/quincy-x/0-random-distro$ b/qa/suites/upgrade/telemetry-upgrade/quincy-x/0-random-distro$
new file mode 120000
index 000000000..4b341719d
--- /dev/null
+++ b/qa/suites/upgrade/telemetry-upgrade/quincy-x/0-random-distro$
@@ -0,0 +1 @@
+.qa/distros/container-hosts
+\ No newline at end of file
diff --git a/qa/suites/upgrade/telemetry-upgrade/quincy-x/0-start.yaml b/qa/suites/upgrade/telemetry-upgrade/quincy-x/0-start.yaml
new file mode 100644
index 000000000..3814ea3ef
--- /dev/null
+++ b/qa/suites/upgrade/telemetry-upgrade/quincy-x/0-start.yaml
@@ -0,0 +1,33 @@
+roles:
+- - mon.a
+  - mon.c
+  - mgr.y
+  - mds.a
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+  - client.0
+  - node-exporter.a
+  - alertmanager.a
+- - mon.b
+  - mds.b
+  - mgr.x
+  - osd.4
+  - osd.5
+  - osd.6
+  - osd.7
+  - client.1
+  - prometheus.a
+  - grafana.a
+  - node-exporter.b
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 10 # GB
+overrides:
+  ceph:
+    create_rbd_pool: true
+    conf:
+      osd:
+        osd shutdown pgref assert: true
diff --git a/qa/suites/upgrade/telemetry-upgrade/quincy-x/1-tasks.yaml b/qa/suites/upgrade/telemetry-upgrade/quincy-x/1-tasks.yaml
new file mode 100644
index 000000000..cd6609a6d
--- /dev/null
+++ b/qa/suites/upgrade/telemetry-upgrade/quincy-x/1-tasks.yaml
@@ -0,0 +1,53 @@
+tasks:
+- install:
+    branch: quincy
+    exclude_packages:
+      - ceph-volume
+- print: "**** done install task..."
+- print: "**** done start installing quincy cephadm ..."
+- cephadm:
+    image: quay.io/ceph/daemon-base:latest-quincy
+    cephadm_branch: quincy
+    cephadm_git_url: https://github.com/ceph/ceph
+    conf:
+      osd:
+        #set config option for which cls modules are allowed to be loaded / used
+        osd_class_load_list: "*"
+        osd_class_default_list: "*"
+- print: "**** done end installing quincy cephadm ..."
+
+- print: "**** done start cephadm.shell ceph config set mgr..."
+- cephadm.shell:
+    mon.a:
+      - ceph config set mgr mgr/cephadm/use_repo_digest true --force
+- print: "**** done cephadm.shell ceph config set mgr..."
+
+- print: "**** done start telemetry quincy..."
+- workunit:
+    clients:
+      client.0:
+        - test_telemetry_quincy.sh
+- print: "**** done end telemetry quincy..."
+
+- print: "**** done start upgrade sequence..."
+- sequential:
+    - print: "**** done start upgrade..."
+    - cephadm.shell:
+        env: [sha1]
+        mon.a:
+            - ceph config set global log_to_journald false --force
+            - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1
+            - while ceph orch upgrade status | jq '.in_progress' | grep true ; do ceph orch ps ; ceph versions ; sleep 30 ; done
+            - ceph orch ps
+            - ceph versions
+            - ceph versions | jq -e '.overall | length == 1'
+            - ceph versions | jq -e '.overall | keys' | grep $sha1
+    - print: "**** done end upgrade..."
+
+    - print: "**** done start telemetry x..."
+    - workunit:
+        clients:
+          client.0:
+            - test_telemetry_quincy_x.sh
+    - print: "**** done end telemetry x..."
+- print: "**** done end upgrade sequence..."
diff --git a/qa/suites/windows/.qa b/qa/suites/windows/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/windows/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/windows/basic/% b/qa/suites/windows/basic/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/windows/basic/%
diff --git a/qa/suites/windows/basic/.qa b/qa/suites/windows/basic/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/windows/basic/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/windows/basic/clusters/.qa b/qa/suites/windows/basic/clusters/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/windows/basic/clusters/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/windows/basic/clusters/fixed-1.yaml b/qa/suites/windows/basic/clusters/fixed-1.yaml
new file mode 120000
index 000000000..02df5dd0c
--- /dev/null
+++ b/qa/suites/windows/basic/clusters/fixed-1.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-1.yaml
+\ No newline at end of file
diff --git a/qa/suites/windows/basic/install/.qa b/qa/suites/windows/basic/install/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/windows/basic/install/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/windows/basic/install/install.yaml b/qa/suites/windows/basic/install/install.yaml
new file mode 100644
index 000000000..2030acb90
--- /dev/null
+++ b/qa/suites/windows/basic/install/install.yaml
@@ -0,0 +1,3 @@
+tasks:
+- install:
+- ceph:
diff --git a/qa/suites/windows/basic/tasks/.qa b/qa/suites/windows/basic/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/windows/basic/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/windows/basic/tasks/windows_tests.yaml b/qa/suites/windows/basic/tasks/windows_tests.yaml
new file mode 100644
index 000000000..42469bf3b
--- /dev/null
+++ b/qa/suites/windows/basic/tasks/windows_tests.yaml
@@ -0,0 +1,6 @@
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - windows/libvirt_vm/setup.sh
+        - windows/run-tests.sh
diff --git a/qa/suites/windows/basic/ubuntu_latest.yaml b/qa/suites/windows/basic/ubuntu_latest.yaml
new file mode 120000
index 000000000..3a09f9abb
--- /dev/null
+++ b/qa/suites/windows/basic/ubuntu_latest.yaml
@@ -0,0 +1 @@
+.qa/distros/supported/ubuntu_latest.yaml
+\ No newline at end of file
diff --git a/qa/tasks/__init__.py b/qa/tasks/__init__.py
new file mode 100644
index 000000000..9a7949a00
--- /dev/null
+++ b/qa/tasks/__init__.py
@@ -0,0 +1,6 @@
+import logging
+
+# Inherit teuthology's log level
+teuthology_log = logging.getLogger('teuthology')
+log = logging.getLogger(__name__)
+log.setLevel(teuthology_log.level)
diff --git a/qa/tasks/admin_socket.py b/qa/tasks/admin_socket.py
new file mode 100644
index 000000000..0d960d1a5
--- /dev/null
+++ b/qa/tasks/admin_socket.py
@@ -0,0 +1,204 @@
+"""
+Admin Socket task -- used in rados, powercycle, and smoke testing
+"""
+
+import json
+import logging
+import os
+import time
+
+from teuthology.exceptions import CommandFailedError
+from teuthology.orchestra import run
+from teuthology import misc as teuthology
+from teuthology.parallel import parallel
+from teuthology.config import config as teuth_config
+
+log = logging.getLogger(__name__)
+
+
+def task(ctx, config):
+    """
+    Run an admin socket command, make sure the output is json, and run
+    a test program on it. The test program should read json from
+    stdin. This task succeeds if the test program exits with status 0.
+
+    To run the same test on all clients::
+
+        tasks:
+        - ceph:
+        - rados:
+        - admin_socket:
+            all:
+              dump_requests:
+                test: http://example.com/script
+
+    To restrict it to certain clients::
+
+        tasks:
+        - ceph:
+        - rados: [client.1]
+        - admin_socket:
+            client.1:
+              dump_requests:
+                test: http://example.com/script
+
+    If an admin socket command has arguments, they can be specified as
+    a list::
+
+        tasks:
+        - ceph:
+        - rados: [client.0]
+        - admin_socket:
+            client.0:
+              dump_requests:
+                test: http://example.com/script
+              help:
+                test: http://example.com/test_help_version
+                args: [version]
+
+    Note that there must be a ceph client with an admin socket running
+    before this task is run. The tests are parallelized at the client
+    level. Tests for a single client are run serially.
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    assert isinstance(config, dict), \
+        'admin_socket task requires a dict for configuration'
+    teuthology.replace_all_with_clients(ctx.cluster, config)
+
+    with parallel() as ptask:
+        for client, tests in config.items():
+            ptask.spawn(_run_tests, ctx, client, tests)
+
+
+def _socket_command(ctx, remote, socket_path, command, args):
+    """
+    Run an admin socket command and return the result as a string.
+
+    :param ctx: Context
+    :param remote: Remote site
+    :param socket_path: path to socket
+    :param command: command to be run remotely
+    :param args: command arguments
+
+    :returns: output of command in json format
+    """
+    testdir = teuthology.get_testdir(ctx)
+    max_tries = 120
+    sub_commands = [c.strip() for c in command.split('||')]
+    ex = None
+    for _ in range(max_tries):
+        for sub_command in sub_commands:
+            try:
+                out = remote.sh([
+                    'sudo',
+                    'adjust-ulimits',
+                    'ceph-coverage',
+                    '{tdir}/archive/coverage'.format(tdir=testdir),
+                    'ceph',
+                    '--admin-daemon', socket_path,
+                    ] + sub_command.split(' ') + args)
+            except CommandFailedError as e:
+                ex = e
+                log.info('ceph cli "%s" returned an error %s, '
+                         'command not registered yet?', sub_command, e)
+            else:
+                log.debug('admin socket command %s returned %s',
+                          sub_command, out)
+                return json.loads(out)
+        else:
+            # exhausted all commands
+            log.info('sleeping and retrying ...')
+            time.sleep(1)
+    else:
+        # i tried max_tries times..
+        assert ex is not None
+        raise ex
+
+
+def _run_tests(ctx, client, tests):
+    """
+    Create a temp directory and wait for a client socket to be created.
+    For each test, copy the executable locally and run the test.
+    Remove temp directory when finished.
+
+    :param ctx: Context
+    :param client: client machine to run the test
+    :param tests: list of tests to run
+    """
+    testdir = teuthology.get_testdir(ctx)
+    log.debug('Running admin socket tests on %s', client)
+    (remote,) = ctx.cluster.only(client).remotes.keys()
+    socket_path = '/var/run/ceph/ceph-{name}.asok'.format(name=client)
+    overrides = ctx.config.get('overrides', {}).get('admin_socket', {})
+
+    try:
+        tmp_dir = os.path.join(
+            testdir,
+            'admin_socket_{client}'.format(client=client),
+            )
+        remote.run(
+            args=[
+                'mkdir',
+                '--',
+                tmp_dir,
+                run.Raw('&&'),
+                # wait for client process to create the socket
+                'while', 'test', '!', '-e', socket_path, run.Raw(';'),
+                'do', 'sleep', '1', run.Raw(';'), 'done',
+                ],
+            )
+
+        for command, config in tests.items():
+            if config is None:
+                config = {}
+            teuthology.deep_merge(config, overrides)
+            log.debug('Testing %s with config %s', command, str(config))
+
+            test_path = None
+            if 'test' in config:
+                # hack: the git_url is always ceph-ci or ceph
+                git_url = teuth_config.get_ceph_git_url()
+                repo_name = 'ceph.git'
+                if git_url.count('ceph-ci'):
+                    repo_name = 'ceph-ci.git'
+                url = config['test'].format(
+                    branch=config.get('branch', 'master'),
+                    repo=repo_name,
+                    )
+                test_path = os.path.join(tmp_dir, command)
+                remote.run(
+                    args=[
+                        'wget',
+                        '-q',
+                        '-O',
+                        test_path,
+                        '--',
+                        url,
+                        run.Raw('&&'),
+                        'chmod',
+                        'u=rx',
+                        '--',
+                        test_path,
+                        ],
+                    )
+
+            args = config.get('args', [])
+            assert isinstance(args, list), \
+                'admin socket command args must be a list'
+            sock_out = _socket_command(ctx, remote, socket_path, command, args)
+            if test_path is not None:
+                remote.run(
+                    args=[
+                        test_path,
+                        ],
+                    stdin=json.dumps(sock_out),
+                    )
+
+    finally:
+        remote.run(
+            args=[
+                'rm', '-rf', '--', tmp_dir,
+                ],
+            )
diff --git a/qa/tasks/autotest.py b/qa/tasks/autotest.py
new file mode 100644
index 000000000..80c3fc9d2
--- /dev/null
+++ b/qa/tasks/autotest.py
@@ -0,0 +1,165 @@
+"""
+Run an autotest test on the ceph cluster.
+"""
+import json
+import logging
+import os
+
+from teuthology import misc as teuthology
+from teuthology.parallel import parallel
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Run an autotest test on the ceph cluster.
+
+    Only autotest client tests are supported.
+
+    The config is a mapping from role name to list of tests to run on
+    that client.
+
+    For example::
+
+        tasks:
+        - ceph:
+        - ceph-fuse: [client.0, client.1]
+        - autotest:
+            client.0: [dbench]
+            client.1: [bonnie]
+
+    You can also specify a list of tests to run on all clients::
+
+        tasks:
+        - ceph:
+        - ceph-fuse:
+        - autotest:
+            all: [dbench]
+    """
+    assert isinstance(config, dict)
+    config = teuthology.replace_all_with_clients(ctx.cluster, config)
+    log.info('Setting up autotest...')
+    testdir = teuthology.get_testdir(ctx)
+    with parallel() as p:
+        for role in config.keys():
+            (remote,) = ctx.cluster.only(role).remotes.keys()
+            p.spawn(_download, testdir, remote)
+
+    log.info('Making a separate scratch dir for every client...')
+    for role in config.keys():
+        assert isinstance(role, str)
+        PREFIX = 'client.'
+        assert role.startswith(PREFIX)
+        id_ = role[len(PREFIX):]
+        (remote,) = ctx.cluster.only(role).remotes.keys()
+        mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
+        scratch = os.path.join(mnt, 'client.{id}'.format(id=id_))
+        remote.run(
+            args=[
+                'sudo',
+                'install',
+                '-d',
+                '-m', '0755',
+                '--owner={user}'.format(user='ubuntu'), #TODO
+                '--',
+                scratch,
+                ],
+            )
+
+    with parallel() as p:
+        for role, tests in config.items():
+            (remote,) = ctx.cluster.only(role).remotes.keys()
+            p.spawn(_run_tests, testdir, remote, role, tests)
+
+def _download(testdir, remote):
+    """
+    Download.  Does not explicitly support muliple tasks in a single run.
+    """
+    remote.run(
+        args=[
+            # explicitly does not support multiple autotest tasks
+            # in a single run; the result archival would conflict
+            'mkdir', '{tdir}/archive/autotest'.format(tdir=testdir),
+            run.Raw('&&'),
+            'mkdir', '{tdir}/autotest'.format(tdir=testdir),
+            run.Raw('&&'),
+            'wget',
+            '-nv',
+            '--no-check-certificate',
+            'https://github.com/ceph/autotest/tarball/ceph',
+            '-O-',
+            run.Raw('|'),
+            'tar',
+            '-C', '{tdir}/autotest'.format(tdir=testdir),
+            '-x',
+            '-z',
+            '-f-',
+            '--strip-components=1',
+            ],
+        )
+
+def _run_tests(testdir, remote, role, tests):
+    """
+    Spawned to run test on remote site
+    """
+    assert isinstance(role, str)
+    PREFIX = 'client.'
+    assert role.startswith(PREFIX)
+    id_ = role[len(PREFIX):]
+    mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
+    scratch = os.path.join(mnt, 'client.{id}'.format(id=id_))
+
+    assert isinstance(tests, list)
+    for idx, testname in enumerate(tests):
+        log.info('Running autotest client test #%d: %s...', idx, testname)
+
+        tag = 'client.{id}.num{idx}.{testname}'.format(
+            idx=idx,
+            testname=testname,
+            id=id_,
+            )
+        control = '{tdir}/control.{tag}'.format(tdir=testdir, tag=tag)
+        remote.write_file(
+            path=control,
+            data='import json; data=json.loads({data!r}); job.run_test(**data)'.format(
+                data=json.dumps(dict(
+                        url=testname,
+                        dir=scratch,
+                        # TODO perhaps tag
+                        # results will be in {testdir}/autotest/client/results/dbench
+                        # or {testdir}/autotest/client/results/dbench.{tag}
+                        )),
+                ),
+            )
+        remote.run(
+            args=[
+                '{tdir}/autotest/client/bin/autotest'.format(tdir=testdir),
+                '--verbose',
+                '--harness=simple',
+                '--tag={tag}'.format(tag=tag),
+                control,
+                run.Raw('3>&1'),
+                ],
+            )
+
+        remote.run(
+            args=[
+                'rm', '-rf', '--', control,
+                ],
+            )
+
+        remote.run(
+            args=[
+                'mv',
+                '--',
+                '{tdir}/autotest/client/results/{tag}'.format(tdir=testdir, tag=tag),
+                '{tdir}/archive/autotest/{tag}'.format(tdir=testdir, tag=tag),
+                ],
+            )
+
+    remote.run(
+        args=[
+            'rm', '-rf', '--', '{tdir}/autotest'.format(tdir=testdir),
+            ],
+        )
diff --git a/qa/tasks/aver.py b/qa/tasks/aver.py
new file mode 100644
index 000000000..79ee18c5c
--- /dev/null
+++ b/qa/tasks/aver.py
@@ -0,0 +1,67 @@
+"""
+Aver wrapper task
+"""
+import contextlib
+import logging
+from subprocess import check_call, Popen, PIPE
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Execute an aver assertion
+
+    Parameters:
+
+        input: file containing data referred to by the assertions. File name is
+               relative to the job's archive path
+        validations: list of validations in the Aver language
+
+    Example:
+    - aver:
+        input: bench_output.csv
+        validations:
+        - expect performance(alg='ceph') > performance(alg='raw')
+        - for size > 3 expect avg_throughput > 2000
+    """
+    log.info('Beginning aver...')
+    assert isinstance(config, dict), 'expecting dictionary for configuration'
+
+    if 'input' not in config:
+        raise Exception("Expecting 'input' option")
+    if len(config.get('validations', [])) < 1:
+        raise Exception("Expecting at least one entry in 'validations'")
+
+    url = ('https://github.com/ivotron/aver/releases/download/'
+           'v0.3.0/aver-linux-amd64.tar.bz2')
+
+    aver_path = ctx.archive + '/aver'
+
+    # download binary
+    check_call(['wget', '-O', aver_path + '.tbz', url])
+    check_call(['tar', 'xfj', aver_path + '.tbz', '-C', ctx.archive])
+
+    # print version
+    process = Popen([aver_path, '-v'], stdout=PIPE)
+    log.info(process.communicate()[0])
+
+    # validate
+    for validation in config['validations']:
+        cmd = (aver_path + ' -s -i ' + (ctx.archive + '/' + config['input']) +
+               ' "' + validation + '"')
+        log.info("executing: " + cmd)
+        process = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
+        (stdout, stderr) = process.communicate()
+        if stderr:
+            log.info('aver stderr: ' + stderr)
+        log.info('aver result: ' + stdout)
+        if stdout.strip(' \t\n\r') != 'true':
+            raise Exception('Failed validation: ' + validation)
+
+    try:
+        yield
+    finally:
+        log.info('Removing aver binary...')
+        check_call(['rm', aver_path, aver_path + '.tbz'])
diff --git a/qa/tasks/backfill_toofull.py b/qa/tasks/backfill_toofull.py
new file mode 100644
index 000000000..f4ff90a46
--- /dev/null
+++ b/qa/tasks/backfill_toofull.py
@@ -0,0 +1,193 @@
+"""
+Backfill_toofull
+"""
+import logging
+import time
+from tasks import ceph_manager
+from tasks.util.rados import rados
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+def wait_for_pg_state(manager, pgid, state, to_osd):
+    log.debug("waiting for pg %s state is %s" % (pgid, state))
+    for i in range(300):
+        time.sleep(5)
+        manager.flush_pg_stats([0, 1, 2, 3])
+        pgs = manager.get_pg_stats()
+        pg = next((pg for pg in pgs if pg['pgid'] == pgid), None)
+        log.info('pg=%s' % pg);
+        assert pg
+        status = pg['state'].split('+')
+        if 'active' not in status:
+            log.debug('not active')
+            continue
+        if state not in status:
+            log.debug('not %s' % state)
+            continue
+        assert to_osd in pg['up']
+        return
+    assert False, '%s not in %s' % (pgid, state)
+
+
+def task(ctx, config):
+    """
+    Test backfill reservation calculates "toofull" condition correctly.
+
+    A pretty rigid cluster is brought up and tested by this task
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'backfill_toofull task only accepts a dict for configuration'
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    profile = config.get('erasure_code_profile', {
+        'k': '2',
+        'm': '1',
+        'crush-failure-domain': 'osd'
+    })
+    profile_name = profile.get('name', 'backfill_toofull')
+    manager.create_erasure_code_profile(profile_name, profile)
+    pool = manager.create_pool_with_unique_name(
+        pg_num=1,
+        erasure_code_profile_name=profile_name,
+        min_size=2)
+    manager.raw_cluster_cmd('osd', 'pool', 'set', pool,
+                            'pg_autoscale_mode', 'off')
+
+    manager.flush_pg_stats([0, 1, 2, 3])
+    manager.wait_for_clean()
+
+    pool_id = manager.get_pool_num(pool)
+    pgid = '%d.0' % pool_id
+    pgs = manager.get_pg_stats()
+    acting = next((pg['acting'] for pg in pgs if pg['pgid'] == pgid), None)
+    log.debug("acting=%s" % acting)
+    assert acting
+    primary = acting[0]
+    target = acting[1]
+
+    log.debug("write some data")
+    rados(ctx, mon, ['-p', pool, 'bench', '120', 'write', '--no-cleanup'])
+    df = manager.get_osd_df(target)
+    log.debug("target osd df: %s" % df)
+
+    total_kb = df['kb']
+    used_kb = df['kb_used']
+
+    log.debug("pause recovery")
+    manager.raw_cluster_cmd('osd', 'set', 'noout')
+    manager.raw_cluster_cmd('osd', 'set', 'nobackfill')
+    manager.raw_cluster_cmd('osd', 'set', 'norecover')
+
+    log.debug("stop tartget osd %s" % target)
+    manager.kill_osd(target)
+    manager.wait_till_active()
+
+    pgs = manager.get_pg_stats()
+    pg = next((pg for pg in pgs if pg['pgid'] == pgid), None)
+    log.debug('pg=%s' % pg)
+    assert pg
+
+    log.debug("re-write data")
+    rados(ctx, mon, ['-p', pool, 'cleanup'])
+    time.sleep(10)
+    rados(ctx, mon, ['-p', pool, 'bench', '60', 'write', '--no-cleanup'])
+
+    df = manager.get_osd_df(primary)
+    log.debug("primary osd df: %s" % df)
+
+    primary_used_kb = df['kb_used']
+
+    log.info("test backfill reservation rejected with toofull")
+
+    # We set backfillfull ratio less than new data size and expect the pg
+    # entering backfill_toofull state.
+    #
+    # We also need to update nearfull ratio to prevent "full ratio(s) out of order".
+
+    backfillfull = 0.9 * primary_used_kb / total_kb
+    nearfull = backfillfull * 0.9
+
+    log.debug("update nearfull ratio to %s and backfillfull ratio to %s" %
+              (nearfull, backfillfull))
+    manager.raw_cluster_cmd('osd', 'set-nearfull-ratio',
+                            '{:.3f}'.format(nearfull + 0.001))
+    manager.raw_cluster_cmd('osd', 'set-backfillfull-ratio',
+                            '{:.3f}'.format(backfillfull + 0.001))
+
+    log.debug("start tartget osd %s" % target)
+
+    manager.revive_osd(target)
+    manager.wait_for_active()
+    manager.wait_till_osd_is_up(target)
+
+    wait_for_pg_state(manager, pgid, 'backfill_toofull', target)
+
+    log.info("test pg not enter backfill_toofull after restarting backfill")
+
+    # We want to set backfillfull ratio to be big enough for the target to
+    # successfully backfill new data but smaller than the sum of old and new
+    # data, so if the osd backfill reservation incorrectly calculates "toofull"
+    # the test will detect this (fail).
+    #
+    # Note, we need to operate with "uncompressed" bytes because currently
+    # osd backfill reservation does not take compression into account.
+    #
+    # We also need to update nearfull ratio to prevent "full ratio(s) out of order".
+
+    pdf = manager.get_pool_df(pool)
+    log.debug("pool %s df: %s" % (pool, pdf))
+    assert pdf
+    compress_ratio = 1.0 * pdf['compress_under_bytes'] / pdf['compress_bytes_used'] \
+        if pdf['compress_bytes_used'] > 0 else 1.0
+    log.debug("compress_ratio: %s" % compress_ratio)
+
+    backfillfull = (used_kb + primary_used_kb) * compress_ratio / total_kb
+    assert backfillfull < 0.9
+    nearfull_min = max(used_kb, primary_used_kb) * compress_ratio / total_kb
+    assert nearfull_min < backfillfull
+    delta = backfillfull - nearfull_min
+    nearfull = nearfull_min + delta * 0.1
+    backfillfull = nearfull_min + delta * 0.2
+
+    log.debug("update nearfull ratio to %s and backfillfull ratio to %s" %
+              (nearfull, backfillfull))
+    manager.raw_cluster_cmd('osd', 'set-nearfull-ratio',
+                            '{:.3f}'.format(nearfull + 0.001))
+    manager.raw_cluster_cmd('osd', 'set-backfillfull-ratio',
+                            '{:.3f}'.format(backfillfull + 0.001))
+
+    wait_for_pg_state(manager, pgid, 'backfilling', target)
+
+    pgs = manager.get_pg_stats()
+    pg = next((pg for pg in pgs if pg['pgid'] == pgid), None)
+    log.debug('pg=%s' % pg)
+    assert pg
+
+    log.debug("interrupt %s backfill" % target)
+    manager.mark_down_osd(target)
+    # after marking the target osd down it will automatically be
+    # up soon again
+
+    log.debug("resume recovery")
+    manager.raw_cluster_cmd('osd', 'unset', 'noout')
+    manager.raw_cluster_cmd('osd', 'unset', 'nobackfill')
+    manager.raw_cluster_cmd('osd', 'unset', 'norecover')
+
+    # wait for everything to peer, backfill and recover
+    manager.wait_for_clean()
+
+    pgs = manager.get_pg_stats()
+    pg = next((pg for pg in pgs if pg['pgid'] == pgid), None)
+    log.info('pg=%s' % pg)
+    assert pg
+    assert 'clean' in pg['state'].split('+')
diff --git a/qa/tasks/barbican.py b/qa/tasks/barbican.py
new file mode 100644
index 000000000..771304fba
--- /dev/null
+++ b/qa/tasks/barbican.py
@@ -0,0 +1,524 @@
+"""
+Deploy and configure Barbican for Teuthology
+"""
+import argparse
+import contextlib
+import logging
+import http
+import json
+import time
+import math
+
+from urllib.parse import urlparse
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.orchestra import run
+from teuthology.exceptions import ConfigError
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def download(ctx, config):
+    """
+    Download the Barbican from github.
+    Remove downloaded file upon exit.
+
+    The context passed in should be identical to the context
+    passed in to the main task.
+    """
+    assert isinstance(config, dict)
+    log.info('Downloading barbican...')
+    testdir = teuthology.get_testdir(ctx)
+    for (client, cconf) in config.items():
+        branch = cconf.get('force-branch', 'master')
+        log.info("Using branch '%s' for barbican", branch)
+
+        sha1 = cconf.get('sha1')
+        log.info('sha1=%s', sha1)
+
+        ctx.cluster.only(client).run(
+            args=[
+                'bash', '-l'
+                ],
+            )
+        ctx.cluster.only(client).run(
+            args=[
+                'git', 'clone',
+                '-b', branch,
+                'https://github.com/openstack/barbican.git',
+                '{tdir}/barbican'.format(tdir=testdir),
+                ],
+            )
+        if sha1 is not None:
+            ctx.cluster.only(client).run(
+                args=[
+                    'cd', '{tdir}/barbican'.format(tdir=testdir),
+                    run.Raw('&&'),
+                    'git', 'reset', '--hard', sha1,
+                    ],
+                )
+    try:
+        yield
+    finally:
+        log.info('Removing barbican...')
+        testdir = teuthology.get_testdir(ctx)
+        for client in config:
+            ctx.cluster.only(client).run(
+                args=[
+                    'rm',
+                    '-rf',
+                    '{tdir}/barbican'.format(tdir=testdir),
+                    ],
+                )
+
+def get_barbican_dir(ctx):
+    return '{tdir}/barbican'.format(tdir=teuthology.get_testdir(ctx))
+
+def run_in_barbican_dir(ctx, client, args):
+    ctx.cluster.only(client).run(
+        args=['cd', get_barbican_dir(ctx), run.Raw('&&'), ] + args,
+    )
+
+def run_in_barbican_venv(ctx, client, args):
+    run_in_barbican_dir(ctx, client,
+                        ['.',
+                         '.barbicanenv/bin/activate',
+                         run.Raw('&&')
+                        ] + args)
+
+@contextlib.contextmanager
+def setup_venv(ctx, config):
+    """
+    Setup the virtualenv for Barbican using pip.
+    """
+    assert isinstance(config, dict)
+    log.info('Setting up virtualenv for barbican...')
+    for (client, _) in config.items():
+        run_in_barbican_dir(ctx, client,
+                            ['python3', '-m', 'venv', '.barbicanenv'])
+        run_in_barbican_venv(ctx, client,
+                             ['pip', 'install', '--upgrade', 'pip'])
+        run_in_barbican_venv(ctx, client,
+                             ['pip', 'install', 'pytz',
+                              '-e', get_barbican_dir(ctx)])
+    yield
+
+def assign_ports(ctx, config, initial_port):
+    """
+    Assign port numbers starting from @initial_port
+    """
+    port = initial_port
+    role_endpoints = {}
+    for remote, roles_for_host in ctx.cluster.remotes.items():
+        for role in roles_for_host:
+            if role in config:
+                role_endpoints[role] = (remote.name.split('@')[1], port)
+                port += 1
+
+    return role_endpoints
+
+def set_authtoken_params(ctx, cclient, cconfig):
+    section_config_list = cconfig['keystone_authtoken'].items()
+    for config in section_config_list:
+        (name, val) = config
+        run_in_barbican_dir(ctx, cclient,
+                            ['sed', '-i',
+                             '/[[]filter:authtoken]/{p;s##'+'{} = {}'.format(name, val)+'#;}',
+                             'etc/barbican/barbican-api-paste.ini'])
+
+    keystone_role = cconfig.get('use-keystone-role', None)
+    public_host, public_port = ctx.keystone.public_endpoints[keystone_role]
+    url = 'http://{host}:{port}/v3'.format(host=public_host,
+                                           port=public_port)
+    run_in_barbican_dir(ctx, cclient,
+                        ['sed', '-i',
+                         '/[[]filter:authtoken]/{p;s##'+'auth_uri = {}'.format(url)+'#;}',
+                         'etc/barbican/barbican-api-paste.ini'])
+    admin_url = 'http://{host}:{port}/v3'.format(host=public_host,
+                                                 port=public_port)
+    run_in_barbican_dir(ctx, cclient,
+                        ['sed', '-i',
+                         '/[[]filter:authtoken]/{p;s##'+'auth_url = {}'.format(admin_url)+'#;}',
+                         'etc/barbican/barbican-api-paste.ini'])
+
+def fix_barbican_api_paste(ctx, cclient):
+    run_in_barbican_dir(ctx, cclient,
+                        ['sed', '-i', '-n',
+                         '/\\[pipeline:barbican_api]/ {p;n; /^pipeline =/ '+
+                         '{ s/.*/pipeline = unauthenticated-context apiapp/;p;d } } ; p',
+                         './etc/barbican/barbican-api-paste.ini'])
+
+def fix_barbican_api(ctx, cclient):
+    run_in_barbican_dir(ctx, cclient,
+                        ['sed', '-i',
+                         '/prop_dir =/ s#etc/barbican#{}/etc/barbican#'.format(get_barbican_dir(ctx)),
+                         'bin/barbican-api'])
+
+def create_barbican_conf(ctx, cclient):
+    barbican_host, barbican_port = ctx.barbican.endpoints[cclient]
+    barbican_url = 'http://{host}:{port}'.format(host=barbican_host,
+                                                 port=barbican_port)
+    log.info("barbican url=%s", barbican_url)
+
+    run_in_barbican_dir(ctx, cclient,
+                        ['bash', '-c',
+                         'echo -n -e "[DEFAULT]\nhost_href=' + barbican_url + '\n" ' + \
+                         '>barbican.conf'])
+
+    log.info("run barbican db upgrade")
+    config_path = get_barbican_dir(ctx) + '/barbican.conf'
+    run_in_barbican_venv(ctx, cclient, ['barbican-manage', '--config-file', config_path,
+                                        'db', 'upgrade'])
+    log.info("run barbican db sync_secret_stores")
+    run_in_barbican_venv(ctx, cclient, ['barbican-manage', '--config-file', config_path,
+                                        'db', 'sync_secret_stores'])
+
+@contextlib.contextmanager
+def configure_barbican(ctx, config):
+    """
+    Configure barbican paste-api and barbican-api.
+    """
+    assert isinstance(config, dict)
+    (cclient, cconfig) = next(iter(config.items()))
+
+    keystone_role = cconfig.get('use-keystone-role', None)
+    if keystone_role is None:
+        raise ConfigError('use-keystone-role not defined in barbican task')
+
+    set_authtoken_params(ctx, cclient, cconfig)
+    fix_barbican_api(ctx, cclient)
+    fix_barbican_api_paste(ctx, cclient)
+    create_barbican_conf(ctx, cclient)
+    try:
+        yield
+    finally:
+        pass
+
+@contextlib.contextmanager
+def run_barbican(ctx, config):
+    assert isinstance(config, dict)
+    log.info('Running barbican...')
+
+    for (client, _) in config.items():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        cluster_name, _, client_id = teuthology.split_role(client)
+
+        # start the public endpoint
+        client_public_with_id = 'barbican.public' + '.' + client_id
+
+        run_cmd = ['cd', get_barbican_dir(ctx), run.Raw('&&'),
+                   '.', '.barbicanenv/bin/activate', run.Raw('&&'),
+                   'HOME={}'.format(get_barbican_dir(ctx)), run.Raw('&&'),
+                   'bin/barbican-api',
+                   run.Raw('& { read; kill %1; }')]
+                   #run.Raw('1>/dev/null')
+
+        run_cmd = 'cd ' + get_barbican_dir(ctx) + ' && ' + \
+                  '. .barbicanenv/bin/activate && ' + \
+                  'HOME={}'.format(get_barbican_dir(ctx)) + ' && ' + \
+                  'exec bin/barbican-api & { read; kill %1; }'
+
+        ctx.daemons.add_daemon(
+            remote, 'barbican', client_public_with_id,
+            cluster=cluster_name,
+            args=['bash', '-c', run_cmd],
+            logger=log.getChild(client),
+            stdin=run.PIPE,
+            cwd=get_barbican_dir(ctx),
+            wait=False,
+            check_status=False,
+        )
+
+        # sleep driven synchronization
+        run_in_barbican_venv(ctx, client, ['sleep', '15'])
+    try:
+        yield
+    finally:
+        log.info('Stopping Barbican instance')
+        ctx.daemons.get_daemon('barbican', client_public_with_id,
+                               cluster_name).stop()
+
+
+@contextlib.contextmanager
+def create_secrets(ctx, config):
+    """
+    Create a main and an alternate s3 user.
+    """
+    assert isinstance(config, dict)
+    (cclient, cconfig) = next(iter(config.items()))
+
+    rgw_user = cconfig['rgw_user']
+
+    keystone_role = cconfig.get('use-keystone-role', None)
+    keystone_host, keystone_port = ctx.keystone.public_endpoints[keystone_role]
+    barbican_host, barbican_port = ctx.barbican.endpoints[cclient]
+    barbican_url = 'http://{host}:{port}'.format(host=barbican_host,
+                                                 port=barbican_port)
+    log.info("barbican_url=%s", barbican_url)
+    #fetching user_id of user that gets secrets for radosgw
+    token_req = http.client.HTTPConnection(keystone_host, keystone_port, timeout=30)
+    token_req.request(
+        'POST',
+        '/v3/auth/tokens',
+        headers={'Content-Type':'application/json'},
+        body=json.dumps({
+            "auth": {
+                "identity": {
+                    "methods": ["password"],
+                    "password": {
+                        "user": {
+                            "domain": {"id": "default"},
+                            "name": rgw_user["username"],
+                            "password": rgw_user["password"]
+                        }
+                    }
+                },
+                "scope": {
+                    "project": {
+                        "domain": {"id": "default"},
+                        "name": rgw_user["tenantName"]
+                    }
+                }
+            }
+        }))
+    rgw_access_user_resp = token_req.getresponse()
+    if not (rgw_access_user_resp.status >= 200 and
+            rgw_access_user_resp.status < 300):
+        raise Exception("Cannot authenticate user "+rgw_user["username"]+" for secret creation")
+    #    baru_resp = json.loads(baru_req.data)
+    rgw_access_user_data = json.loads(rgw_access_user_resp.read().decode())
+    rgw_user_id = rgw_access_user_data['token']['user']['id']
+    if 'secrets' in cconfig:
+        for secret in cconfig['secrets']:
+            if 'name' not in secret:
+                raise ConfigError('barbican.secrets must have "name" field')
+            if 'base64' not in secret:
+                raise ConfigError('barbican.secrets must have "base64" field')
+            if 'tenantName' not in secret:
+                raise ConfigError('barbican.secrets must have "tenantName" field')
+            if 'username' not in secret:
+                raise ConfigError('barbican.secrets must have "username" field')
+            if 'password' not in secret:
+                raise ConfigError('barbican.secrets must have "password" field')
+
+            token_req = http.client.HTTPConnection(keystone_host, keystone_port, timeout=30)
+            token_req.request(
+                'POST',
+                '/v3/auth/tokens',
+                headers={'Content-Type':'application/json'},
+                body=json.dumps({
+                    "auth": {
+                        "identity": {
+                            "methods": ["password"],
+                            "password": {
+                                "user": {
+                                    "domain": {"id": "default"},
+                                    "name": secret["username"],
+                                    "password": secret["password"]
+                                }
+                            }
+                        },
+                        "scope": {
+                            "project": {
+                                "domain": {"id": "default"},
+                                "name": secret["tenantName"]
+                            }
+                        }
+                    }
+                }))
+            token_resp = token_req.getresponse()
+            if not (token_resp.status >= 200 and
+                    token_resp.status < 300):
+                raise Exception("Cannot authenticate user "+secret["username"]+" for secret creation")
+
+            expire = time.time() + 5400		# now + 90m
+            (expire_fract,dummy) = math.modf(expire)
+            expire_format = "%%FT%%T.%06d" % (round(expire_fract*1000000))
+            expiration = time.strftime(expire_format, time.gmtime(expire))
+            token_id = token_resp.getheader('x-subject-token')
+
+            key1_json = json.dumps(
+                {
+                    "name": secret['name'],
+                    "expiration": expiration,
+                    "algorithm": "aes",
+                    "bit_length": 256,
+                    "mode": "cbc",
+                    "payload": secret['base64'],
+                    "payload_content_type": "application/octet-stream",
+                    "payload_content_encoding": "base64"
+                })
+
+            sec_req = http.client.HTTPConnection(barbican_host, barbican_port, timeout=30)
+            try:
+                sec_req.request(
+                    'POST',
+                    '/v1/secrets',
+                    headers={'Content-Type': 'application/json',
+                             'Accept': '*/*',
+                             'X-Auth-Token': token_id},
+                    body=key1_json
+                )
+            except:
+                log.info("catched exception!")
+                run_in_barbican_venv(ctx, cclient, ['sleep', '900'])
+
+            barbican_sec_resp = sec_req.getresponse()
+            if not (barbican_sec_resp.status >= 200 and
+                    barbican_sec_resp.status < 300):
+                raise Exception("Cannot create secret")
+            barbican_data = json.loads(barbican_sec_resp.read().decode())
+            if 'secret_ref' not in barbican_data:
+                raise ValueError("Malformed secret creation response")
+            secret_ref = barbican_data["secret_ref"]
+            log.info("secret_ref=%s", secret_ref)
+            secret_url_parsed = urlparse(secret_ref)
+            acl_json = json.dumps(
+                {
+                    "read": {
+                        "users": [rgw_user_id],
+                        "project-access": True
+                    }
+                })
+            acl_req = http.client.HTTPConnection(secret_url_parsed.netloc, timeout=30)
+            acl_req.request(
+                'PUT',
+                secret_url_parsed.path+'/acl',
+                headers={'Content-Type': 'application/json',
+                         'Accept': '*/*',
+                         'X-Auth-Token': token_id},
+                body=acl_json
+            )
+            barbican_acl_resp = acl_req.getresponse()
+            if not (barbican_acl_resp.status >= 200 and
+                    barbican_acl_resp.status < 300):
+                raise Exception("Cannot set ACL for secret")
+
+            key = {'id': secret_ref.split('secrets/')[1], 'payload': secret['base64']}
+            ctx.barbican.keys[secret['name']] = key
+
+    run_in_barbican_venv(ctx, cclient, ['sleep', '3'])
+    try:
+        yield
+    finally:
+        pass
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Deploy and configure Keystone
+
+    Example of configuration:
+
+    tasks:
+      - local_cluster:
+          cluster_path: /home/adam/ceph-1/build
+      - local_rgw:
+      - tox: [ client.0 ]
+      - keystone:
+          client.0:
+            sha1: 17.0.0.0rc2
+            force-branch: master
+            projects:
+              - name: rgwcrypt
+                description: Encryption Tenant
+              - name: barbican
+                description: Barbican
+              - name: s3
+                description: S3 project
+            users:
+              - name: rgwcrypt-user
+                password: rgwcrypt-pass
+                project: rgwcrypt
+              - name: barbican-user
+                password: barbican-pass
+                project: barbican
+              - name: s3-user
+                password: s3-pass
+                project: s3
+            roles: [ name: Member, name: creator ]
+            role-mappings:
+              - name: Member
+                user: rgwcrypt-user
+                project: rgwcrypt
+              - name: admin
+                user: barbican-user
+                project: barbican
+              - name: creator
+                user: s3-user
+                project: s3
+            services:
+              - name: keystone
+                type: identity
+                description: Keystone Identity Service
+      - barbican:
+          client.0:
+            force-branch: master
+            use-keystone-role: client.0
+            keystone_authtoken:
+              auth_plugin: password
+              username: barbican-user
+              password: barbican-pass
+              user_domain_name: Default
+            rgw_user:
+              tenantName: rgwcrypt
+              username: rgwcrypt-user
+              password: rgwcrypt-pass
+            secrets:
+              - name: my-key-1
+                base64: a2V5MS5GcWVxKzhzTGNLaGtzQkg5NGVpb1FKcFpGb2c=
+                tenantName: s3
+                username: s3-user
+                password: s3-pass
+              - name: my-key-2
+                base64: a2V5Mi5yNUNNMGFzMVdIUVZxcCt5NGVmVGlQQ1k4YWg=
+                tenantName: s3
+                username: s3-user
+                password: s3-pass
+      - s3tests:
+          client.0:
+            force-branch: master
+            kms_key: my-key-1
+      - rgw:
+          client.0:
+            use-keystone-role: client.0
+            use-barbican-role: client.0
+    """
+    assert config is None or isinstance(config, list) \
+        or isinstance(config, dict), \
+        "task keystone only supports a list or dictionary for configuration"
+    all_clients = ['client.{id}'.format(id=id_)
+                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+    if config is None:
+        config = all_clients
+    if isinstance(config, list):
+        config = dict.fromkeys(config)
+
+    overrides = ctx.config.get('overrides', {})
+    # merge each client section, not the top level.
+    for client in config.keys():
+        if not config[client]:
+            config[client] = {}
+        teuthology.deep_merge(config[client], overrides.get('barbican', {}))
+
+    log.debug('Barbican config is %s', config)
+
+    if not hasattr(ctx, 'keystone'):
+        raise ConfigError('barbican must run after the keystone task')
+
+
+    ctx.barbican = argparse.Namespace()
+    ctx.barbican.endpoints = assign_ports(ctx, config, 9311)
+    ctx.barbican.keys = {}
+    
+    with contextutil.nested(
+        lambda: download(ctx=ctx, config=config),
+        lambda: setup_venv(ctx=ctx, config=config),
+        lambda: configure_barbican(ctx=ctx, config=config),
+        lambda: run_barbican(ctx=ctx, config=config),
+        lambda: create_secrets(ctx=ctx, config=config),
+        ):
+        yield
diff --git a/qa/tasks/blktrace.py b/qa/tasks/blktrace.py
new file mode 100644
index 000000000..10b1da0c0
--- /dev/null
+++ b/qa/tasks/blktrace.py
@@ -0,0 +1,96 @@
+"""
+Run blktrace program through teuthology
+"""
+import contextlib
+import logging
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+blktrace = '/usr/sbin/blktrace'
+daemon_signal = 'term'
+
+@contextlib.contextmanager
+def setup(ctx, config):
+    """
+    Setup all the remotes
+    """
+    osds = ctx.cluster.only(teuthology.is_type('osd', config['cluster']))
+    log_dir = '{tdir}/archive/performance/blktrace'.format(tdir=teuthology.get_testdir(ctx))
+
+    for remote, roles_for_host in osds.remotes.items():
+        log.info('Creating %s on %s' % (log_dir, remote.name))
+        remote.run(
+            args=['mkdir', '-p', '-m0755', '--', log_dir],
+            wait=False,
+            )
+    yield
+
+@contextlib.contextmanager
+def execute(ctx, config):
+    """
+    Run the blktrace program on remote machines.
+    """
+    procs = []
+    testdir = teuthology.get_testdir(ctx)
+    log_dir = '{tdir}/archive/performance/blktrace'.format(tdir=testdir)
+
+    osds = ctx.cluster.only(teuthology.is_type('osd'))
+    for remote, roles_for_host in osds.remotes.items():
+        roles_to_devs = ctx.disk_config.remote_to_roles_to_dev[remote]
+        for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd',
+                                                     config['cluster']):
+            if roles_to_devs.get(role):
+                dev = roles_to_devs[role]
+                log.info("running blktrace on %s: %s" % (remote.name, dev))
+
+                proc = remote.run(
+                    args=[
+                        'cd',
+                        log_dir,
+                        run.Raw(';'),
+                        'daemon-helper',
+                        daemon_signal,
+                        'sudo',
+                        blktrace,
+                        '-o',
+                        dev.rsplit("/", 1)[1],
+                        '-d',
+                        dev,
+                        ],
+                    wait=False,
+                    stdin=run.PIPE,
+                    )
+                procs.append(proc)
+    try:
+        yield
+    finally:
+        osds = ctx.cluster.only(teuthology.is_type('osd'))
+        log.info('stopping blktrace processs')
+        for proc in procs:
+            proc.stdin.close()
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Usage:
+        blktrace:
+
+    or:
+        blktrace:
+          cluster: backup
+
+    Runs blktrace on all osds in the specified cluster (the 'ceph' cluster by
+    default).
+    """
+    if config is None:
+        config = {}
+    config['cluster'] = config.get('cluster', 'ceph')
+
+    with contextutil.nested(
+        lambda: setup(ctx=ctx, config=config),
+        lambda: execute(ctx=ctx, config=config),
+        ):
+        yield
diff --git a/qa/tasks/boto.cfg.template b/qa/tasks/boto.cfg.template
new file mode 100644
index 000000000..cdfe8873b
--- /dev/null
+++ b/qa/tasks/boto.cfg.template
@@ -0,0 +1,2 @@
+[Boto]
+http_socket_timeout = {idle_timeout}
diff --git a/qa/tasks/cbt.py b/qa/tasks/cbt.py
new file mode 100644
index 000000000..56c57138b
--- /dev/null
+++ b/qa/tasks/cbt.py
@@ -0,0 +1,293 @@
+import logging
+import os
+import yaml
+
+from teuthology import misc
+from teuthology.orchestra import run
+from teuthology.task import Task
+
+log = logging.getLogger(__name__)
+
+
+class CBT(Task):
+    """
+    Passes through a CBT configuration yaml fragment.
+    """
+    def __init__(self, ctx, config):
+        super(CBT, self).__init__(ctx, config)
+        self.log = log
+
+    def hosts_of_type(self, type_):
+        return [r.name for r in self.ctx.cluster.only(misc.is_type(type_)).remotes.keys()]
+
+    def generate_cbt_config(self):
+        mon_hosts = self.hosts_of_type('mon')
+        osd_hosts = self.hosts_of_type('osd')
+        client_hosts = self.hosts_of_type('client')
+        rgw_client = {}
+        rgw_client[client_hosts[0]] = None
+        rgw_hosts = self.config.get('cluster', {}).get('rgws', rgw_client)
+        cluster_config = dict(
+            user=self.config.get('cluster', {}).get('user', 'ubuntu'),
+            head=mon_hosts[0],
+            osds=osd_hosts,
+            mons=mon_hosts,
+            clients=client_hosts,
+            rgws=rgw_hosts,
+            osds_per_node=self.config.get('cluster', {}).get('osds_per_node', 1),
+            rebuild_every_test=False,
+            use_existing=True,
+            is_teuthology=self.config.get('cluster', {}).get('is_teuthology', True),
+            iterations=self.config.get('cluster', {}).get('iterations', 1),
+            tmp_dir='/tmp/cbt',
+            pool_profiles=self.config.get('cluster', {}).get('pool_profiles'),
+            )
+
+        benchmark_config = self.config.get('benchmarks')
+        benchmark_type = next(iter(benchmark_config.keys()))
+        if benchmark_type in ['librbdfio', 'fio']:
+          testdir = misc.get_testdir(self.ctx)
+          benchmark_config[benchmark_type]['cmd_path'] = os.path.join(testdir, 'fio/fio')
+        if benchmark_type == 'cosbench':
+            # create cosbench_dir and cosbench_xml_dir
+            testdir = misc.get_testdir(self.ctx)
+            benchmark_config['cosbench']['cosbench_dir'] = os.path.join(testdir, 'cos')
+            benchmark_config['cosbench']['cosbench_xml_dir'] = os.path.join(testdir, 'xml')
+            self.ctx.cluster.run(args=['mkdir', '-p', '-m0755', '--', benchmark_config['cosbench']['cosbench_xml_dir']])
+            benchmark_config['cosbench']['controller'] = osd_hosts[0]
+
+            # set auth details
+            remotes_and_roles = self.ctx.cluster.remotes.items()
+            ips = [host for (host, port) in
+                   (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)]
+            benchmark_config['cosbench']['auth'] = "username=cosbench:operator;password=intel2012;url=http://%s:80/auth/v1.0;retry=9" %(ips[0])
+        client_endpoints_config = self.config.get('client_endpoints', None)
+
+        return dict(
+            cluster=cluster_config,
+            benchmarks=benchmark_config,
+            client_endpoints = client_endpoints_config,
+            )
+
+    def install_dependencies(self):
+        system_type = misc.get_system_type(self.first_mon)
+
+        if system_type == 'rpm':
+            install_cmd = ['sudo', 'yum', '-y', 'install']
+            cbt_depends = ['python3-yaml', 'python3-lxml', 'librbd-devel', 'pdsh', 'collectl']
+        else:
+            install_cmd = ['sudo', 'apt-get', '-y', '--force-yes', 'install']
+            cbt_depends = ['python3-yaml', 'python3-lxml', 'librbd-dev', 'collectl']
+        self.first_mon.run(args=install_cmd + cbt_depends)
+
+        benchmark_type = next(iter(self.cbt_config.get('benchmarks').keys()))
+        self.log.info('benchmark: %s', benchmark_type)
+
+        if benchmark_type in ['librbdfio', 'fio']:
+            # install fio
+            testdir = misc.get_testdir(self.ctx)
+            self.first_mon.run(
+                args=[
+                    'git', 'clone', '-b', 'master',
+                    'https://github.com/axboe/fio.git',
+                    '{tdir}/fio'.format(tdir=testdir)
+                ]
+            )
+            self.first_mon.run(
+                args=[
+                    'cd', os.path.join(testdir, 'fio'), run.Raw('&&'),
+                    './configure', run.Raw('&&'),
+                    'make'
+                ]
+            )
+
+        if benchmark_type == 'cosbench':
+            # install cosbench
+            self.log.info('install dependencies for cosbench')
+            if system_type == 'rpm':
+                cosbench_depends = ['wget', 'unzip', 'java-1.7.0-openjdk', 'curl']
+            else:
+                cosbench_depends = ['wget', 'unzip', 'openjdk-8-jre', 'curl']
+            self.first_mon.run(args=install_cmd + cosbench_depends)
+            testdir = misc.get_testdir(self.ctx)
+            cosbench_version = '0.4.2.c3'
+            cosbench_location = 'https://github.com/intel-cloud/cosbench/releases/download/v0.4.2.c3/0.4.2.c3.zip'
+            os_version = misc.get_system_type(self.first_mon, False, True)
+
+            # additional requirements for bionic
+            if os_version == '18.04':
+                self.first_mon.run(
+                    args=['sudo', 'apt-get', '-y', 'purge', 'openjdk-11*'])
+                # use our own version of cosbench
+                cosbench_version = 'cosbench-0.4.2.c3.1'
+                # contains additional parameter "-N" to nc
+                cosbench_location = 'http://drop.ceph.com/qa/cosbench-0.4.2.c3.1.zip'
+                cosbench_dir = os.path.join(testdir, cosbench_version)
+                self.ctx.cluster.run(args=['mkdir', '-p', '-m0755', '--', cosbench_dir])
+                self.first_mon.run(
+                    args=[
+                        'cd', testdir, run.Raw('&&'),
+                        'wget',
+                        cosbench_location, run.Raw('&&'),
+                        'unzip', '{name}.zip'.format(name=cosbench_version), '-d', cosbench_version
+                    ]
+                )
+            else:
+                self.first_mon.run(
+                    args=[
+                        'cd', testdir, run.Raw('&&'),
+                        'wget',
+                        cosbench_location, run.Raw('&&'),
+                        'unzip', '{name}.zip'.format(name=cosbench_version)
+                    ]
+                )
+            self.first_mon.run(
+                args=[
+                    'cd', testdir, run.Raw('&&'),
+                    'ln', '-s', cosbench_version, 'cos',
+                ]
+            )
+            self.first_mon.run(
+                args=[
+                    'cd', os.path.join(testdir, 'cos'), run.Raw('&&'),
+                    'chmod', '+x', run.Raw('*.sh'),
+                ]
+            )
+
+            # start cosbench and check info
+            self.log.info('start cosbench')
+            self.first_mon.run(
+                args=[
+                    'cd', testdir, run.Raw('&&'),
+                    'cd', 'cos', run.Raw('&&'),
+                    'sh', 'start-all.sh'
+                ]
+            )
+            self.log.info('check cosbench info')
+            self.first_mon.run(
+                args=[
+                    'cd', testdir, run.Raw('&&'),
+                    'cd', 'cos', run.Raw('&&'),
+                    'sh', 'cli.sh', 'info'
+                ]
+            )
+
+    def checkout_cbt(self):
+        testdir = misc.get_testdir(self.ctx)
+        repo = self.config.get('repo', 'https://github.com/ceph/cbt.git')
+        branch = self.config.get('branch', 'master')
+        branch = self.config.get('force-branch', branch)
+        sha1 = self.config.get('sha1')
+        if sha1 is None:
+            self.first_mon.run(
+                args=[
+                    'git', 'clone', '--depth', '1', '-b', branch, repo,
+                    '{tdir}/cbt'.format(tdir=testdir)
+                ]
+            )
+        else:
+            self.first_mon.run(
+                args=[
+                    'git', 'clone', '-b', branch, repo,
+                    '{tdir}/cbt'.format(tdir=testdir)
+                ]
+            )
+            self.first_mon.run(
+                args=[
+                    'cd', os.path.join(testdir, 'cbt'), run.Raw('&&'),
+                    'git', 'reset', '--hard', sha1,
+                ]
+            )
+
+    def setup(self):
+        super(CBT, self).setup()
+        self.first_mon = next(iter(self.ctx.cluster.only(misc.get_first_mon(self.ctx, self.config)).remotes.keys()))
+        self.cbt_config = self.generate_cbt_config()
+        self.log.info('cbt configuration is %s', self.cbt_config)
+        self.cbt_dir = os.path.join(misc.get_archive_dir(self.ctx), 'cbt')
+        self.ctx.cluster.run(args=['mkdir', '-p', '-m0755', '--', self.cbt_dir])
+        self.first_mon.write_file(
+                os.path.join(self.cbt_dir, 'cbt_config.yaml'),
+                yaml.safe_dump(self.cbt_config, default_flow_style=False))
+        self.checkout_cbt()
+        self.install_dependencies()
+
+    def begin(self):
+        super(CBT, self).begin()
+        testdir = misc.get_testdir(self.ctx)
+        self.first_mon.run(
+            args=[
+                '{tdir}/cbt/cbt.py'.format(tdir=testdir),
+                '-a', self.cbt_dir,
+                '{cbtdir}/cbt_config.yaml'.format(cbtdir=self.cbt_dir),
+            ],
+        )
+        preserve_file = os.path.join(self.ctx.archive, '.preserve')
+        open(preserve_file, 'a').close()
+
+    def end(self):
+        super(CBT, self).end()
+        testdir = misc.get_testdir(self.ctx)
+        self.first_mon.run(
+            args=[
+                'rm', '--one-file-system', '-rf', '--',
+                '{tdir}/cbt'.format(tdir=testdir),
+            ]
+        )
+        benchmark_type = next(iter(self.cbt_config.get('benchmarks').keys()))
+        if benchmark_type in ['librbdfio', 'fio']:
+            self.first_mon.run(
+                args=[
+                    'rm', '--one-file-system', '-rf', '--',
+                    '{tdir}/fio'.format(tdir=testdir),
+                ]
+            )
+
+        if benchmark_type == 'cosbench':
+            os_version = misc.get_system_type(self.first_mon, False, True)
+            if os_version == '18.04':
+                cosbench_version = 'cosbench-0.4.2.c3.1'
+            else:
+                cosbench_version = '0.4.2.c3'
+            # note: stop-all requires 'nc'
+            self.first_mon.run(
+                args=[
+                    'cd', testdir, run.Raw('&&'),
+                    'cd', 'cos', run.Raw('&&'),
+                    'sh', 'stop-all.sh',
+                    run.Raw('||'), 'true'
+                ]
+            )
+            self.first_mon.run(
+                args=[
+                    'sudo', 'killall', '-9', 'java',
+                    run.Raw('||'), 'true'
+                ]
+            )
+            self.first_mon.run(
+                args=[
+                    'rm', '--one-file-system', '-rf', '--',
+                    '{tdir}/cos'.format(tdir=testdir),
+                ]
+            )
+            self.first_mon.run(
+                args=[
+                    'rm', '--one-file-system', '-rf', '--',
+                    '{tdir}/{version}'.format(tdir=testdir, version=cosbench_version),
+                ]
+            )
+            self.first_mon.run(
+                args=[
+                    'rm', '--one-file-system', '-rf', '--',
+                    '{tdir}/{version}.zip'.format(tdir=testdir, version=cosbench_version),
+                ]
+            )
+            self.first_mon.run(
+                args=[
+                    'rm', '--one-file-system', '-rf', '--',
+                    '{tdir}/xml'.format(tdir=testdir),
+                ]
+            )
+
+
+task = CBT
diff --git a/qa/tasks/ceph.conf.template b/qa/tasks/ceph.conf.template
new file mode 100644
index 000000000..a9cce2953
--- /dev/null
+++ b/qa/tasks/ceph.conf.template
@@ -0,0 +1,113 @@
+[global]
+	chdir = ""
+	pid file = /var/run/ceph/$cluster-$name.pid
+        auth supported = cephx
+
+	filestore xattr use omap = true
+
+	mon clock drift allowed = 1.000
+
+	osd crush chooseleaf type = 0
+        auth debug = true
+
+	ms die on old message = true
+	ms die on bug = true
+
+	mon max pg per osd = 10000        # >= luminous
+	mon pg warn max object skew = 0
+
+	# disable pg_autoscaler by default for new pools
+        osd_pool_default_pg_autoscale_mode = off
+
+	osd pool default size = 2
+
+	mon osd allow primary affinity = true
+	mon osd allow pg remap = true
+	mon warn on legacy crush tunables = false
+	mon warn on crush straw calc version zero = false
+	mon warn on no sortbitwise = false
+	mon warn on osd down out interval zero = false
+	mon warn on too few osds = false
+	mon_warn_on_pool_pg_num_not_power_of_two = false
+        mon_warn_on_pool_no_redundancy = false
+	mon_allow_pool_size_one = true
+
+        osd pool default erasure code profile = "plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd"
+
+	osd default data pool replay window = 5
+
+	mon allow pool delete = true
+
+	mon cluster log file level = debug
+	debug asserts on shutdown = true
+	mon health detail to clog = false
+
+[osd]
+        osd journal size = 100
+
+        osd scrub load threshold = 5.0
+	osd scrub max interval = 600
+        osd mclock profile = high_recovery_ops
+
+	osd recover clone overlap = true
+	osd recovery max chunk = 1048576
+
+	osd debug shutdown = true
+        osd debug op order = true
+        osd debug verify stray on activate = true
+
+	osd open classes on start = true
+        osd debug pg log writeout = true
+
+	osd deep scrub update digest min age = 30
+
+	osd map max advance = 10
+
+        journal zero on create = true
+
+	filestore ondisk finisher threads = 3
+	filestore apply finisher threads = 3
+
+	bdev debug aio = true
+	osd debug misdirected ops = true
+
+[mgr]
+	debug ms = 1
+	debug mgr = 20
+	debug mon = 20
+	debug auth = 20
+	mon reweight min pgs per osd = 4
+	mon reweight min bytes per osd = 10
+	mgr/telemetry/nag = false
+
+[mon]
+	debug ms = 1
+	debug mon = 20
+	debug paxos = 20
+	debug auth = 20
+	mon data avail warn = 5
+	mon mgr mkfs grace = 240
+	mon reweight min pgs per osd = 4
+	mon osd reporter subtree level = osd
+	mon osd prime pg temp = true
+	mon reweight min bytes per osd = 10
+
+	# rotate auth tickets quickly to exercise renewal paths
+	auth mon ticket ttl = 660      # 11m
+	auth service ticket ttl = 240  # 4m
+
+	# don't complain about insecure global_id in the test suite
+	mon_warn_on_insecure_global_id_reclaim = false
+	mon_warn_on_insecure_global_id_reclaim_allowed = false
+
+	# 1m isn't quite enough
+	mon_down_mkfs_grace = 2m
+
+	mon_warn_on_filestore_osds = false
+
+[client]
+	rgw cache enabled = true
+	rgw enable ops log = true
+	rgw enable usage log = true
+	log file = /var/log/ceph/$cluster-$name.$pid.log
+	admin socket = /var/run/ceph/$cluster-$name.$pid.asok
diff --git a/qa/tasks/ceph.py b/qa/tasks/ceph.py
new file mode 100644
index 000000000..105362d48
--- /dev/null
+++ b/qa/tasks/ceph.py
@@ -0,0 +1,1960 @@
+"""
+Ceph cluster task.
+
+Handle the setup, starting, and clean-up of a Ceph cluster.
+"""
+from copy import deepcopy
+from io import BytesIO
+from io import StringIO
+
+import argparse
+import configobj
+import contextlib
+import errno
+import logging
+import os
+import json
+import time
+import gevent
+import re
+import socket
+import yaml
+
+from paramiko import SSHException
+from tasks.ceph_manager import CephManager, write_conf, get_valgrind_args
+from tarfile import ReadError
+from tasks.cephfs.filesystem import MDSCluster, Filesystem
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology import exceptions
+from teuthology.orchestra import run
+from tasks import ceph_client as cclient
+from teuthology.orchestra.daemon import DaemonGroup
+from tasks.daemonwatchdog import DaemonWatchdog
+
+CEPH_ROLE_TYPES = ['mon', 'mgr', 'osd', 'mds', 'rgw']
+DATA_PATH = '/var/lib/ceph/{type_}/{cluster}-{id_}'
+
+log = logging.getLogger(__name__)
+
+
+def generate_caps(type_):
+    """
+    Each call will return the next capability for each system type
+    (essentially a subset of possible role values).  Valid types are osd,
+    mds and client.
+    """
+    defaults = dict(
+        osd=dict(
+            mon='allow profile osd',
+            mgr='allow profile osd',
+            osd='allow *',
+        ),
+        mgr=dict(
+            mon='allow profile mgr',
+            osd='allow *',
+            mds='allow *',
+        ),
+        mds=dict(
+            mon='allow *',
+            mgr='allow *',
+            osd='allow *',
+            mds='allow',
+        ),
+        client=dict(
+            mon='allow rw',
+            mgr='allow r',
+            osd='allow rwx',
+            mds='allow',
+        ),
+    )
+    for subsystem, capability in defaults[type_].items():
+        yield '--cap'
+        yield subsystem
+        yield capability
+
+
+def update_archive_setting(ctx, key, value):
+    """
+    Add logs directory to job's info log file
+    """
+    if ctx.archive is None:
+        return
+    with open(os.path.join(ctx.archive, 'info.yaml'), 'r+') as info_file:
+        info_yaml = yaml.safe_load(info_file)
+        info_file.seek(0)
+        if 'archive' in info_yaml:
+            info_yaml['archive'][key] = value
+        else:
+            info_yaml['archive'] = {key: value}
+        yaml.safe_dump(info_yaml, info_file, default_flow_style=False)
+
+
+@contextlib.contextmanager
+def ceph_crash(ctx, config):
+    """
+    Gather crash dumps from /var/lib/ceph/crash
+    """
+
+    # Add crash directory to job's archive
+    update_archive_setting(ctx, 'crash', '/var/lib/ceph/crash')
+
+    try:
+        yield
+
+    finally:
+        if ctx.archive is not None:
+            log.info('Archiving crash dumps...')
+            path = os.path.join(ctx.archive, 'remote')
+            try:
+                os.makedirs(path)
+            except OSError:
+                pass
+            for remote in ctx.cluster.remotes.keys():
+                sub = os.path.join(path, remote.shortname)
+                try:
+                    os.makedirs(sub)
+                except OSError:
+                    pass
+                try:
+                    teuthology.pull_directory(remote, '/var/lib/ceph/crash',
+                                              os.path.join(sub, 'crash'))
+                except ReadError:
+                    pass
+
+
+@contextlib.contextmanager
+def ceph_log(ctx, config):
+    """
+    Create /var/log/ceph log directory that is open to everyone.
+    Add valgrind and profiling-logger directories.
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    log.info('Making ceph log dir writeable by non-root...')
+    run.wait(
+        ctx.cluster.run(
+            args=[
+                'sudo',
+                'chmod',
+                '777',
+                '/var/log/ceph',
+            ],
+            wait=False,
+        )
+    )
+    log.info('Disabling ceph logrotate...')
+    run.wait(
+        ctx.cluster.run(
+            args=[
+                'sudo',
+                'rm', '-f', '--',
+                '/etc/logrotate.d/ceph',
+            ],
+            wait=False,
+        )
+    )
+    log.info('Creating extra log directories...')
+    run.wait(
+        ctx.cluster.run(
+            args=[
+                'sudo',
+                'install', '-d', '-m0777', '--',
+                '/var/log/ceph/valgrind',
+                '/var/log/ceph/profiling-logger',
+            ],
+            wait=False,
+        )
+    )
+
+    # Add logs directory to job's info log file
+    update_archive_setting(ctx, 'log', '/var/log/ceph')
+
+    class Rotater(object):
+        stop_event = gevent.event.Event()
+
+        def invoke_logrotate(self):
+            # 1) install ceph-test.conf in /etc/logrotate.d
+            # 2) continuously loop over logrotate invocation with ceph-test.conf
+            while not self.stop_event.is_set():
+                self.stop_event.wait(timeout=30)
+                try:
+                    procs = ctx.cluster.run(
+                          args=['sudo', 'logrotate', '/etc/logrotate.d/ceph-test.conf'],
+                          wait=False,
+                          stderr=StringIO()
+                    )
+                    run.wait(procs)
+                except exceptions.ConnectionLostError as e:
+                    # Some tests may power off nodes during test, in which
+                    # case we will see connection errors that we should ignore.
+                    log.debug("Missed logrotate, node '{0}' is offline".format(
+                        e.node))
+                except EOFError:
+                    # Paramiko sometimes raises this when it fails to
+                    # connect to a node during open_session.  As with
+                    # ConnectionLostError, we ignore this because nodes
+                    # are allowed to get power cycled during tests.
+                    log.debug("Missed logrotate, EOFError")
+                except SSHException:
+                    log.debug("Missed logrotate, SSHException")
+                except run.CommandFailedError as e:
+                    for p in procs:
+                        if p.finished and p.exitstatus != 0:
+                            err = p.stderr.getvalue()
+                            if 'error: error renaming temp state file' in err:
+                                log.info('ignoring transient state error: %s', e)
+                            else:
+                                raise
+                except socket.error as e:
+                    if e.errno in (errno.EHOSTUNREACH, errno.ECONNRESET):
+                        log.debug("Missed logrotate, host unreachable")
+                    else:
+                        raise
+
+        def begin(self):
+            self.thread = gevent.spawn(self.invoke_logrotate)
+
+        def end(self):
+            self.stop_event.set()
+            self.thread.get()
+
+    def write_rotate_conf(ctx, daemons):
+        testdir = teuthology.get_testdir(ctx)
+        remote_logrotate_conf = '%s/logrotate.ceph-test.conf' % testdir
+        rotate_conf_path = os.path.join(os.path.dirname(__file__), 'logrotate.conf')
+        with open(rotate_conf_path) as f:
+            conf = ""
+            for daemon, size in daemons.items():
+                log.info('writing logrotate stanza for {}'.format(daemon))
+                conf += f.read().format(daemon_type=daemon,
+                                        max_size=size)
+                f.seek(0, 0)
+
+            for remote in ctx.cluster.remotes.keys():
+                remote.write_file(remote_logrotate_conf, BytesIO(conf.encode()))
+                remote.sh(
+                    f'sudo mv {remote_logrotate_conf} /etc/logrotate.d/ceph-test.conf && '
+                    'sudo chmod 0644 /etc/logrotate.d/ceph-test.conf && '
+                    'sudo chown root.root /etc/logrotate.d/ceph-test.conf')
+                remote.chcon('/etc/logrotate.d/ceph-test.conf',
+                             'system_u:object_r:etc_t:s0')
+
+    if ctx.config.get('log-rotate'):
+        daemons = ctx.config.get('log-rotate')
+        log.info('Setting up log rotation with ' + str(daemons))
+        write_rotate_conf(ctx, daemons)
+        logrotater = Rotater()
+        logrotater.begin()
+    try:
+        yield
+
+    finally:
+        if ctx.config.get('log-rotate'):
+            log.info('Shutting down logrotate')
+            logrotater.end()
+            ctx.cluster.sh('sudo rm /etc/logrotate.d/ceph-test.conf')
+        if ctx.archive is not None and \
+                not (ctx.config.get('archive-on-error') and ctx.summary['success']):
+            # and logs
+            log.info('Compressing logs...')
+            run.wait(
+                ctx.cluster.run(
+                    args=[
+                        'sudo',
+                        'find',
+                        '/var/log/ceph',
+                        '-name',
+                        '*.log',
+                        '-print0',
+                        run.Raw('|'),
+                        'sudo',
+                        'xargs',
+                        '-0',
+                        '--no-run-if-empty',
+                        '--',
+                        'gzip',
+                        '--',
+                    ],
+                    wait=False,
+                ),
+            )
+
+            log.info('Archiving logs...')
+            path = os.path.join(ctx.archive, 'remote')
+            try:
+                os.makedirs(path)
+            except OSError:
+                pass
+            for remote in ctx.cluster.remotes.keys():
+                sub = os.path.join(path, remote.shortname)
+                try:
+                    os.makedirs(sub)
+                except OSError:
+                    pass
+                teuthology.pull_directory(remote, '/var/log/ceph',
+                                          os.path.join(sub, 'log'))
+
+
+def assign_devs(roles, devs):
+    """
+    Create a dictionary of devs indexed by roles
+
+    :param roles: List of roles
+    :param devs: Corresponding list of devices.
+    :returns: Dictionary of devs indexed by roles.
+    """
+    return dict(zip(roles, devs))
+
+
+@contextlib.contextmanager
+def valgrind_post(ctx, config):
+    """
+    After the tests run, look through all the valgrind logs.  Exceptions are raised
+    if textual errors occurred in the logs, or if valgrind exceptions were detected in
+    the logs.
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    try:
+        yield
+    finally:
+        lookup_procs = list()
+        log.info('Checking for errors in any valgrind logs...')
+        for remote in ctx.cluster.remotes.keys():
+            # look at valgrind logs for each node
+            proc = remote.run(
+                args="sudo zgrep '<kind>' /var/log/ceph/valgrind/* "
+                     # include a second file so that we always get
+                     # a filename prefix on the output
+                     "/dev/null | sort | uniq",
+                wait=False,
+                check_status=False,
+                stdout=StringIO(),
+            )
+            lookup_procs.append((proc, remote))
+
+        valgrind_exception = None
+        for (proc, remote) in lookup_procs:
+            proc.wait()
+            out = proc.stdout.getvalue()
+            for line in out.split('\n'):
+                if line == '':
+                    continue
+                try:
+                    (file, kind) = line.split(':')
+                except Exception:
+                    log.error('failed to split line %s', line)
+                    raise
+                log.debug('file %s kind %s', file, kind)
+                if (file.find('mds') >= 0) and kind.find('Lost') > 0:
+                    continue
+                log.error('saw valgrind issue %s in %s', kind, file)
+                valgrind_exception = Exception('saw valgrind issues')
+
+        if config.get('expect_valgrind_errors'):
+            if not valgrind_exception:
+                raise Exception('expected valgrind issues and found none')
+        else:
+            if valgrind_exception:
+                raise valgrind_exception
+
+
+@contextlib.contextmanager
+def crush_setup(ctx, config):
+    cluster_name = config['cluster']
+    first_mon = teuthology.get_first_mon(ctx, config, cluster_name)
+    (mon_remote,) = ctx.cluster.only(first_mon).remotes.keys()
+
+    profile = config.get('crush_tunables', 'default')
+    log.info('Setting crush tunables to %s', profile)
+    mon_remote.run(
+        args=['sudo', 'ceph', '--cluster', cluster_name,
+              'osd', 'crush', 'tunables', profile])
+    yield
+
+
+@contextlib.contextmanager
+def check_enable_crimson(ctx, config):
+    # enable crimson-osds if crimson
+    log.info("check_enable_crimson: {}".format(is_crimson(config)))
+    if is_crimson(config):
+        cluster_name = config['cluster']
+        first_mon = teuthology.get_first_mon(ctx, config, cluster_name)
+        (mon_remote,) = ctx.cluster.only(first_mon).remotes.keys()
+        log.info('check_enable_crimson: setting set-allow-crimson')
+        mon_remote.run(
+            args=[
+                'sudo', 'ceph', '--cluster', cluster_name,
+                'osd', 'set-allow-crimson', '--yes-i-really-mean-it'
+            ]
+        )
+    yield
+
+
+@contextlib.contextmanager
+def setup_manager(ctx, config):
+    first_mon = teuthology.get_first_mon(ctx, config, config['cluster'])
+    (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+    if not hasattr(ctx, 'managers'):
+        ctx.managers = {}
+    ctx.managers[config['cluster']] = CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager.' + config['cluster']),
+        cluster=config['cluster'],
+    )
+    yield
+
+@contextlib.contextmanager
+def create_rbd_pool(ctx, config):
+    cluster_name = config['cluster']
+    first_mon = teuthology.get_first_mon(ctx, config, cluster_name)
+    (mon_remote,) = ctx.cluster.only(first_mon).remotes.keys()
+    log.info('Waiting for OSDs to come up')
+    teuthology.wait_until_osds_up(
+        ctx,
+        cluster=ctx.cluster,
+        remote=mon_remote,
+        ceph_cluster=cluster_name,
+    )
+    if config.get('create_rbd_pool', True):
+        log.info('Creating RBD pool')
+        mon_remote.run(
+            args=['sudo', 'ceph', '--cluster', cluster_name,
+                  'osd', 'pool', 'create', 'rbd', '8'])
+        mon_remote.run(
+            args=[
+                'sudo', 'ceph', '--cluster', cluster_name,
+                'osd', 'pool', 'application', 'enable',
+                'rbd', 'rbd', '--yes-i-really-mean-it'
+            ],
+            check_status=False)
+    yield
+
+@contextlib.contextmanager
+def cephfs_setup(ctx, config):
+    cluster_name = config['cluster']
+
+    first_mon = teuthology.get_first_mon(ctx, config, cluster_name)
+    (mon_remote,) = ctx.cluster.only(first_mon).remotes.keys()
+    mdss = ctx.cluster.only(teuthology.is_type('mds', cluster_name))
+    # If there are any MDSs, then create a filesystem for them to use
+    # Do this last because requires mon cluster to be up and running
+    if mdss.remotes:
+        log.info('Setting up CephFS filesystem(s)...')
+        cephfs_config = config.get('cephfs', {})
+        fs_configs =  cephfs_config.pop('fs', [{'name': 'cephfs'}])
+
+        # wait for standbys to become available (slow due to valgrind, perhaps)
+        mdsc = MDSCluster(ctx)
+        mds_count = len(list(teuthology.all_roles_of_type(ctx.cluster, 'mds')))
+        with contextutil.safe_while(sleep=2,tries=150) as proceed:
+            while proceed():
+                if len(mdsc.get_standby_daemons()) >= mds_count:
+                    break
+
+        fss = []
+        for fs_config in fs_configs:
+            assert isinstance(fs_config, dict)
+            name = fs_config.pop('name')
+            temp = deepcopy(cephfs_config)
+            teuthology.deep_merge(temp, fs_config)
+            subvols = config.get('subvols', None)
+            if subvols:
+                teuthology.deep_merge(temp, {'subvols': subvols})
+            fs = Filesystem(ctx, fs_config=temp, name=name, create=True)
+            fss.append(fs)
+
+        yield
+
+        for fs in fss:
+            fs.destroy()
+    else:
+        yield
+
+@contextlib.contextmanager
+def watchdog_setup(ctx, config):
+    ctx.ceph[config['cluster']].thrashers = []
+    ctx.ceph[config['cluster']].watchdog = DaemonWatchdog(ctx, config, ctx.ceph[config['cluster']].thrashers)
+    ctx.ceph[config['cluster']].watchdog.start()
+    yield
+
+def get_mons(roles, ips, cluster_name,
+             mon_bind_msgr2=False,
+             mon_bind_addrvec=False):
+    """
+    Get monitors and their associated addresses
+    """
+    mons = {}
+    v1_ports = {}
+    v2_ports = {}
+    is_mon = teuthology.is_type('mon', cluster_name)
+    for idx, roles in enumerate(roles):
+        for role in roles:
+            if not is_mon(role):
+                continue
+            if ips[idx] not in v1_ports:
+                v1_ports[ips[idx]] = 6789
+            else:
+                v1_ports[ips[idx]] += 1
+            if mon_bind_msgr2:
+                if ips[idx] not in v2_ports:
+                    v2_ports[ips[idx]] = 3300
+                    addr = '{ip}'.format(ip=ips[idx])
+                else:
+                    assert mon_bind_addrvec
+                    v2_ports[ips[idx]] += 1
+                    addr = '[v2:{ip}:{port2},v1:{ip}:{port1}]'.format(
+                        ip=ips[idx],
+                        port2=v2_ports[ips[idx]],
+                        port1=v1_ports[ips[idx]],
+                    )
+            elif mon_bind_addrvec:
+                addr = '[v1:{ip}:{port}]'.format(
+                    ip=ips[idx],
+                    port=v1_ports[ips[idx]],
+                )
+            else:
+                addr = '{ip}:{port}'.format(
+                    ip=ips[idx],
+                    port=v1_ports[ips[idx]],
+                )
+            mons[role] = addr
+    assert mons
+    return mons
+
+def skeleton_config(ctx, roles, ips, mons, cluster='ceph'):
+    """
+    Returns a ConfigObj that is prefilled with a skeleton config.
+
+    Use conf[section][key]=value or conf.merge to change it.
+
+    Use conf.write to write it out, override .filename first if you want.
+    """
+    path = os.path.join(os.path.dirname(__file__), 'ceph.conf.template')
+    conf = configobj.ConfigObj(path, file_error=True)
+    mon_hosts = []
+    for role, addr in mons.items():
+        mon_cluster, _, _ = teuthology.split_role(role)
+        if mon_cluster != cluster:
+            continue
+        name = teuthology.ceph_role(role)
+        conf.setdefault(name, {})
+        mon_hosts.append(addr)
+    conf.setdefault('global', {})
+    conf['global']['mon host'] = ','.join(mon_hosts)
+    # set up standby mds's
+    is_mds = teuthology.is_type('mds', cluster)
+    for roles_subset in roles:
+        for role in roles_subset:
+            if is_mds(role):
+                name = teuthology.ceph_role(role)
+                conf.setdefault(name, {})
+    return conf
+
+def create_simple_monmap(ctx, remote, conf, mons,
+                         path=None,
+                         mon_bind_addrvec=False):
+    """
+    Writes a simple monmap based on current ceph.conf into path, or
+    <testdir>/monmap by default.
+
+    Assumes ceph_conf is up to date.
+
+    Assumes mon sections are named "mon.*", with the dot.
+
+    :return the FSID (as a string) of the newly created monmap
+    """
+
+    addresses = list(mons.items())
+    assert addresses, "There are no monitors in config!"
+    log.debug('Ceph mon addresses: %s', addresses)
+
+    try:
+        log.debug('writing out conf {c}'.format(c=conf))
+    except:
+        log.debug('my conf logging attempt failed')
+    testdir = teuthology.get_testdir(ctx)
+    tmp_conf_path = '{tdir}/ceph.tmp.conf'.format(tdir=testdir)
+    conf_fp = BytesIO()
+    conf.write(conf_fp)
+    conf_fp.seek(0)
+    teuthology.write_file(remote, tmp_conf_path, conf_fp)
+    args = [
+        'adjust-ulimits',
+        'ceph-coverage',
+        '{tdir}/archive/coverage'.format(tdir=testdir),
+        'monmaptool',
+        '-c',
+        '{conf}'.format(conf=tmp_conf_path),
+        '--create',
+        '--clobber',
+    ]
+    if mon_bind_addrvec:
+        args.extend(['--enable-all-features'])
+    for (role, addr) in addresses:
+        _, _, n = teuthology.split_role(role)
+        if mon_bind_addrvec and (',' in addr or 'v' in addr or ':' in addr):
+            args.extend(('--addv', n, addr))
+        else:
+            args.extend(('--add', n, addr))
+    if not path:
+        path = '{tdir}/monmap'.format(tdir=testdir)
+    args.extend([
+        '--print',
+        path
+    ])
+
+    monmap_output = remote.sh(args)
+    fsid = re.search("generated fsid (.+)$",
+                     monmap_output, re.MULTILINE).group(1)
+    teuthology.delete_file(remote, tmp_conf_path)
+    return fsid
+
+
+def is_crimson(config):
+    return config.get('flavor', 'default') == 'crimson'
+
+
+def maybe_redirect_stderr(config, type_, args, log_path):
+    if type_ == 'osd' and is_crimson(config):
+        # teuthworker uses ubuntu:ubuntu to access the test nodes
+        create_log_cmd = \
+            f'sudo install -b -o ubuntu -g ubuntu /dev/null {log_path}'
+        return create_log_cmd, args + [run.Raw('2>>'), log_path]
+    else:
+        return None, args
+
+
+@contextlib.contextmanager
+def cluster(ctx, config):
+    """
+    Handle the creation and removal of a ceph cluster.
+
+    On startup:
+        Create directories needed for the cluster.
+        Create remote journals for all osds.
+        Create and set keyring.
+        Copy the monmap to the test systems.
+        Setup mon nodes.
+        Setup mds nodes.
+        Mkfs osd nodes.
+        Add keyring information to monmaps
+        Mkfs mon nodes.
+
+    On exit:
+        If errors occurred, extract a failure message and store in ctx.summary.
+        Unmount all test files and temporary journaling files.
+        Save the monitor information and archive all ceph logs.
+        Cleanup the keyring setup, and remove all monitor map and data files left over.
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    if ctx.config.get('use_existing_cluster', False) is True:
+        log.info("'use_existing_cluster' is true; skipping cluster creation")
+        yield
+
+    testdir = teuthology.get_testdir(ctx)
+    cluster_name = config['cluster']
+    data_dir = '{tdir}/{cluster}.data'.format(tdir=testdir, cluster=cluster_name)
+    log.info('Creating ceph cluster %s...', cluster_name)
+    log.info('config %s', config)
+    log.info('ctx.config %s', ctx.config)
+    run.wait(
+        ctx.cluster.run(
+            args=[
+                'install', '-d', '-m0755', '--',
+                data_dir,
+            ],
+            wait=False,
+        )
+    )
+
+    run.wait(
+        ctx.cluster.run(
+            args=[
+                'sudo',
+                'install', '-d', '-m0777', '--', '/var/run/ceph',
+            ],
+            wait=False,
+        )
+    )
+
+    devs_to_clean = {}
+    remote_to_roles_to_devs = {}
+    osds = ctx.cluster.only(teuthology.is_type('osd', cluster_name))
+    for remote, roles_for_host in osds.remotes.items():
+        devs = teuthology.get_scratch_devices(remote)
+        roles_to_devs = assign_devs(
+            teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name), devs
+        )
+        devs_to_clean[remote] = []
+        log.info('osd dev map: {}'.format(roles_to_devs))
+        assert roles_to_devs, \
+            "remote {} has osd roles, but no osd devices were specified!".format(remote.hostname)
+        remote_to_roles_to_devs[remote] = roles_to_devs
+    log.info("remote_to_roles_to_devs: {}".format(remote_to_roles_to_devs))
+    for osd_role, dev_name in remote_to_roles_to_devs.items():
+        assert dev_name, "{} has no associated device!".format(osd_role)
+
+    log.info('Generating config...')
+    remotes_and_roles = ctx.cluster.remotes.items()
+    roles = [role_list for (remote, role_list) in remotes_and_roles]
+    ips = [host for (host, port) in
+           (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)]
+    mons = get_mons(
+        roles, ips, cluster_name,
+        mon_bind_msgr2=config.get('mon_bind_msgr2'),
+        mon_bind_addrvec=config.get('mon_bind_addrvec'),
+        )
+    conf = skeleton_config(
+        ctx, roles=roles, ips=ips, mons=mons, cluster=cluster_name,
+    )
+    for section, keys in config['conf'].items():
+        for key, value in keys.items():
+            log.info("[%s] %s = %s" % (section, key, value))
+            if section not in conf:
+                conf[section] = {}
+            conf[section][key] = value
+
+    if not hasattr(ctx, 'ceph'):
+        ctx.ceph = {}
+    ctx.ceph[cluster_name] = argparse.Namespace()
+    ctx.ceph[cluster_name].conf = conf
+    ctx.ceph[cluster_name].mons = mons
+
+    default_keyring = '/etc/ceph/{cluster}.keyring'.format(cluster=cluster_name)
+    keyring_path = config.get('keyring_path', default_keyring)
+
+    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
+
+    firstmon = teuthology.get_first_mon(ctx, config, cluster_name)
+
+    log.info('Setting up %s...' % firstmon)
+    ctx.cluster.only(firstmon).run(
+        args=[
+            'sudo',
+            'adjust-ulimits',
+            'ceph-coverage',
+            coverage_dir,
+            'ceph-authtool',
+            '--create-keyring',
+            keyring_path,
+        ],
+    )
+    ctx.cluster.only(firstmon).run(
+        args=[
+            'sudo',
+            'adjust-ulimits',
+            'ceph-coverage',
+            coverage_dir,
+            'ceph-authtool',
+            '--gen-key',
+            '--name=mon.',
+            keyring_path,
+        ],
+    )
+    ctx.cluster.only(firstmon).run(
+        args=[
+            'sudo',
+            'chmod',
+            '0644',
+            keyring_path,
+        ],
+    )
+    (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
+    monmap_path = '{tdir}/{cluster}.monmap'.format(tdir=testdir,
+                                                   cluster=cluster_name)
+    fsid = create_simple_monmap(
+        ctx,
+        remote=mon0_remote,
+        conf=conf,
+        mons=mons,
+        path=monmap_path,
+        mon_bind_addrvec=config.get('mon_bind_addrvec'),
+    )
+    ctx.ceph[cluster_name].fsid = fsid
+    if not 'global' in conf:
+        conf['global'] = {}
+    conf['global']['fsid'] = fsid
+
+    default_conf_path = '/etc/ceph/{cluster}.conf'.format(cluster=cluster_name)
+    conf_path = config.get('conf_path', default_conf_path)
+    log.info('Writing %s for FSID %s...' % (conf_path, fsid))
+    write_conf(ctx, conf_path, cluster_name)
+
+    log.info('Creating admin key on %s...' % firstmon)
+    ctx.cluster.only(firstmon).run(
+        args=[
+            'sudo',
+            'adjust-ulimits',
+            'ceph-coverage',
+            coverage_dir,
+            'ceph-authtool',
+            '--gen-key',
+            '--name=client.admin',
+            '--cap', 'mon', 'allow *',
+            '--cap', 'osd', 'allow *',
+            '--cap', 'mds', 'allow *',
+            '--cap', 'mgr', 'allow *',
+            keyring_path,
+        ],
+    )
+
+    log.info('Copying monmap to all nodes...')
+    keyring = mon0_remote.read_file(keyring_path)
+    monmap = mon0_remote.read_file(monmap_path)
+
+    for rem in ctx.cluster.remotes.keys():
+        # copy mon key and initial monmap
+        log.info('Sending monmap to node {remote}'.format(remote=rem))
+        rem.write_file(keyring_path, keyring, mode='0644', sudo=True)
+        rem.write_file(monmap_path, monmap)
+
+    log.info('Setting up mon nodes...')
+    mons = ctx.cluster.only(teuthology.is_type('mon', cluster_name))
+
+    if not config.get('skip_mgr_daemons', False):
+        log.info('Setting up mgr nodes...')
+        mgrs = ctx.cluster.only(teuthology.is_type('mgr', cluster_name))
+        for remote, roles_for_host in mgrs.remotes.items():
+            for role in teuthology.cluster_roles_of_type(roles_for_host, 'mgr',
+                                                         cluster_name):
+                _, _, id_ = teuthology.split_role(role)
+                mgr_dir = DATA_PATH.format(
+                    type_='mgr', cluster=cluster_name, id_=id_)
+                remote.run(
+                    args=[
+                        'sudo',
+                        'mkdir',
+                        '-p',
+                        mgr_dir,
+                        run.Raw('&&'),
+                        'sudo',
+                        'adjust-ulimits',
+                        'ceph-coverage',
+                        coverage_dir,
+                        'ceph-authtool',
+                        '--create-keyring',
+                        '--gen-key',
+                        '--name=mgr.{id}'.format(id=id_),
+                        mgr_dir + '/keyring',
+                    ],
+                )
+
+    log.info('Setting up mds nodes...')
+    mdss = ctx.cluster.only(teuthology.is_type('mds', cluster_name))
+    for remote, roles_for_host in mdss.remotes.items():
+        for role in teuthology.cluster_roles_of_type(roles_for_host, 'mds',
+                                                     cluster_name):
+            _, _, id_ = teuthology.split_role(role)
+            mds_dir = DATA_PATH.format(
+                type_='mds', cluster=cluster_name, id_=id_)
+            remote.run(
+                args=[
+                    'sudo',
+                    'mkdir',
+                    '-p',
+                    mds_dir,
+                    run.Raw('&&'),
+                    'sudo',
+                    'adjust-ulimits',
+                    'ceph-coverage',
+                    coverage_dir,
+                    'ceph-authtool',
+                    '--create-keyring',
+                    '--gen-key',
+                    '--name=mds.{id}'.format(id=id_),
+                    mds_dir + '/keyring',
+                ],
+            )
+            remote.run(args=[
+                'sudo', 'chown', '-R', 'ceph:ceph', mds_dir
+            ])
+
+    cclient.create_keyring(ctx, cluster_name)
+    log.info('Running mkfs on osd nodes...')
+
+    if not hasattr(ctx, 'disk_config'):
+        ctx.disk_config = argparse.Namespace()
+    if not hasattr(ctx.disk_config, 'remote_to_roles_to_dev'):
+        ctx.disk_config.remote_to_roles_to_dev = {}
+    if not hasattr(ctx.disk_config, 'remote_to_roles_to_dev_mount_options'):
+        ctx.disk_config.remote_to_roles_to_dev_mount_options = {}
+    if not hasattr(ctx.disk_config, 'remote_to_roles_to_dev_fstype'):
+        ctx.disk_config.remote_to_roles_to_dev_fstype = {}
+
+    teuthology.deep_merge(ctx.disk_config.remote_to_roles_to_dev, remote_to_roles_to_devs)
+
+    log.info("ctx.disk_config.remote_to_roles_to_dev: {r}".format(r=str(ctx.disk_config.remote_to_roles_to_dev)))
+
+    for remote, roles_for_host in osds.remotes.items():
+        roles_to_devs = remote_to_roles_to_devs[remote]
+
+        for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name):
+            _, _, id_ = teuthology.split_role(role)
+            mnt_point = DATA_PATH.format(
+                type_='osd', cluster=cluster_name, id_=id_)
+            remote.run(
+                args=[
+                    'sudo',
+                    'mkdir',
+                    '-p',
+                    mnt_point,
+                ])
+            log.info('roles_to_devs: {}'.format(roles_to_devs))
+            log.info('role: {}'.format(role))
+            if roles_to_devs.get(role):
+                dev = roles_to_devs[role]
+                fs = config.get('fs')
+                package = None
+                mkfs_options = config.get('mkfs_options')
+                mount_options = config.get('mount_options')
+                if fs == 'btrfs':
+                    # package = 'btrfs-tools'
+                    if mount_options is None:
+                        mount_options = ['noatime', 'user_subvol_rm_allowed']
+                    if mkfs_options is None:
+                        mkfs_options = ['-m', 'single',
+                                        '-l', '32768',
+                                        '-n', '32768']
+                if fs == 'xfs':
+                    # package = 'xfsprogs'
+                    if mount_options is None:
+                        mount_options = ['noatime']
+                    if mkfs_options is None:
+                        mkfs_options = ['-f', '-i', 'size=2048']
+                if fs == 'ext4' or fs == 'ext3':
+                    if mount_options is None:
+                        mount_options = ['noatime', 'user_xattr']
+
+                if mount_options is None:
+                    mount_options = []
+                if mkfs_options is None:
+                    mkfs_options = []
+                mkfs = ['mkfs.%s' % fs] + mkfs_options
+                log.info('%s on %s on %s' % (mkfs, dev, remote))
+                if package is not None:
+                    remote.sh('sudo apt-get install -y %s' % package)
+
+                try:
+                    remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev])
+                except run.CommandFailedError:
+                    # Newer btfs-tools doesn't prompt for overwrite, use -f
+                    if '-f' not in mount_options:
+                        mkfs_options.append('-f')
+                        mkfs = ['mkfs.%s' % fs] + mkfs_options
+                        log.info('%s on %s on %s' % (mkfs, dev, remote))
+                    remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev])
+
+                log.info('mount %s on %s -o %s' % (dev, remote,
+                                                   ','.join(mount_options)))
+                remote.run(
+                    args=[
+                        'sudo',
+                        'mount',
+                        '-t', fs,
+                        '-o', ','.join(mount_options),
+                        dev,
+                        mnt_point,
+                    ]
+                )
+                remote.run(
+                    args=[
+                        'sudo', '/sbin/restorecon', mnt_point,
+                    ],
+                    check_status=False,
+                )
+                if not remote in ctx.disk_config.remote_to_roles_to_dev_mount_options:
+                    ctx.disk_config.remote_to_roles_to_dev_mount_options[remote] = {}
+                ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][role] = mount_options
+                if not remote in ctx.disk_config.remote_to_roles_to_dev_fstype:
+                    ctx.disk_config.remote_to_roles_to_dev_fstype[remote] = {}
+                ctx.disk_config.remote_to_roles_to_dev_fstype[remote][role] = fs
+                devs_to_clean[remote].append(mnt_point)
+
+        for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name):
+            _, _, id_ = teuthology.split_role(role)
+            try:
+                args = ['sudo',
+                        'MALLOC_CHECK_=3',
+                        'adjust-ulimits',
+                        'ceph-coverage', coverage_dir,
+                        'ceph-osd',
+                        '--no-mon-config',
+                        '--cluster', cluster_name,
+                        '--mkfs',
+                        '--mkkey',
+                        '-i', id_,
+                        '--monmap', monmap_path]
+                log_path = f'/var/log/ceph/{cluster_name}-osd.{id_}.log'
+                create_log_cmd, args = \
+                    maybe_redirect_stderr(config, 'osd', args, log_path)
+                if create_log_cmd:
+                    remote.sh(create_log_cmd)
+                remote.run(args=args)
+            except run.CommandFailedError:
+                # try without --no-mon-config.. this may be an upgrade test
+                remote.run(
+                    args=[
+                        'sudo',
+                        'MALLOC_CHECK_=3',
+                        'adjust-ulimits',
+                        'ceph-coverage',
+                        coverage_dir,
+                        'ceph-osd',
+                        '--cluster',
+                        cluster_name,
+                        '--mkfs',
+                        '--mkkey',
+                        '-i', id_,
+                    '--monmap', monmap_path,
+                    ],
+                )
+            mnt_point = DATA_PATH.format(
+                type_='osd', cluster=cluster_name, id_=id_)
+            remote.run(args=[
+                'sudo', 'chown', '-R', 'ceph:ceph', mnt_point
+            ])
+
+    log.info('Reading keys from all nodes...')
+    keys_fp = BytesIO()
+    keys = []
+    for remote, roles_for_host in ctx.cluster.remotes.items():
+        for type_ in ['mgr',  'mds', 'osd']:
+            if type_ == 'mgr' and config.get('skip_mgr_daemons', False):
+                continue
+            for role in teuthology.cluster_roles_of_type(roles_for_host, type_, cluster_name):
+                _, _, id_ = teuthology.split_role(role)
+                data = remote.read_file(
+                    os.path.join(
+                        DATA_PATH.format(
+                            type_=type_, id_=id_, cluster=cluster_name),
+                        'keyring',
+                    ),
+                    sudo=True,
+                )
+                keys.append((type_, id_, data))
+                keys_fp.write(data)
+    for remote, roles_for_host in ctx.cluster.remotes.items():
+        for role in teuthology.cluster_roles_of_type(roles_for_host, 'client', cluster_name):
+            _, _, id_ = teuthology.split_role(role)
+            data = remote.read_file(
+                '/etc/ceph/{cluster}.client.{id}.keyring'.format(id=id_, cluster=cluster_name)
+            )
+            keys.append(('client', id_, data))
+            keys_fp.write(data)
+
+    log.info('Adding keys to all mons...')
+    writes = mons.run(
+        args=[
+            'sudo', 'tee', '-a',
+            keyring_path,
+        ],
+        stdin=run.PIPE,
+        wait=False,
+        stdout=BytesIO(),
+    )
+    keys_fp.seek(0)
+    teuthology.feed_many_stdins_and_close(keys_fp, writes)
+    run.wait(writes)
+    for type_, id_, data in keys:
+        run.wait(
+            mons.run(
+                args=[
+                         'sudo',
+                         'adjust-ulimits',
+                         'ceph-coverage',
+                         coverage_dir,
+                         'ceph-authtool',
+                         keyring_path,
+                         '--name={type}.{id}'.format(
+                             type=type_,
+                             id=id_,
+                         ),
+                     ] + list(generate_caps(type_)),
+                wait=False,
+            ),
+        )
+
+    log.info('Running mkfs on mon nodes...')
+    for remote, roles_for_host in mons.remotes.items():
+        for role in teuthology.cluster_roles_of_type(roles_for_host, 'mon', cluster_name):
+            _, _, id_ = teuthology.split_role(role)
+            mnt_point = DATA_PATH.format(
+                type_='mon', id_=id_, cluster=cluster_name)
+            remote.run(
+                args=[
+                    'sudo',
+                    'mkdir',
+                    '-p',
+                    mnt_point,
+                ],
+            )
+            remote.run(
+                args=[
+                    'sudo',
+                    'adjust-ulimits',
+                    'ceph-coverage',
+                    coverage_dir,
+                    'ceph-mon',
+                    '--cluster', cluster_name,
+                    '--mkfs',
+                    '-i', id_,
+                    '--monmap', monmap_path,
+                    '--keyring', keyring_path,
+                ],
+            )
+            remote.run(args=[
+                'sudo', 'chown', '-R', 'ceph:ceph', mnt_point
+            ])
+
+    run.wait(
+        mons.run(
+            args=[
+                'rm',
+                '--',
+                monmap_path,
+            ],
+            wait=False,
+        ),
+    )
+
+    try:
+        yield
+    except Exception:
+        # we need to know this below
+        ctx.summary['success'] = False
+        raise
+    finally:
+        (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
+
+        log.info('Checking cluster log for badness...')
+
+        def first_in_ceph_log(pattern, excludes):
+            """
+            Find the first occurrence of the pattern specified in the Ceph log,
+            Returns None if none found.
+
+            :param pattern: Pattern scanned for.
+            :param excludes: Patterns to ignore.
+            :return: First line of text (or None if not found)
+            """
+            args = [
+                'sudo',
+                'egrep', pattern,
+                '/var/log/ceph/{cluster}.log'.format(cluster=cluster_name),
+            ]
+            for exclude in excludes:
+                args.extend([run.Raw('|'), 'egrep', '-v', exclude])
+            args.extend([
+                run.Raw('|'), 'head', '-n', '1',
+            ])
+            stdout = mon0_remote.sh(args)
+            return stdout or None
+
+        if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]',
+                             config['log_ignorelist']) is not None:
+            log.warning('Found errors (ERR|WRN|SEC) in cluster log')
+            ctx.summary['success'] = False
+            # use the most severe problem as the failure reason
+            if 'failure_reason' not in ctx.summary:
+                for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']:
+                    match = first_in_ceph_log(pattern, config['log_ignorelist'])
+                    if match is not None:
+                        ctx.summary['failure_reason'] = \
+                            '"{match}" in cluster log'.format(
+                                match=match.rstrip('\n'),
+                            )
+                        break
+
+        for remote, dirs in devs_to_clean.items():
+            for dir_ in dirs:
+                log.info('Unmounting %s on %s' % (dir_, remote))
+                try:
+                    remote.run(
+                        args=[
+                            'sync',
+                            run.Raw('&&'),
+                            'sudo',
+                            'umount',
+                            '-f',
+                            dir_
+                        ]
+                    )
+                except Exception as e:
+                    remote.run(args=[
+                        'sudo',
+                        run.Raw('PATH=/usr/sbin:$PATH'),
+                        'lsof',
+                        run.Raw(';'),
+                        'ps', 'auxf',
+                    ])
+                    raise e
+
+        if ctx.archive is not None and \
+                not (ctx.config.get('archive-on-error') and ctx.summary['success']):
+
+            # archive mon data, too
+            log.info('Archiving mon data...')
+            path = os.path.join(ctx.archive, 'data')
+            try:
+                os.makedirs(path)
+            except OSError as e:
+                if e.errno == errno.EEXIST:
+                    pass
+                else:
+                    raise
+            for remote, roles in mons.remotes.items():
+                for role in roles:
+                    is_mon = teuthology.is_type('mon', cluster_name)
+                    if is_mon(role):
+                        _, _, id_ = teuthology.split_role(role)
+                        mon_dir = DATA_PATH.format(
+                            type_='mon', id_=id_, cluster=cluster_name)
+                        teuthology.pull_directory_tarball(
+                            remote,
+                            mon_dir,
+                            path + '/' + role + '.tgz')
+
+        log.info('Cleaning ceph cluster...')
+        run.wait(
+            ctx.cluster.run(
+                args=[
+                    'sudo',
+                    'rm',
+                    '-rf',
+                    '--',
+                    conf_path,
+                    keyring_path,
+                    data_dir,
+                    monmap_path,
+                    run.Raw('{tdir}/../*.pid'.format(tdir=testdir)),
+                ],
+                wait=False,
+            ),
+        )
+
+
+def osd_scrub_pgs(ctx, config):
+    """
+    Scrub pgs when we exit.
+
+    First make sure all pgs are active and clean.
+    Next scrub all osds.
+    Then periodically check until all pgs have scrub time stamps that
+    indicate the last scrub completed.  Time out if no progress is made
+    here after two minutes.
+    """
+    retries = 40
+    delays = 20
+    cluster_name = config['cluster']
+    manager = ctx.managers[cluster_name]
+    for _ in range(retries):
+        stats = manager.get_pg_stats()
+        unclean = [stat['pgid'] for stat in stats if 'active+clean' not in stat['state']]
+        split_merge = []
+        osd_dump = manager.get_osd_dump_json()
+        try:
+            split_merge = [i['pool_name'] for i in osd_dump['pools'] if i['pg_num'] != i['pg_num_target']]
+        except KeyError:
+            # we don't support pg_num_target before nautilus
+            pass
+        if not unclean and not split_merge:
+            break
+        waiting_on = []
+        if unclean:
+            waiting_on.append(f'{unclean} to go clean')
+        if split_merge:
+            waiting_on.append(f'{split_merge} to split/merge')
+        waiting_on = ' and '.join(waiting_on)
+        log.info('Waiting for all PGs to be active+clean and split+merged, waiting on %s', waiting_on)
+        time.sleep(delays)
+    else:
+        raise RuntimeError("Scrubbing terminated -- not all pgs were active and clean.")
+    check_time_now = time.localtime()
+    time.sleep(1)
+    all_roles = teuthology.all_roles(ctx.cluster)
+    for role in teuthology.cluster_roles_of_type(all_roles, 'osd', cluster_name):
+        log.info("Scrubbing {osd}".format(osd=role))
+        _, _, id_ = teuthology.split_role(role)
+        # allow this to fail; in certain cases the OSD might not be up
+        # at this point.  we will catch all pgs below.
+        try:
+            manager.raw_cluster_cmd('tell', 'osd.' + id_, 'config', 'set',
+                                    'osd_debug_deep_scrub_sleep', '0');
+            manager.raw_cluster_cmd('osd', 'deep-scrub', id_)
+        except run.CommandFailedError:
+            pass
+    prev_good = 0
+    gap_cnt = 0
+    loop = True
+    while loop:
+        stats = manager.get_pg_stats()
+        timez = [(stat['pgid'],stat['last_scrub_stamp']) for stat in stats]
+        loop = False
+        thiscnt = 0
+        re_scrub = []
+        for (pgid, tmval) in timez:
+            t = tmval[0:tmval.find('.')].replace(' ', 'T')
+            pgtm = time.strptime(t, '%Y-%m-%dT%H:%M:%S')
+            if pgtm > check_time_now:
+                thiscnt += 1
+            else:
+                log.info('pgid %s last_scrub_stamp %s %s <= %s', pgid, tmval, pgtm, check_time_now)
+                loop = True
+                re_scrub.append(pgid)
+        if thiscnt > prev_good:
+            prev_good = thiscnt
+            gap_cnt = 0
+        else:
+            gap_cnt += 1
+            if gap_cnt % 6 == 0:
+                for pgid in re_scrub:
+                    # re-request scrub every so often in case the earlier
+                    # request was missed.  do not do it every time because
+                    # the scrub may be in progress or not reported yet and
+                    # we will starve progress.
+                    manager.raw_cluster_cmd('pg', 'deep-scrub', pgid)
+            if gap_cnt > retries:
+                raise RuntimeError('Exiting scrub checking -- not all pgs scrubbed.')
+        if loop:
+            log.info('Still waiting for all pgs to be scrubbed.')
+            time.sleep(delays)
+
+
+@contextlib.contextmanager
+def run_daemon(ctx, config, type_):
+    """
+    Run daemons for a role type.  Handle the startup and termination of a a daemon.
+    On startup -- set coverages, cpu_profile, valgrind values for all remotes,
+    and a max_mds value for one mds.
+    On cleanup -- Stop all existing daemons of this type.
+
+    :param ctx: Context
+    :param config: Configuration
+    :param type_: Role type
+    """
+    cluster_name = config['cluster']
+    log.info('Starting %s daemons in cluster %s...', type_, cluster_name)
+    testdir = teuthology.get_testdir(ctx)
+    daemons = ctx.cluster.only(teuthology.is_type(type_, cluster_name))
+
+    # check whether any daemons if this type are configured
+    if daemons is None:
+        return
+    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
+
+    daemon_signal = 'kill'
+    if config.get('coverage') or config.get('valgrind') is not None:
+        daemon_signal = 'term'
+
+    # create osds in order.  (this only matters for pre-luminous, which might
+    # be jewel/hammer, which doesn't take an id_ argument to legacy 'osd create').
+    osd_uuids  = {}
+    for remote, roles_for_host in daemons.remotes.items():
+        is_type_ = teuthology.is_type(type_, cluster_name)
+        for role in roles_for_host:
+            if not is_type_(role):
+                continue
+            _, _, id_ = teuthology.split_role(role)
+
+
+            if type_ == 'osd':
+                datadir='/var/lib/ceph/osd/{cluster}-{id}'.format(
+                    cluster=cluster_name, id=id_)
+                osd_uuid = remote.read_file(
+                    datadir + '/fsid', sudo=True).decode().strip()
+                osd_uuids[id_] = osd_uuid
+    for osd_id in range(len(osd_uuids)):
+        id_ = str(osd_id)
+        osd_uuid = osd_uuids.get(id_)
+        try:
+            remote.run(
+                args=[
+                'sudo', 'ceph', '--cluster', cluster_name,
+                    'osd', 'new', osd_uuid, id_,
+                ]
+            )
+        except:
+            # fallback to pre-luminous (jewel)
+            remote.run(
+                args=[
+                'sudo', 'ceph', '--cluster', cluster_name,
+                    'osd', 'create', osd_uuid,
+                ]
+            )
+            if config.get('add_osds_to_crush'):
+                remote.run(
+                args=[
+                    'sudo', 'ceph', '--cluster', cluster_name,
+                    'osd', 'crush', 'create-or-move', 'osd.' + id_,
+                    '1.0', 'host=localhost', 'root=default',
+                ]
+            )
+
+    for remote, roles_for_host in daemons.remotes.items():
+        is_type_ = teuthology.is_type(type_, cluster_name)
+        for role in roles_for_host:
+            if not is_type_(role):
+                continue
+            _, _, id_ = teuthology.split_role(role)
+
+            run_cmd = [
+                'sudo',
+                'adjust-ulimits',
+                'ceph-coverage',
+                coverage_dir,
+                'daemon-helper',
+                daemon_signal,
+            ]
+            run_cmd_tail = [
+                'ceph-%s' % (type_),
+                '-f',
+                '--cluster', cluster_name,
+                '-i', id_]
+
+            if type_ in config.get('cpu_profile', []):
+                profile_path = '/var/log/ceph/profiling-logger/%s.prof' % (role)
+                run_cmd.extend(['env', 'CPUPROFILE=%s' % profile_path])
+
+            vc = config.get('valgrind')
+            if vc is not None:
+                valgrind_args = None
+                if type_ in vc:
+                    valgrind_args = vc[type_]
+                if role in vc:
+                    valgrind_args = vc[role]
+                exit_on_first_error = vc.get('exit_on_first_error', True)
+                run_cmd = get_valgrind_args(testdir, role, run_cmd, valgrind_args,
+                    exit_on_first_error=exit_on_first_error)
+
+            run_cmd.extend(run_cmd_tail)
+            log_path = f'/var/log/ceph/{cluster_name}-{type_}.{id_}.log'
+            create_log_cmd, run_cmd = \
+                maybe_redirect_stderr(config, type_, run_cmd, log_path)
+            if create_log_cmd:
+                remote.sh(create_log_cmd)
+            # always register mgr; don't necessarily start
+            ctx.daemons.register_daemon(
+                remote, type_, id_,
+                cluster=cluster_name,
+                args=run_cmd,
+                logger=log.getChild(role),
+                stdin=run.PIPE,
+                wait=False
+            )
+            if type_ != 'mgr' or not config.get('skip_mgr_daemons', False):
+                role = cluster_name + '.' + type_
+                ctx.daemons.get_daemon(type_, id_, cluster_name).restart()
+
+    # kludge: run any pre-manager commands
+    if type_ == 'mon':
+        for cmd in config.get('pre-mgr-commands', []):
+            firstmon = teuthology.get_first_mon(ctx, config, cluster_name)
+            (remote,) = ctx.cluster.only(firstmon).remotes.keys()
+            remote.run(args=cmd.split(' '))
+
+    try:
+        yield
+    finally:
+        teuthology.stop_daemons_of_type(ctx, type_, cluster_name)
+
+
+def healthy(ctx, config):
+    """
+    Wait for all osd's to be up, and for the ceph health monitor to return HEALTH_OK.
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    config = config if isinstance(config, dict) else dict()
+    cluster_name = config.get('cluster', 'ceph')
+    log.info('Waiting until %s daemons up and pgs clean...', cluster_name)
+    manager = ctx.managers[cluster_name]
+    try:
+        manager.wait_for_mgr_available(timeout=30)
+    except (run.CommandFailedError, AssertionError) as e:
+        log.info('ignoring mgr wait error, probably testing upgrade: %s', e)
+
+    manager.wait_for_all_osds_up(timeout=300)
+
+    try:
+        manager.flush_all_pg_stats()
+    except (run.CommandFailedError, Exception) as e:
+        log.info('ignoring flush pg stats error, probably testing upgrade: %s', e)
+    manager.wait_for_clean()
+
+    if config.get('wait-for-healthy', True):
+        log.info('Waiting until ceph cluster %s is healthy...', cluster_name)
+        manager.wait_until_healthy(timeout=300)
+
+    if ctx.cluster.only(teuthology.is_type('mds', cluster_name)).remotes:
+        # Some MDSs exist, wait for them to be healthy
+        for fs in Filesystem.get_all_fs(ctx):
+            fs.wait_for_daemons(timeout=300)
+
+def wait_for_mon_quorum(ctx, config):
+    """
+    Check renote ceph status until all monitors are up.
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    if isinstance(config, dict):
+        mons = config['daemons']
+        cluster_name = config.get('cluster', 'ceph')
+    else:
+        assert isinstance(config, list)
+        mons = config
+        cluster_name = 'ceph'
+    firstmon = teuthology.get_first_mon(ctx, config, cluster_name)
+    (remote,) = ctx.cluster.only(firstmon).remotes.keys()
+    with contextutil.safe_while(sleep=10, tries=60,
+                                action='wait for monitor quorum') as proceed:
+        while proceed():
+            quorum_status = remote.sh('sudo ceph quorum_status',
+                                      logger=log.getChild('quorum_status'))
+            j = json.loads(quorum_status)
+            q = j.get('quorum_names', [])
+            log.debug('Quorum: %s', q)
+            if sorted(q) == sorted(mons):
+                break
+
+
+def created_pool(ctx, config):
+    """
+    Add new pools to the dictionary of pools that the ceph-manager
+    knows about.
+    """
+    for new_pool in config:
+        if new_pool not in ctx.managers['ceph'].pools:
+            ctx.managers['ceph'].pools[new_pool] = ctx.managers['ceph'].get_pool_int_property(
+                new_pool, 'pg_num')
+
+
+@contextlib.contextmanager
+def suppress_mon_health_to_clog(ctx, config):
+    """
+    set the option, and then restore it with its original value
+
+    Note, due to the way how tasks are executed/nested, it's not suggested to
+    use this method as a standalone task. otherwise, it's likely that it will
+    restore the tweaked option at the /end/ of 'tasks' block.
+    """
+    if config.get('mon-health-to-clog', 'true') == 'false':
+        cluster = config.get('cluster', 'ceph')
+        manager = ctx.managers[cluster]
+        manager.raw_cluster_command(
+            'config', 'set', 'mon', 'mon_health_to_clog', 'false'
+        )
+        yield
+        manager.raw_cluster_command(
+            'config', 'rm', 'mon', 'mon_health_to_clog'
+        )
+    else:
+        yield
+
+@contextlib.contextmanager
+def restart(ctx, config):
+    """
+   restart ceph daemons
+
+   For example::
+      tasks:
+      - ceph.restart: [all]
+
+   For example::
+      tasks:
+      - ceph.restart: [osd.0, mon.1, mds.*]
+
+   or::
+
+      tasks:
+      - ceph.restart:
+          daemons: [osd.0, mon.1]
+          wait-for-healthy: false
+          wait-for-osds-up: true
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    if config is None:
+        config = {}
+    elif isinstance(config, list):
+        config = {'daemons': config}
+
+    daemons = ctx.daemons.resolve_role_list(config.get('daemons', None), CEPH_ROLE_TYPES, True)
+    clusters = set()
+
+    with suppress_mon_health_to_clog(ctx, config):
+        for role in daemons:
+            cluster, type_, id_ = teuthology.split_role(role)
+            ctx.daemons.get_daemon(type_, id_, cluster).stop()
+            if type_ == 'osd':
+                ctx.managers[cluster].mark_down_osd(id_)
+            ctx.daemons.get_daemon(type_, id_, cluster).restart()
+            clusters.add(cluster)
+
+    if config.get('wait-for-healthy', True):
+        for cluster in clusters:
+            healthy(ctx=ctx, config=dict(cluster=cluster))
+    if config.get('wait-for-osds-up', False):
+        for cluster in clusters:
+            ctx.managers[cluster].wait_for_all_osds_up()
+    if config.get('expected-failure') is not None:
+        log.info('Checking for expected-failure in osds logs after restart...')
+        expected_fail = config.get('expected-failure')
+        is_osd = teuthology.is_type('osd')
+        for role in daemons:
+            if not is_osd(role):
+                continue
+            (remote,) = ctx.cluster.only(role).remotes.keys()
+            cluster, type_, id_ = teuthology.split_role(role)
+            remote.run(
+               args = ['sudo',
+                       'egrep', expected_fail,
+                       '/var/log/ceph/{cluster}-{type_}.{id_}.log'.format(cluster=cluster, type_=type_, id_=id_),
+                ])
+    yield
+
+
+@contextlib.contextmanager
+def stop(ctx, config):
+    """
+    Stop ceph daemons
+
+    For example::
+      tasks:
+      - ceph.stop: [mds.*]
+
+      tasks:
+      - ceph.stop: [osd.0, osd.2]
+
+      tasks:
+      - ceph.stop:
+          daemons: [osd.0, osd.2]
+
+    """
+    if config is None:
+        config = {}
+    elif isinstance(config, list):
+        config = {'daemons': config}
+
+    daemons = ctx.daemons.resolve_role_list(config.get('daemons', None), CEPH_ROLE_TYPES, True)
+    clusters = set()
+
+    for role in daemons:
+        cluster, type_, id_ = teuthology.split_role(role)
+        ctx.daemons.get_daemon(type_, id_, cluster).stop()
+        clusters.add(cluster)
+
+
+    for cluster in clusters:
+        ctx.ceph[cluster].watchdog.stop()
+        ctx.ceph[cluster].watchdog.join()
+
+    yield
+
+
+@contextlib.contextmanager
+def wait_for_failure(ctx, config):
+    """
+    Wait for a failure of a ceph daemon
+
+    For example::
+      tasks:
+      - ceph.wait_for_failure: [mds.*]
+
+      tasks:
+      - ceph.wait_for_failure: [osd.0, osd.2]
+
+      tasks:
+      - ceph.wait_for_failure:
+          daemons: [osd.0, osd.2]
+
+    """
+    if config is None:
+        config = {}
+    elif isinstance(config, list):
+        config = {'daemons': config}
+
+    daemons = ctx.daemons.resolve_role_list(config.get('daemons', None), CEPH_ROLE_TYPES, True)
+    for role in daemons:
+        cluster, type_, id_ = teuthology.split_role(role)
+        try:
+            ctx.daemons.get_daemon(type_, id_, cluster).wait()
+        except:
+            log.info('Saw expected daemon failure.  Continuing.')
+            pass
+        else:
+            raise RuntimeError('daemon %s did not fail' % role)
+
+    yield
+
+
+def validate_config(ctx, config):
+    """
+    Perform some simple validation on task configuration.
+    Raises exceptions.ConfigError if an error is found.
+    """
+    # check for osds from multiple clusters on the same host
+    for remote, roles_for_host in ctx.cluster.remotes.items():
+        last_cluster = None
+        last_role = None
+        for role in roles_for_host:
+            role_cluster, role_type, _ = teuthology.split_role(role)
+            if role_type != 'osd':
+                continue
+            if last_cluster and last_cluster != role_cluster:
+                msg = "Host should not have osds (%s and %s) from multiple clusters" % (
+                    last_role, role)
+                raise exceptions.ConfigError(msg)
+            last_cluster = role_cluster
+            last_role = role
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Set up and tear down a Ceph cluster.
+
+    For example::
+
+        tasks:
+        - ceph:
+        - interactive:
+
+    You can also specify what branch to run::
+
+        tasks:
+        - ceph:
+            branch: foo
+
+    Or a tag::
+
+        tasks:
+        - ceph:
+            tag: v0.42.13
+
+    Or a sha1::
+
+        tasks:
+        - ceph:
+            sha1: 1376a5ab0c89780eab39ffbbe436f6a6092314ed
+
+    Or a local source dir::
+
+        tasks:
+        - ceph:
+            path: /home/sage/ceph
+
+    To capture code coverage data, use::
+
+        tasks:
+        - ceph:
+            coverage: true
+
+    To use btrfs, ext4, or xfs on the target's scratch disks, use::
+
+        tasks:
+        - ceph:
+            fs: xfs
+            mkfs_options: [-b,size=65536,-l,logdev=/dev/sdc1]
+            mount_options: [nobarrier, inode64]
+
+    To change the cephfs's default max_mds (1), use::
+
+        tasks:
+        - ceph:
+            cephfs:
+              max_mds: 2
+
+    To change the max_mds of a specific filesystem, use::
+
+        tasks:
+        - ceph:
+            cephfs:
+              max_mds: 2
+              fs:
+                - name: a
+                  max_mds: 3
+                - name: b
+
+    In the above example, filesystem 'a' will have 'max_mds' 3,
+    and filesystme 'b' will have 'max_mds' 2.
+
+    To change the mdsmap's default session_timeout (60 seconds), use::
+
+        tasks:
+        - ceph:
+            cephfs:
+              session_timeout: 300
+
+    Note, this will cause the task to check the /scratch_devs file on each node
+    for available devices.  If no such file is found, /dev/sdb will be used.
+
+    To run some daemons under valgrind, include their names
+    and the tool/args to use in a valgrind section::
+
+        tasks:
+        - ceph:
+          valgrind:
+            mds.1: --tool=memcheck
+            osd.1: [--tool=memcheck, --leak-check=no]
+
+    Those nodes which are using memcheck or valgrind will get
+    checked for bad results.
+
+    To adjust or modify config options, use::
+
+        tasks:
+        - ceph:
+            conf:
+              section:
+                key: value
+
+    For example::
+
+        tasks:
+        - ceph:
+            conf:
+              mds.0:
+                some option: value
+                other key: other value
+              client.0:
+                debug client: 10
+                debug ms: 1
+
+    By default, the cluster log is checked for errors and warnings,
+    and the run marked failed if any appear. You can ignore log
+    entries by giving a list of egrep compatible regexes, i.e.:
+
+        tasks:
+        - ceph:
+            log-ignorelist: ['foo.*bar', 'bad message']
+
+    To run multiple ceph clusters, use multiple ceph tasks, and roles
+    with a cluster name prefix, e.g. cluster1.client.0. Roles with no
+    cluster use the default cluster name, 'ceph'. OSDs from separate
+    clusters must be on separate hosts. Clients and non-osd daemons
+    from multiple clusters may be colocated. For each cluster, add an
+    instance of the ceph task with the cluster name specified, e.g.::
+
+        roles:
+        - [mon.a, osd.0, osd.1]
+        - [backup.mon.a, backup.osd.0, backup.osd.1]
+        - [client.0, backup.client.0]
+        tasks:
+        - ceph:
+            cluster: ceph
+        - ceph:
+            cluster: backup
+
+    :param ctx: Context
+    :param config: Configuration
+
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        "task ceph only supports a dictionary for configuration"
+
+    overrides = ctx.config.get('overrides', {})
+    teuthology.deep_merge(config, overrides.get('ceph', {}))
+
+    first_ceph_cluster = False
+    if not hasattr(ctx, 'daemons'):
+        first_ceph_cluster = True
+        ctx.daemons = DaemonGroup()
+
+    testdir = teuthology.get_testdir(ctx)
+    if config.get('coverage'):
+        coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
+        log.info('Creating coverage directory...')
+        run.wait(
+            ctx.cluster.run(
+                args=[
+                    'install', '-d', '-m0755', '--',
+                    coverage_dir,
+                ],
+                wait=False,
+            )
+        )
+
+    if 'cluster' not in config:
+        config['cluster'] = 'ceph'
+
+    validate_config(ctx, config)
+
+    subtasks = []
+    if first_ceph_cluster:
+        # these tasks handle general log setup and parsing on all hosts,
+        # so they should only be run once
+        subtasks = [
+            lambda: ceph_log(ctx=ctx, config=None),
+            lambda: ceph_crash(ctx=ctx, config=None),
+            lambda: valgrind_post(ctx=ctx, config=config),
+        ]
+
+    subtasks += [
+        lambda: cluster(ctx=ctx, config=dict(
+            conf=config.get('conf', {}),
+            fs=config.get('fs', 'xfs'),
+            mkfs_options=config.get('mkfs_options', None),
+            mount_options=config.get('mount_options', None),
+            skip_mgr_daemons=config.get('skip_mgr_daemons', False),
+            log_ignorelist=config.get('log-ignorelist', []),
+            cpu_profile=set(config.get('cpu_profile', []),),
+            cluster=config['cluster'],
+            mon_bind_msgr2=config.get('mon_bind_msgr2', True),
+            mon_bind_addrvec=config.get('mon_bind_addrvec', True),
+        )),
+        lambda: run_daemon(ctx=ctx, config=config, type_='mon'),
+        lambda: run_daemon(ctx=ctx, config=config, type_='mgr'),
+        lambda: crush_setup(ctx=ctx, config=config),
+        lambda: check_enable_crimson(ctx=ctx, config=config),
+        lambda: run_daemon(ctx=ctx, config=config, type_='osd'),
+        lambda: setup_manager(ctx=ctx, config=config),
+        lambda: create_rbd_pool(ctx=ctx, config=config),
+        lambda: run_daemon(ctx=ctx, config=config, type_='mds'),
+        lambda: cephfs_setup(ctx=ctx, config=config),
+        lambda: watchdog_setup(ctx=ctx, config=config),
+    ]
+
+    with contextutil.nested(*subtasks):
+        try:
+            if config.get('wait-for-healthy', True):
+                healthy(ctx=ctx, config=dict(cluster=config['cluster']))
+
+            yield
+        finally:
+            # set pg_num_targets back to actual pg_num, so we don't have to
+            # wait for pending merges (which can take a while!)
+            if not config.get('skip_stop_pg_num_changes', True):
+                ctx.managers[config['cluster']].stop_pg_num_changes()
+
+            if config.get('wait-for-scrub', True):
+                # wait for pgs to become active+clean in case any
+                # recoveries were triggered since the last health check
+                ctx.managers[config['cluster']].wait_for_clean()
+                osd_scrub_pgs(ctx, config)
+
+            # stop logging health to clog during shutdown, or else we generate
+            # a bunch of scary messages unrelated to our actual run.
+            firstmon = teuthology.get_first_mon(ctx, config, config['cluster'])
+            (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
+            mon0_remote.run(
+                args=[
+                    'sudo',
+                    'ceph',
+                    '--cluster', config['cluster'],
+                    'config', 'set', 'global',
+                    'mon_health_to_clog', 'false',
+                ],
+                check_status=False,
+            )
diff --git a/qa/tasks/ceph_client.py b/qa/tasks/ceph_client.py
new file mode 100644
index 000000000..74e818f93
--- /dev/null
+++ b/qa/tasks/ceph_client.py
@@ -0,0 +1,42 @@
+"""
+Set up client keyring
+"""
+import logging
+
+from teuthology import misc as teuthology
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+def create_keyring(ctx, cluster_name):
+    """
+    Set up key ring on remote sites
+    """
+    log.info('Setting up client nodes...')
+    clients = ctx.cluster.only(teuthology.is_type('client', cluster_name))
+    testdir = teuthology.get_testdir(ctx)
+    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
+    for remote, roles_for_host in clients.remotes.items():
+        for role in teuthology.cluster_roles_of_type(roles_for_host, 'client',
+                                                     cluster_name):
+            name = teuthology.ceph_role(role)
+            client_keyring = '/etc/ceph/{0}.{1}.keyring'.format(cluster_name, name)
+            remote.run(
+                args=[
+                    'sudo',
+                    'adjust-ulimits',
+                    'ceph-coverage',
+                    coverage_dir,
+                    'ceph-authtool',
+                    '--create-keyring',
+                    '--gen-key',
+                    # TODO this --name= is not really obeyed, all unknown "types" are munged to "client"
+                    '--name={name}'.format(name=name),
+                    client_keyring,
+                    run.Raw('&&'),
+                    'sudo',
+                    'chmod',
+                    '0644',
+                    client_keyring,
+                    ],
+                )
diff --git a/qa/tasks/ceph_deploy.py b/qa/tasks/ceph_deploy.py
new file mode 100644
index 000000000..99c8c1ffb
--- /dev/null
+++ b/qa/tasks/ceph_deploy.py
@@ -0,0 +1,916 @@
+"""
+Execute ceph-deploy as a task
+"""
+
+import contextlib
+import os
+import time
+import logging
+import traceback
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.config import config as teuth_config
+from teuthology.task import install as install_fn
+from teuthology.orchestra import run
+from tasks.cephfs.filesystem import Filesystem
+from teuthology.misc import wait_until_healthy
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def download_ceph_deploy(ctx, config):
+    """
+    Downloads ceph-deploy from the ceph.com git mirror and (by default)
+    switches to the master branch. If the `ceph-deploy-branch` is specified, it
+    will use that instead. The `bootstrap` script is ran, with the argument
+    obtained from `python_version`, if specified.
+    """
+    # use mon.a for ceph_admin
+    (ceph_admin,) = ctx.cluster.only('mon.a').remotes.keys()
+
+    try:
+        py_ver = str(config['python_version'])
+    except KeyError:
+        pass
+    else:
+        supported_versions = ['2', '3']
+        if py_ver not in supported_versions:
+            raise ValueError("python_version must be: {}, not {}".format(
+                ' or '.join(supported_versions), py_ver
+            ))
+
+        log.info("Installing Python")
+        system_type = teuthology.get_system_type(ceph_admin)
+
+        if system_type == 'rpm':
+            package = 'python36' if py_ver == '3' else 'python'
+            ctx.cluster.run(args=[
+                'sudo', 'yum', '-y', 'install',
+                package, 'python-virtualenv'
+            ])
+        else:
+            package = 'python3' if py_ver == '3' else 'python'
+            ctx.cluster.run(args=[
+                'sudo', 'apt-get', '-y', '--force-yes', 'install',
+                package, 'python-virtualenv'
+            ])
+
+    log.info('Downloading ceph-deploy...')
+    testdir = teuthology.get_testdir(ctx)
+    ceph_deploy_branch = config.get('ceph-deploy-branch', 'master')
+
+    ceph_admin.run(
+        args=[
+            'git', 'clone', '-b', ceph_deploy_branch,
+            teuth_config.ceph_git_base_url + 'ceph-deploy.git',
+            '{tdir}/ceph-deploy'.format(tdir=testdir),
+        ],
+    )
+    args = [
+        'cd',
+        '{tdir}/ceph-deploy'.format(tdir=testdir),
+        run.Raw('&&'),
+        './bootstrap',
+    ]
+    try:
+        args.append(str(config['python_version']))
+    except KeyError:
+        pass
+    ceph_admin.run(args=args)
+
+    try:
+        yield
+    finally:
+        log.info('Removing ceph-deploy ...')
+        ceph_admin.run(
+            args=[
+                'rm',
+                '-rf',
+                '{tdir}/ceph-deploy'.format(tdir=testdir),
+            ],
+        )
+
+
+def is_healthy(ctx, config):
+    """Wait until a Ceph cluster is healthy."""
+    testdir = teuthology.get_testdir(ctx)
+    ceph_admin = teuthology.get_first_mon(ctx, config)
+    (remote,) = ctx.cluster.only(ceph_admin).remotes.keys()
+    max_tries = 90  # 90 tries * 10 secs --> 15 minutes
+    tries = 0
+    while True:
+        tries += 1
+        if tries >= max_tries:
+            msg = "ceph health was unable to get 'HEALTH_OK' after waiting 15 minutes"
+            remote.run(
+                args=[
+                    'cd',
+                    '{tdir}'.format(tdir=testdir),
+                    run.Raw('&&'),
+                    'sudo', 'ceph',
+                    'report',
+                ],
+            )
+            raise RuntimeError(msg)
+
+        out = remote.sh(
+            [
+                'cd',
+                '{tdir}'.format(tdir=testdir),
+                run.Raw('&&'),
+                'sudo', 'ceph',
+                'health',
+            ],
+            logger=log.getChild('health'),
+        )
+        log.info('Ceph health: %s', out.rstrip('\n'))
+        if out.split(None, 1)[0] == 'HEALTH_OK':
+            break
+        time.sleep(10)
+
+
+def get_nodes_using_role(ctx, target_role):
+    """
+    Extract the names of nodes that match a given role from a cluster, and modify the
+    cluster's service IDs to match the resulting node-based naming scheme that ceph-deploy
+    uses, such that if "mon.a" is on host "foo23", it'll be renamed to "mon.foo23".
+    """
+
+    # Nodes containing a service of the specified role
+    nodes_of_interest = []
+
+    # Prepare a modified version of cluster.remotes with ceph-deploy-ized names
+    modified_remotes = {}
+    ceph_deploy_mapped = dict()
+    for _remote, roles_for_host in ctx.cluster.remotes.items():
+        modified_remotes[_remote] = []
+        for svc_id in roles_for_host:
+            if svc_id.startswith("{0}.".format(target_role)):
+                fqdn = str(_remote).split('@')[-1]
+                nodename = str(str(_remote).split('.')[0]).split('@')[1]
+                if target_role == 'mon':
+                    nodes_of_interest.append(fqdn)
+                else:
+                    nodes_of_interest.append(nodename)
+                mapped_role = "{0}.{1}".format(target_role, nodename)
+                modified_remotes[_remote].append(mapped_role)
+                # keep dict of mapped role for later use by tasks
+                # eg. mon.a => mon.node1
+                ceph_deploy_mapped[svc_id] = mapped_role
+            else:
+                modified_remotes[_remote].append(svc_id)
+
+    ctx.cluster.remotes = modified_remotes
+    # since the function is called multiple times for target roles
+    # append new mapped roles
+    if not hasattr(ctx.cluster, 'mapped_role'):
+        ctx.cluster.mapped_role = ceph_deploy_mapped
+    else:
+        ctx.cluster.mapped_role.update(ceph_deploy_mapped)
+    log.info("New mapped_role={mr}".format(mr=ctx.cluster.mapped_role))
+    return nodes_of_interest
+
+
+def get_dev_for_osd(ctx, config):
+    """Get a list of all osd device names."""
+    osd_devs = []
+    for remote, roles_for_host in ctx.cluster.remotes.items():
+        host = remote.name.split('@')[-1]
+        shortname = host.split('.')[0]
+        devs = teuthology.get_scratch_devices(remote)
+        num_osd_per_host = list(
+            teuthology.roles_of_type(
+                roles_for_host, 'osd'))
+        num_osds = len(num_osd_per_host)
+        if config.get('separate_journal_disk') is not None:
+            num_devs_reqd = 2 * num_osds
+            assert num_devs_reqd <= len(
+                devs), 'fewer data and journal disks than required ' + shortname
+            for dindex in range(0, num_devs_reqd, 2):
+                jd_index = dindex + 1
+                dev_short = devs[dindex].split('/')[-1]
+                jdev_short = devs[jd_index].split('/')[-1]
+                osd_devs.append((shortname, dev_short, jdev_short))
+        else:
+            assert num_osds <= len(devs), 'fewer disks than osds ' + shortname
+            for dev in devs[:num_osds]:
+                dev_short = dev.split('/')[-1]
+                osd_devs.append((shortname, dev_short))
+    return osd_devs
+
+
+def get_all_nodes(ctx, config):
+    """Return a string of node names separated by blanks"""
+    nodelist = []
+    for t, k in ctx.config['targets'].items():
+        host = t.split('@')[-1]
+        simple_host = host.split('.')[0]
+        nodelist.append(simple_host)
+    nodelist = " ".join(nodelist)
+    return nodelist
+
+@contextlib.contextmanager
+def build_ceph_cluster(ctx, config):
+    """Build a ceph cluster"""
+
+    # Expect to find ceph_admin on the first mon by ID, same place that the download task
+    # puts it.  Remember this here, because subsequently IDs will change from those in
+    # the test config to those that ceph-deploy invents.
+
+    (ceph_admin,) = ctx.cluster.only('mon.a').remotes.keys()
+
+    def execute_ceph_deploy(cmd):
+        """Remotely execute a ceph_deploy command"""
+        return ceph_admin.run(
+            args=[
+                'cd',
+                '{tdir}/ceph-deploy'.format(tdir=testdir),
+                run.Raw('&&'),
+                run.Raw(cmd),
+            ],
+            check_status=False,
+        ).exitstatus
+
+    def ceph_disk_osd_create(ctx, config):
+        node_dev_list = get_dev_for_osd(ctx, config)
+        no_of_osds = 0
+        for d in node_dev_list:
+            node = d[0]
+            for disk in d[1:]:
+                zap = './ceph-deploy disk zap ' + node + ' ' + disk
+                estatus = execute_ceph_deploy(zap)
+                if estatus != 0:
+                    raise RuntimeError("ceph-deploy: Failed to zap osds")
+            osd_create_cmd = './ceph-deploy osd create '
+            # first check for filestore, default is bluestore with ceph-deploy
+            if config.get('filestore') is not None:
+                osd_create_cmd += '--filestore '
+            elif config.get('bluestore') is not None:
+                osd_create_cmd += '--bluestore '
+            if config.get('dmcrypt') is not None:
+                osd_create_cmd += '--dmcrypt '
+            osd_create_cmd += ":".join(d)
+            estatus_osd = execute_ceph_deploy(osd_create_cmd)
+            if estatus_osd == 0:
+                log.info('successfully created osd')
+                no_of_osds += 1
+            else:
+                raise RuntimeError("ceph-deploy: Failed to create osds")
+        return no_of_osds
+
+    def ceph_volume_osd_create(ctx, config):
+        osds = ctx.cluster.only(teuthology.is_type('osd'))
+        no_of_osds = 0
+        for remote in osds.remotes.keys():
+            # all devs should be lvm
+            osd_create_cmd = './ceph-deploy osd create --debug ' + remote.shortname + ' '
+            # default is bluestore so we just need config item for filestore
+            roles = ctx.cluster.remotes[remote]
+            dev_needed = len([role for role in roles
+                              if role.startswith('osd')])
+            all_devs = teuthology.get_scratch_devices(remote)
+            log.info("node={n}, need_devs={d}, available={a}".format(
+                        n=remote.shortname,
+                        d=dev_needed,
+                        a=all_devs,
+                        ))
+            devs = all_devs[0:dev_needed]
+            # rest of the devices can be used for journal if required
+            jdevs = dev_needed
+            for device in devs:
+                device_split = device.split('/')
+                lv_device = device_split[-2] + '/' + device_split[-1]
+                if config.get('filestore') is not None:
+                    osd_create_cmd += '--filestore --data ' + lv_device + ' '
+                    # filestore with ceph-volume also needs journal disk
+                    try:
+                        jdevice = all_devs.pop(jdevs)
+                    except IndexError:
+                        raise RuntimeError("No device available for \
+                                            journal configuration")
+                    jdevice_split = jdevice.split('/')
+                    j_lv = jdevice_split[-2] + '/' + jdevice_split[-1]
+                    osd_create_cmd += '--journal ' + j_lv
+                else:
+                    osd_create_cmd += ' --data ' + lv_device
+                estatus_osd = execute_ceph_deploy(osd_create_cmd)
+                if estatus_osd == 0:
+                    log.info('successfully created osd')
+                    no_of_osds += 1
+                else:
+                    raise RuntimeError("ceph-deploy: Failed to create osds")
+        return no_of_osds
+
+    try:
+        log.info('Building ceph cluster using ceph-deploy...')
+        testdir = teuthology.get_testdir(ctx)
+        ceph_branch = None
+        if config.get('branch') is not None:
+            cbranch = config.get('branch')
+            for var, val in cbranch.items():
+                ceph_branch = '--{var}={val}'.format(var=var, val=val)
+        all_nodes = get_all_nodes(ctx, config)
+        mds_nodes = get_nodes_using_role(ctx, 'mds')
+        mds_nodes = " ".join(mds_nodes)
+        mon_node = get_nodes_using_role(ctx, 'mon')
+        mon_nodes = " ".join(mon_node)
+        # skip mgr based on config item
+        # this is needed when test uses latest code to install old ceph
+        # versions
+        skip_mgr = config.get('skip-mgr', False)
+        if not skip_mgr:
+            mgr_nodes = get_nodes_using_role(ctx, 'mgr')
+            mgr_nodes = " ".join(mgr_nodes)
+        new_mon = './ceph-deploy new' + " " + mon_nodes
+        if not skip_mgr:
+            mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes
+        mon_hostname = mon_nodes.split(' ')[0]
+        mon_hostname = str(mon_hostname)
+        gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname
+        deploy_mds = './ceph-deploy mds create' + " " + mds_nodes
+
+        if mon_nodes is None:
+            raise RuntimeError("no monitor nodes in the config file")
+
+        estatus_new = execute_ceph_deploy(new_mon)
+        if estatus_new != 0:
+            raise RuntimeError("ceph-deploy: new command failed")
+
+        log.info('adding config inputs...')
+        testdir = teuthology.get_testdir(ctx)
+        conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir)
+
+        if config.get('conf') is not None:
+            confp = config.get('conf')
+            for section, keys in confp.items():
+                lines = '[{section}]\n'.format(section=section)
+                ceph_admin.sudo_write_file(conf_path, lines, append=True)
+                for key, value in keys.items():
+                    log.info("[%s] %s = %s" % (section, key, value))
+                    lines = '{key} = {value}\n'.format(key=key, value=value)
+                    ceph_admin.sudo_write_file(conf_path, lines, append=True)
+
+        # install ceph
+        dev_branch = ctx.config['branch']
+        branch = '--dev={branch}'.format(branch=dev_branch)
+        if ceph_branch:
+            option = ceph_branch
+        else:
+            option = branch
+        install_nodes = './ceph-deploy install ' + option + " " + all_nodes
+        estatus_install = execute_ceph_deploy(install_nodes)
+        if estatus_install != 0:
+            raise RuntimeError("ceph-deploy: Failed to install ceph")
+        # install ceph-test package too
+        install_nodes2 = './ceph-deploy install --tests ' + option + \
+                         " " + all_nodes
+        estatus_install = execute_ceph_deploy(install_nodes2)
+        if estatus_install != 0:
+            raise RuntimeError("ceph-deploy: Failed to install ceph-test")
+
+        mon_create_nodes = './ceph-deploy mon create-initial'
+        # If the following fails, it is OK, it might just be that the monitors
+        # are taking way more than a minute/monitor to form quorum, so lets
+        # try the next block which will wait up to 15 minutes to gatherkeys.
+        execute_ceph_deploy(mon_create_nodes)
+
+        estatus_gather = execute_ceph_deploy(gather_keys)
+        if estatus_gather != 0:
+            raise RuntimeError("ceph-deploy: Failed during gather keys")
+
+        # install admin key on mons (ceph-create-keys doesn't do this any more)
+        mons = ctx.cluster.only(teuthology.is_type('mon'))
+        for remote in mons.remotes.keys():
+            execute_ceph_deploy('./ceph-deploy admin ' + remote.shortname)
+
+        # create osd's
+        if config.get('use-ceph-volume', False):
+            no_of_osds = ceph_volume_osd_create(ctx, config)
+        else:
+            # this method will only work with ceph-deploy v1.5.39 or older
+            no_of_osds = ceph_disk_osd_create(ctx, config)
+
+        if not skip_mgr:
+            execute_ceph_deploy(mgr_create)
+
+        if mds_nodes:
+            estatus_mds = execute_ceph_deploy(deploy_mds)
+            if estatus_mds != 0:
+                raise RuntimeError("ceph-deploy: Failed to deploy mds")
+
+        if config.get('test_mon_destroy') is not None:
+            for d in range(1, len(mon_node)):
+                mon_destroy_nodes = './ceph-deploy mon destroy' + \
+                    " " + mon_node[d]
+                estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes)
+                if estatus_mon_d != 0:
+                    raise RuntimeError("ceph-deploy: Failed to delete monitor")
+
+
+
+        if config.get('wait-for-healthy', True) and no_of_osds >= 2:
+            is_healthy(ctx=ctx, config=None)
+
+            log.info('Setting up client nodes...')
+            conf_path = '/etc/ceph/ceph.conf'
+            admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring'
+            first_mon = teuthology.get_first_mon(ctx, config)
+            (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys()
+            conf_data = mon0_remote.read_file(conf_path, sudo=True)
+            admin_keyring = mon0_remote.read_file(admin_keyring_path, sudo=True)
+
+            clients = ctx.cluster.only(teuthology.is_type('client'))
+            for remote, roles_for_host in clients.remotes.items():
+                for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
+                    client_keyring = \
+                        '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
+                    mon0_remote.run(
+                        args=[
+                            'cd',
+                            '{tdir}'.format(tdir=testdir),
+                            run.Raw('&&'),
+                            'sudo', 'bash', '-c',
+                            run.Raw('"'), 'ceph',
+                            'auth',
+                            'get-or-create',
+                            'client.{id}'.format(id=id_),
+                            'mds', 'allow',
+                            'mon', 'allow *',
+                            'osd', 'allow *',
+                            run.Raw('>'),
+                            client_keyring,
+                            run.Raw('"'),
+                        ],
+                    )
+                    key_data = mon0_remote.read_file(
+                        path=client_keyring,
+                        sudo=True,
+                    )
+                    remote.sudo_write_file(
+                        path=client_keyring,
+                        data=key_data,
+                        mode='0644'
+                    )
+                    remote.sudo_write_file(
+                        path=admin_keyring_path,
+                        data=admin_keyring,
+                        mode='0644'
+                    )
+                    remote.sudo_write_file(
+                        path=conf_path,
+                        data=conf_data,
+                        mode='0644'
+                    )
+
+            if mds_nodes:
+                log.info('Configuring CephFS...')
+                Filesystem(ctx, create=True)
+        elif not config.get('only_mon'):
+            raise RuntimeError(
+                "The cluster is NOT operational due to insufficient OSDs")
+        # create rbd pool
+        ceph_admin.run(
+            args=[
+                'sudo', 'ceph', '--cluster', 'ceph',
+                'osd', 'pool', 'create', 'rbd', '128', '128'],
+            check_status=False)
+        ceph_admin.run(
+            args=[
+                'sudo', 'ceph', '--cluster', 'ceph',
+                'osd', 'pool', 'application', 'enable',
+                'rbd', 'rbd', '--yes-i-really-mean-it'
+                ],
+            check_status=False)
+        yield
+
+    except Exception:
+        log.info(
+            "Error encountered, logging exception before tearing down ceph-deploy")
+        log.info(traceback.format_exc())
+        raise
+    finally:
+        if config.get('keep_running'):
+            return
+        log.info('Stopping ceph...')
+        ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'],
+                        check_status=False)
+        time.sleep(4)
+
+        # and now just check for the processes themselves, as if upstart/sysvinit
+        # is lying to us. Ignore errors if the grep fails
+        ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'),
+                              'grep', '-v', 'grep', run.Raw('|'),
+                              'grep', 'ceph'], check_status=False)
+        ctx.cluster.run(args=['sudo', 'systemctl', run.Raw('|'),
+                              'grep', 'ceph'], check_status=False)
+
+        if ctx.archive is not None:
+            # archive mon data, too
+            log.info('Archiving mon data...')
+            path = os.path.join(ctx.archive, 'data')
+            os.makedirs(path)
+            mons = ctx.cluster.only(teuthology.is_type('mon'))
+            for remote, roles in mons.remotes.items():
+                for role in roles:
+                    if role.startswith('mon.'):
+                        teuthology.pull_directory_tarball(
+                            remote,
+                            '/var/lib/ceph/mon',
+                            path + '/' + role + '.tgz')
+
+            log.info('Compressing logs...')
+            run.wait(
+                ctx.cluster.run(
+                    args=[
+                        'sudo',
+                        'find',
+                        '/var/log/ceph',
+                        '-name',
+                        '*.log',
+                        '-print0',
+                        run.Raw('|'),
+                        'sudo',
+                        'xargs',
+                        '-0',
+                        '--no-run-if-empty',
+                        '--',
+                        'gzip',
+                        '--',
+                    ],
+                    wait=False,
+                ),
+            )
+
+            log.info('Archiving logs...')
+            path = os.path.join(ctx.archive, 'remote')
+            os.makedirs(path)
+            for remote in ctx.cluster.remotes.keys():
+                sub = os.path.join(path, remote.shortname)
+                os.makedirs(sub)
+                teuthology.pull_directory(remote, '/var/log/ceph',
+                                          os.path.join(sub, 'log'))
+
+        # Prevent these from being undefined if the try block fails
+        all_nodes = get_all_nodes(ctx, config)
+        purge_nodes = './ceph-deploy purge' + " " + all_nodes
+        purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes
+
+        log.info('Purging package...')
+        execute_ceph_deploy(purge_nodes)
+        log.info('Purging data...')
+        execute_ceph_deploy(purgedata_nodes)
+
+
+@contextlib.contextmanager
+def cli_test(ctx, config):
+    """
+     ceph-deploy cli to exercise most commonly use cli's and ensure
+     all commands works and also startup the init system.
+
+    """
+    log.info('Ceph-deploy Test')
+    if config is None:
+        config = {}
+    test_branch = ''
+    conf_dir = teuthology.get_testdir(ctx) + "/cdtest"
+
+    def execute_cdeploy(admin, cmd, path):
+        """Execute ceph-deploy commands """
+        """Either use git path or repo path """
+        args = ['cd', conf_dir, run.Raw(';')]
+        if path:
+            args.append('{path}/ceph-deploy/ceph-deploy'.format(path=path))
+        else:
+            args.append('ceph-deploy')
+        args.append(run.Raw(cmd))
+        ec = admin.run(args=args, check_status=False).exitstatus
+        if ec != 0:
+            raise RuntimeError(
+                "failed during ceph-deploy cmd: {cmd} , ec={ec}".format(cmd=cmd, ec=ec))
+
+    if config.get('rhbuild'):
+        path = None
+    else:
+        path = teuthology.get_testdir(ctx)
+        # test on branch from config eg: wip-* , master or next etc
+        # packages for all distro's should exist for wip*
+        if ctx.config.get('branch'):
+            branch = ctx.config.get('branch')
+            test_branch = ' --dev={branch} '.format(branch=branch)
+    mons = ctx.cluster.only(teuthology.is_type('mon'))
+    for node, role in mons.remotes.items():
+        admin = node
+        admin.run(args=['mkdir', conf_dir], check_status=False)
+        nodename = admin.shortname
+    system_type = teuthology.get_system_type(admin)
+    if config.get('rhbuild'):
+        admin.run(args=['sudo', 'yum', 'install', 'ceph-deploy', '-y'])
+    log.info('system type is %s', system_type)
+    osds = ctx.cluster.only(teuthology.is_type('osd'))
+
+    for remote, roles in osds.remotes.items():
+        devs = teuthology.get_scratch_devices(remote)
+        log.info("roles %s", roles)
+        if (len(devs) < 3):
+            log.error(
+                'Test needs minimum of 3 devices, only found %s',
+                str(devs))
+            raise RuntimeError("Needs minimum of 3 devices ")
+
+    conf_path = '{conf_dir}/ceph.conf'.format(conf_dir=conf_dir)
+    new_cmd = 'new ' + nodename
+    execute_cdeploy(admin, new_cmd, path)
+    if config.get('conf') is not None:
+        confp = config.get('conf')
+        for section, keys in confp.items():
+            lines = '[{section}]\n'.format(section=section)
+            admin.sudo_write_file(conf_path, lines, append=True)
+            for key, value in keys.items():
+                log.info("[%s] %s = %s" % (section, key, value))
+                lines = '{key} = {value}\n'.format(key=key, value=value)
+                admin.sudo_write_file(conf_path, lines, append=True)
+    new_mon_install = 'install {branch} --mon '.format(
+        branch=test_branch) + nodename
+    new_mgr_install = 'install {branch} --mgr '.format(
+        branch=test_branch) + nodename
+    new_osd_install = 'install {branch} --osd '.format(
+        branch=test_branch) + nodename
+    new_admin = 'install {branch} --cli '.format(branch=test_branch) + nodename
+    create_initial = 'mon create-initial '
+    mgr_create = 'mgr create ' + nodename
+    # either use create-keys or push command
+    push_keys = 'admin ' + nodename
+    execute_cdeploy(admin, new_mon_install, path)
+    execute_cdeploy(admin, new_mgr_install, path)
+    execute_cdeploy(admin, new_osd_install, path)
+    execute_cdeploy(admin, new_admin, path)
+    execute_cdeploy(admin, create_initial, path)
+    execute_cdeploy(admin, mgr_create, path)
+    execute_cdeploy(admin, push_keys, path)
+
+    for i in range(3):
+        zap_disk = 'disk zap ' + "{n}:{d}".format(n=nodename, d=devs[i])
+        prepare = 'osd prepare ' + "{n}:{d}".format(n=nodename, d=devs[i])
+        execute_cdeploy(admin, zap_disk, path)
+        execute_cdeploy(admin, prepare, path)
+
+    log.info("list files for debugging purpose to check file permissions")
+    admin.run(args=['ls', run.Raw('-lt'), conf_dir])
+    remote.run(args=['sudo', 'ceph', '-s'], check_status=False)
+    out = remote.sh('sudo ceph health')
+    log.info('Ceph health: %s', out.rstrip('\n'))
+    log.info("Waiting for cluster to become healthy")
+    with contextutil.safe_while(sleep=10, tries=6,
+                                action='check health') as proceed:
+        while proceed():
+            out = remote.sh('sudo ceph health')
+            if (out.split(None, 1)[0] == 'HEALTH_OK'):
+                break
+    rgw_install = 'install {branch} --rgw {node}'.format(
+        branch=test_branch,
+        node=nodename,
+    )
+    rgw_create = 'rgw create ' + nodename
+    execute_cdeploy(admin, rgw_install, path)
+    execute_cdeploy(admin, rgw_create, path)
+    log.info('All ceph-deploy cli tests passed')
+    try:
+        yield
+    finally:
+        log.info("cleaning up")
+        ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'],
+                        check_status=False)
+        time.sleep(4)
+        for i in range(3):
+            umount_dev = "{d}1".format(d=devs[i])
+            remote.run(args=['sudo', 'umount', run.Raw(umount_dev)])
+        cmd = 'purge ' + nodename
+        execute_cdeploy(admin, cmd, path)
+        cmd = 'purgedata ' + nodename
+        execute_cdeploy(admin, cmd, path)
+        log.info("Removing temporary dir")
+        admin.run(
+            args=[
+                'rm',
+                run.Raw('-rf'),
+                run.Raw(conf_dir)],
+            check_status=False)
+        if config.get('rhbuild'):
+            admin.run(args=['sudo', 'yum', 'remove', 'ceph-deploy', '-y'])
+
+
+@contextlib.contextmanager
+def single_node_test(ctx, config):
+    """
+    - ceph-deploy.single_node_test: null
+
+    #rhbuild testing
+    - ceph-deploy.single_node_test:
+        rhbuild: 1.2.3
+
+    """
+    log.info("Testing ceph-deploy on single node")
+    if config is None:
+        config = {}
+    overrides = ctx.config.get('overrides', {})
+    teuthology.deep_merge(config, overrides.get('ceph-deploy', {}))
+
+    if config.get('rhbuild'):
+        log.info("RH Build, Skip Download")
+        with contextutil.nested(
+            lambda: cli_test(ctx=ctx, config=config),
+        ):
+            yield
+    else:
+        with contextutil.nested(
+            lambda: install_fn.ship_utilities(ctx=ctx, config=None),
+            lambda: download_ceph_deploy(ctx=ctx, config=config),
+            lambda: cli_test(ctx=ctx, config=config),
+        ):
+            yield
+
+
+@contextlib.contextmanager
+def upgrade(ctx, config):
+    """
+     Upgrade using ceph-deploy
+     eg:
+       ceph-deploy.upgrade:
+          # to upgrade to specific branch, use
+          branch:
+             stable: jewel
+           # to setup mgr node, use
+           setup-mgr-node: True
+           # to wait for cluster to be healthy after all upgrade, use
+           wait-for-healthy: True
+           role: (upgrades the below roles serially)
+              mon.a
+              mon.b
+              osd.0
+     """
+    roles = config.get('roles')
+    # get the roles that are mapped as per ceph-deploy
+    # roles are mapped for mon/mds eg: mon.a  => mon.host_short_name
+    mapped_role = ctx.cluster.mapped_role
+    log.info("roles={r}, mapped_roles={mr}".format(r=roles, mr=mapped_role))
+    if config.get('branch'):
+        branch = config.get('branch')
+        (var, val) = branch.items()[0]
+        ceph_branch = '--{var}={val}'.format(var=var, val=val)
+    else:
+        # default to wip-branch under test
+        dev_branch = ctx.config['branch']
+        ceph_branch = '--dev={branch}'.format(branch=dev_branch)
+    # get the node used for initial deployment which is mon.a
+    mon_a = mapped_role.get('mon.a')
+    (ceph_admin,) = ctx.cluster.only(mon_a).remotes.keys()
+    testdir = teuthology.get_testdir(ctx)
+    cmd = './ceph-deploy install ' + ceph_branch
+    for role in roles:
+        # check if this role is mapped (mon or mds)
+        if mapped_role.get(role):
+            role = mapped_role.get(role)
+        remotes_and_roles = ctx.cluster.only(role).remotes
+        for remote, roles in remotes_and_roles.items():
+            nodename = remote.shortname
+            cmd = cmd + ' ' + nodename
+            log.info("Upgrading ceph on  %s", nodename)
+            ceph_admin.run(
+                args=[
+                    'cd',
+                    '{tdir}/ceph-deploy'.format(tdir=testdir),
+                    run.Raw('&&'),
+                    run.Raw(cmd),
+                ],
+            )
+            # restart all ceph services, ideally upgrade should but it does not
+            remote.run(
+                args=[
+                    'sudo', 'systemctl', 'restart', 'ceph.target'
+                ]
+            )
+            ceph_admin.run(args=['sudo', 'ceph', '-s'])
+
+    # workaround for http://tracker.ceph.com/issues/20950
+    # write the correct mgr key to disk
+    if config.get('setup-mgr-node', None):
+        mons = ctx.cluster.only(teuthology.is_type('mon'))
+        for remote, roles in mons.remotes.items():
+            remote.run(
+                args=[
+                    run.Raw('sudo ceph auth get client.bootstrap-mgr'),
+                    run.Raw('|'),
+                    run.Raw('sudo tee'),
+                    run.Raw('/var/lib/ceph/bootstrap-mgr/ceph.keyring')
+                ]
+            )
+
+    if config.get('setup-mgr-node', None):
+        mgr_nodes = get_nodes_using_role(ctx, 'mgr')
+        mgr_nodes = " ".join(mgr_nodes)
+        mgr_install = './ceph-deploy install --mgr ' + ceph_branch + " " + mgr_nodes
+        mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes
+        # install mgr
+        ceph_admin.run(
+            args=[
+                'cd',
+                '{tdir}/ceph-deploy'.format(tdir=testdir),
+                run.Raw('&&'),
+                run.Raw(mgr_install),
+                ],
+            )
+        # create mgr
+        ceph_admin.run(
+            args=[
+                'cd',
+                '{tdir}/ceph-deploy'.format(tdir=testdir),
+                run.Raw('&&'),
+                run.Raw(mgr_create),
+                ],
+            )
+        ceph_admin.run(args=['sudo', 'ceph', '-s'])
+    if config.get('wait-for-healthy', None):
+        wait_until_healthy(ctx, ceph_admin, use_sudo=True)
+    yield
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Set up and tear down a Ceph cluster.
+
+    For example::
+
+        tasks:
+        - install:
+             extras: yes
+        - ssh_keys:
+        - ceph-deploy:
+             branch:
+                stable: bobtail
+             mon_initial_members: 1
+             ceph-deploy-branch: my-ceph-deploy-branch
+             only_mon: true
+             keep_running: true
+             # either choose bluestore or filestore, default is bluestore
+             bluestore: True
+             # or
+             filestore: True
+             # skip install of mgr for old release using below flag
+             skip-mgr: True  ( default is False )
+             # to use ceph-volume instead of ceph-disk
+             # ceph-disk can only be used with old ceph-deploy release from pypi
+             use-ceph-volume: true
+
+        tasks:
+        - install:
+             extras: yes
+        - ssh_keys:
+        - ceph-deploy:
+             branch:
+                dev: master
+             conf:
+                mon:
+                   debug mon = 20
+
+        tasks:
+        - install:
+             extras: yes
+        - ssh_keys:
+        - ceph-deploy:
+             branch:
+                testing:
+             dmcrypt: yes
+             separate_journal_disk: yes
+
+    """
+    if config is None:
+        config = {}
+
+    assert isinstance(config, dict), \
+        "task ceph-deploy only supports a dictionary for configuration"
+
+    overrides = ctx.config.get('overrides', {})
+    teuthology.deep_merge(config, overrides.get('ceph-deploy', {}))
+
+    if config.get('branch') is not None:
+        assert isinstance(
+            config['branch'], dict), 'branch must be a dictionary'
+
+    log.info('task ceph-deploy with config ' + str(config))
+
+    # we need to use 1.5.39-stable for testing jewel or master branch with
+    # ceph-disk
+    if config.get('use-ceph-volume', False) is False:
+        # check we are not testing specific branch
+        if config.get('ceph-deploy-branch', False) is False:
+            config['ceph-deploy-branch'] = '1.5.39-stable'
+
+    with contextutil.nested(
+        lambda: install_fn.ship_utilities(ctx=ctx, config=None),
+        lambda: download_ceph_deploy(ctx=ctx, config=config),
+        lambda: build_ceph_cluster(ctx=ctx, config=config),
+    ):
+        yield
diff --git a/qa/tasks/ceph_fuse.py b/qa/tasks/ceph_fuse.py
new file mode 100644
index 000000000..70cf9bf83
--- /dev/null
+++ b/qa/tasks/ceph_fuse.py
@@ -0,0 +1,185 @@
+"""
+Ceph FUSE client task
+"""
+
+import contextlib
+import logging
+
+from teuthology import misc
+from tasks.cephfs.fuse_mount import FuseMount
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Mount/unmount a ``ceph-fuse`` client.
+
+    The config is optional and defaults to mounting on all clients. If
+    a config is given, it is expected to be a list of clients to do
+    this operation on. This lets you e.g. set up one client with
+    ``ceph-fuse`` and another with ``kclient``.
+
+    ``brxnet`` should be a Private IPv4 Address range, default range is
+    [192.168.0.0/16]
+
+    Example that mounts all clients::
+
+        tasks:
+        - ceph:
+        - ceph-fuse:
+        - interactive:
+        - brxnet: [192.168.0.0/16]
+
+    Example that uses both ``kclient` and ``ceph-fuse``::
+
+        tasks:
+        - ceph:
+        - ceph-fuse: [client.0]
+        - kclient: [client.1]
+        - interactive:
+
+    Example that enables valgrind:
+
+        tasks:
+        - ceph:
+        - ceph-fuse:
+            client.0:
+              valgrind: [--tool=memcheck, --leak-check=full, --show-reachable=yes]
+        - interactive:
+
+    Example that stops an already-mounted client:
+
+    ::
+
+        tasks:
+            - ceph:
+            - ceph-fuse: [client.0]
+            - ... do something that requires the FS mounted ...
+            - ceph-fuse:
+                client.0:
+                    mounted: false
+            - ... do something that requires the FS unmounted ...
+
+    Example that adds more generous wait time for mount (for virtual machines):
+
+        tasks:
+        - ceph:
+        - ceph-fuse:
+            client.0:
+              mount_wait: 60 # default is 0, do not wait before checking /sys/
+              mount_timeout: 120 # default is 30, give up if /sys/ is not populated
+        - interactive:
+
+    Example that creates and mounts a subvol:
+
+        overrides:
+          ceph:
+            subvols:
+              create: 2
+              subvol_options: "--namespace-isolated --size 25000000000"
+          ceph-fuse:
+            client.0:
+              mount_subvol_num: 0
+          kclient:
+            client.1:
+              mount_subvol_num: 1
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    log.info('Running ceph_fuse task...')
+
+    if config is None:
+        ids = misc.all_roles_of_type(ctx.cluster, 'client')
+        client_roles = [f'client.{id_}' for id_ in ids]
+        config = dict([r, dict()] for r in client_roles)
+    elif isinstance(config, list):
+        client_roles = config
+        config = dict([r, dict()] for r in client_roles)
+    elif isinstance(config, dict):
+        client_roles = filter(lambda x: 'client.' in x, config.keys())
+    else:
+        raise ValueError(f"Invalid config object: {config} ({config.__class__})")
+    log.info(f"config is {config}")
+
+    clients = list(misc.get_clients(ctx=ctx, roles=client_roles))
+    testdir = misc.get_testdir(ctx)
+    all_mounts = getattr(ctx, 'mounts', {})
+    mounted_by_me = {}
+    skipped = {}
+    remotes = set()
+
+    brxnet = config.get("brxnet", None)
+
+    # Construct any new FuseMount instances
+    overrides = ctx.config.get('overrides', {}).get('ceph-fuse', {})
+    top_overrides = dict(filter(lambda x: 'client.' not in x[0], overrides.items()))
+    for id_, remote in clients:
+        entity = f"client.{id_}"
+        client_config = config.get(entity)
+        if client_config is None:
+            client_config = {}
+        # top level overrides
+        misc.deep_merge(client_config, top_overrides)
+        # mount specific overrides
+        client_config_overrides = overrides.get(entity)
+        misc.deep_merge(client_config, client_config_overrides)
+        log.info(f"{entity} config is {client_config}")
+
+        remotes.add(remote)
+        auth_id = client_config.get("auth_id", id_)
+        cephfs_name = client_config.get("cephfs_name")
+
+        skip = client_config.get("skip", False)
+        if skip:
+            skipped[id_] = skip
+            continue
+
+        if id_ not in all_mounts:
+            fuse_mount = FuseMount(ctx=ctx, client_config=client_config,
+                                   test_dir=testdir, client_id=auth_id,
+                                   client_remote=remote, brxnet=brxnet,
+                                   cephfs_name=cephfs_name)
+            all_mounts[id_] = fuse_mount
+        else:
+            # Catch bad configs where someone has e.g. tried to use ceph-fuse and kcephfs for the same client
+            assert isinstance(all_mounts[id_], FuseMount)
+
+        if not config.get("disabled", False) and client_config.get('mounted', True):
+            mounted_by_me[id_] = {"config": client_config, "mount": all_mounts[id_]}
+
+    ctx.mounts = all_mounts
+
+    # Umount any pre-existing clients that we have not been asked to mount
+    for client_id in set(all_mounts.keys()) - set(mounted_by_me.keys()) - set(skipped.keys()):
+        mount = all_mounts[client_id]
+        if mount.is_mounted():
+            mount.umount_wait()
+
+    for remote in remotes:
+        FuseMount.cleanup_stale_netnses_and_bridge(remote)
+
+    # Mount any clients we have been asked to (default to mount all)
+    log.info('Mounting ceph-fuse clients...')
+    for info in mounted_by_me.values():
+        config = info["config"]
+        mount_x = info['mount']
+        mount_x.mount(mntopts=config.get('mntopts', []), mntargs=config.get('mntargs', []))
+
+    for info in mounted_by_me.values():
+        info["mount"].wait_until_mounted()
+
+    try:
+        yield all_mounts
+    finally:
+        log.info('Unmounting ceph-fuse clients...')
+
+        for info in mounted_by_me.values():
+            # Conditional because an inner context might have umounted it
+            mount = info["mount"]
+            if mount.is_mounted():
+                mount.umount_wait()
+        for remote in remotes:
+            FuseMount.cleanup_stale_netnses_and_bridge(remote)
diff --git a/qa/tasks/ceph_iscsi_client.py b/qa/tasks/ceph_iscsi_client.py
new file mode 100644
index 000000000..189b7fa31
--- /dev/null
+++ b/qa/tasks/ceph_iscsi_client.py
@@ -0,0 +1,56 @@
+"""
+Set up ceph-iscsi client.
+"""
+import logging
+import contextlib
+from textwrap import dedent
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Set up ceph-iscsi client.
+
+      tasks:
+        ceph_iscsi_client:
+          clients: [client.1]
+    """
+    log.info('Setting up ceph-iscsi client...')
+    for role in config['clients']:
+        (remote,) = (ctx.cluster.only(role).remotes.keys())
+
+        conf = dedent('''
+        InitiatorName=iqn.1994-05.com.redhat:client
+        ''')
+        path = "/etc/iscsi/initiatorname.iscsi"
+        remote.sudo_write_file(path, conf, mkdir=True)
+
+        # the restart is needed after the above change is applied
+        remote.run(args=['sudo', 'systemctl', 'restart', 'iscsid'])
+
+        remote.run(args=['sudo', 'modprobe', 'dm_multipath'])
+        remote.run(args=['sudo', 'mpathconf', '--enable'])
+        conf = dedent('''
+        devices {
+                device {
+                        vendor                 "LIO-ORG"
+                        product                "TCMU device"
+                        hardware_handler       "1 alua"
+                        path_grouping_policy   "failover"
+                        path_selector          "queue-length 0"
+                        failback               60
+                        path_checker           tur
+                        prio                   alua
+                        prio_args              exclusive_pref_bit
+                        fast_io_fail_tmo       25
+                        no_path_retry          queue
+                }
+        }
+        ''')
+        path = "/etc/multipath.conf"
+        remote.sudo_write_file(path, conf, append=True)
+        remote.run(args=['sudo', 'systemctl', 'start', 'multipathd'])
+
+    yield
diff --git a/qa/tasks/ceph_manager.py b/qa/tasks/ceph_manager.py
new file mode 100644
index 000000000..516c409e8
--- /dev/null
+++ b/qa/tasks/ceph_manager.py
@@ -0,0 +1,3235 @@
+"""
+ceph manager -- Thrasher and CephManager objects
+"""
+from functools import wraps
+import contextlib
+import errno
+import random
+import signal
+import time
+import gevent
+import base64
+import json
+import logging
+import threading
+import traceback
+import os
+import shlex
+
+from io import BytesIO, StringIO
+from subprocess import DEVNULL
+from teuthology import misc as teuthology
+from tasks.scrub import Scrubber
+from tasks.util.rados import cmd_erasure_code_profile
+from tasks.util import get_remote
+
+from teuthology.contextutil import safe_while
+from teuthology.orchestra.remote import Remote
+from teuthology.orchestra import run
+from teuthology.parallel import parallel
+from teuthology.exceptions import CommandFailedError
+from tasks.thrasher import Thrasher
+
+
+DEFAULT_CONF_PATH = '/etc/ceph/ceph.conf'
+
+log = logging.getLogger(__name__)
+
+# this is for cephadm clusters
+def shell(ctx, cluster_name, remote, args, name=None, **kwargs):
+    extra_args = []
+    if name:
+        extra_args = ['-n', name]
+    return remote.run(
+        args=[
+            'sudo',
+            ctx.cephadm,
+            '--image', ctx.ceph[cluster_name].image,
+            'shell',
+        ] + extra_args + [
+            '--fsid', ctx.ceph[cluster_name].fsid,
+            '--',
+        ] + args,
+        **kwargs
+    )
+
+# this is for rook clusters
+def toolbox(ctx, cluster_name, args, **kwargs):
+    return ctx.rook[cluster_name].remote.run(
+        args=[
+            'kubectl',
+            '-n', 'rook-ceph',
+            'exec',
+            ctx.rook[cluster_name].toolbox,
+            '--',
+        ] + args,
+        **kwargs
+    )
+
+
+def write_conf(ctx, conf_path=DEFAULT_CONF_PATH, cluster='ceph'):
+    conf_fp = BytesIO()
+    ctx.ceph[cluster].conf.write(conf_fp)
+    conf_fp.seek(0)
+    writes = ctx.cluster.run(
+        args=[
+            'sudo', 'mkdir', '-p', '/etc/ceph', run.Raw('&&'),
+            'sudo', 'chmod', '0755', '/etc/ceph', run.Raw('&&'),
+            'sudo', 'tee', conf_path, run.Raw('&&'),
+            'sudo', 'chmod', '0644', conf_path,
+            run.Raw('>'), '/dev/null',
+
+        ],
+        stdin=run.PIPE,
+        wait=False)
+    teuthology.feed_many_stdins_and_close(conf_fp, writes)
+    run.wait(writes)
+
+def get_valgrind_args(testdir, name, preamble, v, exit_on_first_error=True, cd=True):
+    """
+    Build a command line for running valgrind.
+
+    testdir - test results directory
+    name - name of daemon (for naming hte log file)
+    preamble - stuff we should run before valgrind
+    v - valgrind arguments
+    """
+    if v is None:
+        return preamble
+    if not isinstance(v, list):
+        v = [v]
+
+    # https://tracker.ceph.com/issues/44362
+    preamble.extend([
+        'env', 'OPENSSL_ia32cap=~0x1000000000000000',
+    ])
+
+    val_path = '/var/log/ceph/valgrind'
+    if '--tool=memcheck' in v or '--tool=helgrind' in v:
+        extra_args = [
+            'valgrind',
+            '--trace-children=no',
+            '--child-silent-after-fork=yes',
+            '--soname-synonyms=somalloc=*tcmalloc*',
+            '--num-callers=50',
+            '--suppressions={tdir}/valgrind.supp'.format(tdir=testdir),
+            '--xml=yes',
+            '--xml-file={vdir}/{n}.log'.format(vdir=val_path, n=name),
+            '--time-stamp=yes',
+            '--vgdb=yes',
+        ]
+    else:
+        extra_args = [
+            'valgrind',
+            '--trace-children=no',
+            '--child-silent-after-fork=yes',
+            '--soname-synonyms=somalloc=*tcmalloc*',
+            '--suppressions={tdir}/valgrind.supp'.format(tdir=testdir),
+            '--log-file={vdir}/{n}.log'.format(vdir=val_path, n=name),
+            '--time-stamp=yes',
+            '--vgdb=yes',
+        ]
+    if exit_on_first_error:
+        extra_args.extend([
+            # at least Valgrind 3.14 is required
+            '--exit-on-first-error=yes',
+            '--error-exitcode=42',
+        ])
+    args = []
+    if cd:
+        args += ['cd', testdir, run.Raw('&&')]
+    args += preamble + extra_args + v
+    log.debug('running %s under valgrind with args %s', name, args)
+    return args
+
+
+def mount_osd_data(ctx, remote, cluster, osd):
+    """
+    Mount a remote OSD
+
+    :param ctx: Context
+    :param remote: Remote site
+    :param cluster: name of ceph cluster
+    :param osd: Osd name
+    """
+    log.debug('Mounting data for osd.{o} on {r}'.format(o=osd, r=remote))
+    role = "{0}.osd.{1}".format(cluster, osd)
+    alt_role = role if cluster != 'ceph' else "osd.{0}".format(osd)
+    if remote in ctx.disk_config.remote_to_roles_to_dev:
+        if alt_role in ctx.disk_config.remote_to_roles_to_dev[remote]:
+            role = alt_role
+        if role not in ctx.disk_config.remote_to_roles_to_dev[remote]:
+            return
+        dev = ctx.disk_config.remote_to_roles_to_dev[remote][role]
+        mount_options = ctx.disk_config.\
+            remote_to_roles_to_dev_mount_options[remote][role]
+        fstype = ctx.disk_config.remote_to_roles_to_dev_fstype[remote][role]
+        mnt = os.path.join('/var/lib/ceph/osd', '{0}-{1}'.format(cluster, osd))
+
+        log.info('Mounting osd.{o}: dev: {n}, cluster: {c}'
+                 'mountpoint: {p}, type: {t}, options: {v}'.format(
+                     o=osd, n=remote.name, p=mnt, t=fstype, v=mount_options,
+                     c=cluster))
+
+        remote.run(
+            args=[
+                'sudo',
+                'mount',
+                '-t', fstype,
+                '-o', ','.join(mount_options),
+                dev,
+                mnt,
+            ]
+            )
+
+
+def log_exc(func):
+    @wraps(func)
+    def wrapper(self):
+        try:
+            return func(self)
+        except:
+            self.log(traceback.format_exc())
+            raise
+    return wrapper
+
+
+class PoolType:
+    REPLICATED = 1
+    ERASURE_CODED = 3
+
+
+class OSDThrasher(Thrasher):
+    """
+    Object used to thrash Ceph
+    """
+    def __init__(self, manager, config, name, logger):
+        super(OSDThrasher, self).__init__()
+
+        self.ceph_manager = manager
+        self.cluster = manager.cluster
+        self.ceph_manager.wait_for_clean()
+        osd_status = self.ceph_manager.get_osd_status()
+        self.in_osds = osd_status['in']
+        self.live_osds = osd_status['live']
+        self.out_osds = osd_status['out']
+        self.dead_osds = osd_status['dead']
+        self.stopping = False
+        self.logger = logger
+        self.config = config
+        self.name = name
+        self.revive_timeout = self.config.get("revive_timeout", 360)
+        self.pools_to_fix_pgp_num = set()
+        if self.config.get('powercycle'):
+            self.revive_timeout += 120
+        self.clean_wait = self.config.get('clean_wait', 0)
+        self.minin = self.config.get("min_in", 4)
+        self.chance_move_pg = self.config.get('chance_move_pg', 1.0)
+        self.sighup_delay = self.config.get('sighup_delay')
+        self.optrack_toggle_delay = self.config.get('optrack_toggle_delay')
+        self.dump_ops_enable = self.config.get('dump_ops_enable')
+        self.noscrub_toggle_delay = self.config.get('noscrub_toggle_delay')
+        self.chance_thrash_cluster_full = self.config.get('chance_thrash_cluster_full', .05)
+        self.chance_thrash_pg_upmap = self.config.get('chance_thrash_pg_upmap', 1.0)
+        self.chance_thrash_pg_upmap_items = self.config.get('chance_thrash_pg_upmap', 1.0)
+        self.random_eio = self.config.get('random_eio')
+        self.chance_force_recovery = self.config.get('chance_force_recovery', 0.3)
+
+        num_osds = self.in_osds + self.out_osds
+        self.max_pgs = self.config.get("max_pgs_per_pool_osd", 1200) * len(num_osds)
+        self.min_pgs = self.config.get("min_pgs_per_pool_osd", 1) * len(num_osds)
+        if self.config is None:
+            self.config = dict()
+        # prevent monitor from auto-marking things out while thrasher runs
+        # try both old and new tell syntax, in case we are testing old code
+        self.saved_options = []
+        # assuming that the default settings do not vary from one daemon to
+        # another
+        first_mon = teuthology.get_first_mon(manager.ctx, self.config).split('.')
+        opts = [('mon', 'mon_osd_down_out_interval', 0)]
+        #why do we disable marking an OSD out automatically? :/
+        for service, opt, new_value in opts:
+            old_value = manager.get_config(first_mon[0],
+                                           first_mon[1],
+                                           opt)
+            self.saved_options.append((service, opt, old_value))
+            manager.inject_args(service, '*', opt, new_value)
+        # initialize ceph_objectstore_tool property - must be done before
+        # do_thrash is spawned - http://tracker.ceph.com/issues/18799
+        if (self.config.get('powercycle') or
+            not self.cmd_exists_on_osds("ceph-objectstore-tool") or
+            self.config.get('disable_objectstore_tool_tests', False)):
+            self.ceph_objectstore_tool = False
+            if self.config.get('powercycle'):
+                self.log("Unable to test ceph-objectstore-tool, "
+                         "powercycle testing")
+            else:
+                self.log("Unable to test ceph-objectstore-tool, "
+                         "not available on all OSD nodes")
+        else:
+            self.ceph_objectstore_tool = \
+                self.config.get('ceph_objectstore_tool', True)
+        # spawn do_thrash
+        self.thread = gevent.spawn(self.do_thrash)
+        if self.sighup_delay:
+            self.sighup_thread = gevent.spawn(self.do_sighup)
+        if self.optrack_toggle_delay:
+            self.optrack_toggle_thread = gevent.spawn(self.do_optrack_toggle)
+        if self.dump_ops_enable == "true":
+            self.dump_ops_thread = gevent.spawn(self.do_dump_ops)
+        if self.noscrub_toggle_delay:
+            self.noscrub_toggle_thread = gevent.spawn(self.do_noscrub_toggle)
+
+    def log(self, msg, *args, **kwargs):
+        self.logger.info(msg, *args, **kwargs)
+
+    def cmd_exists_on_osds(self, cmd):
+        if self.ceph_manager.cephadm or self.ceph_manager.rook:
+            return True
+        allremotes = self.ceph_manager.ctx.cluster.only(\
+            teuthology.is_type('osd', self.cluster)).remotes.keys()
+        allremotes = list(set(allremotes))
+        for remote in allremotes:
+            proc = remote.run(args=['type', cmd], wait=True,
+                              check_status=False, stdout=BytesIO(),
+                              stderr=BytesIO())
+            if proc.exitstatus != 0:
+                return False;
+        return True;
+
+    def run_ceph_objectstore_tool(self, remote, osd, cmd):
+        if self.ceph_manager.cephadm:
+            return shell(
+                self.ceph_manager.ctx, self.ceph_manager.cluster, remote,
+                args=['ceph-objectstore-tool', '--err-to-stderr'] + cmd,
+                name=osd,
+                wait=True, check_status=False,
+                stdout=StringIO(),
+                stderr=StringIO())
+        elif self.ceph_manager.rook:
+            assert False, 'not implemented'
+        else:
+            return remote.run(
+                args=['sudo', 'adjust-ulimits', 'ceph-objectstore-tool', '--err-to-stderr'] + cmd,
+                wait=True, check_status=False,
+                stdout=StringIO(),
+                stderr=StringIO())
+
+    def run_ceph_bluestore_tool(self, remote, osd, cmd):
+        if self.ceph_manager.cephadm:
+            return shell(
+                self.ceph_manager.ctx, self.ceph_manager.cluster, remote,
+                args=['ceph-bluestore-tool', '--err-to-stderr'] + cmd,
+                name=osd,
+                wait=True, check_status=False,
+                stdout=StringIO(),
+                stderr=StringIO())
+        elif self.ceph_manager.rook:
+            assert False, 'not implemented'
+        else:
+            return remote.run(
+                args=['sudo', 'ceph-bluestore-tool', '--err-to-stderr'] + cmd,
+                wait=True, check_status=False,
+                stdout=StringIO(),
+                stderr=StringIO())
+
+    def kill_osd(self, osd=None, mark_down=False, mark_out=False):
+        """
+        :param osd: Osd to be killed.
+        :mark_down: Mark down if true.
+        :mark_out: Mark out if true.
+        """
+        if osd is None:
+            osd = random.choice(self.live_osds)
+        self.log("Killing osd %s, live_osds are %s" % (str(osd),
+                                                       str(self.live_osds)))
+        self.live_osds.remove(osd)
+        self.dead_osds.append(osd)
+        self.ceph_manager.kill_osd(osd)
+        if mark_down:
+            self.ceph_manager.mark_down_osd(osd)
+        if mark_out and osd in self.in_osds:
+            self.out_osd(osd)
+        if self.ceph_objectstore_tool:
+            self.log("Testing ceph-objectstore-tool on down osd.%s" % osd)
+            remote = self.ceph_manager.find_remote('osd', osd)
+            FSPATH = self.ceph_manager.get_filepath()
+            JPATH = os.path.join(FSPATH, "journal")
+            exp_osd = imp_osd = osd
+            self.log('remote for osd %s is %s' % (osd, remote))
+            exp_remote = imp_remote = remote
+            # If an older osd is available we'll move a pg from there
+            if (len(self.dead_osds) > 1 and
+                    random.random() < self.chance_move_pg):
+                exp_osd = random.choice(self.dead_osds[:-1])
+                exp_remote = self.ceph_manager.find_remote('osd', exp_osd)
+                self.log('remote for exp osd %s is %s' % (exp_osd, exp_remote))
+            prefix = [
+                '--no-mon-config',
+                '--log-file=/var/log/ceph/objectstore_tool.$pid.log',
+            ]
+
+            if self.ceph_manager.rook:
+                assert False, 'not implemented'
+
+            if not self.ceph_manager.cephadm:
+                # ceph-objectstore-tool might be temporarily absent during an
+                # upgrade - see http://tracker.ceph.com/issues/18014
+                with safe_while(sleep=15, tries=40, action="type ceph-objectstore-tool") as proceed:
+                    while proceed():
+                        proc = exp_remote.run(args=['type', 'ceph-objectstore-tool'],
+                                              wait=True, check_status=False, stdout=BytesIO(),
+                                              stderr=BytesIO())
+                        if proc.exitstatus == 0:
+                            break
+                        log.debug("ceph-objectstore-tool binary not present, trying again")
+
+            # ceph-objectstore-tool might bogusly fail with "OSD has the store locked"
+            # see http://tracker.ceph.com/issues/19556
+            with safe_while(sleep=15, tries=40, action="ceph-objectstore-tool --op list-pgs") as proceed:
+                while proceed():
+                    proc = self.run_ceph_objectstore_tool(
+                        exp_remote, 'osd.%s' % exp_osd,
+                        prefix + [
+                            '--data-path', FSPATH.format(id=exp_osd),
+                            '--journal-path', JPATH.format(id=exp_osd),
+                            '--op', 'list-pgs',
+                        ])
+                    if proc.exitstatus == 0:
+                        break
+                    elif (proc.exitstatus == 1 and
+                          proc.stderr.getvalue() == "OSD has the store locked"):
+                        continue
+                    else:
+                        raise Exception("ceph-objectstore-tool: "
+                                        "exp list-pgs failure with status {ret}".
+                                        format(ret=proc.exitstatus))
+
+            pgs = proc.stdout.getvalue().split('\n')[:-1]
+            if len(pgs) == 0:
+                self.log("No PGs found for osd.{osd}".format(osd=exp_osd))
+                return
+            pg = random.choice(pgs)
+            #exp_path = teuthology.get_testdir(self.ceph_manager.ctx)
+            #exp_path = os.path.join(exp_path, '{0}.data'.format(self.cluster))
+            exp_path = os.path.join('/var/log/ceph', # available inside 'shell' container
+                                    "exp.{pg}.{id}".format(
+                                        pg=pg,
+                                        id=exp_osd))
+            if self.ceph_manager.cephadm:
+                exp_host_path = os.path.join(
+                    '/var/log/ceph',
+                    self.ceph_manager.ctx.ceph[self.ceph_manager.cluster].fsid,
+                    "exp.{pg}.{id}".format(
+                        pg=pg,
+                        id=exp_osd))
+            else:
+                exp_host_path = exp_path
+
+            # export
+            # Can't use new export-remove op since this is part of upgrade testing
+            proc = self.run_ceph_objectstore_tool(
+                exp_remote, 'osd.%s' % exp_osd,
+                prefix + [
+                    '--data-path', FSPATH.format(id=exp_osd),
+                    '--journal-path', JPATH.format(id=exp_osd),
+                    '--op', 'export',
+                    '--pgid', pg,
+                    '--file', exp_path,
+                ])
+            if proc.exitstatus:
+                raise Exception("ceph-objectstore-tool: "
+                                "export failure with status {ret}".
+                                format(ret=proc.exitstatus))
+            # remove
+            proc = self.run_ceph_objectstore_tool(
+                exp_remote, 'osd.%s' % exp_osd,
+                prefix + [
+                    '--data-path', FSPATH.format(id=exp_osd),
+                    '--journal-path', JPATH.format(id=exp_osd),
+                    '--force',
+                    '--op', 'remove',
+                    '--pgid', pg,
+                ])
+            if proc.exitstatus:
+                raise Exception("ceph-objectstore-tool: "
+                                "remove failure with status {ret}".
+                                format(ret=proc.exitstatus))
+            # If there are at least 2 dead osds we might move the pg
+            if exp_osd != imp_osd:
+                # If pg isn't already on this osd, then we will move it there
+                proc = self.run_ceph_objectstore_tool(
+                    imp_remote,
+                    'osd.%s' % imp_osd,
+                    prefix + [
+                        '--data-path', FSPATH.format(id=imp_osd),
+                        '--journal-path', JPATH.format(id=imp_osd),
+                        '--op', 'list-pgs',
+                    ])
+                if proc.exitstatus:
+                    raise Exception("ceph-objectstore-tool: "
+                                    "imp list-pgs failure with status {ret}".
+                                    format(ret=proc.exitstatus))
+                pgs = proc.stdout.getvalue().split('\n')[:-1]
+                if pg not in pgs:
+                    self.log("Moving pg {pg} from osd.{fosd} to osd.{tosd}".
+                             format(pg=pg, fosd=exp_osd, tosd=imp_osd))
+                    if imp_remote != exp_remote:
+                        # Copy export file to the other machine
+                        self.log("Transfer export file from {srem} to {trem}".
+                                 format(srem=exp_remote, trem=imp_remote))
+                        # just in case an upgrade make /var/log/ceph unreadable by non-root,
+                        exp_remote.run(args=['sudo', 'chmod', '777',
+                                             '/var/log/ceph'])
+                        imp_remote.run(args=['sudo', 'chmod', '777',
+                                             '/var/log/ceph'])
+                        tmpexport = Remote.get_file(exp_remote, exp_host_path,
+                                                    sudo=True)
+                        if exp_host_path != exp_path:
+                            # push to /var/log/ceph, then rename (we can't
+                            # chmod 777 the /var/log/ceph/$fsid mountpoint)
+                            Remote.put_file(imp_remote, tmpexport, exp_path)
+                            imp_remote.run(args=[
+                                'sudo', 'mv', exp_path, exp_host_path])
+                        else:
+                            Remote.put_file(imp_remote, tmpexport, exp_host_path)
+                        os.remove(tmpexport)
+                else:
+                    # Can't move the pg after all
+                    imp_osd = exp_osd
+                    imp_remote = exp_remote
+            # import
+            proc = self.run_ceph_objectstore_tool(
+                imp_remote, 'osd.%s' % imp_osd,
+                [
+                    '--data-path', FSPATH.format(id=imp_osd),
+                    '--journal-path', JPATH.format(id=imp_osd),
+                    '--log-file=/var/log/ceph/objectstore_tool.$pid.log',
+                    '--op', 'import',
+                    '--file', exp_path,
+                ])
+            if proc.exitstatus == 1:
+                bogosity = "The OSD you are using is older than the exported PG"
+                if bogosity in proc.stderr.getvalue():
+                    self.log("OSD older than exported PG"
+                             "...ignored")
+            elif proc.exitstatus == 10:
+                self.log("Pool went away before processing an import"
+                         "...ignored")
+            elif proc.exitstatus == 11:
+                self.log("Attempt to import an incompatible export"
+                         "...ignored")
+            elif proc.exitstatus == 12:
+                # this should be safe to ignore because we only ever move 1
+                # copy of the pg at a time, and merge is only initiated when
+                # all replicas are peered and happy.  /me crosses fingers
+                self.log("PG merged on target"
+                         "...ignored")
+            elif proc.exitstatus:
+                raise Exception("ceph-objectstore-tool: "
+                                "import failure with status {ret}".
+                                format(ret=proc.exitstatus))
+            cmd = "sudo rm -f {file}".format(file=exp_host_path)
+            exp_remote.run(args=cmd)
+            if imp_remote != exp_remote:
+                imp_remote.run(args=cmd)
+
+    def blackhole_kill_osd(self, osd=None):
+        """
+        If all else fails, kill the osd.
+        :param osd: Osd to be killed.
+        """
+        if osd is None:
+            osd = random.choice(self.live_osds)
+        self.log("Blackholing and then killing osd %s, live_osds are %s" %
+                 (str(osd), str(self.live_osds)))
+        self.live_osds.remove(osd)
+        self.dead_osds.append(osd)
+        self.ceph_manager.blackhole_kill_osd(osd)
+
+    def revive_osd(self, osd=None, skip_admin_check=False):
+        """
+        Revive the osd.
+        :param osd: Osd to be revived.
+        """
+        if osd is None:
+            osd = random.choice(self.dead_osds)
+        self.log("Reviving osd %s" % (str(osd),))
+        self.ceph_manager.revive_osd(
+            osd,
+            self.revive_timeout,
+            skip_admin_check=skip_admin_check)
+        self.dead_osds.remove(osd)
+        self.live_osds.append(osd)
+        if self.random_eio > 0 and osd == self.rerrosd:
+            self.ceph_manager.set_config(self.rerrosd,
+                                         filestore_debug_random_read_err = self.random_eio)
+            self.ceph_manager.set_config(self.rerrosd,
+                                         bluestore_debug_random_read_err = self.random_eio)
+
+
+    def out_osd(self, osd=None):
+        """
+        Mark the osd out
+        :param osd: Osd to be marked.
+        """
+        if osd is None:
+            osd = random.choice(self.in_osds)
+        self.log("Removing osd %s, in_osds are: %s" %
+                 (str(osd), str(self.in_osds)))
+        self.ceph_manager.mark_out_osd(osd)
+        self.in_osds.remove(osd)
+        self.out_osds.append(osd)
+
+    def in_osd(self, osd=None):
+        """
+        Mark the osd out
+        :param osd: Osd to be marked.
+        """
+        if osd is None:
+            osd = random.choice(self.out_osds)
+        if osd in self.dead_osds:
+            return self.revive_osd(osd)
+        self.log("Adding osd %s" % (str(osd),))
+        self.out_osds.remove(osd)
+        self.in_osds.append(osd)
+        self.ceph_manager.mark_in_osd(osd)
+        self.log("Added osd %s" % (str(osd),))
+
+    def reweight_osd_or_by_util(self, osd=None):
+        """
+        Reweight an osd that is in
+        :param osd: Osd to be marked.
+        """
+        if osd is not None or random.choice([True, False]):
+            if osd is None:
+                osd = random.choice(self.in_osds)
+            val = random.uniform(.1, 1.0)
+            self.log("Reweighting osd %s to %s" % (str(osd), str(val)))
+            self.ceph_manager.raw_cluster_cmd('osd', 'reweight',
+                                              str(osd), str(val))
+        else:
+            # do it several times, the option space is large
+            for i in range(5):
+                options = {
+                    'max_change': random.choice(['0.05', '1.0', '3.0']),
+                    'overage': random.choice(['110', '1000']),
+                    'type': random.choice([
+                        'reweight-by-utilization',
+                        'test-reweight-by-utilization']),
+                }
+                self.log("Reweighting by: %s"%(str(options),))
+                self.ceph_manager.raw_cluster_cmd(
+                    'osd',
+                    options['type'],
+                    options['overage'],
+                    options['max_change'])
+
+    def primary_affinity(self, osd=None):
+        self.log("primary_affinity")
+        if osd is None:
+            osd = random.choice(self.in_osds)
+        if random.random() >= .5:
+            pa = random.random()
+        elif random.random() >= .5:
+            pa = 1
+        else:
+            pa = 0
+        self.log('Setting osd %s primary_affinity to %f' % (str(osd), pa))
+        self.ceph_manager.raw_cluster_cmd('osd', 'primary-affinity',
+                                          str(osd), str(pa))
+
+    def thrash_cluster_full(self):
+        """
+        Set and unset cluster full condition
+        """
+        self.log('Setting full ratio to .001')
+        self.ceph_manager.raw_cluster_cmd('osd', 'set-full-ratio', '.001')
+        time.sleep(1)
+        self.log('Setting full ratio back to .95')
+        self.ceph_manager.raw_cluster_cmd('osd', 'set-full-ratio', '.95')
+
+    def thrash_pg_upmap(self):
+        """
+        Install or remove random pg_upmap entries in OSDMap
+        """
+        self.log("thrash_pg_upmap")
+        from random import shuffle
+        out = self.ceph_manager.raw_cluster_cmd('osd', 'dump', '-f', 'json-pretty')
+        j = json.loads(out)
+        self.log('j is %s' % j)
+        try:
+            if random.random() >= .3:
+                pgs = self.ceph_manager.get_pg_stats()
+                if not pgs:
+                    self.log('No pgs; doing nothing')
+                    return
+                pg = random.choice(pgs)
+                pgid = str(pg['pgid'])
+                poolid = int(pgid.split('.')[0])
+                sizes = [x['size'] for x in j['pools'] if x['pool'] == poolid]
+                if len(sizes) == 0:
+                    self.log('No pools; doing nothing')
+                    return
+                n = sizes[0]
+                osds = self.in_osds + self.out_osds
+                shuffle(osds)
+                osds = osds[0:n]
+                self.log('Setting %s to %s' % (pgid, osds))
+                cmd = ['osd', 'pg-upmap', pgid] + [str(x) for x in osds]
+                self.log('cmd %s' % cmd)
+                self.ceph_manager.raw_cluster_cmd(*cmd)
+            else:
+                m = j['pg_upmap']
+                if len(m) > 0:
+                    shuffle(m)
+                    pg = m[0]['pgid']
+                    self.log('Clearing pg_upmap on %s' % pg)
+                    self.ceph_manager.raw_cluster_cmd(
+                        'osd',
+                        'rm-pg-upmap',
+                        pg)
+                else:
+                    self.log('No pg_upmap entries; doing nothing')
+        except CommandFailedError:
+            self.log('Failed to rm-pg-upmap, ignoring')
+
+    def thrash_pg_upmap_items(self):
+        """
+        Install or remove random pg_upmap_items entries in OSDMap
+        """
+        self.log("thrash_pg_upmap_items")
+        from random import shuffle
+        out = self.ceph_manager.raw_cluster_cmd('osd', 'dump', '-f', 'json-pretty')
+        j = json.loads(out)
+        self.log('j is %s' % j)
+        try:
+            if random.random() >= .3:
+                pgs = self.ceph_manager.get_pg_stats()
+                if not pgs:
+                    self.log('No pgs; doing nothing')
+                    return
+                pg = random.choice(pgs)
+                pgid = str(pg['pgid'])
+                poolid = int(pgid.split('.')[0])
+                sizes = [x['size'] for x in j['pools'] if x['pool'] == poolid]
+                if len(sizes) == 0:
+                    self.log('No pools; doing nothing')
+                    return
+                n = sizes[0]
+                osds = self.in_osds + self.out_osds
+                shuffle(osds)
+                osds = osds[0:n*2]
+                self.log('Setting %s to %s' % (pgid, osds))
+                cmd = ['osd', 'pg-upmap-items', pgid] + [str(x) for x in osds]
+                self.log('cmd %s' % cmd)
+                self.ceph_manager.raw_cluster_cmd(*cmd)
+            else:
+                m = j['pg_upmap_items']
+                if len(m) > 0:
+                    shuffle(m)
+                    pg = m[0]['pgid']
+                    self.log('Clearing pg_upmap on %s' % pg)
+                    self.ceph_manager.raw_cluster_cmd(
+                        'osd',
+                        'rm-pg-upmap-items',
+                        pg)
+                else:
+                    self.log('No pg_upmap entries; doing nothing')
+        except CommandFailedError:
+            self.log('Failed to rm-pg-upmap-items, ignoring')
+
+    def force_recovery(self):
+        """
+        Force recovery on some of PGs
+        """
+        backfill = random.random() >= 0.5
+        j = self.ceph_manager.get_pgids_to_force(backfill)
+        if j:
+            try:
+                if backfill:
+                    self.ceph_manager.raw_cluster_cmd('pg', 'force-backfill', *j)
+                else:
+                    self.ceph_manager.raw_cluster_cmd('pg', 'force-recovery', *j)
+            except CommandFailedError:
+                self.log('Failed to force backfill|recovery, ignoring')
+
+
+    def cancel_force_recovery(self):
+        """
+        Force recovery on some of PGs
+        """
+        backfill = random.random() >= 0.5
+        j = self.ceph_manager.get_pgids_to_cancel_force(backfill)
+        if j:
+            try:
+                if backfill:
+                    self.ceph_manager.raw_cluster_cmd('pg', 'cancel-force-backfill', *j)
+                else:
+                    self.ceph_manager.raw_cluster_cmd('pg', 'cancel-force-recovery', *j)
+            except CommandFailedError:
+                self.log('Failed to force backfill|recovery, ignoring')
+
+    def force_cancel_recovery(self):
+        """
+        Force or cancel forcing recovery
+        """
+        if random.random() >= 0.4:
+           self.force_recovery()
+        else:
+           self.cancel_force_recovery()
+
+    def all_up(self):
+        """
+        Make sure all osds are up and not out.
+        """
+        while len(self.dead_osds) > 0:
+            self.log("reviving osd")
+            self.revive_osd()
+        while len(self.out_osds) > 0:
+            self.log("inning osd")
+            self.in_osd()
+
+    def all_up_in(self):
+        """
+        Make sure all osds are up and fully in.
+        """
+        self.all_up();
+        for osd in self.live_osds:
+            self.ceph_manager.raw_cluster_cmd('osd', 'reweight',
+                                              str(osd), str(1))
+            self.ceph_manager.raw_cluster_cmd('osd', 'primary-affinity',
+                                              str(osd), str(1))
+
+    def do_join(self):
+        """
+        Break out of this Ceph loop
+        """
+        self.stopping = True
+        self.thread.get()
+        if self.sighup_delay:
+            self.log("joining the do_sighup greenlet")
+            self.sighup_thread.get()
+        if self.optrack_toggle_delay:
+            self.log("joining the do_optrack_toggle greenlet")
+            self.optrack_toggle_thread.join()
+        if self.dump_ops_enable == "true":
+            self.log("joining the do_dump_ops greenlet")
+            self.dump_ops_thread.join()
+        if self.noscrub_toggle_delay:
+            self.log("joining the do_noscrub_toggle greenlet")
+            self.noscrub_toggle_thread.join()
+
+    def grow_pool(self):
+        """
+        Increase the size of the pool
+        """
+        pool = self.ceph_manager.get_pool()
+        if pool is None:
+            return
+        self.log("Growing pool %s" % (pool,))
+        if self.ceph_manager.expand_pool(pool,
+                                         self.config.get('pool_grow_by', 10),
+                                         self.max_pgs):
+            self.pools_to_fix_pgp_num.add(pool)
+
+    def shrink_pool(self):
+        """
+        Decrease the size of the pool
+        """
+        pool = self.ceph_manager.get_pool()
+        if pool is None:
+            return
+        _ = self.ceph_manager.get_pool_pg_num(pool)
+        self.log("Shrinking pool %s" % (pool,))
+        if self.ceph_manager.contract_pool(
+                pool,
+                self.config.get('pool_shrink_by', 10),
+                self.min_pgs):
+            self.pools_to_fix_pgp_num.add(pool)
+
+    def fix_pgp_num(self, pool=None):
+        """
+        Fix number of pgs in pool.
+        """
+        if pool is None:
+            pool = self.ceph_manager.get_pool()
+            if not pool:
+                return
+            force = False
+        else:
+            force = True
+        self.log("fixing pg num pool %s" % (pool,))
+        if self.ceph_manager.set_pool_pgpnum(pool, force):
+            self.pools_to_fix_pgp_num.discard(pool)
+
+    def test_pool_min_size(self):
+        """
+        Loop to selectively push PGs below their min_size and test that recovery
+        still occurs.
+        """
+        self.log("test_pool_min_size")
+        self.all_up()
+        time.sleep(60) # buffer time for recovery to start.
+        self.ceph_manager.wait_for_recovery(
+            timeout=self.config.get('timeout')
+            )
+        minout = int(self.config.get("min_out", 1))
+        minlive = int(self.config.get("min_live", 2))
+        mindead = int(self.config.get("min_dead", 1))
+        self.log("doing min_size thrashing")
+        self.ceph_manager.wait_for_clean(timeout=180)
+        assert self.ceph_manager.is_clean(), \
+            'not clean before minsize thrashing starts'
+        while not self.stopping:
+            # look up k and m from all the pools on each loop, in case it
+            # changes as the cluster runs
+            k = 0
+            m = 99
+            has_pools = False
+            pools_json = self.ceph_manager.get_osd_dump_json()['pools']
+
+            for pool_json in pools_json:
+                pool = pool_json['pool_name']
+                has_pools = True
+                pool_type = pool_json['type']  # 1 for rep, 3 for ec
+                min_size = pool_json['min_size']
+                self.log("pool {pool} min_size is {min_size}".format(pool=pool,min_size=min_size))
+                try:
+                    ec_profile = self.ceph_manager.get_pool_property(pool, 'erasure_code_profile')
+                    if pool_type != PoolType.ERASURE_CODED:
+                        continue
+                    ec_profile = pool_json['erasure_code_profile']
+                    ec_profile_json = self.ceph_manager.raw_cluster_cmd(
+                        'osd',
+                        'erasure-code-profile',
+                        'get',
+                        ec_profile,
+                        '--format=json')
+                    ec_json = json.loads(ec_profile_json)
+                    local_k = int(ec_json['k'])
+                    local_m = int(ec_json['m'])
+                    self.log("pool {pool} local_k={k} local_m={m}".format(pool=pool,
+                                                                          k=local_k, m=local_m))
+                    if local_k > k:
+                        self.log("setting k={local_k} from previous {k}".format(local_k=local_k, k=k))
+                        k = local_k
+                    if local_m < m:
+                        self.log("setting m={local_m} from previous {m}".format(local_m=local_m, m=m))
+                        m = local_m
+                except CommandFailedError:
+                    self.log("failed to read erasure_code_profile. %s was likely removed", pool)
+                    continue
+
+            if has_pools :
+                self.log("using k={k}, m={m}".format(k=k,m=m))
+            else:
+                self.log("No pools yet, waiting")
+                time.sleep(5)
+                continue
+                
+            if minout > len(self.out_osds): # kill OSDs and mark out
+                self.log("forced to out an osd")
+                self.kill_osd(mark_out=True)
+                continue
+            elif mindead > len(self.dead_osds): # kill OSDs but force timeout
+                self.log("forced to kill an osd")
+                self.kill_osd()
+                continue
+            else: # make mostly-random choice to kill or revive OSDs
+                minup = max(minlive, k)
+                rand_val = random.uniform(0, 1)
+                self.log("choosing based on number of live OSDs and rand val {rand}".\
+                         format(rand=rand_val))
+                if len(self.live_osds) > minup+1 and rand_val < 0.5:
+                    # chose to knock out as many OSDs as we can w/out downing PGs
+                    
+                    most_killable = min(len(self.live_osds) - minup, m)
+                    self.log("chose to kill {n} OSDs".format(n=most_killable))
+                    for i in range(1, most_killable):
+                        self.kill_osd(mark_out=True)
+                    time.sleep(10)
+                    # try a few times since there might be a concurrent pool
+                    # creation or deletion
+                    with safe_while(
+                            sleep=25, tries=5,
+                            action='check for active or peered') as proceed:
+                        while proceed():
+                            if self.ceph_manager.all_active_or_peered():
+                                break
+                            self.log('not all PGs are active or peered')
+                else: # chose to revive OSDs, bring up a random fraction of the dead ones
+                    self.log("chose to revive osds")
+                    for i in range(1, int(rand_val * len(self.dead_osds))):
+                        self.revive_osd(i)
+
+            # let PGs repair themselves or our next knockout might kill one
+            self.ceph_manager.wait_for_clean(timeout=self.config.get('timeout'))
+ 
+        # / while not self.stopping
+        self.all_up_in()
+ 
+        self.ceph_manager.wait_for_recovery(
+            timeout=self.config.get('timeout')
+            )
+
+    def inject_pause(self, conf_key, duration, check_after, should_be_down):
+        """
+        Pause injection testing. Check for osd being down when finished.
+        """
+        the_one = random.choice(self.live_osds)
+        self.log("inject_pause on osd.{osd}".format(osd=the_one))
+        self.log(
+            "Testing {key} pause injection for duration {duration}".format(
+                key=conf_key,
+                duration=duration
+                ))
+        self.log(
+            "Checking after {after}, should_be_down={shouldbedown}".format(
+                after=check_after,
+                shouldbedown=should_be_down
+                ))
+        self.ceph_manager.set_config(the_one, **{conf_key: duration})
+        if not should_be_down:
+            return
+        time.sleep(check_after)
+        status = self.ceph_manager.get_osd_status()
+        assert the_one in status['down']
+        time.sleep(duration - check_after + 20)
+        status = self.ceph_manager.get_osd_status()
+        assert not the_one in status['down']
+
+    def test_backfill_full(self):
+        """
+        Test backfills stopping when the replica fills up.
+
+        First, use injectfull admin command to simulate a now full
+        osd by setting it to 0 on all of the OSDs.
+
+        Second, on a random subset, set
+        osd_debug_skip_full_check_in_backfill_reservation to force
+        the more complicated check in do_scan to be exercised.
+
+        Then, verify that all backfillings stop.
+        """
+        self.log("injecting backfill full")
+        for i in self.live_osds:
+            self.ceph_manager.set_config(
+                i,
+                osd_debug_skip_full_check_in_backfill_reservation=
+                random.choice(['false', 'true']))
+            self.ceph_manager.osd_admin_socket(i, command=['injectfull', 'backfillfull'],
+                                     check_status=True, timeout=30, stdout=DEVNULL)
+        for i in range(30):
+            status = self.ceph_manager.compile_pg_status()
+            if 'backfilling' not in status.keys():
+                break
+            self.log(
+                "waiting for {still_going} backfillings".format(
+                    still_going=status.get('backfilling')))
+            time.sleep(1)
+        assert('backfilling' not in self.ceph_manager.compile_pg_status().keys())
+        for i in self.live_osds:
+            self.ceph_manager.set_config(
+                i,
+                osd_debug_skip_full_check_in_backfill_reservation='false')
+            self.ceph_manager.osd_admin_socket(i, command=['injectfull', 'none'],
+                                     check_status=True, timeout=30, stdout=DEVNULL)
+
+
+    def generate_random_sharding(self):
+        prefixes = [
+            'm','O','P','L'
+        ]
+        new_sharding = ''
+        for prefix in prefixes:
+            choose = random.choice([False, True])
+            if not choose:
+                continue
+            if new_sharding != '':
+                new_sharding = new_sharding + ' '
+            columns = random.randint(1, 5)
+            do_hash = random.choice([False, True])
+            if do_hash:
+                low_hash = random.choice([0, 5, 8])
+                do_high_hash = random.choice([False, True])
+                if do_high_hash:
+                    high_hash = random.choice([8, 16, 30]) + low_hash
+                    new_sharding = new_sharding + prefix + '(' + str(columns) + ',' + str(low_hash) + '-' + str(high_hash) + ')'
+                else:
+                    new_sharding = new_sharding + prefix + '(' + str(columns) + ',' + str(low_hash) + '-)'
+            else:
+                if columns == 1:
+                    new_sharding = new_sharding + prefix
+                else:
+                    new_sharding = new_sharding + prefix + '(' + str(columns) + ')'
+        return new_sharding
+
+    def test_bluestore_reshard_action(self):
+        """
+        Test if resharding of bluestore works properly.
+        If bluestore is not used, or bluestore is in version that
+        does not support sharding, skip.
+        """
+
+        osd = random.choice(self.dead_osds)
+        remote = self.ceph_manager.find_remote('osd', osd)
+        FSPATH = self.ceph_manager.get_filepath()
+
+        prefix = [
+                '--no-mon-config',
+                '--log-file=/var/log/ceph/bluestore_tool.$pid.log',
+                '--log-level=10',
+                '--path', FSPATH.format(id=osd)
+            ]
+
+        # sanity check if bluestore-tool accessible
+        self.log('checking if target objectstore is bluestore on osd.%s' % osd)
+        cmd = prefix + [
+            'show-label'
+            ]
+        proc = self.run_ceph_bluestore_tool(remote, 'osd.%s' % osd, cmd)
+        if proc.exitstatus != 0:
+            raise Exception("ceph-bluestore-tool access failed.")
+
+        # check if sharding is possible
+        self.log('checking if target bluestore supports sharding on osd.%s' % osd)
+        cmd = prefix + [
+            'show-sharding'
+            ]
+        proc = self.run_ceph_bluestore_tool(remote, 'osd.%s' % osd, cmd)
+        if proc.exitstatus != 0:
+            self.log("Unable to test resharding, "
+                     "ceph-bluestore-tool does not support it.")
+            return
+
+        # now go for reshard to something else
+        self.log('applying new sharding to bluestore on osd.%s' % osd)
+        new_sharding = self.config.get('bluestore_new_sharding','random')
+
+        if new_sharding == 'random':
+            self.log('generate random sharding')
+            new_sharding = self.generate_random_sharding()
+
+        self.log("applying new sharding: " + new_sharding)
+        cmd = prefix + [
+            '--sharding', new_sharding,
+            'reshard'
+            ]
+        proc = self.run_ceph_bluestore_tool(remote, 'osd.%s' % osd, cmd)
+        if proc.exitstatus != 0:
+            raise Exception("ceph-bluestore-tool resharding failed.")
+
+        # now do fsck to
+        self.log('running fsck to verify new sharding on osd.%s' % osd)
+        cmd = prefix + [
+            'fsck'
+            ]
+        proc = self.run_ceph_bluestore_tool(remote, 'osd.%s' % osd, cmd)
+        if proc.exitstatus != 0:
+            raise Exception("ceph-bluestore-tool fsck failed.")
+        self.log('resharding successfully completed')
+
+    def test_bluestore_reshard(self):
+        """
+        1) kills an osd
+        2) reshards bluestore on killed osd
+        3) revives the osd
+        """
+        self.log('test_bluestore_reshard started')
+        self.kill_osd(mark_down=True, mark_out=True)
+        self.test_bluestore_reshard_action()
+        self.revive_osd()
+        self.log('test_bluestore_reshard completed')
+
+
+    def test_map_discontinuity(self):
+        """
+        1) Allows the osds to recover
+        2) kills an osd
+        3) allows the remaining osds to recover
+        4) waits for some time
+        5) revives the osd
+        This sequence should cause the revived osd to have to handle
+        a map gap since the mons would have trimmed
+        """
+        self.log("test_map_discontinuity")
+        while len(self.in_osds) < (self.minin + 1):
+            self.in_osd()
+        self.log("Waiting for recovery")
+        self.ceph_manager.wait_for_all_osds_up(
+            timeout=self.config.get('timeout')
+            )
+        # now we wait 20s for the pg status to change, if it takes longer,
+        # the test *should* fail!
+        time.sleep(20)
+        self.ceph_manager.wait_for_clean(
+            timeout=self.config.get('timeout')
+            )
+
+        # now we wait 20s for the backfill replicas to hear about the clean
+        time.sleep(20)
+        self.log("Recovered, killing an osd")
+        self.kill_osd(mark_down=True, mark_out=True)
+        self.log("Waiting for clean again")
+        self.ceph_manager.wait_for_clean(
+            timeout=self.config.get('timeout')
+            )
+        self.log("Waiting for trim")
+        time.sleep(int(self.config.get("map_discontinuity_sleep_time", 40)))
+        self.revive_osd()
+
+    def choose_action(self):
+        """
+        Random action selector.
+        """
+        chance_down = self.config.get('chance_down', 0.4)
+        _ = self.config.get('chance_test_min_size', 0)
+        chance_test_backfill_full = \
+            self.config.get('chance_test_backfill_full', 0)
+        if isinstance(chance_down, int):
+            chance_down = float(chance_down) / 100
+        minin = self.minin
+        minout = int(self.config.get("min_out", 0))
+        minlive = int(self.config.get("min_live", 2))
+        mindead = int(self.config.get("min_dead", 0))
+
+        self.log('choose_action: min_in %d min_out '
+                 '%d min_live %d min_dead %d '
+                 'chance_down %.2f' %
+                 (minin, minout, minlive, mindead, chance_down))
+        actions = []
+        if len(self.in_osds) > minin:
+            actions.append((self.out_osd, 1.0,))
+        if len(self.live_osds) > minlive and chance_down > 0:
+            actions.append((self.kill_osd, chance_down,))
+        if len(self.out_osds) > minout:
+            actions.append((self.in_osd, 1.7,))
+        if len(self.dead_osds) > mindead:
+            actions.append((self.revive_osd, 1.0,))
+        if self.config.get('thrash_primary_affinity', True):
+            actions.append((self.primary_affinity, 1.0,))
+        actions.append((self.reweight_osd_or_by_util,
+                        self.config.get('reweight_osd', .5),))
+        actions.append((self.grow_pool,
+                        self.config.get('chance_pgnum_grow', 0),))
+        actions.append((self.shrink_pool,
+                        self.config.get('chance_pgnum_shrink', 0),))
+        actions.append((self.fix_pgp_num,
+                        self.config.get('chance_pgpnum_fix', 0),))
+        actions.append((self.test_pool_min_size,
+                        self.config.get('chance_test_min_size', 0),))
+        actions.append((self.test_backfill_full,
+                        chance_test_backfill_full,))
+        if self.chance_thrash_cluster_full > 0:
+            actions.append((self.thrash_cluster_full, self.chance_thrash_cluster_full,))
+        if self.chance_thrash_pg_upmap > 0:
+            actions.append((self.thrash_pg_upmap, self.chance_thrash_pg_upmap,))
+        if self.chance_thrash_pg_upmap_items > 0:
+            actions.append((self.thrash_pg_upmap_items, self.chance_thrash_pg_upmap_items,))
+        if self.chance_force_recovery > 0:
+            actions.append((self.force_cancel_recovery, self.chance_force_recovery))
+
+        for key in ['heartbeat_inject_failure', 'filestore_inject_stall']:
+            for scenario in [
+                (lambda:
+                 self.inject_pause(key,
+                                   self.config.get('pause_short', 3),
+                                   0,
+                                   False),
+                 self.config.get('chance_inject_pause_short', 1),),
+                (lambda:
+                 self.inject_pause(key,
+                                   self.config.get('pause_long', 80),
+                                   self.config.get('pause_check_after', 70),
+                                   True),
+                 self.config.get('chance_inject_pause_long', 0),)]:
+                actions.append(scenario)
+
+        # only consider resharding if objectstore is bluestore
+        cluster_name = self.ceph_manager.cluster
+        cluster = self.ceph_manager.ctx.ceph[cluster_name]
+        if cluster.conf.get('osd', {}).get('osd objectstore', 'bluestore') == 'bluestore':
+            actions.append((self.test_bluestore_reshard,
+                            self.config.get('chance_bluestore_reshard', 0),))
+
+        total = sum([y for (x, y) in actions])
+        val = random.uniform(0, total)
+        for (action, prob) in actions:
+            if val < prob:
+                return action
+            val -= prob
+        return None
+
+    def do_thrash(self):
+        """
+        _do_thrash() wrapper.
+        """
+        try:
+            self._do_thrash()
+        except Exception as e:
+            # See _run exception comment for MDSThrasher
+            self.set_thrasher_exception(e)
+            self.logger.exception("exception:")
+            # Allow successful completion so gevent doesn't see an exception.
+            # The DaemonWatchdog will observe the error and tear down the test.
+
+    @log_exc
+    def do_sighup(self):
+        """
+        Loops and sends signal.SIGHUP to a random live osd.
+
+        Loop delay is controlled by the config value sighup_delay.
+        """
+        delay = float(self.sighup_delay)
+        self.log("starting do_sighup with a delay of {0}".format(delay))
+        while not self.stopping:
+            osd = random.choice(self.live_osds)
+            self.ceph_manager.signal_osd(osd, signal.SIGHUP, silent=True)
+            time.sleep(delay)
+
+    @log_exc
+    def do_optrack_toggle(self):
+        """
+        Loops and toggle op tracking to all osds.
+
+        Loop delay is controlled by the config value optrack_toggle_delay.
+        """
+        delay = float(self.optrack_toggle_delay)
+        osd_state = "true"
+        self.log("starting do_optrack_toggle with a delay of {0}".format(delay))
+        while not self.stopping:
+            if osd_state == "true":
+                osd_state = "false"
+            else:
+                osd_state = "true"
+            try:
+                self.ceph_manager.inject_args('osd', '*',
+                                              'osd_enable_op_tracker',
+                                              osd_state)
+            except CommandFailedError:
+                self.log('Failed to tell all osds, ignoring')
+            gevent.sleep(delay)
+
+    @log_exc
+    def do_dump_ops(self):
+        """
+        Loops and does op dumps on all osds
+        """
+        self.log("starting do_dump_ops")
+        while not self.stopping:
+            for osd in self.live_osds:
+                # Ignore errors because live_osds is in flux
+                self.ceph_manager.osd_admin_socket(osd, command=['dump_ops_in_flight'],
+                                     check_status=False, timeout=30, stdout=DEVNULL)
+                self.ceph_manager.osd_admin_socket(osd, command=['dump_blocked_ops'],
+                                     check_status=False, timeout=30, stdout=DEVNULL)
+                self.ceph_manager.osd_admin_socket(osd, command=['dump_historic_ops'],
+                                     check_status=False, timeout=30, stdout=DEVNULL)
+            gevent.sleep(0)
+
+    @log_exc
+    def do_noscrub_toggle(self):
+        """
+        Loops and toggle noscrub flags
+
+        Loop delay is controlled by the config value noscrub_toggle_delay.
+        """
+        delay = float(self.noscrub_toggle_delay)
+        scrub_state = "none"
+        self.log("starting do_noscrub_toggle with a delay of {0}".format(delay))
+        while not self.stopping:
+            if scrub_state == "none":
+                self.ceph_manager.raw_cluster_cmd('osd', 'set', 'noscrub')
+                scrub_state = "noscrub"
+            elif scrub_state == "noscrub":
+                self.ceph_manager.raw_cluster_cmd('osd', 'set', 'nodeep-scrub')
+                scrub_state = "both"
+            elif scrub_state == "both":
+                self.ceph_manager.raw_cluster_cmd('osd', 'unset', 'noscrub')
+                scrub_state = "nodeep-scrub"
+            else:
+                self.ceph_manager.raw_cluster_cmd('osd', 'unset', 'nodeep-scrub')
+                scrub_state = "none"
+            gevent.sleep(delay)
+        self.ceph_manager.raw_cluster_cmd('osd', 'unset', 'noscrub')
+        self.ceph_manager.raw_cluster_cmd('osd', 'unset', 'nodeep-scrub')
+
+    @log_exc
+    def _do_thrash(self):
+        """
+        Loop to select random actions to thrash ceph manager with.
+        """
+        cleanint = self.config.get("clean_interval", 60)
+        scrubint = self.config.get("scrub_interval", -1)
+        maxdead = self.config.get("max_dead", 0)
+        delay = self.config.get("op_delay", 5)
+        self.rerrosd = self.live_osds[0]
+        if self.random_eio > 0:
+            self.ceph_manager.inject_args('osd', self.rerrosd,
+                                          'filestore_debug_random_read_err',
+                                          self.random_eio)
+            self.ceph_manager.inject_args('osd', self.rerrosd,
+                                          'bluestore_debug_random_read_err',
+                                          self.random_eio)
+        self.log("starting do_thrash")
+        while not self.stopping:
+            to_log = [str(x) for x in ["in_osds: ", self.in_osds,
+                                       "out_osds: ", self.out_osds,
+                                       "dead_osds: ", self.dead_osds,
+                                       "live_osds: ", self.live_osds]]
+            self.log(" ".join(to_log))
+            if random.uniform(0, 1) < (float(delay) / cleanint):
+                while len(self.dead_osds) > maxdead:
+                    self.revive_osd()
+                for osd in self.in_osds:
+                    self.ceph_manager.raw_cluster_cmd('osd', 'reweight',
+                                                      str(osd), str(1))
+                if random.uniform(0, 1) < float(
+                        self.config.get('chance_test_map_discontinuity', 0)) \
+                        and len(self.live_osds) > 5: # avoid m=2,k=2 stall, w/ some buffer for crush being picky
+                    self.test_map_discontinuity()
+                else:
+                    self.ceph_manager.wait_for_recovery(
+                        timeout=self.config.get('timeout')
+                        )
+                time.sleep(self.clean_wait)
+                if scrubint > 0:
+                    if random.uniform(0, 1) < (float(delay) / scrubint):
+                        self.log('Scrubbing while thrashing being performed')
+                        Scrubber(self.ceph_manager, self.config)
+            self.choose_action()()
+            time.sleep(delay)
+        self.all_up()
+        if self.random_eio > 0:
+            self.ceph_manager.inject_args('osd', self.rerrosd,
+                                          'filestore_debug_random_read_err', '0.0')
+            self.ceph_manager.inject_args('osd', self.rerrosd,
+                                          'bluestore_debug_random_read_err', '0.0')
+        for pool in list(self.pools_to_fix_pgp_num):
+            if self.ceph_manager.get_pool_pg_num(pool) > 0:
+                self.fix_pgp_num(pool)
+        self.pools_to_fix_pgp_num.clear()
+        for service, opt, saved_value in self.saved_options:
+            self.ceph_manager.inject_args(service, '*', opt, saved_value)
+        self.saved_options = []
+        self.all_up_in()
+
+
+class ObjectStoreTool:
+
+    def __init__(self, manager, pool, **kwargs):
+        self.manager = manager
+        self.pool = pool
+        self.osd = kwargs.get('osd', None)
+        self.object_name = kwargs.get('object_name', None)
+        self.do_revive = kwargs.get('do_revive', True)
+        if self.osd and self.pool and self.object_name:
+            if self.osd == "primary":
+                self.osd = self.manager.get_object_primary(self.pool,
+                                                           self.object_name)
+        assert self.osd is not None
+        if self.object_name:
+            self.pgid = self.manager.get_object_pg_with_shard(self.pool,
+                                                              self.object_name,
+                                                              self.osd)
+        self.remote = next(iter(self.manager.ctx.\
+            cluster.only('osd.{o}'.format(o=self.osd)).remotes.keys()))
+        path = self.manager.get_filepath().format(id=self.osd)
+        self.paths = ("--data-path {path} --journal-path {path}/journal".
+                      format(path=path))
+
+    def build_cmd(self, options, args, stdin):
+        lines = []
+        if self.object_name:
+            lines.append("object=$(sudo adjust-ulimits ceph-objectstore-tool "
+                         "{paths} --pgid {pgid} --op list |"
+                         "grep '\"oid\":\"{name}\"')".
+                         format(paths=self.paths,
+                                pgid=self.pgid,
+                                name=self.object_name))
+            args = '"$object" ' + args
+            options += " --pgid {pgid}".format(pgid=self.pgid)
+        cmd = ("sudo adjust-ulimits ceph-objectstore-tool {paths} {options} {args}".
+               format(paths=self.paths,
+                      args=args,
+                      options=options))
+        if stdin:
+            cmd = ("echo {payload} | base64 --decode | {cmd}".
+                   format(payload=base64.encode(stdin),
+                          cmd=cmd))
+        lines.append(cmd)
+        return "\n".join(lines)
+
+    def run(self, options, args):
+        self.manager.kill_osd(self.osd)
+        cmd = self.build_cmd(options, args, None)
+        self.manager.log(cmd)
+        try:
+            proc = self.remote.run(args=['bash', '-e', '-x', '-c', cmd],
+                                   check_status=False,
+                                   stdout=BytesIO(),
+                                   stderr=BytesIO())
+            proc.wait()
+            if proc.exitstatus != 0:
+                self.manager.log("failed with " + str(proc.exitstatus))
+                error = proc.stdout.getvalue().decode()  + " " + \
+                        proc.stderr.getvalue().decode()
+                raise Exception(error)
+        finally:
+            if self.do_revive:
+                self.manager.revive_osd(self.osd)
+                self.manager.wait_till_osd_is_up(self.osd, 300)
+
+
+# XXX: this class has nothing to do with the Ceph daemon (ceph-mgr) of
+# the same name.
+class CephManager:
+    """
+    Ceph manager object.
+    Contains several local functions that form a bulk of this module.
+
+    :param controller: the remote machine where the Ceph commands should be
+                       executed
+    :param ctx: the cluster context
+    :param config: path to Ceph config file
+    :param logger: for logging messages
+    :param cluster: name of the Ceph cluster
+    """
+
+    def __init__(self, controller, ctx=None, config=None, logger=None,
+                 cluster='ceph', cephadm=False, rook=False) -> None:
+        self.lock = threading.RLock()
+        self.ctx = ctx
+        self.config = config
+        self.controller = controller
+        self.next_pool_id = 0
+        self.cluster = cluster
+
+        if (logger):
+            self.log = lambda x: logger.info(x)
+        else:
+            def tmp(x):
+                """
+                implement log behavior.
+                """
+                print(x)
+            self.log = tmp
+
+        if self.config is None:
+            self.config = dict()
+
+        # NOTE: These variables are meant to be overriden by vstart_runner.py.
+        self.rook = rook
+        self.cephadm = cephadm
+        self.testdir = teuthology.get_testdir(self.ctx)
+        # prefix args for ceph cmds to be executed
+        pre = ['adjust-ulimits', 'ceph-coverage',
+               f'{self.testdir}/archive/coverage']
+        self.CEPH_CMD = ['sudo'] + pre + ['timeout', '120', 'ceph',
+                                          '--cluster', self.cluster]
+        self.RADOS_CMD = pre + ['rados', '--cluster', self.cluster]
+        self.run_ceph_w_prefix = ['sudo', 'daemon-helper', 'kill', 'ceph',
+                                  '--cluster', self.cluster]
+
+        pools = self.list_pools()
+        self.pools = {}
+        for pool in pools:
+            # we may race with a pool deletion; ignore failures here
+            try:
+                self.pools[pool] = self.get_pool_int_property(pool, 'pg_num')
+            except CommandFailedError:
+                self.log('Failed to get pg_num from pool %s, ignoring' % pool)
+
+    def ceph(self, cmd, **kwargs):
+        """
+        Simple Ceph admin command wrapper around run_cluster_cmd.
+        """
+
+        kwargs.pop('args', None)
+        args = shlex.split(cmd)
+        stdout = kwargs.pop('stdout', StringIO())
+        stderr = kwargs.pop('stderr', StringIO())
+        return self.run_cluster_cmd(args=args, stdout=stdout, stderr=stderr, **kwargs)
+
+    def run_cluster_cmd(self, **kwargs):
+        """
+        Run a Ceph command and return the object representing the process
+        for the command.
+
+        Accepts arguments same as that of teuthology.orchestra.run.run()
+        """
+        if isinstance(kwargs['args'], str):
+            kwargs['args'] = shlex.split(kwargs['args'])
+        elif isinstance(kwargs['args'], tuple):
+            kwargs['args'] = list(kwargs['args'])
+
+        prefixcmd = []
+        timeoutcmd = kwargs.pop('timeoutcmd', None)
+        if timeoutcmd is not None:
+            prefixcmd += ['timeout', str(timeoutcmd)]
+
+        if self.cephadm:
+            prefixcmd += ['ceph']
+            cmd = prefixcmd + list(kwargs['args'])
+            return shell(self.ctx, self.cluster, self.controller,
+                         args=cmd,
+                         stdout=StringIO(),
+                         check_status=kwargs.get('check_status', True))
+        elif self.rook:
+            prefixcmd += ['ceph']
+            cmd = prefixcmd + list(kwargs['args'])
+            return toolbox(self.ctx, self.cluster,
+                           args=cmd,
+                           stdout=StringIO(),
+                           check_status=kwargs.get('check_status', True))
+        else:
+            kwargs['args'] = prefixcmd + self.CEPH_CMD + kwargs['args']
+            return self.controller.run(**kwargs)
+
+    def raw_cluster_cmd(self, *args, **kwargs) -> str:
+        """
+        Start ceph on a raw cluster.  Return count
+        """
+        if kwargs.get('args') is None and args:
+            kwargs['args'] = args
+        kwargs['stdout'] = kwargs.pop('stdout', StringIO())
+        return self.run_cluster_cmd(**kwargs).stdout.getvalue()
+
+    def raw_cluster_cmd_result(self, *args, **kwargs):
+        """
+        Start ceph on a cluster.  Return success or failure information.
+        """
+        if kwargs.get('args') is None and args:
+           kwargs['args'] = args
+        kwargs['check_status'] = False
+        return self.run_cluster_cmd(**kwargs).exitstatus
+
+    def get_keyring(self, client_id):
+        """
+        Return keyring for the given client.
+
+        :param client_id: str
+        :return keyring: str
+        """
+        if client_id.find('client.') != -1:
+            client_id = client_id.replace('client.', '')
+
+        keyring = self.run_cluster_cmd(args=f'auth get client.{client_id}',
+                                       stdout=StringIO()).\
+            stdout.getvalue().strip()
+
+        assert isinstance(keyring, str) and keyring != ''
+        return keyring
+
+    def run_ceph_w(self, watch_channel=None):
+        """
+        Execute "ceph -w" in the background with stdout connected to a BytesIO,
+        and return the RemoteProcess.
+
+        :param watch_channel: Specifies the channel to be watched. This can be
+                              'cluster', 'audit', ...
+        :type watch_channel: str
+        """
+        args = self.run_ceph_w_prefix + ['-w']
+        if watch_channel is not None:
+            args.append("--watch-channel")
+            args.append(watch_channel)
+        return self.controller.run(args=args, wait=False, stdout=StringIO(), stdin=run.PIPE)
+
+    def get_mon_socks(self):
+        """
+        Get monitor sockets.
+
+        :return socks: tuple of strings; strings are individual sockets.
+        """
+        from json import loads
+
+        output = loads(self.raw_cluster_cmd(['--format=json', 'mon', 'dump']))
+        socks = []
+        for mon in output['mons']:
+            for addrvec_mem in mon['public_addrs']['addrvec']:
+                socks.append(addrvec_mem['addr'])
+        return tuple(socks)
+
+    def get_msgrv1_mon_socks(self):
+        """
+        Get monitor sockets that use msgrv1 to operate.
+
+        :return socks: tuple of strings; strings are individual sockets.
+        """
+        from json import loads
+
+        output = loads(self.raw_cluster_cmd('--format=json', 'mon', 'dump'))
+        socks = []
+        for mon in output['mons']:
+            for addrvec_mem in mon['public_addrs']['addrvec']:
+                if addrvec_mem['type'] == 'v1':
+                    socks.append(addrvec_mem['addr'])
+        return tuple(socks)
+
+    def get_msgrv2_mon_socks(self):
+        """
+        Get monitor sockets that use msgrv2 to operate.
+
+        :return socks: tuple of strings; strings are individual sockets.
+        """
+        from json import loads
+
+        output = loads(self.raw_cluster_cmd('--format=json', 'mon', 'dump'))
+        socks = []
+        for mon in output['mons']:
+            for addrvec_mem in mon['public_addrs']['addrvec']:
+                if addrvec_mem['type'] == 'v2':
+                    socks.append(addrvec_mem['addr'])
+        return tuple(socks)
+
+    def flush_pg_stats(self, osds, no_wait=None, wait_for_mon=300):
+        """
+        Flush pg stats from a list of OSD ids, ensuring they are reflected
+        all the way to the monitor.  Luminous and later only.
+
+        :param osds: list of OSDs to flush
+        :param no_wait: list of OSDs not to wait for seq id. by default, we
+                        wait for all specified osds, but some of them could be
+                        moved out of osdmap, so we cannot get their updated
+                        stat seq from monitor anymore. in that case, you need
+                        to pass a blocklist.
+        :param wait_for_mon: wait for mon to be synced with mgr. 0 to disable
+                             it. (5 min by default)
+        """
+        if no_wait is None:
+            no_wait = []
+
+        def flush_one_osd(osd: int, wait_for_mon: int):
+            need = int(self.raw_cluster_cmd('tell', 'osd.%d' % osd, 'flush_pg_stats'))
+            if not wait_for_mon:
+                return
+            if osd in no_wait:
+                return
+            got = 0
+            while wait_for_mon > 0:
+                got = int(self.raw_cluster_cmd('osd', 'last-stat-seq', 'osd.%d' % osd))
+                self.log('need seq {need} got {got} for osd.{osd}'.format(
+                    need=need, got=got, osd=osd))
+                if got >= need:
+                    break
+                A_WHILE = 1
+                time.sleep(A_WHILE)
+                wait_for_mon -= A_WHILE
+            else:
+                raise Exception('timed out waiting for mon to be updated with '
+                                'osd.{osd}: {got} < {need}'.
+                                format(osd=osd, got=got, need=need))
+
+        with parallel() as p:
+            for osd in osds:
+                p.spawn(flush_one_osd, osd, wait_for_mon)
+
+    def flush_all_pg_stats(self):
+        self.flush_pg_stats(range(len(self.get_osd_dump())))
+
+    def do_rados(self, cmd, pool=None, namespace=None, remote=None, **kwargs):
+        """
+        Execute a remote rados command.
+        """
+        if remote is None:
+            remote = self.controller
+
+        pre = self.RADOS_CMD + [] # deep-copying!
+        if pool is not None:
+            pre += ['--pool', pool]
+        if namespace is not None:
+            pre += ['--namespace', namespace]
+        pre.extend(cmd)
+        proc = remote.run(
+            args=pre,
+            wait=True,
+            **kwargs
+            )
+        return proc
+
+    def rados_write_objects(self, pool, num_objects, size,
+                            timelimit, threads, cleanup=False):
+        """
+        Write rados objects
+        Threads not used yet.
+        """
+        args = [
+            '--num-objects', num_objects,
+            '-b', size,
+            'bench', timelimit,
+            'write'
+            ]
+        if not cleanup:
+            args.append('--no-cleanup')
+        return self.do_rados(map(str, args), pool=pool)
+
+    def do_put(self, pool, obj, fname, namespace=None):
+        """
+        Implement rados put operation
+        """
+        args = ['put', obj, fname]
+        return self.do_rados(
+            args,
+            check_status=False,
+            pool=pool,
+            namespace=namespace
+        ).exitstatus
+
+    def do_get(self, pool, obj, fname='/dev/null', namespace=None):
+        """
+        Implement rados get operation
+        """
+        args = ['get', obj, fname]
+        return self.do_rados(
+            args,
+            check_status=False,
+            pool=pool,
+            namespace=namespace,
+        ).exitstatus
+
+    def do_rm(self, pool, obj, namespace=None):
+        """
+        Implement rados rm operation
+        """
+        args = ['rm', obj]
+        return self.do_rados(
+            args,
+            check_status=False,
+            pool=pool,
+            namespace=namespace
+        ).exitstatus
+
+    def osd_admin_socket(self, osd_id, command, check_status=True, timeout=0, stdout=None):
+        if stdout is None:
+            stdout = StringIO()
+        return self.admin_socket('osd', osd_id, command, check_status, timeout, stdout)
+
+    def find_remote(self, service_type, service_id):
+        """
+        Get the Remote for the host where a particular service runs.
+
+        :param service_type: 'mds', 'osd', 'client'
+        :param service_id: The second part of a role, e.g. '0' for
+                           the role 'client.0'
+        :return: a Remote instance for the host where the
+                 requested role is placed
+        """
+        return get_remote(self.ctx, self.cluster,
+                          service_type, service_id)
+
+    def admin_socket(self, service_type, service_id,
+                     command, check_status=True, timeout=0, stdout=None):
+        """
+        Remotely start up ceph specifying the admin socket
+        :param command: a list of words to use as the command
+                        to the admin socket
+        """
+        if stdout is None:
+            stdout = StringIO()
+
+        remote = self.find_remote(service_type, service_id)
+
+        if self.cephadm:
+            return shell(
+                self.ctx, self.cluster, remote,
+                args=[
+                    'ceph', 'daemon', '%s.%s' % (service_type, service_id),
+                ] + command,
+                stdout=stdout,
+                wait=True,
+                check_status=check_status,
+            )
+        if self.rook:
+            assert False, 'not implemented'
+
+        args = [
+            'sudo',
+            'adjust-ulimits',
+            'ceph-coverage',
+           f'{self.testdir}/archive/coverage',
+            'timeout',
+            str(timeout),
+            'ceph',
+            '--cluster',
+            self.cluster,
+            '--admin-daemon',
+            '/var/run/ceph/{cluster}-{type}.{id}.asok'.format(
+                cluster=self.cluster,
+                type=service_type,
+                id=service_id),
+            ]
+        args.extend(command)
+        return remote.run(
+            args=args,
+            stdout=stdout,
+            wait=True,
+            check_status=check_status
+            )
+
+    def objectstore_tool(self, pool, options, args, **kwargs):
+        return ObjectStoreTool(self, pool, **kwargs).run(options, args)
+
+    def get_pgid(self, pool, pgnum):
+        """
+        :param pool: pool name
+        :param pgnum: pg number
+        :returns: a string representing this pg.
+        """
+        poolnum = self.get_pool_num(pool)
+        pg_str = "{poolnum}.{pgnum}".format(
+            poolnum=poolnum,
+            pgnum=pgnum)
+        return pg_str
+
+    def get_pg_replica(self, pool, pgnum):
+        """
+        get replica for pool, pgnum (e.g. (data, 0)->0
+        """
+        pg_str = self.get_pgid(pool, pgnum)
+        output = self.raw_cluster_cmd("pg", "map", pg_str, '--format=json')
+        j = json.loads('\n'.join(output.split('\n')[1:]))
+        return int(j['acting'][-1])
+        assert False
+
+    def wait_for_pg_stats(func):
+        # both osd_mon_report_interval and mgr_stats_period are 5 seconds
+        # by default, and take the faulty injection in ms into consideration,
+        # 12 seconds are more than enough
+        delays = [1, 1, 2, 3, 5, 8, 13, 0]
+        @wraps(func)
+        def wrapper(self, *args, **kwargs):
+            exc = None
+            for delay in delays:
+                try:
+                    return func(self, *args, **kwargs)
+                except AssertionError as e:
+                    time.sleep(delay)
+                    exc = e
+            raise exc
+        return wrapper
+
+    def get_pg_primary(self, pool, pgnum):
+        """
+        get primary for pool, pgnum (e.g. (data, 0)->0
+        """
+        pg_str = self.get_pgid(pool, pgnum)
+        output = self.raw_cluster_cmd("pg", "map", pg_str, '--format=json')
+        j = json.loads('\n'.join(output.split('\n')[1:]))
+        return int(j['acting'][0])
+        assert False
+
+    def get_pool_num(self, pool):
+        """
+        get number for pool (e.g., data -> 2)
+        """
+        return int(self.get_pool_dump(pool)['pool'])
+
+    def list_pools(self):
+        """
+        list all pool names
+        """
+        osd_dump = self.get_osd_dump_json()
+        self.log(osd_dump['pools'])
+        return [str(i['pool_name']) for i in osd_dump['pools']]
+
+    def clear_pools(self):
+        """
+        remove all pools
+        """
+        [self.remove_pool(i) for i in self.list_pools()]
+
+    def kick_recovery_wq(self, osdnum):
+        """
+        Run kick_recovery_wq on cluster.
+        """
+        return self.raw_cluster_cmd(
+            'tell', "osd.%d" % (int(osdnum),),
+            'debug',
+            'kick_recovery_wq',
+            '0')
+
+    def wait_run_admin_socket(self, service_type,
+                              service_id, args=['version'], timeout=75, stdout=None):
+        """
+        If osd_admin_socket call succeeds, return.  Otherwise wait
+        five seconds and try again.
+        """
+        if stdout is None:
+            stdout = StringIO()
+        tries = 0
+        while True:
+            proc = self.admin_socket(service_type, service_id,
+                                     args, check_status=False, stdout=stdout)
+            if proc.exitstatus == 0:
+                return proc
+            else:
+                tries += 1
+                if (tries * 5) > timeout:
+                    raise Exception('timed out waiting for admin_socket '
+                                    'to appear after {type}.{id} restart'.
+                                    format(type=service_type,
+                                           id=service_id))
+                self.log("waiting on admin_socket for {type}-{id}, "
+                         "{command}".format(type=service_type,
+                                            id=service_id,
+                                            command=args))
+                time.sleep(5)
+
+    def get_pool_dump(self, pool):
+        """
+        get the osd dump part of a pool
+        """
+        osd_dump = self.get_osd_dump_json()
+        for i in osd_dump['pools']:
+            if i['pool_name'] == pool:
+                return i
+        assert False
+
+    def get_config(self, service_type, service_id, name):
+        """
+        :param node: like 'mon.a'
+        :param name: the option name
+        """
+        proc = self.wait_run_admin_socket(service_type, service_id,
+                                          ['config', 'show'])
+        j = json.loads(proc.stdout.getvalue())
+        return j[name]
+
+    def inject_args(self, service_type, service_id, name, value):
+        whom = '{0}.{1}'.format(service_type, service_id)
+        if isinstance(value, bool):
+            value = 'true' if value else 'false'
+        opt_arg = '--{name}={value}'.format(name=name, value=value)
+        self.raw_cluster_cmd('--', 'tell', whom, 'injectargs', opt_arg)
+
+    def set_config(self, osdnum, **argdict):
+        """
+        :param osdnum: osd number
+        :param argdict: dictionary containing values to set.
+        """
+        for k, v in argdict.items():
+            self.wait_run_admin_socket(
+                'osd', osdnum,
+                ['config', 'set', str(k), str(v)])
+
+    def raw_cluster_status(self):
+        """
+        Get status from cluster
+        """
+        status = self.raw_cluster_cmd('status', '--format=json')
+        return json.loads(status)
+
+    def raw_osd_status(self):
+        """
+        Get osd status from cluster
+        """
+        return self.raw_cluster_cmd('osd', 'dump')
+
+    def get_osd_status(self):
+        """
+        Get osd statuses sorted by states that the osds are in.
+        """
+        osd_lines = list(filter(
+            lambda x: x.startswith('osd.') and (("up" in x) or ("down" in x)),
+            self.raw_osd_status().split('\n')))
+        self.log(osd_lines)
+        in_osds = [int(i[4:].split()[0])
+                   for i in filter(lambda x: " in " in x, osd_lines)]
+        out_osds = [int(i[4:].split()[0])
+                    for i in filter(lambda x: " out " in x, osd_lines)]
+        up_osds = [int(i[4:].split()[0])
+                   for i in filter(lambda x: " up " in x, osd_lines)]
+        down_osds = [int(i[4:].split()[0])
+                     for i in filter(lambda x: " down " in x, osd_lines)]
+        dead_osds = [int(x.id_)
+                     for x in filter(lambda x:
+                                     not x.running(),
+                                     self.ctx.daemons.
+                                     iter_daemons_of_role('osd', self.cluster))]
+        live_osds = [int(x.id_) for x in
+                     filter(lambda x:
+                            x.running(),
+                            self.ctx.daemons.iter_daemons_of_role('osd',
+                                                                  self.cluster))]
+        return {'in': in_osds, 'out': out_osds, 'up': up_osds,
+                'down': down_osds, 'dead': dead_osds, 'live': live_osds,
+                'raw': osd_lines}
+
+    def get_num_pgs(self):
+        """
+        Check cluster status for the number of pgs
+        """
+        status = self.raw_cluster_status()
+        self.log(status)
+        return status['pgmap']['num_pgs']
+
+    def create_erasure_code_profile(self, profile_name, profile):
+        """
+        Create an erasure code profile name that can be used as a parameter
+        when creating an erasure coded pool.
+        """
+        with self.lock:
+            args = cmd_erasure_code_profile(profile_name, profile)
+            self.raw_cluster_cmd(*args)
+
+    def create_pool_with_unique_name(self, pg_num=16,
+                                     erasure_code_profile_name=None,
+                                     min_size=None,
+                                     erasure_code_use_overwrites=False):
+        """
+        Create a pool named unique_pool_X where X is unique.
+        """
+        name = ""
+        with self.lock:
+            name = "unique_pool_%s" % (str(self.next_pool_id),)
+            self.next_pool_id += 1
+            self.create_pool(
+                name,
+                pg_num,
+                erasure_code_profile_name=erasure_code_profile_name,
+                min_size=min_size,
+                erasure_code_use_overwrites=erasure_code_use_overwrites)
+        return name
+
+    @contextlib.contextmanager
+    def pool(self, pool_name, pg_num=16, erasure_code_profile_name=None):
+        self.create_pool(pool_name, pg_num, erasure_code_profile_name)
+        yield
+        self.remove_pool(pool_name)
+
+    def create_pool(self, pool_name, pg_num=16,
+                    erasure_code_profile_name=None,
+                    min_size=None,
+                    erasure_code_use_overwrites=False):
+        """
+        Create a pool named from the pool_name parameter.
+        :param pool_name: name of the pool being created.
+        :param pg_num: initial number of pgs.
+        :param erasure_code_profile_name: if set and !None create an
+                                          erasure coded pool using the profile
+        :param erasure_code_use_overwrites: if true, allow overwrites
+        """
+        with self.lock:
+            assert isinstance(pool_name, str)
+            assert isinstance(pg_num, int)
+            assert pool_name not in self.pools
+            self.log("creating pool_name %s" % (pool_name,))
+            if erasure_code_profile_name:
+                self.raw_cluster_cmd('osd', 'pool', 'create',
+                                     pool_name, str(pg_num), str(pg_num),
+                                     'erasure', erasure_code_profile_name)
+            else:
+                self.raw_cluster_cmd('osd', 'pool', 'create',
+                                     pool_name, str(pg_num))
+            if min_size is not None:
+                self.raw_cluster_cmd(
+                    'osd', 'pool', 'set', pool_name,
+                    'min_size',
+                    str(min_size))
+            if erasure_code_use_overwrites:
+                self.raw_cluster_cmd(
+                    'osd', 'pool', 'set', pool_name,
+                    'allow_ec_overwrites',
+                    'true')
+            self.raw_cluster_cmd(
+                'osd', 'pool', 'application', 'enable',
+                pool_name, 'rados', '--yes-i-really-mean-it',
+                run.Raw('||'), 'true')
+            self.pools[pool_name] = pg_num
+        time.sleep(1)
+
+    def add_pool_snap(self, pool_name, snap_name):
+        """
+        Add pool snapshot
+        :param pool_name: name of pool to snapshot
+        :param snap_name: name of snapshot to take
+        """
+        self.raw_cluster_cmd('osd', 'pool', 'mksnap',
+                             str(pool_name), str(snap_name))
+
+    def remove_pool_snap(self, pool_name, snap_name):
+        """
+        Remove pool snapshot
+        :param pool_name: name of pool to snapshot
+        :param snap_name: name of snapshot to remove
+        """
+        self.raw_cluster_cmd('osd', 'pool', 'rmsnap',
+                             str(pool_name), str(snap_name))
+
+    def remove_pool(self, pool_name):
+        """
+        Remove the indicated pool
+        :param pool_name: Pool to be removed
+        """
+        with self.lock:
+            assert isinstance(pool_name, str)
+            assert pool_name in self.pools
+            self.log("removing pool_name %s" % (pool_name,))
+            del self.pools[pool_name]
+            self.raw_cluster_cmd('osd', 'pool', 'rm', pool_name, pool_name,
+                                 "--yes-i-really-really-mean-it")
+
+    def get_pool(self):
+        """
+        Pick a random pool
+        """
+        with self.lock:
+            if self.pools:
+                return random.sample(self.pools.keys(), 1)[0]
+
+    def get_pool_pg_num(self, pool_name):
+        """
+        Return the number of pgs in the pool specified.
+        """
+        with self.lock:
+            assert isinstance(pool_name, str)
+            if pool_name in self.pools:
+                return self.pools[pool_name]
+            return 0
+
+    def get_pool_property(self, pool_name, prop):
+        """
+        :param pool_name: pool
+        :param prop: property to be checked.
+        :returns: property as string
+        """
+        with self.lock:
+            assert isinstance(pool_name, str)
+            assert isinstance(prop, str)
+            output = self.raw_cluster_cmd(
+                'osd',
+                'pool',
+                'get',
+                pool_name,
+                prop)
+            return output.split()[1]
+
+    def get_pool_int_property(self, pool_name, prop):
+        return int(self.get_pool_property(pool_name, prop))
+
+    def set_pool_property(self, pool_name, prop, val):
+        """
+        :param pool_name: pool
+        :param prop: property to be set.
+        :param val: value to set.
+
+        This routine retries if set operation fails.
+        """
+        with self.lock:
+            assert isinstance(pool_name, str)
+            assert isinstance(prop, str)
+            assert isinstance(val, int)
+            tries = 0
+            while True:
+                r = self.raw_cluster_cmd_result(
+                    'osd',
+                    'pool',
+                    'set',
+                    pool_name,
+                    prop,
+                    str(val))
+                if r != 11:  # EAGAIN
+                    break
+                tries += 1
+                if tries > 50:
+                    raise Exception('timed out getting EAGAIN '
+                                    'when setting pool property %s %s = %s' %
+                                    (pool_name, prop, val))
+                self.log('got EAGAIN setting pool property, '
+                         'waiting a few seconds...')
+                time.sleep(2)
+
+    def expand_pool(self, pool_name, by, max_pgs):
+        """
+        Increase the number of pgs in a pool
+        """
+        with self.lock:
+            assert isinstance(pool_name, str)
+            assert isinstance(by, int)
+            assert pool_name in self.pools
+            if self.get_num_creating() > 0:
+                return False
+            if (self.pools[pool_name] + by) > max_pgs:
+                return False
+            self.log("increase pool size by %d" % (by,))
+            new_pg_num = self.pools[pool_name] + by
+            self.set_pool_property(pool_name, "pg_num", new_pg_num)
+            self.pools[pool_name] = new_pg_num
+            return True
+
+    def contract_pool(self, pool_name, by, min_pgs):
+        """
+        Decrease the number of pgs in a pool
+        """
+        with self.lock:
+            self.log('contract_pool %s by %s min %s' % (
+                     pool_name, str(by), str(min_pgs)))
+            assert isinstance(pool_name, str)
+            assert isinstance(by, int)
+            assert pool_name in self.pools
+            if self.get_num_creating() > 0:
+                self.log('too many creating')
+                return False
+            proj = self.pools[pool_name] - by
+            if proj < min_pgs:
+                self.log('would drop below min_pgs, proj %d, currently %d' % (proj,self.pools[pool_name],))
+                return False
+            self.log("decrease pool size by %d" % (by,))
+            new_pg_num = self.pools[pool_name] - by
+            self.set_pool_property(pool_name, "pg_num", new_pg_num)
+            self.pools[pool_name] = new_pg_num
+            return True
+
+    def stop_pg_num_changes(self):
+        """
+        Reset all pg_num_targets back to pg_num, canceling splits and merges
+        """
+        self.log('Canceling any pending splits or merges...')
+        osd_dump = self.get_osd_dump_json()
+        try:
+            for pool in osd_dump['pools']:
+                if pool['pg_num'] != pool['pg_num_target']:
+                    self.log('Setting pool %s (%d) pg_num %d -> %d' %
+                             (pool['pool_name'], pool['pool'],
+                              pool['pg_num_target'],
+                              pool['pg_num']))
+                    self.raw_cluster_cmd('osd', 'pool', 'set', pool['pool_name'],
+                                         'pg_num', str(pool['pg_num']))
+        except KeyError:
+            # we don't support pg_num_target before nautilus
+            pass
+
+    def set_pool_pgpnum(self, pool_name, force):
+        """
+        Set pgpnum property of pool_name pool.
+        """
+        with self.lock:
+            assert isinstance(pool_name, str)
+            assert pool_name in self.pools
+            if not force and self.get_num_creating() > 0:
+                return False
+            self.set_pool_property(pool_name, 'pgp_num', self.pools[pool_name])
+            return True
+
+    def list_pg_unfound(self, pgid):
+        """
+        return list of unfound pgs with the id specified
+        """
+        r = None
+        offset = {}
+        while True:
+            out = self.raw_cluster_cmd('--', 'pg', pgid, 'list_unfound',
+                                       json.dumps(offset))
+            j = json.loads(out)
+            if r is None:
+                r = j
+            else:
+                r['objects'].extend(j['objects'])
+            if not 'more' in j:
+                break
+            if j['more'] == 0:
+                break
+            offset = j['objects'][-1]['oid']
+        if 'more' in r:
+            del r['more']
+        return r
+
+    def get_pg_stats(self):
+        """
+        Dump the cluster and get pg stats
+        """
+        out = self.raw_cluster_cmd('pg', 'dump', '--format=json')
+        j = json.loads('\n'.join(out.split('\n')[1:]))
+        try:
+            return j['pg_map']['pg_stats']
+        except KeyError:
+            return j['pg_stats']
+
+    def get_osd_df(self, osdid):
+        """
+        Get the osd df stats
+        """
+        out = self.raw_cluster_cmd('osd', 'df', 'name', 'osd.{}'.format(osdid),
+                                   '--format=json')
+        j = json.loads('\n'.join(out.split('\n')[1:]))
+        return j['nodes'][0]
+
+    def get_pool_df(self, name):
+        """
+        Get the pool df stats
+        """
+        out = self.raw_cluster_cmd('df', 'detail', '--format=json')
+        j = json.loads('\n'.join(out.split('\n')[1:]))
+        return next((p['stats'] for p in j['pools'] if p['name'] == name),
+                    None)
+
+    def get_pgids_to_force(self, backfill):
+        """
+        Return the randomized list of PGs that can have their recovery/backfill forced
+        """
+        j = self.get_pg_stats();
+        pgids = []
+        if backfill:
+            wanted = ['degraded', 'backfilling', 'backfill_wait']
+        else:
+            wanted = ['recovering', 'degraded', 'recovery_wait']
+        for pg in j:
+            status = pg['state'].split('+')
+            for t in wanted:
+                if random.random() > 0.5 and not ('forced_backfill' in status or 'forced_recovery' in status) and t in status:
+                    pgids.append(pg['pgid'])
+                    break
+        return pgids
+
+    def get_pgids_to_cancel_force(self, backfill):
+       """
+       Return the randomized list of PGs whose recovery/backfill priority is forced
+       """
+       j = self.get_pg_stats();
+       pgids = []
+       if backfill:
+           wanted = 'forced_backfill'
+       else:
+           wanted = 'forced_recovery'
+       for pg in j:
+           status = pg['state'].split('+')
+           if wanted in status and random.random() > 0.5:
+               pgids.append(pg['pgid'])
+       return pgids
+
+    def compile_pg_status(self):
+        """
+        Return a histogram of pg state values
+        """
+        ret = {}
+        j = self.get_pg_stats()
+        for pg in j:
+            for status in pg['state'].split('+'):
+                if status not in ret:
+                    ret[status] = 0
+                ret[status] += 1
+        return ret
+
+    @wait_for_pg_stats # type: ignore
+    def with_pg_state(self, pool, pgnum, check):
+        pgstr = self.get_pgid(pool, pgnum)
+        stats = self.get_single_pg_stats(pgstr)
+        assert(check(stats['state']))
+
+    @wait_for_pg_stats # type: ignore
+    def with_pg(self, pool, pgnum, check):
+        pgstr = self.get_pgid(pool, pgnum)
+        stats = self.get_single_pg_stats(pgstr)
+        return check(stats)
+
+    def get_last_scrub_stamp(self, pool, pgnum):
+        """
+        Get the timestamp of the last scrub.
+        """
+        stats = self.get_single_pg_stats(self.get_pgid(pool, pgnum))
+        return stats["last_scrub_stamp"]
+
+    def do_pg_scrub(self, pool, pgnum, stype):
+        """
+        Scrub pg and wait for scrubbing to finish
+        """
+        init = self.get_last_scrub_stamp(pool, pgnum)
+        RESEND_TIMEOUT = 120    # Must be a multiple of SLEEP_TIME
+        FATAL_TIMEOUT = RESEND_TIMEOUT * 3
+        SLEEP_TIME = 10
+        timer = 0
+        while init == self.get_last_scrub_stamp(pool, pgnum):
+            assert timer < FATAL_TIMEOUT, "fatal timeout trying to " + stype
+            self.log("waiting for scrub type %s" % (stype,))
+            if (timer % RESEND_TIMEOUT) == 0:
+                self.raw_cluster_cmd('pg', stype, self.get_pgid(pool, pgnum))
+                # The first time in this loop is the actual request
+                if timer != 0 and stype == "repair":
+                    self.log("WARNING: Resubmitted a non-idempotent repair")
+            time.sleep(SLEEP_TIME)
+            timer += SLEEP_TIME
+
+    def wait_snap_trimming_complete(self, pool):
+        """
+        Wait for snap trimming on pool to end
+        """
+        POLL_PERIOD = 10
+        FATAL_TIMEOUT = 600
+        start = time.time()
+        poolnum = self.get_pool_num(pool)
+        poolnumstr = "%s." % (poolnum,)
+        while (True):
+            now = time.time()
+            if (now - start) > FATAL_TIMEOUT:
+                assert (now - start) < FATAL_TIMEOUT, \
+                    'failed to complete snap trimming before timeout'
+            all_stats = self.get_pg_stats()
+            trimming = False
+            for pg in all_stats:
+                if (poolnumstr in pg['pgid']) and ('snaptrim' in pg['state']):
+                    self.log("pg {pg} in trimming, state: {state}".format(
+                        pg=pg['pgid'],
+                        state=pg['state']))
+                    trimming = True
+            if not trimming:
+                break
+            self.log("{pool} still trimming, waiting".format(pool=pool))
+            time.sleep(POLL_PERIOD)
+
+    def get_single_pg_stats(self, pgid):
+        """
+        Return pg for the pgid specified.
+        """
+        all_stats = self.get_pg_stats()
+
+        for pg in all_stats:
+            if pg['pgid'] == pgid:
+                return pg
+
+        return None
+
+    def get_object_pg_with_shard(self, pool, name, osdid):
+        """
+        """
+        pool_dump = self.get_pool_dump(pool)
+        object_map = self.get_object_map(pool, name)
+        if pool_dump["type"] == PoolType.ERASURE_CODED:
+            shard = object_map['acting'].index(osdid)
+            return "{pgid}s{shard}".format(pgid=object_map['pgid'],
+                                           shard=shard)
+        else:
+            return object_map['pgid']
+
+    def get_object_primary(self, pool, name):
+        """
+        """
+        object_map = self.get_object_map(pool, name)
+        return object_map['acting_primary']
+
+    def get_object_map(self, pool, name):
+        """
+        osd map --format=json converted to a python object
+        :returns: the python object
+        """
+        out = self.raw_cluster_cmd('--format=json', 'osd', 'map', pool, name)
+        return json.loads('\n'.join(out.split('\n')[1:]))
+
+    def get_osd_dump_json(self):
+        """
+        osd dump --format=json converted to a python object
+        :returns: the python object
+        """
+        out = self.raw_cluster_cmd('osd', 'dump', '--format=json')
+        return json.loads('\n'.join(out.split('\n')[1:]))
+
+    def get_osd_dump(self):
+        """
+        Dump osds
+        :returns: all osds
+        """
+        return self.get_osd_dump_json()['osds']
+
+    def get_osd_metadata(self):
+        """
+        osd metadata --format=json converted to a python object
+        :returns: the python object containing osd metadata information
+        """
+        out = self.raw_cluster_cmd('osd', 'metadata', '--format=json')
+        return json.loads('\n'.join(out.split('\n')[1:]))
+
+    def get_mgr_dump(self):
+        out = self.raw_cluster_cmd('mgr', 'dump', '--format=json')
+        return json.loads(out)
+
+    def get_stuck_pgs(self, type_, threshold):
+        """
+        :returns: stuck pg information from the cluster
+        """
+        out = self.raw_cluster_cmd('pg', 'dump_stuck', type_, str(threshold),
+                                   '--format=json')
+        return json.loads(out).get('stuck_pg_stats',[])
+
+    def get_num_unfound_objects(self):
+        """
+        Check cluster status to get the number of unfound objects
+        """
+        status = self.raw_cluster_status()
+        self.log(status)
+        return status['pgmap'].get('unfound_objects', 0)
+
+    def get_num_creating(self):
+        """
+        Find the number of pgs in creating mode.
+        """
+        pgs = self.get_pg_stats()
+        num = 0
+        for pg in pgs:
+            if 'creating' in pg['state']:
+                num += 1
+        return num
+
+    def get_num_active_clean(self):
+        """
+        Find the number of active and clean pgs.
+        """
+        pgs = self.get_pg_stats()
+        return self._get_num_active_clean(pgs)
+
+    def _get_num_active_clean(self, pgs):
+        num = 0
+        for pg in pgs:
+            if (pg['state'].count('active') and
+                    pg['state'].count('clean') and
+                    not pg['state'].count('stale')):
+                num += 1
+        return num
+
+    def get_num_active_recovered(self):
+        """
+        Find the number of active and recovered pgs.
+        """
+        pgs = self.get_pg_stats()
+        return self._get_num_active_recovered(pgs)
+
+    def _get_num_active_recovered(self, pgs):
+        num = 0
+        for pg in pgs:
+            if (pg['state'].count('active') and
+                    not pg['state'].count('recover') and
+                    not pg['state'].count('backfilling') and
+                    not pg['state'].count('stale')):
+                num += 1
+        return num
+
+    def get_is_making_recovery_progress(self):
+        """
+        Return whether there is recovery progress discernable in the
+        raw cluster status
+        """
+        status = self.raw_cluster_status()
+        kps = status['pgmap'].get('recovering_keys_per_sec', 0)
+        bps = status['pgmap'].get('recovering_bytes_per_sec', 0)
+        ops = status['pgmap'].get('recovering_objects_per_sec', 0)
+        return kps > 0 or bps > 0 or ops > 0
+
+    def get_num_active(self):
+        """
+        Find the number of active pgs.
+        """
+        pgs = self.get_pg_stats()
+        return self._get_num_active(pgs)
+
+    def _get_num_active(self, pgs):
+        num = 0
+        for pg in pgs:
+            if pg['state'].count('active') and not pg['state'].count('stale'):
+                num += 1
+        return num
+
+    def get_num_down(self):
+        """
+        Find the number of pgs that are down.
+        """
+        pgs = self.get_pg_stats()
+        num = 0
+        for pg in pgs:
+            if ((pg['state'].count('down') and not
+                    pg['state'].count('stale')) or
+                (pg['state'].count('incomplete') and not
+                    pg['state'].count('stale'))):
+                num += 1
+        return num
+
+    def get_num_active_down(self):
+        """
+        Find the number of pgs that are either active or down.
+        """
+        pgs = self.get_pg_stats()
+        return self._get_num_active_down(pgs)
+
+    def _get_num_active_down(self, pgs):
+        num = 0
+        for pg in pgs:
+            if ((pg['state'].count('active') and not
+                    pg['state'].count('stale')) or
+                (pg['state'].count('down') and not
+                    pg['state'].count('stale')) or
+                (pg['state'].count('incomplete') and not
+                    pg['state'].count('stale'))):
+                num += 1
+        return num
+
+    def get_num_peered(self):
+        """
+        Find the number of PGs that are peered
+        """
+        pgs = self.get_pg_stats()
+        return self._get_num_peered(pgs)
+
+    def _get_num_peered(self, pgs):
+        num = 0
+        for pg in pgs:
+            if pg['state'].count('peered') and not pg['state'].count('stale'):
+                 num += 1
+        return num
+
+    def is_clean(self):
+        """
+        True if all pgs are clean
+        """
+        pgs = self.get_pg_stats()
+        if self._get_num_active_clean(pgs) == len(pgs):
+            return True
+        else:
+            self.dump_pgs_not_active_clean()
+            return False
+
+    def is_recovered(self):
+        """
+        True if all pgs have recovered
+        """
+        pgs = self.get_pg_stats()
+        return self._get_num_active_recovered(pgs) == len(pgs)
+
+    def is_active_or_down(self):
+        """
+        True if all pgs are active or down
+        """
+        pgs = self.get_pg_stats()
+        return self._get_num_active_down(pgs) == len(pgs)
+
+    def dump_pgs_not_active_clean(self):
+        """
+        Dumps all pgs that are not active+clean
+        """
+        pgs = self.get_pg_stats()
+        for pg in pgs:
+           if pg['state'] != 'active+clean':
+             self.log('PG %s is not active+clean' % pg['pgid'])
+             self.log(pg)
+
+    def dump_pgs_not_active_down(self):
+        """
+        Dumps all pgs that are not active or down
+        """
+        pgs = self.get_pg_stats()
+        for pg in pgs:
+           if 'active' not in pg['state'] and 'down' not in pg['state']:
+             self.log('PG %s is not active or down' % pg['pgid'])
+             self.log(pg)
+
+    def dump_pgs_not_active(self):
+        """
+        Dumps all pgs that are not active
+        """
+        pgs = self.get_pg_stats()
+        for pg in pgs:
+           if 'active' not in pg['state']:
+             self.log('PG %s is not active' % pg['pgid'])
+             self.log(pg)
+
+    def dump_pgs_not_active_peered(self, pgs):
+        for pg in pgs:
+            if (not pg['state'].count('active')) and (not pg['state'].count('peered')):
+                self.log('PG %s is not active or peered' % pg['pgid'])
+                self.log(pg)
+
+    def wait_for_clean(self, timeout=1200):
+        """
+        Returns true when all pgs are clean.
+        """
+        self.log("waiting for clean")
+        start = time.time()
+        num_active_clean = self.get_num_active_clean()
+        while not self.is_clean():
+            if timeout is not None:
+                if self.get_is_making_recovery_progress():
+                    self.log("making progress, resetting timeout")
+                    start = time.time()
+                else:
+                    self.log("no progress seen, keeping timeout for now")
+                    if time.time() - start >= timeout:
+                        self.log('dumping pgs not clean')
+                        self.dump_pgs_not_active_clean()
+                        assert time.time() - start < timeout, \
+                            'wait_for_clean: failed before timeout expired'
+            cur_active_clean = self.get_num_active_clean()
+            if cur_active_clean != num_active_clean:
+                start = time.time()
+                num_active_clean = cur_active_clean
+            time.sleep(3)
+        self.log("clean!")
+
+    def are_all_osds_up(self):
+        """
+        Returns true if all osds are up.
+        """
+        x = self.get_osd_dump()
+        return (len(x) == sum([(y['up'] > 0) for y in x]))
+
+    def wait_for_all_osds_up(self, timeout=None):
+        """
+        When this exits, either the timeout has expired, or all
+        osds are up.
+        """
+        self.log("waiting for all up")
+        start = time.time()
+        while not self.are_all_osds_up():
+            if timeout is not None:
+                assert time.time() - start < timeout, \
+                    'timeout expired in wait_for_all_osds_up'
+            time.sleep(3)
+        self.log("all up!")
+
+    def pool_exists(self, pool):
+        if pool in self.list_pools():
+            return True
+        return False
+
+    def wait_for_pool(self, pool, timeout=300):
+        """
+        Wait for a pool to exist
+        """
+        self.log('waiting for pool %s to exist' % pool)
+        start = time.time()
+        while not self.pool_exists(pool):
+            if timeout is not None:
+                assert time.time() - start < timeout, \
+                    'timeout expired in wait_for_pool'
+            time.sleep(3)
+
+    def wait_for_pools(self, pools):
+        for pool in pools:
+            self.wait_for_pool(pool)
+
+    def is_mgr_available(self):
+        x = self.get_mgr_dump()
+        return x.get('available', False)
+
+    def wait_for_mgr_available(self, timeout=None):
+        self.log("waiting for mgr available")
+        start = time.time()
+        while not self.is_mgr_available():
+            if timeout is not None:
+                assert time.time() - start < timeout, \
+                    'timeout expired in wait_for_mgr_available'
+            time.sleep(3)
+        self.log("mgr available!")
+
+    def wait_for_recovery(self, timeout=None):
+        """
+        Check peering. When this exists, we have recovered.
+        """
+        self.log("waiting for recovery to complete")
+        start = time.time()
+        num_active_recovered = self.get_num_active_recovered()
+        while not self.is_recovered():
+            now = time.time()
+            if timeout is not None:
+                if self.get_is_making_recovery_progress():
+                    self.log("making progress, resetting timeout")
+                    start = time.time()
+                else:
+                    self.log("no progress seen, keeping timeout for now")
+                    if now - start >= timeout:
+                        if self.is_recovered():
+                            break
+                        self.log('dumping pgs not recovered yet')
+                        self.dump_pgs_not_active_clean()
+                        assert now - start < timeout, \
+                            'wait_for_recovery: failed before timeout expired'
+            cur_active_recovered = self.get_num_active_recovered()
+            if cur_active_recovered != num_active_recovered:
+                start = time.time()
+                num_active_recovered = cur_active_recovered
+            time.sleep(3)
+        self.log("recovered!")
+
+    def wait_for_active(self, timeout=None):
+        """
+        Check peering. When this exists, we are definitely active
+        """
+        self.log("waiting for peering to complete")
+        start = time.time()
+        num_active = self.get_num_active()
+        while not self.is_active():
+            if timeout is not None:
+                if time.time() - start >= timeout:
+                    self.log('dumping pgs not active')
+                    self.dump_pgs_not_active()
+                    assert time.time() - start < timeout, \
+                        'wait_for_active: failed before timeout expired'
+            cur_active = self.get_num_active()
+            if cur_active != num_active:
+                start = time.time()
+                num_active = cur_active
+            time.sleep(3)
+        self.log("active!")
+
+    def wait_for_active_or_down(self, timeout=None):
+        """
+        Check peering. When this exists, we are definitely either
+        active or down
+        """
+        self.log("waiting for peering to complete or become blocked")
+        start = time.time()
+        num_active_down = self.get_num_active_down()
+        while not self.is_active_or_down():
+            if timeout is not None:
+                if time.time() - start >= timeout:
+                    self.log('dumping pgs not active or down')
+                    self.dump_pgs_not_active_down()
+                    assert time.time() - start < timeout, \
+                        'wait_for_active_or_down: failed before timeout expired'
+            cur_active_down = self.get_num_active_down()
+            if cur_active_down != num_active_down:
+                start = time.time()
+                num_active_down = cur_active_down
+            time.sleep(3)
+        self.log("active or down!")
+
+    def osd_is_up(self, osd):
+        """
+        Wrapper for osd check
+        """
+        osds = self.get_osd_dump()
+        return osds[osd]['up'] > 0
+
+    def wait_till_osd_is_up(self, osd, timeout=None):
+        """
+        Loop waiting for osd.
+        """
+        self.log('waiting for osd.%d to be up' % osd)
+        start = time.time()
+        while not self.osd_is_up(osd):
+            if timeout is not None:
+                assert time.time() - start < timeout, \
+                    'osd.%d failed to come up before timeout expired' % osd
+            time.sleep(3)
+        self.log('osd.%d is up' % osd)
+
+    def is_active(self):
+        """
+        Wrapper to check if all pgs are active
+        """
+        return self.get_num_active() == self.get_num_pgs()
+
+    def all_active_or_peered(self):
+        """
+        Wrapper to check if all PGs are active or peered
+        """
+        pgs = self.get_pg_stats()
+        if self._get_num_active(pgs) + self._get_num_peered(pgs) == len(pgs):
+            return True
+        else:
+            self.dump_pgs_not_active_peered(pgs)
+            return False
+
+    def wait_till_active(self, timeout=None):
+        """
+        Wait until all pgs are active.
+        """
+        self.log("waiting till active")
+        start = time.time()
+        while not self.is_active():
+            if timeout is not None:
+                if time.time() - start >= timeout:
+                    self.log('dumping pgs not active')
+                    self.dump_pgs_not_active()
+                    assert time.time() - start < timeout, \
+                        'wait_till_active: failed before timeout expired'
+            time.sleep(3)
+        self.log("active!")
+
+    def wait_till_pg_convergence(self, timeout=None):
+        start = time.time()
+        old_stats = None
+        active_osds = [osd['osd'] for osd in self.get_osd_dump()
+                       if osd['in'] and osd['up']]
+        while True:
+            # strictly speaking, no need to wait for mon. but due to the
+            # "ms inject socket failures" setting, the osdmap could be delayed,
+            # so mgr is likely to ignore the pg-stat messages with pgs serving
+            # newly created pools which is not yet known by mgr. so, to make sure
+            # the mgr is updated with the latest pg-stats, waiting for mon/mgr is
+            # necessary.
+            self.flush_pg_stats(active_osds)
+            new_stats = dict((stat['pgid'], stat['state'])
+                             for stat in self.get_pg_stats())
+            if old_stats == new_stats:
+                return old_stats
+            if timeout is not None:
+                assert time.time() - start < timeout, \
+                    'failed to reach convergence before %d secs' % timeout
+            old_stats = new_stats
+            # longer than mgr_stats_period
+            time.sleep(5 + 1)
+
+    def mark_out_osd(self, osd):
+        """
+        Wrapper to mark osd out.
+        """
+        self.raw_cluster_cmd('osd', 'out', str(osd))
+
+    def kill_osd(self, osd):
+        """
+        Kill osds by either power cycling (if indicated by the config)
+        or by stopping.
+        """
+        if self.config.get('powercycle'):
+            remote = self.find_remote('osd', osd)
+            self.log('kill_osd on osd.{o} '
+                     'doing powercycle of {s}'.format(o=osd, s=remote.name))
+            self._assert_ipmi(remote)
+            remote.console.power_off()
+        elif self.config.get('bdev_inject_crash') and self.config.get('bdev_inject_crash_probability'):
+            if random.uniform(0, 1) < self.config.get('bdev_inject_crash_probability', .5):
+                self.inject_args(
+                    'osd', osd,
+                    'bdev-inject-crash', self.config.get('bdev_inject_crash'))
+                try:
+                    self.ctx.daemons.get_daemon('osd', osd, self.cluster).wait()
+                except:
+                    pass
+                else:
+                    raise RuntimeError('osd.%s did not fail' % osd)
+            else:
+                self.ctx.daemons.get_daemon('osd', osd, self.cluster).stop()
+        else:
+            self.ctx.daemons.get_daemon('osd', osd, self.cluster).stop()
+
+    @staticmethod
+    def _assert_ipmi(remote):
+        assert remote.console.has_ipmi_credentials, (
+            "powercycling requested but RemoteConsole is not "
+            "initialized.  Check ipmi config.")
+
+    def blackhole_kill_osd(self, osd):
+        """
+        Stop osd if nothing else works.
+        """
+        self.inject_args('osd', osd,
+                         'objectstore-blackhole', True)
+        time.sleep(2)
+        self.ctx.daemons.get_daemon('osd', osd, self.cluster).stop()
+
+    def revive_osd(self, osd, timeout=360, skip_admin_check=False):
+        """
+        Revive osds by either power cycling (if indicated by the config)
+        or by restarting.
+        """
+        if self.config.get('powercycle'):
+            remote = self.find_remote('osd', osd)
+            self.log('kill_osd on osd.{o} doing powercycle of {s}'.
+                     format(o=osd, s=remote.name))
+            self._assert_ipmi(remote)
+            remote.console.power_on()
+            if not remote.console.check_status(300):
+                raise Exception('Failed to revive osd.{o} via ipmi'.
+                                format(o=osd))
+            teuthology.reconnect(self.ctx, 60, [remote])
+            mount_osd_data(self.ctx, remote, self.cluster, str(osd))
+            self.make_admin_daemon_dir(remote)
+            self.ctx.daemons.get_daemon('osd', osd, self.cluster).reset()
+        self.ctx.daemons.get_daemon('osd', osd, self.cluster).restart()
+
+        if not skip_admin_check:
+            # wait for dump_ops_in_flight; this command doesn't appear
+            # until after the signal handler is installed and it is safe
+            # to stop the osd again without making valgrind leak checks
+            # unhappy.  see #5924.
+            self.wait_run_admin_socket('osd', osd,
+                                       args=['dump_ops_in_flight'],
+                                       timeout=timeout, stdout=DEVNULL)
+
+    def mark_down_osd(self, osd):
+        """
+        Cluster command wrapper
+        """
+        self.raw_cluster_cmd('osd', 'down', str(osd))
+
+    def mark_in_osd(self, osd):
+        """
+        Cluster command wrapper
+        """
+        self.raw_cluster_cmd('osd', 'in', str(osd))
+
+    def signal_osd(self, osd, sig, silent=False):
+        """
+        Wrapper to local get_daemon call which sends the given
+        signal to the given osd.
+        """
+        self.ctx.daemons.get_daemon('osd', osd,
+                                    self.cluster).signal(sig, silent=silent)
+
+    ## monitors
+    def signal_mon(self, mon, sig, silent=False):
+        """
+        Wrapper to local get_daemon call
+        """
+        self.ctx.daemons.get_daemon('mon', mon,
+                                    self.cluster).signal(sig, silent=silent)
+
+    def kill_mon(self, mon):
+        """
+        Kill the monitor by either power cycling (if the config says so),
+        or by doing a stop.
+        """
+        if self.config.get('powercycle'):
+            remote = self.find_remote('mon', mon)
+            self.log('kill_mon on mon.{m} doing powercycle of {s}'.
+                     format(m=mon, s=remote.name))
+            self._assert_ipmi(remote)
+            remote.console.power_off()
+        else:
+            self.ctx.daemons.get_daemon('mon', mon, self.cluster).stop()
+
+    def revive_mon(self, mon):
+        """
+        Restart by either power cycling (if the config says so),
+        or by doing a normal restart.
+        """
+        if self.config.get('powercycle'):
+            remote = self.find_remote('mon', mon)
+            self.log('revive_mon on mon.{m} doing powercycle of {s}'.
+                     format(m=mon, s=remote.name))
+            self._assert_ipmi(remote)
+            remote.console.power_on()
+            self.make_admin_daemon_dir(remote)
+        self.ctx.daemons.get_daemon('mon', mon, self.cluster).restart()
+
+    def revive_mgr(self, mgr):
+        """
+        Restart by either power cycling (if the config says so),
+        or by doing a normal restart.
+        """
+        if self.config.get('powercycle'):
+            remote = self.find_remote('mgr', mgr)
+            self.log('revive_mgr on mgr.{m} doing powercycle of {s}'.
+                     format(m=mgr, s=remote.name))
+            self._assert_ipmi(remote)
+            remote.console.power_on()
+            self.make_admin_daemon_dir(remote)
+        self.ctx.daemons.get_daemon('mgr', mgr, self.cluster).restart()
+
+    def get_mon_status(self, mon):
+        """
+        Extract all the monitor status information from the cluster
+        """
+        out = self.raw_cluster_cmd('tell', 'mon.%s' % mon, 'mon_status')
+        return json.loads(out)
+
+    def get_mon_quorum(self):
+        """
+        Extract monitor quorum information from the cluster
+        """
+        out = self.raw_cluster_cmd('quorum_status')
+        j = json.loads(out)
+        return j['quorum']
+
+    def wait_for_mon_quorum_size(self, size, timeout=300):
+        """
+        Loop until quorum size is reached.
+        """
+        self.log('waiting for quorum size %d' % size)
+        sleep = 3
+        with safe_while(sleep=sleep,
+                        tries=timeout // sleep,
+                        action=f'wait for quorum size {size}') as proceed:
+            while proceed():
+                try:
+                    if len(self.get_mon_quorum()) == size:
+                        break
+                except CommandFailedError as e:
+                    # could fail instea4d of blocked if the rotating key of the
+                    # connected monitor is not updated yet after they form the
+                    # quorum
+                    if e.exitstatus == errno.EACCES:
+                        pass
+                    else:
+                        raise
+        self.log("quorum is size %d" % size)
+
+    def get_mon_health(self, debug=False):
+        """
+        Extract all the monitor health information.
+        """
+        out = self.raw_cluster_cmd('health', '--format=json')
+        if debug:
+            self.log('health:\n{h}'.format(h=out))
+        return json.loads(out)
+
+    def wait_until_healthy(self, timeout=None):
+        self.log("wait_until_healthy")
+        start = time.time()
+        while self.get_mon_health()['status'] != 'HEALTH_OK':
+            if timeout is not None:
+                assert time.time() - start < timeout, \
+                    'timeout expired in wait_until_healthy'
+            time.sleep(3)
+        self.log("wait_until_healthy done")
+
+    def get_filepath(self):
+        """
+        Return path to osd data with {id} needing to be replaced
+        """
+        return '/var/lib/ceph/osd/' + self.cluster + '-{id}'
+
+    def make_admin_daemon_dir(self, remote):
+        """
+        Create /var/run/ceph directory on remote site.
+
+        :param ctx: Context
+        :param remote: Remote site
+        """
+        remote.run(args=['sudo',
+                         'install', '-d', '-m0777', '--', '/var/run/ceph', ], )
+
+    def get_service_task_status(self, service, status_key):
+        """
+        Return daemon task status for a given ceph service.
+
+        :param service: ceph service (mds, osd, etc...)
+        :param status_key: matching task status key
+        """
+        task_status = {}
+        status = self.raw_cluster_status()
+        try:
+            for k,v in status['servicemap']['services'][service]['daemons'].items():
+                ts = dict(v).get('task_status', None)
+                if ts:
+                    task_status[k] = ts[status_key]
+        except KeyError: # catches missing service and status key
+            return {}
+        self.log(task_status)
+        return task_status
+
+def utility_task(name):
+    """
+    Generate ceph_manager subtask corresponding to ceph_manager
+    method name
+    """
+    def task(ctx, config):
+        if config is None:
+            config = {}
+        args = config.get('args', [])
+        kwargs = config.get('kwargs', {})
+        cluster = config.get('cluster', 'ceph')
+        fn = getattr(ctx.managers[cluster], name)
+        fn(*args, **kwargs)
+    return task
+
+revive_osd = utility_task("revive_osd")
+revive_mon = utility_task("revive_mon")
+kill_osd = utility_task("kill_osd")
+kill_mon = utility_task("kill_mon")
+create_pool = utility_task("create_pool")
+remove_pool = utility_task("remove_pool")
+wait_for_clean = utility_task("wait_for_clean")
+flush_all_pg_stats = utility_task("flush_all_pg_stats")
+set_pool_property = utility_task("set_pool_property")
+do_pg_scrub = utility_task("do_pg_scrub")
+wait_for_pool = utility_task("wait_for_pool")
+wait_for_pools = utility_task("wait_for_pools")
diff --git a/qa/tasks/ceph_objectstore_tool.py b/qa/tasks/ceph_objectstore_tool.py
new file mode 100644
index 000000000..9c29d80b2
--- /dev/null
+++ b/qa/tasks/ceph_objectstore_tool.py
@@ -0,0 +1,662 @@
+"""
+ceph_objectstore_tool - Simple test of ceph-objectstore-tool utility
+"""
+from io import BytesIO
+
+import contextlib
+import json
+import logging
+import os
+import sys
+import tempfile
+import time
+from tasks import ceph_manager
+from tasks.util.rados import (rados, create_replicated_pool, create_ec_pool)
+from teuthology import misc as teuthology
+from teuthology.orchestra import run
+
+from teuthology.exceptions import CommandFailedError
+
+# from util.rados import (rados, create_ec_pool,
+#                               create_replicated_pool,
+#                               create_cache_pool)
+
+log = logging.getLogger(__name__)
+
+# Should get cluster name "ceph" from somewhere
+# and normal path from osd_data and osd_journal in conf
+FSPATH = "/var/lib/ceph/osd/ceph-{id}"
+JPATH = "/var/lib/ceph/osd/ceph-{id}/journal"
+
+
+def cod_setup_local_data(log, ctx, NUM_OBJECTS, DATADIR,
+                         BASE_NAME, DATALINECOUNT):
+    objects = range(1, NUM_OBJECTS + 1)
+    for i in objects:
+        NAME = BASE_NAME + "{num}".format(num=i)
+        LOCALNAME = os.path.join(DATADIR, NAME)
+
+        dataline = range(DATALINECOUNT)
+        fd = open(LOCALNAME, "w")
+        data = "This is the data for " + NAME + "\n"
+        for _ in dataline:
+            fd.write(data)
+        fd.close()
+
+
+def cod_setup_remote_data(log, ctx, remote, NUM_OBJECTS, DATADIR,
+                          BASE_NAME, DATALINECOUNT):
+
+    objects = range(1, NUM_OBJECTS + 1)
+    for i in objects:
+        NAME = BASE_NAME + "{num}".format(num=i)
+        DDNAME = os.path.join(DATADIR, NAME)
+
+        remote.run(args=['rm', '-f', DDNAME])
+
+        dataline = range(DATALINECOUNT)
+        data = "This is the data for " + NAME + "\n"
+        DATA = ""
+        for _ in dataline:
+            DATA += data
+        remote.write_file(DDNAME, DATA)
+
+
+def cod_setup(log, ctx, remote, NUM_OBJECTS, DATADIR,
+              BASE_NAME, DATALINECOUNT, POOL, db, ec):
+    ERRORS = 0
+    log.info("Creating {objs} objects in pool".format(objs=NUM_OBJECTS))
+
+    objects = range(1, NUM_OBJECTS + 1)
+    for i in objects:
+        NAME = BASE_NAME + "{num}".format(num=i)
+        DDNAME = os.path.join(DATADIR, NAME)
+
+        proc = rados(ctx, remote, ['-p', POOL, 'put', NAME, DDNAME],
+                     wait=False)
+        # proc = remote.run(args=['rados', '-p', POOL, 'put', NAME, DDNAME])
+        ret = proc.wait()
+        if ret != 0:
+            log.critical("Rados put failed with status {ret}".
+                         format(ret=proc.exitstatus))
+            sys.exit(1)
+
+        db[NAME] = {}
+
+        keys = range(i)
+        db[NAME]["xattr"] = {}
+        for k in keys:
+            if k == 0:
+                continue
+            mykey = "key{i}-{k}".format(i=i, k=k)
+            myval = "val{i}-{k}".format(i=i, k=k)
+            proc = remote.run(args=['rados', '-p', POOL, 'setxattr',
+                                    NAME, mykey, myval])
+            ret = proc.wait()
+            if ret != 0:
+                log.error("setxattr failed with {ret}".format(ret=ret))
+                ERRORS += 1
+            db[NAME]["xattr"][mykey] = myval
+
+        # Erasure coded pools don't support omap
+        if ec:
+            continue
+
+        # Create omap header in all objects but REPobject1
+        if i != 1:
+            myhdr = "hdr{i}".format(i=i)
+            proc = remote.run(args=['rados', '-p', POOL, 'setomapheader',
+                                    NAME, myhdr])
+            ret = proc.wait()
+            if ret != 0:
+                log.critical("setomapheader failed with {ret}".format(ret=ret))
+                ERRORS += 1
+            db[NAME]["omapheader"] = myhdr
+
+        db[NAME]["omap"] = {}
+        for k in keys:
+            if k == 0:
+                continue
+            mykey = "okey{i}-{k}".format(i=i, k=k)
+            myval = "oval{i}-{k}".format(i=i, k=k)
+            proc = remote.run(args=['rados', '-p', POOL, 'setomapval',
+                                    NAME, mykey, myval])
+            ret = proc.wait()
+            if ret != 0:
+                log.critical("setomapval failed with {ret}".format(ret=ret))
+            db[NAME]["omap"][mykey] = myval
+
+    return ERRORS
+
+
+def get_lines(filename):
+    tmpfd = open(filename, "r")
+    line = True
+    lines = []
+    while line:
+        line = tmpfd.readline().rstrip('\n')
+        if line:
+            lines += [line]
+    tmpfd.close()
+    os.unlink(filename)
+    return lines
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run ceph_objectstore_tool test
+
+    The config should be as follows::
+
+        ceph_objectstore_tool:
+          objects: 20 # <number of objects>
+          pgnum: 12
+    """
+
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'ceph_objectstore_tool task only accepts a dict for configuration'
+
+    log.info('Beginning ceph_objectstore_tool...')
+
+    log.debug(config)
+    log.debug(ctx)
+    clients = ctx.cluster.only(teuthology.is_type('client'))
+    assert len(clients.remotes) > 0, 'Must specify at least 1 client'
+    (cli_remote, _) = clients.remotes.popitem()
+    log.debug(cli_remote)
+
+    # clients = dict(teuthology.get_clients(ctx=ctx, roles=config.keys()))
+    # client = clients.popitem()
+    # log.info(client)
+    osds = ctx.cluster.only(teuthology.is_type('osd'))
+    log.info("OSDS")
+    log.info(osds)
+    log.info(osds.remotes)
+
+    manager = ctx.managers['ceph']
+    while (len(manager.get_osd_status()['up']) !=
+           len(manager.get_osd_status()['raw'])):
+        time.sleep(10)
+    while (len(manager.get_osd_status()['in']) !=
+           len(manager.get_osd_status()['up'])):
+        time.sleep(10)
+    manager.raw_cluster_cmd('osd', 'set', 'noout')
+    manager.raw_cluster_cmd('osd', 'set', 'nodown')
+
+    PGNUM = config.get('pgnum', 12)
+    log.info("pgnum: {num}".format(num=PGNUM))
+
+    ERRORS = 0
+
+    REP_POOL = "rep_pool"
+    REP_NAME = "REPobject"
+    create_replicated_pool(cli_remote, REP_POOL, PGNUM)
+    ERRORS += test_objectstore(ctx, config, cli_remote, REP_POOL, REP_NAME)
+
+    EC_POOL = "ec_pool"
+    EC_NAME = "ECobject"
+    create_ec_pool(cli_remote, EC_POOL, 'default', PGNUM)
+    ERRORS += test_objectstore(ctx, config, cli_remote,
+                               EC_POOL, EC_NAME, ec=True)
+
+    if ERRORS == 0:
+        log.info("TEST PASSED")
+    else:
+        log.error("TEST FAILED WITH {errcount} ERRORS".format(errcount=ERRORS))
+
+    assert ERRORS == 0
+
+    try:
+        yield
+    finally:
+        log.info('Ending ceph_objectstore_tool')
+
+
+def test_objectstore(ctx, config, cli_remote, REP_POOL, REP_NAME, ec=False):
+    manager = ctx.managers['ceph']
+
+    osds = ctx.cluster.only(teuthology.is_type('osd'))
+
+    TEUTHDIR = teuthology.get_testdir(ctx)
+    DATADIR = os.path.join(TEUTHDIR, "ceph.data")
+    DATALINECOUNT = 10000
+    ERRORS = 0
+    NUM_OBJECTS = config.get('objects', 10)
+    log.info("objects: {num}".format(num=NUM_OBJECTS))
+
+    pool_dump = manager.get_pool_dump(REP_POOL)
+    REPID = pool_dump['pool']
+
+    log.debug("repid={num}".format(num=REPID))
+
+    db = {}
+
+    LOCALDIR = tempfile.mkdtemp("cod")
+
+    cod_setup_local_data(log, ctx, NUM_OBJECTS, LOCALDIR,
+                         REP_NAME, DATALINECOUNT)
+    allremote = []
+    allremote.append(cli_remote)
+    allremote += list(osds.remotes.keys())
+    allremote = list(set(allremote))
+    for remote in allremote:
+        cod_setup_remote_data(log, ctx, remote, NUM_OBJECTS, DATADIR,
+                              REP_NAME, DATALINECOUNT)
+
+    ERRORS += cod_setup(log, ctx, cli_remote, NUM_OBJECTS, DATADIR,
+                        REP_NAME, DATALINECOUNT, REP_POOL, db, ec)
+
+    pgs = {}
+    for stats in manager.get_pg_stats():
+        if stats["pgid"].find(str(REPID) + ".") != 0:
+            continue
+        if pool_dump["type"] == ceph_manager.PoolType.REPLICATED:
+            for osd in stats["acting"]:
+                pgs.setdefault(osd, []).append(stats["pgid"])
+        elif pool_dump["type"] == ceph_manager.PoolType.ERASURE_CODED:
+            shard = 0
+            for osd in stats["acting"]:
+                pgs.setdefault(osd, []).append("{pgid}s{shard}".
+                                               format(pgid=stats["pgid"],
+                                                      shard=shard))
+                shard += 1
+        else:
+            raise Exception("{pool} has an unexpected type {type}".
+                            format(pool=REP_POOL, type=pool_dump["type"]))
+
+    log.info(pgs)
+    log.info(db)
+
+    for osd in manager.get_osd_status()['up']:
+        manager.kill_osd(osd)
+    time.sleep(5)
+
+    pgswithobjects = set()
+    objsinpg = {}
+
+    # Test --op list and generate json for all objects
+    log.info("Test --op list by generating json for all objects")
+    prefix = ("sudo ceph-objectstore-tool "
+              "--data-path {fpath} "
+              "--journal-path {jpath} ").format(fpath=FSPATH, jpath=JPATH)
+    for remote in osds.remotes.keys():
+        log.debug(remote)
+        log.debug(osds.remotes[remote])
+        for role in osds.remotes[remote]:
+            if not role.startswith("osd."):
+                continue
+            osdid = int(role.split('.')[1])
+            log.info("process osd.{id} on {remote}".
+                     format(id=osdid, remote=remote))
+            cmd = (prefix + "--op list").format(id=osdid)
+            try:
+                lines = remote.sh(cmd, check_status=False).splitlines()
+                for pgline in lines:
+                    if not pgline:
+                        continue
+                    (pg, obj) = json.loads(pgline)
+                    name = obj['oid']
+                    if name in db:
+                        pgswithobjects.add(pg)
+                        objsinpg.setdefault(pg, []).append(name)
+                        db[name].setdefault("pg2json",
+                                            {})[pg] = json.dumps(obj)
+            except CommandFailedError as e:
+                log.error("Bad exit status {ret} from --op list request".
+                          format(ret=e.exitstatus))
+                ERRORS += 1
+
+    log.info(db)
+    log.info(pgswithobjects)
+    log.info(objsinpg)
+
+    if pool_dump["type"] == ceph_manager.PoolType.REPLICATED:
+        # Test get-bytes
+        log.info("Test get-bytes and set-bytes")
+        for basename in db.keys():
+            file = os.path.join(DATADIR, basename)
+            GETNAME = os.path.join(DATADIR, "get")
+            SETNAME = os.path.join(DATADIR, "set")
+
+            for remote in osds.remotes.keys():
+                for role in osds.remotes[remote]:
+                    if not role.startswith("osd."):
+                        continue
+                    osdid = int(role.split('.')[1])
+                    if osdid not in pgs:
+                        continue
+
+                    for pg, JSON in db[basename]["pg2json"].items():
+                        if pg in pgs[osdid]:
+                            cmd = ((prefix + "--pgid {pg}").
+                                   format(id=osdid, pg=pg).split())
+                            cmd.append(run.Raw("'{json}'".format(json=JSON)))
+                            cmd += ("get-bytes {fname}".
+                                    format(fname=GETNAME).split())
+                            proc = remote.run(args=cmd, check_status=False)
+                            if proc.exitstatus != 0:
+                                remote.run(args="rm -f {getfile}".
+                                           format(getfile=GETNAME).split())
+                                log.error("Bad exit status {ret}".
+                                          format(ret=proc.exitstatus))
+                                ERRORS += 1
+                                continue
+                            cmd = ("diff -q {file} {getfile}".
+                                   format(file=file, getfile=GETNAME))
+                            proc = remote.run(args=cmd.split())
+                            if proc.exitstatus != 0:
+                                log.error("Data from get-bytes differ")
+                                # log.debug("Got:")
+                                # cat_file(logging.DEBUG, GETNAME)
+                                # log.debug("Expected:")
+                                # cat_file(logging.DEBUG, file)
+                                ERRORS += 1
+                            remote.run(args="rm -f {getfile}".
+                                       format(getfile=GETNAME).split())
+
+                            data = ("put-bytes going into {file}\n".
+                                    format(file=file))
+                            remote.write_file(SETNAME, data)
+                            cmd = ((prefix + "--pgid {pg}").
+                                   format(id=osdid, pg=pg).split())
+                            cmd.append(run.Raw("'{json}'".format(json=JSON)))
+                            cmd += ("set-bytes {fname}".
+                                    format(fname=SETNAME).split())
+                            proc = remote.run(args=cmd, check_status=False)
+                            proc.wait()
+                            if proc.exitstatus != 0:
+                                log.info("set-bytes failed for object {obj} "
+                                         "in pg {pg} osd.{id} ret={ret}".
+                                         format(obj=basename, pg=pg,
+                                                id=osdid, ret=proc.exitstatus))
+                                ERRORS += 1
+
+                            cmd = ((prefix + "--pgid {pg}").
+                                   format(id=osdid, pg=pg).split())
+                            cmd.append(run.Raw("'{json}'".format(json=JSON)))
+                            cmd += "get-bytes -".split()
+                            try:
+                                output = remote.sh(cmd, wait=True)
+                                if data != output:
+                                    log.error("Data inconsistent after "
+                                              "set-bytes, got:")
+                                    log.error(output)
+                                    ERRORS += 1
+                            except CommandFailedError as e:
+                                log.error("get-bytes after "
+                                          "set-bytes ret={ret}".
+                                          format(ret=e.exitstatus))
+                                ERRORS += 1
+
+                            cmd = ((prefix + "--pgid {pg}").
+                                   format(id=osdid, pg=pg).split())
+                            cmd.append(run.Raw("'{json}'".format(json=JSON)))
+                            cmd += ("set-bytes {fname}".
+                                    format(fname=file).split())
+                            proc = remote.run(args=cmd, check_status=False)
+                            proc.wait()
+                            if proc.exitstatus != 0:
+                                log.info("set-bytes failed for object {obj} "
+                                         "in pg {pg} osd.{id} ret={ret}".
+                                         format(obj=basename, pg=pg,
+                                                id=osdid, ret=proc.exitstatus))
+                                ERRORS += 1
+
+    log.info("Test list-attrs get-attr")
+    for basename in db.keys():
+        file = os.path.join(DATADIR, basename)
+        GETNAME = os.path.join(DATADIR, "get")
+        SETNAME = os.path.join(DATADIR, "set")
+
+        for remote in osds.remotes.keys():
+            for role in osds.remotes[remote]:
+                if not role.startswith("osd."):
+                    continue
+                osdid = int(role.split('.')[1])
+                if osdid not in pgs:
+                    continue
+
+                for pg, JSON in db[basename]["pg2json"].items():
+                    if pg in pgs[osdid]:
+                        cmd = ((prefix + "--pgid {pg}").
+                               format(id=osdid, pg=pg).split())
+                        cmd.append(run.Raw("'{json}'".format(json=JSON)))
+                        cmd += ["list-attrs"]
+                        try:
+                            keys = remote.sh(cmd, wait=True, stderr=BytesIO()).split()
+                        except CommandFailedError as e:
+                            log.error("Bad exit status {ret}".
+                                      format(ret=e.exitstatus))
+                            ERRORS += 1
+                            continue
+                        values = dict(db[basename]["xattr"])
+
+                        for key in keys:
+                            if (key == "_" or
+                                    key == "snapset" or
+                                    key == "hinfo_key"):
+                                continue
+                            key = key.strip("_")
+                            if key not in values:
+                                log.error("The key {key} should be present".
+                                          format(key=key))
+                                ERRORS += 1
+                                continue
+                            exp = values.pop(key)
+                            cmd = ((prefix + "--pgid {pg}").
+                                   format(id=osdid, pg=pg).split())
+                            cmd.append(run.Raw("'{json}'".format(json=JSON)))
+                            cmd += ("get-attr {key}".
+                                    format(key="_" + key).split())
+                            try:
+                                val = remote.sh(cmd, wait=True)
+                            except CommandFailedError as e:
+                                log.error("get-attr failed with {ret}".
+                                          format(ret=e.exitstatus))
+                                ERRORS += 1
+                                continue
+                            if exp != val:
+                                log.error("For key {key} got value {got} "
+                                          "instead of {expected}".
+                                          format(key=key, got=val,
+                                                 expected=exp))
+                                ERRORS += 1
+                        if "hinfo_key" in keys:
+                            cmd_prefix = prefix.format(id=osdid)
+                            cmd = """
+      expected=$({prefix} --pgid {pg} '{json}' get-attr {key} | base64)
+      echo placeholder | {prefix} --pgid {pg} '{json}' set-attr {key} -
+      test $({prefix} --pgid {pg} '{json}' get-attr {key}) = placeholder
+      echo $expected | base64 --decode | \
+         {prefix} --pgid {pg} '{json}' set-attr {key} -
+      test $({prefix} --pgid {pg} '{json}' get-attr {key} | base64) = $expected
+                            """.format(prefix=cmd_prefix, pg=pg, json=JSON,
+                                       key="hinfo_key")
+                            log.debug(cmd)
+                            proc = remote.run(args=['bash', '-e', '-x',
+                                                    '-c', cmd],
+                                              check_status=False,
+                                              stdout=BytesIO(),
+                                              stderr=BytesIO())
+                            proc.wait()
+                            if proc.exitstatus != 0:
+                                log.error("failed with " +
+                                          str(proc.exitstatus))
+                                log.error(" ".join([
+                                    proc.stdout.getvalue().decode(),
+                                    proc.stderr.getvalue().decode(),
+                                    ]))
+                                ERRORS += 1
+
+                        if len(values) != 0:
+                            log.error("Not all keys found, remaining keys:")
+                            log.error(values)
+
+    log.info("Test pg info")
+    for remote in osds.remotes.keys():
+        for role in osds.remotes[remote]:
+            if not role.startswith("osd."):
+                continue
+            osdid = int(role.split('.')[1])
+            if osdid not in pgs:
+                continue
+
+            for pg in pgs[osdid]:
+                cmd = ((prefix + "--op info --pgid {pg}").
+                       format(id=osdid, pg=pg).split())
+                try:
+                    info = remote.sh(cmd, wait=True)
+                except CommandFailedError as e:
+                    log.error("Failure of --op info command with %s",
+                              e.exitstatus)
+                    ERRORS += 1
+                    continue
+                if not str(pg) in info:
+                    log.error("Bad data from info: %s", info)
+                    ERRORS += 1
+
+    log.info("Test pg logging")
+    for remote in osds.remotes.keys():
+        for role in osds.remotes[remote]:
+            if not role.startswith("osd."):
+                continue
+            osdid = int(role.split('.')[1])
+            if osdid not in pgs:
+                continue
+
+            for pg in pgs[osdid]:
+                cmd = ((prefix + "--op log --pgid {pg}").
+                       format(id=osdid, pg=pg).split())
+                try:
+                    output = remote.sh(cmd, wait=True)
+                except CommandFailedError as e:
+                    log.error("Getting log failed for pg {pg} "
+                              "from osd.{id} with {ret}".
+                              format(pg=pg, id=osdid, ret=e.exitstatus))
+                    ERRORS += 1
+                    continue
+                HASOBJ = pg in pgswithobjects
+                MODOBJ = "modify" in output
+                if HASOBJ != MODOBJ:
+                    log.error("Bad log for pg {pg} from osd.{id}".
+                              format(pg=pg, id=osdid))
+                    MSG = (HASOBJ and [""] or ["NOT "])[0]
+                    log.error("Log should {msg}have a modify entry".
+                              format(msg=MSG))
+                    ERRORS += 1
+
+    log.info("Test pg export")
+    EXP_ERRORS = 0
+    for remote in osds.remotes.keys():
+        for role in osds.remotes[remote]:
+            if not role.startswith("osd."):
+                continue
+            osdid = int(role.split('.')[1])
+            if osdid not in pgs:
+                continue
+
+            for pg in pgs[osdid]:
+                fpath = os.path.join(DATADIR, "osd{id}.{pg}".
+                                     format(id=osdid, pg=pg))
+
+                cmd = ((prefix + "--op export --pgid {pg} --file {file}").
+                       format(id=osdid, pg=pg, file=fpath))
+                try:
+                    remote.sh(cmd, wait=True)
+                except CommandFailedError as e:
+                    log.error("Exporting failed for pg {pg} "
+                              "on osd.{id} with {ret}".
+                              format(pg=pg, id=osdid, ret=e.exitstatus))
+                    EXP_ERRORS += 1
+
+    ERRORS += EXP_ERRORS
+
+    log.info("Test pg removal")
+    RM_ERRORS = 0
+    for remote in osds.remotes.keys():
+        for role in osds.remotes[remote]:
+            if not role.startswith("osd."):
+                continue
+            osdid = int(role.split('.')[1])
+            if osdid not in pgs:
+                continue
+
+            for pg in pgs[osdid]:
+                cmd = ((prefix + "--force --op remove --pgid {pg}").
+                       format(pg=pg, id=osdid))
+                try:
+                    remote.sh(cmd, wait=True)
+                except CommandFailedError as e:
+                    log.error("Removing failed for pg {pg} "
+                              "on osd.{id} with {ret}".
+                              format(pg=pg, id=osdid, ret=e.exitstatus))
+                    RM_ERRORS += 1
+
+    ERRORS += RM_ERRORS
+
+    IMP_ERRORS = 0
+    if EXP_ERRORS == 0 and RM_ERRORS == 0:
+        log.info("Test pg import")
+
+        for remote in osds.remotes.keys():
+            for role in osds.remotes[remote]:
+                if not role.startswith("osd."):
+                    continue
+                osdid = int(role.split('.')[1])
+                if osdid not in pgs:
+                    continue
+
+                for pg in pgs[osdid]:
+                    fpath = os.path.join(DATADIR, "osd{id}.{pg}".
+                                         format(id=osdid, pg=pg))
+
+                    cmd = ((prefix + "--op import --file {file}").
+                           format(id=osdid, file=fpath))
+                    try:
+                        remote.sh(cmd, wait=True)
+                    except CommandFailedError as e:
+                        log.error("Import failed from {file} with {ret}".
+                                  format(file=fpath, ret=e.exitstatus))
+                        IMP_ERRORS += 1
+    else:
+        log.warning("SKIPPING IMPORT TESTS DUE TO PREVIOUS FAILURES")
+
+    ERRORS += IMP_ERRORS
+
+    if EXP_ERRORS == 0 and RM_ERRORS == 0 and IMP_ERRORS == 0:
+        log.info("Restarting OSDs....")
+        # They are still look to be up because of setting nodown
+        for osd in manager.get_osd_status()['up']:
+            manager.revive_osd(osd)
+        # Wait for health?
+        time.sleep(5)
+        # Let scrub after test runs verify consistency of all copies
+        log.info("Verify replicated import data")
+        objects = range(1, NUM_OBJECTS + 1)
+        for i in objects:
+            NAME = REP_NAME + "{num}".format(num=i)
+            TESTNAME = os.path.join(DATADIR, "gettest")
+            REFNAME = os.path.join(DATADIR, NAME)
+
+            proc = rados(ctx, cli_remote,
+                         ['-p', REP_POOL, 'get', NAME, TESTNAME], wait=False)
+
+            ret = proc.wait()
+            if ret != 0:
+                log.error("After import, rados get failed with {ret}".
+                          format(ret=proc.exitstatus))
+                ERRORS += 1
+                continue
+
+            cmd = "diff -q {gettest} {ref}".format(gettest=TESTNAME,
+                                                   ref=REFNAME)
+            proc = cli_remote.run(args=cmd, check_status=False)
+            proc.wait()
+            if proc.exitstatus != 0:
+                log.error("Data comparison failed for {obj}".format(obj=NAME))
+                ERRORS += 1
+
+    return ERRORS
diff --git a/qa/tasks/ceph_test_case.py b/qa/tasks/ceph_test_case.py
new file mode 100644
index 000000000..3f8a152d7
--- /dev/null
+++ b/qa/tasks/ceph_test_case.py
@@ -0,0 +1,224 @@
+from typing import Optional, TYPE_CHECKING
+import unittest
+import time
+import logging
+
+from teuthology.exceptions import CommandFailedError
+
+if TYPE_CHECKING:
+    from tasks.mgr.mgr_test_case import MgrCluster
+
+log = logging.getLogger(__name__)
+
+class TestTimeoutError(RuntimeError):
+    pass
+
+class CephTestCase(unittest.TestCase):
+    """
+    For test tasks that want to define a structured set of
+    tests implemented in python.  Subclass this with appropriate
+    helpers for the subsystem you're testing.
+    """
+
+    # Environment references
+    mounts = None
+    fs = None
+    recovery_fs = None
+    backup_fs = None
+    ceph_cluster = None
+    mds_cluster = None
+    mgr_cluster: Optional['MgrCluster'] = None
+    ctx = None
+
+    mon_manager = None
+
+    # Declarative test requirements: subclasses should override these to indicate
+    # their special needs.  If not met, tests will be skipped.
+    REQUIRE_MEMSTORE = False
+
+    def setUp(self):
+        self._mon_configs_set = set()
+
+        self.ceph_cluster.mon_manager.raw_cluster_cmd("log",
+            "Starting test {0}".format(self.id()))
+
+        if self.REQUIRE_MEMSTORE:
+            objectstore = self.ceph_cluster.get_config("osd_objectstore", "osd")
+            if objectstore != "memstore":
+                # You certainly *could* run this on a real OSD, but you don't want to sit
+                # here for hours waiting for the test to fill up a 1TB drive!
+                raise self.skipTest("Require `memstore` OSD backend (test " \
+                        "would take too long on full sized OSDs")
+
+    def tearDown(self):
+        self.config_clear()
+
+        self.ceph_cluster.mon_manager.raw_cluster_cmd("log",
+            "Ended test {0}".format(self.id()))
+
+    def config_clear(self):
+        for section, key in self._mon_configs_set:
+            self.config_rm(section, key)
+        self._mon_configs_set.clear()
+
+    def _fix_key(self, key):
+        return str(key).replace(' ', '_')
+
+    def config_get(self, section, key):
+       key = self._fix_key(key)
+       return self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "get", section, key).strip()
+
+    def config_show(self, entity, key):
+       key = self._fix_key(key)
+       return self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "show", entity, key).strip()
+
+    def config_minimal(self):
+       return self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "generate-minimal-conf").strip()
+
+    def config_rm(self, section, key):
+       key = self._fix_key(key)
+       self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "rm", section, key)
+       # simplification: skip removing from _mon_configs_set;
+       # let tearDown clear everything again
+
+    def config_set(self, section, key, value):
+       key = self._fix_key(key)
+       self._mon_configs_set.add((section, key))
+       self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "set", section, key, str(value))
+
+    def cluster_cmd(self, command: str):
+        assert self.ceph_cluster is not None
+        return self.ceph_cluster.mon_manager.raw_cluster_cmd(*(command.split(" ")))
+
+
+    def assert_cluster_log(self, expected_pattern, invert_match=False,
+                           timeout=10, watch_channel=None, present=True):
+        """
+        Context manager.  Assert that during execution, or up to 5 seconds later,
+        the Ceph cluster log emits a message matching the expected pattern.
+
+        :param expected_pattern: A string that you expect to see in the log output
+        :type expected_pattern: str
+        :param watch_channel: Specifies the channel to be watched. This can be
+                              'cluster', 'audit', ...
+        :type watch_channel: str
+        :param present: Assert the log entry is present (default: True) or not (False).
+        :type present: bool
+        """
+
+        ceph_manager = self.ceph_cluster.mon_manager
+
+        class ContextManager(object):
+            def match(self):
+                found = expected_pattern in self.watcher_process.stdout.getvalue()
+                if invert_match:
+                    return not found
+
+                return found
+
+            def __enter__(self):
+                self.watcher_process = ceph_manager.run_ceph_w(watch_channel)
+
+            def __exit__(self, exc_type, exc_val, exc_tb):
+                fail = False
+                if not self.watcher_process.finished:
+                    # Check if we got an early match, wait a bit if we didn't
+                    if present and self.match():
+                        return
+                    elif not present and self.match():
+                        fail = True
+                    else:
+                        log.debug("No log hits yet, waiting...")
+                        # Default monc tick interval is 10s, so wait that long and
+                        # then some grace
+                        time.sleep(5 + timeout)
+
+                self.watcher_process.stdin.close()
+                try:
+                    self.watcher_process.wait()
+                except CommandFailedError:
+                    pass
+
+                if present and not self.match():
+                    log.error(f"Log output: \n{self.watcher_process.stdout.getvalue()}\n")
+                    raise AssertionError(f"Expected log message found: '{expected_pattern}'")
+                elif fail or (not present and self.match()):
+                    log.error(f"Log output: \n{self.watcher_process.stdout.getvalue()}\n")
+                    raise AssertionError(f"Unexpected log message found: '{expected_pattern}'")
+
+        return ContextManager()
+
+    def wait_for_health(self, pattern, timeout):
+        """
+        Wait until 'ceph health' contains messages matching the pattern
+        """
+        def seen_health_warning():
+            health = self.ceph_cluster.mon_manager.get_mon_health()
+            codes = [s for s in health['checks']]
+            summary_strings = [s[1]['summary']['message'] for s in health['checks'].items()]
+            if len(summary_strings) == 0:
+                log.debug("Not expected number of summary strings ({0})".format(summary_strings))
+                return False
+            else:
+                for ss in summary_strings:
+                    if pattern in ss:
+                         return True
+                if pattern in codes:
+                    return True
+
+            log.debug("Not found expected summary strings yet ({0})".format(summary_strings))
+            return False
+
+        log.info(f"waiting {timeout}s for health warning matching {pattern}")
+        self.wait_until_true(seen_health_warning, timeout)
+
+    def wait_for_health_clear(self, timeout):
+        """
+        Wait until `ceph health` returns no messages
+        """
+        def is_clear():
+            health = self.ceph_cluster.mon_manager.get_mon_health()
+            return len(health['checks']) == 0
+
+        self.wait_until_true(is_clear, timeout)
+
+    def wait_until_equal(self, get_fn, expect_val, timeout, reject_fn=None, period=5):
+        elapsed = 0
+        while True:
+            val = get_fn()
+            if val == expect_val:
+                return
+            elif reject_fn and reject_fn(val):
+                raise RuntimeError("wait_until_equal: forbidden value {0} seen".format(val))
+            else:
+                if elapsed >= timeout:
+                    raise TestTimeoutError("Timed out after {0} seconds waiting for {1} (currently {2})".format(
+                        elapsed, expect_val, val
+                    ))
+                else:
+                    log.debug("wait_until_equal: {0} != {1}, waiting (timeout={2})...".format(val, expect_val, timeout))
+                time.sleep(period)
+                elapsed += period
+
+        log.debug("wait_until_equal: success")
+
+    @classmethod
+    def wait_until_true(cls, condition, timeout, check_fn=None, period=5):
+        elapsed = 0
+        retry_count = 0
+        while True:
+            if condition():
+                log.debug("wait_until_true: success in {0}s and {1} retries".format(elapsed, retry_count))
+                return
+            else:
+                if elapsed >= timeout:
+                    if check_fn and check_fn() and retry_count < 5:
+                        elapsed = 0
+                        retry_count += 1
+                        log.debug("wait_until_true: making progress, waiting (timeout={0} retry_count={1})...".format(timeout, retry_count))
+                    else:
+                        raise TestTimeoutError("Timed out after {0}s and {1} retries".format(elapsed, retry_count))
+                else:
+                    log.debug("wait_until_true: waiting (timeout={0} retry_count={1})...".format(timeout, retry_count))
+                time.sleep(period)
+                elapsed += period
diff --git a/qa/tasks/cephadm.conf b/qa/tasks/cephadm.conf
new file mode 100644
index 000000000..9ec08a346
--- /dev/null
+++ b/qa/tasks/cephadm.conf
@@ -0,0 +1,91 @@
+[global]
+# make logging friendly to teuthology
+log_to_file = true
+log_to_stderr = false
+log to journald = false
+mon cluster log file level = debug
+
+mon clock drift allowed = 1.000
+
+# replicate across OSDs, not hosts
+osd crush chooseleaf type = 0
+#osd pool default size = 2
+osd pool default erasure code profile = "plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd"
+
+# enable some debugging
+auth debug = true
+ms die on old message = true
+ms die on bug = true
+debug asserts on shutdown = true
+
+# adjust warnings
+mon max pg per osd = 10000        # >= luminous
+mon pg warn max object skew = 0
+mon osd allow primary affinity = true
+mon osd allow pg remap = true
+mon warn on legacy crush tunables = false
+mon warn on crush straw calc version zero = false
+mon warn on no sortbitwise = false
+mon warn on osd down out interval zero = false
+mon warn on too few osds = false
+mon_warn_on_pool_pg_num_not_power_of_two = false
+
+# disable pg_autoscaler by default for new pools
+osd_pool_default_pg_autoscale_mode = off
+
+# tests delete pools
+mon allow pool delete = true
+
+[osd]
+osd scrub load threshold = 5.0
+osd scrub max interval = 600
+osd mclock profile = high_recovery_ops
+
+osd recover clone overlap = true
+osd recovery max chunk = 1048576
+
+osd deep scrub update digest min age = 30
+
+osd map max advance = 10
+
+osd memory target autotune = true
+
+# debugging
+osd debug shutdown = true
+osd debug op order = true
+osd debug verify stray on activate = true
+osd debug pg log writeout = true
+osd debug verify cached snaps = true
+osd debug verify missing on start = true
+osd debug misdirected ops = true
+osd op queue = debug_random
+osd op queue cut off = debug_random
+osd shutdown pgref assert = true
+bdev debug aio = true
+osd sloppy crc = true
+
+[mgr]
+mon reweight min pgs per osd = 4
+mon reweight min bytes per osd = 10
+mgr/telemetry/nag = false
+
+[mon]
+mon data avail warn = 5
+mon mgr mkfs grace = 240
+mon reweight min pgs per osd = 4
+mon osd reporter subtree level = osd
+mon osd prime pg temp = true
+mon reweight min bytes per osd = 10
+
+# rotate auth tickets quickly to exercise renewal paths
+auth mon ticket ttl = 660      # 11m
+auth service ticket ttl = 240  # 4m
+
+# don't complain about global id reclaim
+mon_warn_on_insecure_global_id_reclaim = false
+mon_warn_on_insecure_global_id_reclaim_allowed = false
+
+[client.rgw]
+rgw cache enabled = true
+rgw enable ops log = true
+rgw enable usage log = true
diff --git a/qa/tasks/cephadm.py b/qa/tasks/cephadm.py
new file mode 100644
index 000000000..e9fc25d6a
--- /dev/null
+++ b/qa/tasks/cephadm.py
@@ -0,0 +1,1754 @@
+"""
+Ceph cluster task, deployed via cephadm orchestrator
+"""
+import argparse
+import configobj
+import contextlib
+import logging
+import os
+import json
+import re
+import uuid
+import yaml
+
+from copy import deepcopy
+from io import BytesIO, StringIO
+from tarfile import ReadError
+from tasks.ceph_manager import CephManager
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology import packaging
+from teuthology.orchestra import run
+from teuthology.orchestra.daemon import DaemonGroup
+from teuthology.config import config as teuth_config
+from textwrap import dedent
+from tasks.cephfs.filesystem import MDSCluster, Filesystem
+from tasks.util import chacra
+
+# these items we use from ceph.py should probably eventually move elsewhere
+from tasks.ceph import get_mons, healthy
+from tasks.vip import subst_vip
+
+CEPH_ROLE_TYPES = ['mon', 'mgr', 'osd', 'mds', 'rgw', 'prometheus']
+
+log = logging.getLogger(__name__)
+
+
+def _shell(ctx, cluster_name, remote, args, extra_cephadm_args=[], **kwargs):
+    teuthology.get_testdir(ctx)
+    return remote.run(
+        args=[
+            'sudo',
+            ctx.cephadm,
+            '--image', ctx.ceph[cluster_name].image,
+            'shell',
+            '-c', '/etc/ceph/{}.conf'.format(cluster_name),
+            '-k', '/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
+            '--fsid', ctx.ceph[cluster_name].fsid,
+            ] + extra_cephadm_args + [
+            '--',
+            ] + args,
+        **kwargs
+    )
+
+
+def build_initial_config(ctx, config):
+    cluster_name = config['cluster']
+
+    path = os.path.join(os.path.dirname(__file__), 'cephadm.conf')
+    conf = configobj.ConfigObj(path, file_error=True)
+
+    conf.setdefault('global', {})
+    conf['global']['fsid'] = ctx.ceph[cluster_name].fsid
+
+    # overrides
+    for section, keys in config.get('conf',{}).items():
+        for key, value in keys.items():
+            log.info(" override: [%s] %s = %s" % (section, key, value))
+            if section not in conf:
+                conf[section] = {}
+            conf[section][key] = value
+
+    return conf
+
+
+def distribute_iscsi_gateway_cfg(ctx, conf_data):
+    """
+    Distribute common gateway config to get the IPs.
+    These will help in iscsi clients with finding trusted_ip_list.
+    """
+    log.info('Distributing iscsi-gateway.cfg...')
+    for remote, roles in ctx.cluster.remotes.items():
+        remote.write_file(
+            path='/etc/ceph/iscsi-gateway.cfg',
+            data=conf_data,
+            sudo=True)
+
+def update_archive_setting(ctx, key, value):
+    """
+    Add logs directory to job's info log file
+    """
+    if ctx.archive is None:
+        return
+    with open(os.path.join(ctx.archive, 'info.yaml'), 'r+') as info_file:
+        info_yaml = yaml.safe_load(info_file)
+        info_file.seek(0)
+        if 'archive' in info_yaml:
+            info_yaml['archive'][key] = value
+        else:
+            info_yaml['archive'] = {key: value}
+        yaml.safe_dump(info_yaml, info_file, default_flow_style=False)
+
+
+@contextlib.contextmanager
+def normalize_hostnames(ctx):
+    """
+    Ensure we have short hostnames throughout, for consistency between
+    remote.shortname and socket.gethostname() in cephadm.
+    """
+    log.info('Normalizing hostnames...')
+    cluster = ctx.cluster.filter(lambda r: '.' in r.hostname)
+    cluster.run(args=[
+        'sudo',
+        'hostname',
+        run.Raw('$(hostname -s)'),
+    ])
+
+    try:
+        yield
+    finally:
+        pass
+
+
+@contextlib.contextmanager
+def download_cephadm(ctx, config, ref):
+    cluster_name = config['cluster']
+
+    if config.get('cephadm_mode') != 'cephadm-package':
+        if ctx.config.get('redhat'):
+            _fetch_cephadm_from_rpm(ctx)
+        # TODO: come up with a sensible way to detect if we need an "old, uncompiled"
+        # cephadm
+        elif 'cephadm_git_url' in config and 'cephadm_branch' in config:
+            _fetch_cephadm_from_github(ctx, config, ref)
+        else:
+            _fetch_cephadm_from_chachra(ctx, config, cluster_name)
+
+    try:
+        yield
+    finally:
+        _rm_cluster(ctx, cluster_name)
+        if config.get('cephadm_mode') == 'root':
+            _rm_cephadm(ctx)
+
+
+def _fetch_cephadm_from_rpm(ctx):
+    log.info("Copying cephadm installed from an RPM package")
+    # cephadm already installed from redhat.install task
+    ctx.cluster.run(
+        args=[
+            'cp',
+            run.Raw('$(which cephadm)'),
+            ctx.cephadm,
+            run.Raw('&&'),
+            'ls', '-l',
+            ctx.cephadm,
+        ]
+    )
+
+
+def _fetch_cephadm_from_github(ctx, config, ref):
+    ref = config.get('cephadm_branch', ref)
+    git_url = config.get('cephadm_git_url', teuth_config.get_ceph_git_url())
+    log.info('Downloading cephadm (repo %s ref %s)...' % (git_url, ref))
+    if git_url.startswith('https://github.com/'):
+        # git archive doesn't like https:// URLs, which we use with github.
+        rest = git_url.split('https://github.com/', 1)[1]
+        rest = re.sub(r'\.git/?$', '', rest).strip() # no .git suffix
+        ctx.cluster.run(
+            args=[
+                'curl', '--silent',
+                'https://raw.githubusercontent.com/' + rest + '/' + ref + '/src/cephadm/cephadm',
+                run.Raw('>'),
+                ctx.cephadm,
+                run.Raw('&&'),
+                'ls', '-l',
+                ctx.cephadm,
+            ],
+        )
+    else:
+        ctx.cluster.run(
+            args=[
+                'git', 'clone', git_url, 'testrepo',
+                run.Raw('&&'),
+                'cd', 'testrepo',
+                run.Raw('&&'),
+                'git', 'show', f'{ref}:src/cephadm/cephadm',
+                run.Raw('>'),
+                ctx.cephadm,
+                run.Raw('&&'),
+                'ls', '-l', ctx.cephadm,
+            ],
+        )
+    # sanity-check the resulting file and set executable bit
+    cephadm_file_size = '$(stat -c%s {})'.format(ctx.cephadm)
+    ctx.cluster.run(
+        args=[
+            'test', '-s', ctx.cephadm,
+            run.Raw('&&'),
+            'test', run.Raw(cephadm_file_size), "-gt", run.Raw('1000'),
+            run.Raw('&&'),
+            'chmod', '+x', ctx.cephadm,
+        ],
+    )
+
+
+def _fetch_cephadm_from_chachra(ctx, config, cluster_name):
+    log.info('Downloading "compiled" cephadm from cachra')
+    bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote
+    bp = packaging.get_builder_project()(
+        config.get('project', 'ceph'),
+        config,
+        ctx=ctx,
+        remote=bootstrap_remote,
+    )
+    log.info('builder_project result: %s' % (bp._result.json()))
+
+    flavor = config.get('flavor', 'default')
+    branch = config.get('branch')
+    sha1 = config.get('sha1')
+
+    # pull the cephadm binary from chacra
+    url = chacra.get_binary_url(
+            'cephadm',
+            project=bp.project,
+            distro=bp.distro.split('/')[0],
+            release=bp.distro.split('/')[1],
+            arch=bp.arch,
+            flavor=flavor,
+            branch=branch,
+            sha1=sha1,
+    )
+    log.info("Discovered cachra url: %s", url)
+    ctx.cluster.run(
+        args=[
+            'curl', '--silent', '-L', url,
+            run.Raw('>'),
+            ctx.cephadm,
+            run.Raw('&&'),
+            'ls', '-l',
+            ctx.cephadm,
+        ],
+    )
+
+    # sanity-check the resulting file and set executable bit
+    cephadm_file_size = '$(stat -c%s {})'.format(ctx.cephadm)
+    ctx.cluster.run(
+        args=[
+            'test', '-s', ctx.cephadm,
+            run.Raw('&&'),
+            'test', run.Raw(cephadm_file_size), "-gt", run.Raw('1000'),
+            run.Raw('&&'),
+            'chmod', '+x', ctx.cephadm,
+        ],
+    )
+
+
+def _rm_cluster(ctx, cluster_name):
+    log.info('Removing cluster...')
+    ctx.cluster.run(args=[
+        'sudo',
+        ctx.cephadm,
+        'rm-cluster',
+        '--fsid', ctx.ceph[cluster_name].fsid,
+        '--force',
+    ])
+
+
+def _rm_cephadm(ctx):
+    log.info('Removing cephadm ...')
+    ctx.cluster.run(
+        args=[
+            'rm',
+            '-rf',
+            ctx.cephadm,
+        ],
+    )
+
+
+@contextlib.contextmanager
+def ceph_log(ctx, config):
+    cluster_name = config['cluster']
+    fsid = ctx.ceph[cluster_name].fsid
+
+    update_archive_setting(ctx, 'log', '/var/log/ceph')
+
+
+    try:
+        yield
+
+    except Exception:
+        # we need to know this below
+        ctx.summary['success'] = False
+        raise
+
+    finally:
+        log.info('Checking cluster log for badness...')
+        def first_in_ceph_log(pattern, excludes):
+            """
+            Find the first occurrence of the pattern specified in the Ceph log,
+            Returns None if none found.
+
+            :param pattern: Pattern scanned for.
+            :param excludes: Patterns to ignore.
+            :return: First line of text (or None if not found)
+            """
+            args = [
+                'sudo',
+                'egrep', pattern,
+                '/var/log/ceph/{fsid}/ceph.log'.format(
+                    fsid=fsid),
+            ]
+            if excludes:
+                for exclude in excludes:
+                    args.extend([run.Raw('|'), 'egrep', '-v', exclude])
+            args.extend([
+                run.Raw('|'), 'head', '-n', '1',
+            ])
+            r = ctx.ceph[cluster_name].bootstrap_remote.run(
+                stdout=StringIO(),
+                args=args,
+            )
+            stdout = r.stdout.getvalue()
+            if stdout != '':
+                return stdout
+            return None
+
+        if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]',
+                             config.get('log-ignorelist')) is not None:
+            log.warning('Found errors (ERR|WRN|SEC) in cluster log')
+            ctx.summary['success'] = False
+            # use the most severe problem as the failure reason
+            if 'failure_reason' not in ctx.summary:
+                for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']:
+                    match = first_in_ceph_log(pattern, config['log-ignorelist'])
+                    if match is not None:
+                        ctx.summary['failure_reason'] = \
+                            '"{match}" in cluster log'.format(
+                                match=match.rstrip('\n'),
+                            )
+                        break
+
+        if ctx.archive is not None and \
+                not (ctx.config.get('archive-on-error') and ctx.summary['success']):
+            # and logs
+            log.info('Compressing logs...')
+            run.wait(
+                ctx.cluster.run(
+                    args=[
+                        'sudo',
+                        'find',
+                        '/var/log/ceph',   # all logs, not just for the cluster
+                        '/var/log/rbd-target-api', # ceph-iscsi
+                        '-name',
+                        '*.log',
+                        '-print0',
+                        run.Raw('|'),
+                        'sudo',
+                        'xargs',
+                        '-0',
+                        '--no-run-if-empty',
+                        '--',
+                        'gzip',
+                        '--',
+                    ],
+                    wait=False,
+                ),
+            )
+
+            log.info('Archiving logs...')
+            path = os.path.join(ctx.archive, 'remote')
+            try:
+                os.makedirs(path)
+            except OSError:
+                pass
+            for remote in ctx.cluster.remotes.keys():
+                sub = os.path.join(path, remote.shortname)
+                try:
+                    os.makedirs(sub)
+                except OSError:
+                    pass
+                try:
+                    teuthology.pull_directory(remote, '/var/log/ceph',  # everything
+                                              os.path.join(sub, 'log'))
+                except ReadError:
+                    pass
+
+
+@contextlib.contextmanager
+def ceph_crash(ctx, config):
+    """
+    Gather crash dumps from /var/lib/ceph/$fsid/crash
+    """
+    cluster_name = config['cluster']
+    fsid = ctx.ceph[cluster_name].fsid
+
+    update_archive_setting(ctx, 'crash', '/var/lib/ceph/crash')
+
+    try:
+        yield
+
+    finally:
+        if ctx.archive is not None:
+            log.info('Archiving crash dumps...')
+            path = os.path.join(ctx.archive, 'remote')
+            try:
+                os.makedirs(path)
+            except OSError:
+                pass
+            for remote in ctx.cluster.remotes.keys():
+                sub = os.path.join(path, remote.shortname)
+                try:
+                    os.makedirs(sub)
+                except OSError:
+                    pass
+                try:
+                    teuthology.pull_directory(remote,
+                                              '/var/lib/ceph/%s/crash' % fsid,
+                                              os.path.join(sub, 'crash'))
+                except ReadError:
+                    pass
+
+
+@contextlib.contextmanager
+def pull_image(ctx, config):
+    cluster_name = config['cluster']
+    log.info(f'Pulling image {ctx.ceph[cluster_name].image} on all hosts...')
+    run.wait(
+        ctx.cluster.run(
+            args=[
+                'sudo',
+                ctx.cephadm,
+                '--image', ctx.ceph[cluster_name].image,
+                'pull',
+            ],
+            wait=False,
+        )
+    )
+
+    try:
+        yield
+    finally:
+        pass
+
+@contextlib.contextmanager
+def setup_ca_signed_keys(ctx, config):
+    # generate our ca key
+    cluster_name = config['cluster']
+    bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote
+    bootstrap_remote.run(args=[
+        'sudo', 'ssh-keygen', '-t', 'rsa', '-f', '/root/ca-key', '-N', ''
+    ])
+
+    # not using read_file here because it runs dd as a non-root
+    # user and would hit permission issues
+    r = bootstrap_remote.run(args=[
+        'sudo', 'cat', '/root/ca-key.pub'
+    ], stdout=StringIO())
+    ca_key_pub_contents = r.stdout.getvalue()
+
+    # make CA key accepted on each host
+    for remote in ctx.cluster.remotes.keys():
+        # write key to each host's /etc/ssh dir
+        remote.run(args=[
+            'sudo', 'echo', ca_key_pub_contents,
+            run.Raw('|'),
+            'sudo', 'tee', '-a', '/etc/ssh/ca-key.pub',
+        ])
+        # make sshd accept the CA signed key
+        remote.run(args=[
+            'sudo', 'echo', 'TrustedUserCAKeys /etc/ssh/ca-key.pub',
+            run.Raw('|'),
+            'sudo', 'tee', '-a', '/etc/ssh/sshd_config',
+            run.Raw('&&'),
+            'sudo', 'systemctl', 'restart', 'sshd',
+        ])
+
+    # generate a new key pair and sign the pub key to make a cert
+    bootstrap_remote.run(args=[
+        'sudo', 'ssh-keygen', '-t', 'rsa', '-f', '/root/cephadm-ssh-key', '-N', '',
+        run.Raw('&&'),
+        'sudo', 'ssh-keygen', '-s', '/root/ca-key', '-I', 'user_root', '-n', 'root', '-V', '+52w', '/root/cephadm-ssh-key',
+    ])
+
+    # for debugging, to make sure this setup has worked as intended
+    for remote in ctx.cluster.remotes.keys():
+        remote.run(args=[
+            'sudo', 'cat', '/etc/ssh/ca-key.pub'
+        ])
+        remote.run(args=[
+            'sudo', 'cat', '/etc/ssh/sshd_config',
+            run.Raw('|'),
+            'grep', 'TrustedUserCAKeys'
+        ])
+    bootstrap_remote.run(args=[
+        'sudo', 'ls', '/root/'
+    ])
+
+    ctx.ca_signed_key_info = {}
+    ctx.ca_signed_key_info['ca-key'] = '/root/ca-key'
+    ctx.ca_signed_key_info['ca-key-pub'] = '/root/ca-key.pub'
+    ctx.ca_signed_key_info['private-key'] = '/root/cephadm-ssh-key'
+    ctx.ca_signed_key_info['ca-signed-cert'] = '/root/cephadm-ssh-key-cert.pub'
+
+    try:
+        yield
+    finally:
+        pass
+
+@contextlib.contextmanager
+def ceph_bootstrap(ctx, config):
+    """
+    Bootstrap ceph cluster.
+
+    :param ctx: the argparse.Namespace object
+    :param config: the config dict
+    """
+    cluster_name = config['cluster']
+    testdir = teuthology.get_testdir(ctx)
+    fsid = ctx.ceph[cluster_name].fsid
+
+    bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote
+    first_mon = ctx.ceph[cluster_name].first_mon
+    first_mon_role = ctx.ceph[cluster_name].first_mon_role
+    mons = ctx.ceph[cluster_name].mons
+
+    ctx.cluster.run(args=[
+        'sudo', 'mkdir', '-p', '/etc/ceph',
+        ]);
+    ctx.cluster.run(args=[
+        'sudo', 'chmod', '777', '/etc/ceph',
+        ]);
+    try:
+        # write seed config
+        log.info('Writing seed config...')
+        conf_fp = BytesIO()
+        seed_config = build_initial_config(ctx, config)
+        seed_config.write(conf_fp)
+        bootstrap_remote.write_file(
+            path='{}/seed.{}.conf'.format(testdir, cluster_name),
+            data=conf_fp.getvalue())
+        log.debug('Final config:\n' + conf_fp.getvalue().decode())
+        ctx.ceph[cluster_name].conf = seed_config
+
+        # register initial daemons
+        ctx.daemons.register_daemon(
+            bootstrap_remote, 'mon', first_mon,
+            cluster=cluster_name,
+            fsid=fsid,
+            logger=log.getChild('mon.' + first_mon),
+            wait=False,
+            started=True,
+        )
+        if not ctx.ceph[cluster_name].roleless:
+            first_mgr = ctx.ceph[cluster_name].first_mgr
+            ctx.daemons.register_daemon(
+                bootstrap_remote, 'mgr', first_mgr,
+                cluster=cluster_name,
+                fsid=fsid,
+                logger=log.getChild('mgr.' + first_mgr),
+                wait=False,
+                started=True,
+            )
+
+        # bootstrap
+        log.info('Bootstrapping...')
+        cmd = [
+            'sudo',
+            ctx.cephadm,
+            '--image', ctx.ceph[cluster_name].image,
+            '-v',
+            'bootstrap',
+            '--fsid', fsid,
+            '--config', '{}/seed.{}.conf'.format(testdir, cluster_name),
+            '--output-config', '/etc/ceph/{}.conf'.format(cluster_name),
+            '--output-keyring',
+            '/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
+        ]
+
+        if not config.get("use-ca-signed-key", False):
+            cmd += ['--output-pub-ssh-key', '{}/{}.pub'.format(testdir, cluster_name)]
+        else:
+            # ctx.ca_signed_key_info should have been set up in
+            # setup_ca_signed_keys function which we expect to have
+            # run before bootstrap if use-ca-signed-key is true
+            signed_key_info = ctx.ca_signed_key_info
+            cmd += [
+                "--ssh-private-key", signed_key_info['private-key'],
+                "--ssh-signed-cert", signed_key_info['ca-signed-cert'],
+            ]
+
+        if config.get("no_cgroups_split") is True:
+            cmd.insert(cmd.index("bootstrap"), "--no-cgroups-split")
+
+        if config.get('registry-login'):
+            registry = config['registry-login']
+            cmd += [
+                "--registry-url", registry['url'],
+                "--registry-username", registry['username'],
+                "--registry-password", registry['password'],
+            ]
+
+        if not ctx.ceph[cluster_name].roleless:
+            cmd += [
+                '--mon-id', first_mon,
+                '--mgr-id', first_mgr,
+                '--orphan-initial-daemons',   # we will do it explicitly!
+                '--skip-monitoring-stack',    # we'll provision these explicitly
+            ]
+
+        if mons[first_mon_role].startswith('['):
+            cmd += ['--mon-addrv', mons[first_mon_role]]
+        else:
+            cmd += ['--mon-ip', mons[first_mon_role]]
+        if config.get('skip_dashboard'):
+            cmd += ['--skip-dashboard']
+        if config.get('skip_monitoring_stack'):
+            cmd += ['--skip-monitoring-stack']
+        if config.get('single_host_defaults'):
+            cmd += ['--single-host-defaults']
+        if not config.get('avoid_pacific_features', False):
+            cmd += ['--skip-admin-label']
+        # bootstrap makes the keyring root 0600, so +r it for our purposes
+        cmd += [
+            run.Raw('&&'),
+            'sudo', 'chmod', '+r',
+            '/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
+        ]
+        bootstrap_remote.run(args=cmd)
+
+        # fetch keys and configs
+        log.info('Fetching config...')
+        ctx.ceph[cluster_name].config_file = \
+            bootstrap_remote.read_file(f'/etc/ceph/{cluster_name}.conf')
+        log.info('Fetching client.admin keyring...')
+        ctx.ceph[cluster_name].admin_keyring = \
+            bootstrap_remote.read_file(f'/etc/ceph/{cluster_name}.client.admin.keyring')
+        log.info('Fetching mon keyring...')
+        ctx.ceph[cluster_name].mon_keyring = \
+            bootstrap_remote.read_file(f'/var/lib/ceph/{fsid}/mon.{first_mon}/keyring', sudo=True)
+
+        if not config.get("use-ca-signed-key", False):
+            # fetch ssh key, distribute to additional nodes
+            log.info('Fetching pub ssh key...')
+            ssh_pub_key = bootstrap_remote.read_file(
+                f'{testdir}/{cluster_name}.pub').decode('ascii').strip()
+
+            log.info('Installing pub ssh key for root users...')
+            ctx.cluster.run(args=[
+                'sudo', 'install', '-d', '-m', '0700', '/root/.ssh',
+                run.Raw('&&'),
+                'echo', ssh_pub_key,
+                run.Raw('|'),
+                'sudo', 'tee', '-a', '/root/.ssh/authorized_keys',
+                run.Raw('&&'),
+                'sudo', 'chmod', '0600', '/root/.ssh/authorized_keys',
+            ])
+
+        # set options
+        if config.get('allow_ptrace', True):
+            _shell(ctx, cluster_name, bootstrap_remote,
+                   ['ceph', 'config', 'set', 'mgr', 'mgr/cephadm/allow_ptrace', 'true'])
+
+        if not config.get('avoid_pacific_features', False):
+            log.info('Distributing conf and client.admin keyring to all hosts + 0755')
+            _shell(ctx, cluster_name, bootstrap_remote,
+                   ['ceph', 'orch', 'client-keyring', 'set', 'client.admin',
+                    '*', '--mode', '0755'],
+                   check_status=False)
+
+        # add other hosts
+        for remote in ctx.cluster.remotes.keys():
+            if remote == bootstrap_remote:
+                continue
+
+            # note: this may be redundant (see above), but it avoids
+            # us having to wait for cephadm to do it.
+            log.info('Writing (initial) conf and keyring to %s' % remote.shortname)
+            remote.write_file(
+                path='/etc/ceph/{}.conf'.format(cluster_name),
+                data=ctx.ceph[cluster_name].config_file)
+            remote.write_file(
+                path='/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
+                data=ctx.ceph[cluster_name].admin_keyring)
+
+            log.info('Adding host %s to orchestrator...' % remote.shortname)
+            _shell(ctx, cluster_name, bootstrap_remote, [
+                'ceph', 'orch', 'host', 'add',
+                remote.shortname
+            ])
+            r = _shell(ctx, cluster_name, bootstrap_remote,
+                       ['ceph', 'orch', 'host', 'ls', '--format=json'],
+                       stdout=StringIO())
+            hosts = [node['hostname'] for node in json.loads(r.stdout.getvalue())]
+            assert remote.shortname in hosts
+
+        yield
+
+    finally:
+        log.info('Cleaning up testdir ceph.* files...')
+        ctx.cluster.run(args=[
+            'rm', '-f',
+            '{}/seed.{}.conf'.format(testdir, cluster_name),
+            '{}/{}.pub'.format(testdir, cluster_name),
+        ])
+
+        log.info('Stopping all daemons...')
+
+        # this doesn't block until they are all stopped...
+        #ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'])
+
+        # stop the daemons we know
+        for role in ctx.daemons.resolve_role_list(None, CEPH_ROLE_TYPES, True):
+            cluster, type_, id_ = teuthology.split_role(role)
+            try:
+                ctx.daemons.get_daemon(type_, id_, cluster).stop()
+            except Exception:
+                log.exception(f'Failed to stop "{role}"')
+                raise
+
+        # tear down anything left (but leave the logs behind)
+        ctx.cluster.run(
+            args=[
+                'sudo',
+                ctx.cephadm,
+                'rm-cluster',
+                '--fsid', fsid,
+                '--force',
+                '--keep-logs',
+            ],
+            check_status=False,  # may fail if upgrading from old cephadm
+        )
+
+        # clean up /etc/ceph
+        ctx.cluster.run(args=[
+            'sudo', 'rm', '-f',
+            '/etc/ceph/{}.conf'.format(cluster_name),
+            '/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
+        ])
+
+
+@contextlib.contextmanager
+def ceph_mons(ctx, config):
+    """
+    Deploy any additional mons
+    """
+    cluster_name = config['cluster']
+    fsid = ctx.ceph[cluster_name].fsid
+
+    try:
+        daemons = {}
+        if config.get('add_mons_via_daemon_add'):
+            # This is the old way of adding mons that works with the (early) octopus
+            # cephadm scheduler.
+            num_mons = 1
+            for remote, roles in ctx.cluster.remotes.items():
+                for mon in [r for r in roles
+                            if teuthology.is_type('mon', cluster_name)(r)]:
+                    c_, _, id_ = teuthology.split_role(mon)
+                    if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mon:
+                        continue
+                    log.info('Adding %s on %s' % (mon, remote.shortname))
+                    num_mons += 1
+                    _shell(ctx, cluster_name, remote, [
+                        'ceph', 'orch', 'daemon', 'add', 'mon',
+                        remote.shortname + ':' + ctx.ceph[cluster_name].mons[mon] + '=' + id_,
+                    ])
+                    ctx.daemons.register_daemon(
+                        remote, 'mon', id_,
+                        cluster=cluster_name,
+                        fsid=fsid,
+                        logger=log.getChild(mon),
+                        wait=False,
+                        started=True,
+                    )
+                    daemons[mon] = (remote, id_)
+
+                    with contextutil.safe_while(sleep=1, tries=180) as proceed:
+                        while proceed():
+                            log.info('Waiting for %d mons in monmap...' % (num_mons))
+                            r = _shell(
+                                ctx=ctx,
+                                cluster_name=cluster_name,
+                                remote=remote,
+                                args=[
+                                    'ceph', 'mon', 'dump', '-f', 'json',
+                                ],
+                                stdout=StringIO(),
+                            )
+                            j = json.loads(r.stdout.getvalue())
+                            if len(j['mons']) == num_mons:
+                                break
+        else:
+            nodes = []
+            for remote, roles in ctx.cluster.remotes.items():
+                for mon in [r for r in roles
+                            if teuthology.is_type('mon', cluster_name)(r)]:
+                    c_, _, id_ = teuthology.split_role(mon)
+                    log.info('Adding %s on %s' % (mon, remote.shortname))
+                    nodes.append(remote.shortname
+                                 + ':' + ctx.ceph[cluster_name].mons[mon]
+                                 + '=' + id_)
+                    if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mon:
+                        continue
+                    daemons[mon] = (remote, id_)
+
+            _shell(ctx, cluster_name, remote, [
+                'ceph', 'orch', 'apply', 'mon',
+                str(len(nodes)) + ';' + ';'.join(nodes)]
+                   )
+            for mgr, i in daemons.items():
+                remote, id_ = i
+                ctx.daemons.register_daemon(
+                    remote, 'mon', id_,
+                    cluster=cluster_name,
+                    fsid=fsid,
+                    logger=log.getChild(mon),
+                    wait=False,
+                    started=True,
+                )
+
+            with contextutil.safe_while(sleep=1, tries=180) as proceed:
+                while proceed():
+                    log.info('Waiting for %d mons in monmap...' % (len(nodes)))
+                    r = _shell(
+                        ctx=ctx,
+                        cluster_name=cluster_name,
+                        remote=remote,
+                        args=[
+                            'ceph', 'mon', 'dump', '-f', 'json',
+                        ],
+                        stdout=StringIO(),
+                    )
+                    j = json.loads(r.stdout.getvalue())
+                    if len(j['mons']) == len(nodes):
+                        break
+
+        # refresh our (final) ceph.conf file
+        bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote
+        log.info('Generating final ceph.conf file...')
+        r = _shell(
+            ctx=ctx,
+            cluster_name=cluster_name,
+            remote=bootstrap_remote,
+            args=[
+                'ceph', 'config', 'generate-minimal-conf',
+            ],
+            stdout=StringIO(),
+        )
+        ctx.ceph[cluster_name].config_file = r.stdout.getvalue()
+
+        yield
+
+    finally:
+        pass
+
+
+@contextlib.contextmanager
+def ceph_mgrs(ctx, config):
+    """
+    Deploy any additional mgrs
+    """
+    cluster_name = config['cluster']
+    fsid = ctx.ceph[cluster_name].fsid
+
+    try:
+        nodes = []
+        daemons = {}
+        for remote, roles in ctx.cluster.remotes.items():
+            for mgr in [r for r in roles
+                        if teuthology.is_type('mgr', cluster_name)(r)]:
+                c_, _, id_ = teuthology.split_role(mgr)
+                log.info('Adding %s on %s' % (mgr, remote.shortname))
+                nodes.append(remote.shortname + '=' + id_)
+                if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mgr:
+                    continue
+                daemons[mgr] = (remote, id_)
+        if nodes:
+            _shell(ctx, cluster_name, remote, [
+                'ceph', 'orch', 'apply', 'mgr',
+                str(len(nodes)) + ';' + ';'.join(nodes)]
+            )
+        for mgr, i in daemons.items():
+            remote, id_ = i
+            ctx.daemons.register_daemon(
+                remote, 'mgr', id_,
+                cluster=cluster_name,
+                fsid=fsid,
+                logger=log.getChild(mgr),
+                wait=False,
+                started=True,
+            )
+
+        yield
+
+    finally:
+        pass
+
+
+@contextlib.contextmanager
+def ceph_osds(ctx, config):
+    """
+    Deploy OSDs
+    """
+    cluster_name = config['cluster']
+    fsid = ctx.ceph[cluster_name].fsid
+
+    try:
+        log.info('Deploying OSDs...')
+
+        # provision OSDs in numeric order
+        id_to_remote = {}
+        devs_by_remote = {}
+        for remote, roles in ctx.cluster.remotes.items():
+            devs_by_remote[remote] = teuthology.get_scratch_devices(remote)
+            for osd in [r for r in roles
+                        if teuthology.is_type('osd', cluster_name)(r)]:
+                _, _, id_ = teuthology.split_role(osd)
+                id_to_remote[int(id_)] = (osd, remote)
+
+        cur = 0
+        for osd_id in sorted(id_to_remote.keys()):
+            osd, remote = id_to_remote[osd_id]
+            _, _, id_ = teuthology.split_role(osd)
+            assert int(id_) == cur
+            devs = devs_by_remote[remote]
+            assert devs   ## FIXME ##
+            dev = devs.pop()
+            if all(_ in dev for _ in ('lv', 'vg')):
+                short_dev = dev.replace('/dev/', '')
+            else:
+                short_dev = dev
+            log.info('Deploying %s on %s with %s...' % (
+                osd, remote.shortname, dev))
+            _shell(ctx, cluster_name, remote, [
+                'ceph-volume', 'lvm', 'zap', dev])
+            add_osd_args = ['ceph', 'orch', 'daemon', 'add', 'osd',
+                            remote.shortname + ':' + short_dev]
+            osd_method = config.get('osd_method')
+            if osd_method:
+                add_osd_args.append(osd_method)
+            _shell(ctx, cluster_name, remote, add_osd_args)
+            ctx.daemons.register_daemon(
+                remote, 'osd', id_,
+                cluster=cluster_name,
+                fsid=fsid,
+                logger=log.getChild(osd),
+                wait=False,
+                started=True,
+            )
+            cur += 1
+
+        if cur == 0:
+            _shell(ctx, cluster_name, remote, [
+                'ceph', 'orch', 'apply', 'osd', '--all-available-devices',
+            ])
+            # expect the number of scratch devs
+            num_osds = sum(map(len, devs_by_remote.values()))
+            assert num_osds
+        else:
+            # expect the number of OSDs we created
+            num_osds = cur
+
+        log.info(f'Waiting for {num_osds} OSDs to come up...')
+        with contextutil.safe_while(sleep=1, tries=120) as proceed:
+            while proceed():
+                p = _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote,
+                           ['ceph', 'osd', 'stat', '-f', 'json'], stdout=StringIO())
+                j = json.loads(p.stdout.getvalue())
+                if int(j.get('num_up_osds', 0)) == num_osds:
+                    break;
+
+        if not hasattr(ctx, 'managers'):
+            ctx.managers = {}
+        ctx.managers[cluster_name] = CephManager(
+            ctx.ceph[cluster_name].bootstrap_remote,
+            ctx=ctx,
+            logger=log.getChild('ceph_manager.' + cluster_name),
+            cluster=cluster_name,
+            cephadm=True,
+        )
+
+        yield
+    finally:
+        pass
+
+
+@contextlib.contextmanager
+def ceph_mdss(ctx, config):
+    """
+    Deploy MDSss
+    """
+    cluster_name = config['cluster']
+    fsid = ctx.ceph[cluster_name].fsid
+
+    nodes = []
+    daemons = {}
+    for remote, roles in ctx.cluster.remotes.items():
+        for role in [r for r in roles
+                    if teuthology.is_type('mds', cluster_name)(r)]:
+            c_, _, id_ = teuthology.split_role(role)
+            log.info('Adding %s on %s' % (role, remote.shortname))
+            nodes.append(remote.shortname + '=' + id_)
+            daemons[role] = (remote, id_)
+    if nodes:
+        _shell(ctx, cluster_name, remote, [
+            'ceph', 'orch', 'apply', 'mds',
+            'all',
+            str(len(nodes)) + ';' + ';'.join(nodes)]
+        )
+    for role, i in daemons.items():
+        remote, id_ = i
+        ctx.daemons.register_daemon(
+            remote, 'mds', id_,
+            cluster=cluster_name,
+            fsid=fsid,
+            logger=log.getChild(role),
+            wait=False,
+            started=True,
+        )
+
+    yield
+
+@contextlib.contextmanager
+def cephfs_setup(ctx, config):
+    mdss = list(teuthology.all_roles_of_type(ctx.cluster, 'mds'))
+
+    # If there are any MDSs, then create a filesystem for them to use
+    # Do this last because requires mon cluster to be up and running
+    if len(mdss) > 0:
+        log.info('Setting up CephFS filesystem(s)...')
+        cephfs_config = config.get('cephfs', {})
+        fs_configs =  cephfs_config.pop('fs', [{'name': 'cephfs'}])
+        set_allow_multifs = len(fs_configs) > 1
+
+        # wait for standbys to become available (slow due to valgrind, perhaps)
+        mdsc = MDSCluster(ctx)
+        with contextutil.safe_while(sleep=2,tries=150) as proceed:
+            while proceed():
+                if len(mdsc.get_standby_daemons()) >= len(mdss):
+                    break
+
+        fss = []
+        for fs_config in fs_configs:
+            assert isinstance(fs_config, dict)
+            name = fs_config.pop('name')
+            temp = deepcopy(cephfs_config)
+            teuthology.deep_merge(temp, fs_config)
+            subvols = config.get('subvols', None)
+            if subvols:
+                teuthology.deep_merge(temp, {'subvols': subvols})
+            fs = Filesystem(ctx, fs_config=temp, name=name, create=True)
+            if set_allow_multifs:
+                fs.set_allow_multifs()
+                set_allow_multifs = False
+            fss.append(fs)
+
+        yield
+
+        for fs in fss:
+            fs.destroy()
+    else:
+        yield
+
+@contextlib.contextmanager
+def ceph_monitoring(daemon_type, ctx, config):
+    """
+    Deploy prometheus, node-exporter, etc.
+    """
+    cluster_name = config['cluster']
+    fsid = ctx.ceph[cluster_name].fsid
+
+    nodes = []
+    daemons = {}
+    for remote, roles in ctx.cluster.remotes.items():
+        for role in [r for r in roles
+                    if teuthology.is_type(daemon_type, cluster_name)(r)]:
+            c_, _, id_ = teuthology.split_role(role)
+            log.info('Adding %s on %s' % (role, remote.shortname))
+            nodes.append(remote.shortname + '=' + id_)
+            daemons[role] = (remote, id_)
+    if nodes:
+        _shell(ctx, cluster_name, remote, [
+            'ceph', 'orch', 'apply', daemon_type,
+            str(len(nodes)) + ';' + ';'.join(nodes)]
+        )
+    for role, i in daemons.items():
+        remote, id_ = i
+        ctx.daemons.register_daemon(
+            remote, daemon_type, id_,
+            cluster=cluster_name,
+            fsid=fsid,
+            logger=log.getChild(role),
+            wait=False,
+            started=True,
+        )
+
+    yield
+
+
+@contextlib.contextmanager
+def ceph_rgw(ctx, config):
+    """
+    Deploy rgw
+    """
+    cluster_name = config['cluster']
+    fsid = ctx.ceph[cluster_name].fsid
+
+    nodes = {}
+    daemons = {}
+    for remote, roles in ctx.cluster.remotes.items():
+        for role in [r for r in roles
+                    if teuthology.is_type('rgw', cluster_name)(r)]:
+            c_, _, id_ = teuthology.split_role(role)
+            log.info('Adding %s on %s' % (role, remote.shortname))
+            svc = '.'.join(id_.split('.')[0:2])
+            if svc not in nodes:
+                nodes[svc] = []
+            nodes[svc].append(remote.shortname + '=' + id_)
+            daemons[role] = (remote, id_)
+
+    for svc, nodes in nodes.items():
+        _shell(ctx, cluster_name, remote, [
+            'ceph', 'orch', 'apply', 'rgw', svc,
+             '--placement',
+             str(len(nodes)) + ';' + ';'.join(nodes)]
+        )
+    for role, i in daemons.items():
+        remote, id_ = i
+        ctx.daemons.register_daemon(
+            remote, 'rgw', id_,
+            cluster=cluster_name,
+            fsid=fsid,
+            logger=log.getChild(role),
+            wait=False,
+            started=True,
+        )
+
+    yield
+
+
+@contextlib.contextmanager
+def ceph_iscsi(ctx, config):
+    """
+    Deploy iSCSIs
+    """
+    cluster_name = config['cluster']
+    fsid = ctx.ceph[cluster_name].fsid
+
+    nodes = []
+    daemons = {}
+    ips = []
+
+    for remote, roles in ctx.cluster.remotes.items():
+        for role in [r for r in roles
+                     if teuthology.is_type('iscsi', cluster_name)(r)]:
+            c_, _, id_ = teuthology.split_role(role)
+            log.info('Adding %s on %s' % (role, remote.shortname))
+            nodes.append(remote.shortname + '=' + id_)
+            daemons[role] = (remote, id_)
+            ips.append(remote.ip_address)
+    trusted_ip_list = ','.join(ips)
+    if nodes:
+        poolname = 'datapool'
+        # ceph osd pool create datapool 3 3 replicated
+        _shell(ctx, cluster_name, remote, [
+            'ceph', 'osd', 'pool', 'create',
+            poolname, '3', '3', 'replicated']
+        )
+
+        _shell(ctx, cluster_name, remote, [
+            'rbd', 'pool', 'init', poolname]
+        )
+
+        # ceph orch apply iscsi datapool (admin)user (admin)password
+        _shell(ctx, cluster_name, remote, [
+            'ceph', 'orch', 'apply', 'iscsi',
+            poolname, 'admin', 'admin',
+            '--trusted_ip_list', trusted_ip_list,
+            '--placement', str(len(nodes)) + ';' + ';'.join(nodes)]
+        )
+
+        # used by iscsi client to identify valid gateway ip's
+        conf_data = dedent(f"""
+        [config]
+        trusted_ip_list = {trusted_ip_list}
+        """)
+        distribute_iscsi_gateway_cfg(ctx, conf_data)
+
+    for role, i in daemons.items():
+        remote, id_ = i
+        ctx.daemons.register_daemon(
+            remote, 'iscsi', id_,
+            cluster=cluster_name,
+            fsid=fsid,
+            logger=log.getChild(role),
+            wait=False,
+            started=True,
+        )
+
+    yield
+
+
+@contextlib.contextmanager
+def ceph_clients(ctx, config):
+    cluster_name = config['cluster']
+
+    log.info('Setting up client nodes...')
+    clients = ctx.cluster.only(teuthology.is_type('client', cluster_name))
+    for remote, roles_for_host in clients.remotes.items():
+        for role in teuthology.cluster_roles_of_type(roles_for_host, 'client',
+                                                     cluster_name):
+            name = teuthology.ceph_role(role)
+            client_keyring = '/etc/ceph/{0}.{1}.keyring'.format(cluster_name,
+                                                                name)
+            r = _shell(
+                ctx=ctx,
+                cluster_name=cluster_name,
+                remote=remote,
+                args=[
+                    'ceph', 'auth',
+                    'get-or-create', name,
+                    'mon', 'allow *',
+                    'osd', 'allow *',
+                    'mds', 'allow *',
+                    'mgr', 'allow *',
+                ],
+                stdout=StringIO(),
+            )
+            keyring = r.stdout.getvalue()
+            remote.sudo_write_file(client_keyring, keyring, mode='0644')
+    yield
+
+
+@contextlib.contextmanager
+def ceph_initial():
+    try:
+        yield
+    finally:
+        log.info('Teardown complete')
+
+
+## public methods
+@contextlib.contextmanager
+def stop(ctx, config):
+    """
+    Stop ceph daemons
+
+    For example::
+      tasks:
+      - ceph.stop: [mds.*]
+
+      tasks:
+      - ceph.stop: [osd.0, osd.2]
+
+      tasks:
+      - ceph.stop:
+          daemons: [osd.0, osd.2]
+
+    """
+    if config is None:
+        config = {}
+    elif isinstance(config, list):
+        config = {'daemons': config}
+
+    daemons = ctx.daemons.resolve_role_list(
+        config.get('daemons', None), CEPH_ROLE_TYPES, True)
+    clusters = set()
+
+    for role in daemons:
+        cluster, type_, id_ = teuthology.split_role(role)
+        ctx.daemons.get_daemon(type_, id_, cluster).stop()
+        clusters.add(cluster)
+
+#    for cluster in clusters:
+#        ctx.ceph[cluster].watchdog.stop()
+#        ctx.ceph[cluster].watchdog.join()
+
+    yield
+
+
+def shell(ctx, config):
+    """
+    Execute (shell) commands
+    """
+    cluster_name = config.get('cluster', 'ceph')
+
+    args = []
+    for k in config.pop('env', []):
+        args.extend(['-e', k + '=' + ctx.config.get(k, '')])
+    for k in config.pop('volumes', []):
+        args.extend(['-v', k])
+
+    if 'all-roles' in config and len(config) == 1:
+        a = config['all-roles']
+        roles = teuthology.all_roles(ctx.cluster)
+        config = dict((id_, a) for id_ in roles if not id_.startswith('host.'))
+    elif 'all-hosts' in config and len(config) == 1:
+        a = config['all-hosts']
+        roles = teuthology.all_roles(ctx.cluster)
+        config = dict((id_, a) for id_ in roles if id_.startswith('host.'))
+
+    for role, cmd in config.items():
+        (remote,) = ctx.cluster.only(role).remotes.keys()
+        log.info('Running commands on role %s host %s', role, remote.name)
+        if isinstance(cmd, list):
+            for c in cmd:
+                _shell(ctx, cluster_name, remote,
+                       ['bash', '-c', subst_vip(ctx, c)],
+                       extra_cephadm_args=args)
+        else:
+            assert isinstance(cmd, str)
+            _shell(ctx, cluster_name, remote,
+                   ['bash', '-ex', '-c', subst_vip(ctx, cmd)],
+                   extra_cephadm_args=args)
+
+
+def apply(ctx, config):
+    """
+    Apply spec
+    
+      tasks:
+        - cephadm.apply:
+            specs:
+            - service_type: rgw
+              service_id: foo
+              spec:
+                rgw_frontend_port: 8000
+            - service_type: rgw
+              service_id: bar
+              spec:
+                rgw_frontend_port: 9000
+                zone: bar
+                realm: asdf
+
+    """
+    cluster_name = config.get('cluster', 'ceph')
+
+    specs = config.get('specs', [])
+    y = subst_vip(ctx, yaml.dump_all(specs))
+
+    log.info(f'Applying spec(s):\n{y}')
+    _shell(
+        ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote,
+        ['ceph', 'orch', 'apply', '-i', '-'],
+        stdin=y,
+    )
+
+
+def wait_for_service(ctx, config):
+    """
+    Wait for a service to be fully started
+
+      tasks:
+        - cephadm.wait_for_service:
+            service: rgw.foo
+            timeout: 60    # defaults to 300
+
+    """
+    cluster_name = config.get('cluster', 'ceph')
+    timeout = config.get('timeout', 300)
+    service = config.get('service')
+    assert service
+
+    log.info(
+        f'Waiting for {cluster_name} service {service} to start (timeout {timeout})...'
+    )
+    with contextutil.safe_while(sleep=1, tries=timeout) as proceed:
+        while proceed():
+            r = _shell(
+                ctx=ctx,
+                cluster_name=cluster_name,
+                remote=ctx.ceph[cluster_name].bootstrap_remote,
+                args=[
+                    'ceph', 'orch', 'ls', '-f', 'json',
+                ],
+                stdout=StringIO(),
+            )
+            j = json.loads(r.stdout.getvalue())
+            svc = None
+            for s in j:
+                if s['service_name'] == service:
+                    svc = s
+                    break
+            if svc:
+                log.info(
+                    f"{service} has {s['status']['running']}/{s['status']['size']}"
+                )
+                if s['status']['running'] == s['status']['size']:
+                    break
+
+
+@contextlib.contextmanager
+def tweaked_option(ctx, config):
+    """
+    set an option, and then restore it with its original value
+
+    Note, due to the way how tasks are executed/nested, it's not suggested to
+    use this method as a standalone task. otherwise, it's likely that it will
+    restore the tweaked option at the /end/ of 'tasks' block.
+    """
+    saved_options = {}
+    # we can complicate this when necessary
+    options = ['mon-health-to-clog']
+    type_, id_ = 'mon', '*'
+    cluster = config.get('cluster', 'ceph')
+    manager = ctx.managers[cluster]
+    if id_ == '*':
+        get_from = next(teuthology.all_roles_of_type(ctx.cluster, type_))
+    else:
+        get_from = id_
+    for option in options:
+        if option not in config:
+            continue
+        value = 'true' if config[option] else 'false'
+        option = option.replace('-', '_')
+        old_value = manager.get_config(type_, get_from, option)
+        if value != old_value:
+            saved_options[option] = old_value
+            manager.inject_args(type_, id_, option, value)
+    yield
+    for option, value in saved_options.items():
+        manager.inject_args(type_, id_, option, value)
+
+
+@contextlib.contextmanager
+def restart(ctx, config):
+    """
+   restart ceph daemons
+
+   For example::
+      tasks:
+      - ceph.restart: [all]
+
+   For example::
+      tasks:
+      - ceph.restart: [osd.0, mon.1, mds.*]
+
+   or::
+
+      tasks:
+      - ceph.restart:
+          daemons: [osd.0, mon.1]
+          wait-for-healthy: false
+          wait-for-osds-up: true
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    if config is None:
+        config = {}
+    elif isinstance(config, list):
+        config = {'daemons': config}
+
+    daemons = ctx.daemons.resolve_role_list(
+        config.get('daemons', None), CEPH_ROLE_TYPES, True)
+    clusters = set()
+
+    log.info('daemons %s' % daemons)
+    with tweaked_option(ctx, config):
+        for role in daemons:
+            cluster, type_, id_ = teuthology.split_role(role)
+            d = ctx.daemons.get_daemon(type_, id_, cluster)
+            assert d, 'daemon %s does not exist' % role
+            d.stop()
+            if type_ == 'osd':
+                ctx.managers[cluster].mark_down_osd(id_)
+            d.restart()
+            clusters.add(cluster)
+
+    if config.get('wait-for-healthy', True):
+        for cluster in clusters:
+            healthy(ctx=ctx, config=dict(cluster=cluster))
+    if config.get('wait-for-osds-up', False):
+        for cluster in clusters:
+            ctx.managers[cluster].wait_for_all_osds_up()
+    yield
+
+
+@contextlib.contextmanager
+def distribute_config_and_admin_keyring(ctx, config):
+    """
+    Distribute a sufficient config and keyring for clients
+    """
+    cluster_name = config['cluster']
+    log.info('Distributing (final) config and client.admin keyring...')
+    for remote, roles in ctx.cluster.remotes.items():
+        remote.write_file(
+            '/etc/ceph/{}.conf'.format(cluster_name),
+            ctx.ceph[cluster_name].config_file,
+            sudo=True)
+        remote.write_file(
+            path='/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
+            data=ctx.ceph[cluster_name].admin_keyring,
+            sudo=True)
+    try:
+        yield
+    finally:
+        ctx.cluster.run(args=[
+            'sudo', 'rm', '-f',
+            '/etc/ceph/{}.conf'.format(cluster_name),
+            '/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
+        ])
+
+
+@contextlib.contextmanager
+def crush_setup(ctx, config):
+    cluster_name = config['cluster']
+
+    profile = config.get('crush_tunables', 'default')
+    log.info('Setting crush tunables to %s', profile)
+    _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote,
+        args=['ceph', 'osd', 'crush', 'tunables', profile])
+    yield
+
+
+@contextlib.contextmanager
+def create_rbd_pool(ctx, config):
+    if config.get('create_rbd_pool', False):
+      cluster_name = config['cluster']
+      log.info('Waiting for OSDs to come up')
+      teuthology.wait_until_osds_up(
+          ctx,
+          cluster=ctx.cluster,
+          remote=ctx.ceph[cluster_name].bootstrap_remote,
+          ceph_cluster=cluster_name,
+      )
+      log.info('Creating RBD pool')
+      _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote,
+          args=['sudo', 'ceph', '--cluster', cluster_name,
+                'osd', 'pool', 'create', 'rbd', '8'])
+      _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote,
+          args=['sudo', 'ceph', '--cluster', cluster_name,
+                'osd', 'pool', 'application', 'enable',
+                'rbd', 'rbd', '--yes-i-really-mean-it'
+          ])
+    yield
+
+
+@contextlib.contextmanager
+def _bypass():
+    yield
+
+
+@contextlib.contextmanager
+def initialize_config(ctx, config):
+    cluster_name = config['cluster']
+    testdir = teuthology.get_testdir(ctx)
+
+    ctx.ceph[cluster_name].thrashers = []
+    # fixme: setup watchdog, ala ceph.py
+
+    ctx.ceph[cluster_name].roleless = False  # see below
+
+    first_ceph_cluster = False
+    if not hasattr(ctx, 'daemons'):
+        first_ceph_cluster = True
+
+    # cephadm mode?
+    if 'cephadm_mode' not in config:
+        config['cephadm_mode'] = 'root'
+    assert config['cephadm_mode'] in ['root', 'cephadm-package']
+    if config['cephadm_mode'] == 'root':
+        ctx.cephadm = testdir + '/cephadm'
+    else:
+        ctx.cephadm = 'cephadm'  # in the path
+
+    if first_ceph_cluster:
+        # FIXME: this is global for all clusters
+        ctx.daemons = DaemonGroup(
+            use_cephadm=ctx.cephadm)
+
+    # uuid
+    fsid = str(uuid.uuid1())
+    log.info('Cluster fsid is %s' % fsid)
+    ctx.ceph[cluster_name].fsid = fsid
+
+    # mon ips
+    log.info('Choosing monitor IPs and ports...')
+    remotes_and_roles = ctx.cluster.remotes.items()
+    ips = [host for (host, port) in
+           (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)]
+
+    if config.get('roleless', False):
+        # mons will be named after hosts
+        first_mon = None
+        max_mons = config.get('max_mons', 5)
+        for remote, _ in remotes_and_roles:
+            ctx.cluster.remotes[remote].append('mon.' + remote.shortname)
+            if not first_mon:
+                first_mon = remote.shortname
+                bootstrap_remote = remote
+            max_mons -= 1
+            if not max_mons:
+                break
+        log.info('No mon roles; fabricating mons')
+
+    roles = [role_list for (remote, role_list) in ctx.cluster.remotes.items()]
+
+    ctx.ceph[cluster_name].mons = get_mons(
+        roles, ips, cluster_name,
+        mon_bind_msgr2=config.get('mon_bind_msgr2', True),
+        mon_bind_addrvec=config.get('mon_bind_addrvec', True),
+    )
+    log.info('Monitor IPs: %s' % ctx.ceph[cluster_name].mons)
+
+    if config.get('roleless', False):
+        ctx.ceph[cluster_name].roleless = True
+        ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote
+        ctx.ceph[cluster_name].first_mon = first_mon
+        ctx.ceph[cluster_name].first_mon_role = 'mon.' + first_mon
+    else:
+        first_mon_role = sorted(ctx.ceph[cluster_name].mons.keys())[0]
+        _, _, first_mon = teuthology.split_role(first_mon_role)
+        (bootstrap_remote,) = ctx.cluster.only(first_mon_role).remotes.keys()
+        log.info('First mon is mon.%s on %s' % (first_mon,
+                                                bootstrap_remote.shortname))
+        ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote
+        ctx.ceph[cluster_name].first_mon = first_mon
+        ctx.ceph[cluster_name].first_mon_role = first_mon_role
+
+        others = ctx.cluster.remotes[bootstrap_remote]
+        mgrs = sorted([r for r in others
+                       if teuthology.is_type('mgr', cluster_name)(r)])
+        if not mgrs:
+            raise RuntimeError('no mgrs on the same host as first mon %s' % first_mon)
+        _, _, first_mgr = teuthology.split_role(mgrs[0])
+        log.info('First mgr is %s' % (first_mgr))
+        ctx.ceph[cluster_name].first_mgr = first_mgr
+    yield
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Deploy ceph cluster using cephadm
+
+    For example, teuthology.yaml can contain the 'defaults' section:
+
+        defaults:
+          cephadm:
+            containers:
+              image: 'quay.io/ceph-ci/ceph'
+
+    Using overrides makes it possible to customize it per run.
+    The equivalent 'overrides' section looks like:
+
+        overrides:
+          cephadm:
+            containers:
+              image: 'quay.io/ceph-ci/ceph'
+            registry-login:
+              url:  registry-url
+              username: registry-user
+              password: registry-password
+
+    :param ctx: the argparse.Namespace object
+    :param config: the config dict
+    """
+    if config is None:
+        config = {}
+
+    assert isinstance(config, dict), \
+        "task only supports a dictionary for configuration"
+
+    overrides = ctx.config.get('overrides', {})
+    teuthology.deep_merge(config, overrides.get('ceph', {}))
+    teuthology.deep_merge(config, overrides.get('cephadm', {}))
+    log.info('Config: ' + str(config))
+
+    # set up cluster context
+    if not hasattr(ctx, 'ceph'):
+        ctx.ceph = {}
+    if 'cluster' not in config:
+        config['cluster'] = 'ceph'
+    cluster_name = config['cluster']
+    if cluster_name not in ctx.ceph:
+        ctx.ceph[cluster_name] = argparse.Namespace()
+        ctx.ceph[cluster_name].bootstrapped = False
+
+    # image
+    teuth_defaults = teuth_config.get('defaults', {})
+    cephadm_defaults = teuth_defaults.get('cephadm', {})
+    containers_defaults = cephadm_defaults.get('containers', {})
+    container_image_name = containers_defaults.get('image', None)
+
+    containers = config.get('containers', {})
+    container_image_name = containers.get('image', container_image_name)
+
+    if not hasattr(ctx.ceph[cluster_name], 'image'):
+        ctx.ceph[cluster_name].image = config.get('image')
+    ref = ctx.config.get("branch", "main")
+    if not ctx.ceph[cluster_name].image:
+        if not container_image_name:
+            raise Exception("Configuration error occurred. "
+                            "The 'image' value is undefined for 'cephadm' task. "
+                            "Please provide corresponding options in the task's "
+                            "config, task 'overrides', or teuthology 'defaults' "
+                            "section.")
+        sha1 = config.get('sha1')
+        flavor = config.get('flavor', 'default')
+
+        if sha1:
+            if flavor == "crimson":
+                ctx.ceph[cluster_name].image = container_image_name + ':' + sha1 + '-' + flavor
+            else:
+                ctx.ceph[cluster_name].image = container_image_name + ':' + sha1
+            ref = sha1
+        else:
+            # fall back to using the branch value
+            ctx.ceph[cluster_name].image = container_image_name + ':' + ref
+    log.info('Cluster image is %s' % ctx.ceph[cluster_name].image)
+
+
+    with contextutil.nested(
+            #if the cluster is already bootstrapped bypass corresponding methods
+            lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped) \
+                              else initialize_config(ctx=ctx, config=config),
+            lambda: ceph_initial(),
+            lambda: normalize_hostnames(ctx=ctx),
+            lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped) \
+                              else download_cephadm(ctx=ctx, config=config, ref=ref),
+            lambda: ceph_log(ctx=ctx, config=config),
+            lambda: ceph_crash(ctx=ctx, config=config),
+            lambda: pull_image(ctx=ctx, config=config),
+            lambda: _bypass() if not (config.get('use-ca-signed-key', False)) \
+                              else setup_ca_signed_keys(ctx, config),
+            lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped) \
+                              else ceph_bootstrap(ctx, config),
+            lambda: crush_setup(ctx=ctx, config=config),
+            lambda: ceph_mons(ctx=ctx, config=config),
+            lambda: distribute_config_and_admin_keyring(ctx=ctx, config=config),
+            lambda: ceph_mgrs(ctx=ctx, config=config),
+            lambda: ceph_osds(ctx=ctx, config=config),
+            lambda: ceph_mdss(ctx=ctx, config=config),
+            lambda: cephfs_setup(ctx=ctx, config=config),
+            lambda: ceph_rgw(ctx=ctx, config=config),
+            lambda: ceph_iscsi(ctx=ctx, config=config),
+            lambda: ceph_monitoring('prometheus', ctx=ctx, config=config),
+            lambda: ceph_monitoring('node-exporter', ctx=ctx, config=config),
+            lambda: ceph_monitoring('alertmanager', ctx=ctx, config=config),
+            lambda: ceph_monitoring('grafana', ctx=ctx, config=config),
+            lambda: ceph_clients(ctx=ctx, config=config),
+            lambda: create_rbd_pool(ctx=ctx, config=config),
+    ):
+        try:
+            if config.get('wait-for-healthy', True):
+                healthy(ctx=ctx, config=config)
+
+            log.info('Setup complete, yielding')
+            yield
+
+        finally:
+            log.info('Teardown begin')
+
diff --git a/qa/tasks/cephadm_cases/__init__.py b/qa/tasks/cephadm_cases/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/tasks/cephadm_cases/__init__.py
diff --git a/qa/tasks/cephadm_cases/test_cli.py b/qa/tasks/cephadm_cases/test_cli.py
new file mode 100644
index 000000000..ca40a8cdb
--- /dev/null
+++ b/qa/tasks/cephadm_cases/test_cli.py
@@ -0,0 +1,73 @@
+import json
+import logging
+import time
+
+from tasks.mgr.mgr_test_case import MgrTestCase
+from teuthology.contextutil import safe_while
+
+log = logging.getLogger(__name__)
+
+
+class TestCephadmCLI(MgrTestCase):
+    def _cmd(self, *args) -> str:
+        assert self.mgr_cluster is not None
+        return self.mgr_cluster.mon_manager.raw_cluster_cmd(*args)
+
+    def _orch_cmd(self, *args) -> str:
+        return self._cmd("orch", *args)
+
+    def setUp(self):
+        super(TestCephadmCLI, self).setUp()
+
+    def test_yaml(self):
+        """
+        to prevent oddities like
+
+        >>> import yaml
+        ... from collections import OrderedDict
+        ... assert yaml.dump(OrderedDict()) == '!!python/object/apply:collections.OrderedDict\\n- []\\n'
+        """
+        out = self._orch_cmd('device', 'ls', '--format', 'yaml')
+        self.assertNotIn('!!python', out)
+
+        out = self._orch_cmd('host', 'ls', '--format', 'yaml')
+        self.assertNotIn('!!python', out)
+
+        out = self._orch_cmd('ls', '--format', 'yaml')
+        self.assertNotIn('!!python', out)
+
+        out = self._orch_cmd('ps', '--format', 'yaml')
+        self.assertNotIn('!!python', out)
+
+        out = self._orch_cmd('status', '--format', 'yaml')
+        self.assertNotIn('!!python', out)
+
+    def test_pause(self):
+        self._orch_cmd('pause')
+        self.wait_for_health('CEPHADM_PAUSED', 60)
+        self._orch_cmd('resume')
+        self.wait_for_health_clear(60)
+
+    def test_daemon_restart(self):
+        self._orch_cmd('daemon', 'stop', 'osd.0')
+        self.wait_for_health('OSD_DOWN', 60)
+        with safe_while(sleep=2, tries=30) as proceed:
+            while proceed():
+                j = json.loads(self._orch_cmd('ps', '--format', 'json'))
+                d = {d['daemon_name']: d for d in j}
+                if d['osd.0']['status_desc'] != 'running':
+                    break
+        time.sleep(5)
+        self._orch_cmd('daemon', 'start', 'osd.0')
+        self.wait_for_health_clear(120)
+        self._orch_cmd('daemon', 'restart', 'osd.0')
+
+    def test_device_ls_wide(self):
+        self._orch_cmd('device', 'ls', '--wide')
+
+    def test_cephfs_mirror(self):
+        self._orch_cmd('apply', 'cephfs-mirror')
+        self.wait_until_true(lambda: 'cephfs-mirror' in self._orch_cmd('ps'), 60)
+        self.wait_for_health_clear(60)
+        self._orch_cmd('rm', 'cephfs-mirror')
+        self.wait_until_true(lambda: 'cephfs-mirror' not in self._orch_cmd('ps'), 60)
diff --git a/qa/tasks/cephadm_cases/test_cli_mon.py b/qa/tasks/cephadm_cases/test_cli_mon.py
new file mode 100644
index 000000000..72aee094e
--- /dev/null
+++ b/qa/tasks/cephadm_cases/test_cli_mon.py
@@ -0,0 +1,71 @@
+import json
+import logging
+
+from tasks.mgr.mgr_test_case import MgrTestCase
+
+log = logging.getLogger(__name__)
+
+
+class TestCephadmCLI(MgrTestCase):
+
+    APPLY_MON_PERIOD = 60
+
+    def _cmd(self, *args) -> str:
+        assert self.mgr_cluster is not None
+        return self.mgr_cluster.mon_manager.raw_cluster_cmd(*args)
+
+    def _orch_cmd(self, *args) -> str:
+        return self._cmd("orch", *args)
+
+    def setUp(self):
+        super(TestCephadmCLI, self).setUp()
+
+    def _create_and_write_pool(self, pool_name):
+        # Create new pool and write to it, simulating a small workload.
+        self.mgr_cluster.mon_manager.create_pool(pool_name)
+        args = [
+            "rados", "-p", pool_name, "bench", "30", "write", "-t", "16"]
+        self.mgr_cluster.admin_remote.run(args=args, wait=True)
+    
+    def _get_quorum_size(self) -> int:
+        # Evaluate if the quorum size of the cluster is correct.
+        # log the quorum_status before reducing the monitors
+        retstr = self._cmd('quorum_status')
+        log.info("test_apply_mon._check_quorum_size: %s" % json.dumps(retstr, indent=2))
+        quorum_size = len(json.loads(retstr)['quorum']) # get quorum size
+        return quorum_size
+
+    def _check_no_crashes(self):
+        # Evaluate if there are no crashes
+        # log the crash
+        retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            'crash', 'ls',
+        )
+        log.info("test_apply_mon._check_no_crashes: %s" % retstr)
+        self.assertEqual(0, len(retstr)) # check if there are no crashes
+
+    def test_apply_mon_three(self):
+        # Evaluating the process of reducing the number of 
+        # monitors from 5 to 3 and increasing the number of
+        # monitors from 3 to 5, using the `ceph orch apply mon <num>` command.
+
+        self.wait_until_equal(lambda : self._get_quorum_size(), 5,
+                      timeout=self.APPLY_MON_PERIOD, period=10)
+
+        self._orch_cmd('apply', 'mon', '3') # reduce the monitors from 5 -> 3
+
+        self._create_and_write_pool('test_pool1')
+
+        self.wait_until_equal(lambda : self._get_quorum_size(), 3,
+                      timeout=self.APPLY_MON_PERIOD, period=10)
+
+        self._check_no_crashes()
+
+        self._orch_cmd('apply', 'mon', '5') # increase the monitors from 3 -> 5
+
+        self._create_and_write_pool('test_pool2')
+
+        self.wait_until_equal(lambda : self._get_quorum_size(), 5,
+                      timeout=self.APPLY_MON_PERIOD, period=10)
+
+        self._check_no_crashes()
+\ No newline at end of file
diff --git a/qa/tasks/cephfs/__init__.py b/qa/tasks/cephfs/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/tasks/cephfs/__init__.py
diff --git a/qa/tasks/cephfs/caps_helper.py b/qa/tasks/cephfs/caps_helper.py
new file mode 100644
index 000000000..ac9bc4401
--- /dev/null
+++ b/qa/tasks/cephfs/caps_helper.py
@@ -0,0 +1,195 @@
+"""
+Helper methods to test that MON and MDS caps are enforced properly.
+"""
+from os.path import join as os_path_join
+from logging import getLogger
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+from teuthology.orchestra.run import Raw
+
+
+log = getLogger(__name__)
+
+
+class CapTester(CephFSTestCase):
+    """
+    Test that MON and MDS caps are enforced.
+
+    MDS caps are tested by exercising read-write permissions and MON caps are
+    tested using output of command "ceph fs ls". Besides, it provides
+    write_test_files() which creates test files at the given path on CephFS
+    mounts passed to it.
+
+    USAGE: Call write_test_files() method at the beginning of the test and
+    once the caps that needs to be tested are assigned to the client and
+    CephFS be remount for caps to effective, call run_cap_tests(),
+    run_mon_cap_tests() or run_mds_cap_tests() as per the need.
+    """
+
+    def write_test_files(self, mounts, testpath=''):
+        """
+        Exercising 'r' and 'w' access levels on a file on CephFS mount is
+        pretty routine across all tests for caps. Adding to method to write
+        that file will reduce clutter in these tests.
+
+        This methods writes a fixed data in a file with a fixed name located
+        at the path passed in testpath for the given list of mounts. If
+        testpath is empty, the file is created at the root of the CephFS.
+        """
+        dirname, filename = 'testdir', 'testfile'
+        self.test_set = []
+        # XXX: The reason behind testpath[1:] below is that the testpath is
+        # supposed to contain a path inside CephFS (which might be passed as
+        # an absolute path). os.path.join() deletes all previous path
+        # components when it encounters a path component starting with '/'.
+        # Deleting the first '/' from the string in testpath ensures that
+        # previous path components are not deleted by os.path.join().
+        if testpath:
+            testpath = testpath[1:] if testpath[0] == '/' else testpath
+        # XXX: passing just '/' screw up os.path.join() ahead.
+        if testpath == '/':
+            testpath = ''
+
+        for mount_x in mounts:
+            log.info(f'creating test file on FS {mount_x.cephfs_name} '
+                     f'mounted at {mount_x.mountpoint}...')
+            dirpath = os_path_join(mount_x.hostfs_mntpt, testpath, dirname)
+            mount_x.run_shell(f'mkdir {dirpath}')
+            filepath = os_path_join(dirpath, filename)
+            # XXX: the reason behind adding filepathm, cephfs_name and both
+            # mntpts is to avoid a test bug where we mount cephfs1 but what
+            # ends up being mounted cephfs2. since filepath and filedata are
+            # identical, how would tests figure otherwise that they are
+            # accessing the right filename but on wrong CephFS.
+            filedata = (f'filepath = {filepath}\n'
+                        f'cephfs_name = {mount_x.cephfs_name}\n'
+                        f'cephfs_mntpt = {mount_x.cephfs_mntpt}\n'
+                        f'hostfs_mntpt = {mount_x.hostfs_mntpt}')
+            mount_x.write_file(filepath, filedata)
+            self.test_set.append((mount_x, filepath, filedata))
+            log.info('test file created at {path} with data "{data}.')
+
+    def run_cap_tests(self, perm, mntpt=None):
+        # TODO
+        #self.run_mon_cap_tests()
+        self.run_mds_cap_tests(perm, mntpt=mntpt)
+
+    def _get_fsnames_from_moncap(self, moncap):
+        fsnames = []
+        while moncap.find('fsname=') != -1:
+            fsname_first_char = moncap.index('fsname=') + len('fsname=')
+
+            if ',' in moncap:
+                last = moncap.index(',')
+                fsname = moncap[fsname_first_char : last]
+                moncap = moncap.replace(moncap[0 : last+1], '')
+            else:
+                fsname = moncap[fsname_first_char : ]
+                moncap = moncap.replace(moncap[0 : ], '')
+
+            fsnames.append(fsname)
+
+        return fsnames
+
+    def run_mon_cap_tests(self, def_fs, client_id):
+        """
+        Check that MON cap is enforced for a client by searching for a Ceph
+        FS name in output of cmd "fs ls" executed with that client's caps.
+
+        def_fs stands for default FS on Ceph cluster.
+        """
+        get_cluster_cmd_op = def_fs.mon_manager.raw_cluster_cmd
+
+        keyring = get_cluster_cmd_op(args=f'auth get client.{client_id}')
+
+        moncap = None
+        for line in keyring.split('\n'):
+            if 'caps mon' in line:
+                moncap = line[line.find(' = "') + 4 : -1]
+                break
+        else:
+            raise RuntimeError('run_mon_cap_tests(): mon cap not found in '
+                               'keyring. keyring -\n' + keyring)
+
+        keyring_path = def_fs.admin_remote.mktemp(data=keyring)
+
+        fsls = get_cluster_cmd_op(
+            args=f'fs ls --id {client_id} -k {keyring_path}')
+        log.info(f'output of fs ls cmd run by client.{client_id} -\n{fsls}')
+
+        if 'fsname=' not in moncap:
+            log.info('no FS name is mentioned in moncap, client has '
+                     'permission to list all files. moncap -\n{moncap}')
+            log.info('testing for presence of all FS names in output of '
+                     '"fs ls" command run by client.')
+
+            fsls_admin = get_cluster_cmd_op(args='fs ls')
+            log.info('output of fs ls cmd run by admin -\n{fsls_admin}')
+
+            self.assertEqual(fsls, fsls_admin)
+            return
+
+        log.info('FS names are mentioned in moncap. moncap -\n{moncap}')
+        log.info('testing for presence of these FS names in output of '
+                 '"fs ls" command run by client.')
+        for fsname in self._get_fsnames_from_moncap(moncap):
+            self.assertIn('name: ' + fsname, fsls)
+
+    def run_mds_cap_tests(self, perm, mntpt=None):
+        """
+        Run test for read perm and, for write perm, run positive test if it
+        is present and run negative test if not.
+        """
+        # XXX: mntpt is path inside cephfs that serves as root for current
+        # mount. Therefore, this path must me deleted from self.filepaths.
+        # Example -
+        #   orignal path: /mnt/cephfs_x/dir1/dir2/testdir
+        #   cephfs dir serving as root for current mnt: /dir1/dir2
+        #   therefore, final path: /mnt/cephfs_x//testdir
+        if mntpt:
+            self.test_set = [(x, y.replace(mntpt, ''), z) for x, y, z in \
+                             self.test_set]
+
+        self.conduct_pos_test_for_read_caps()
+
+        if perm == 'rw':
+            self.conduct_pos_test_for_write_caps()
+        elif perm == 'r':
+            self.conduct_neg_test_for_write_caps()
+        else:
+            raise RuntimeError(f'perm = {perm}\nIt should be "r" or "rw".')
+
+    def conduct_pos_test_for_read_caps(self):
+        for mount, path, data in self.test_set:
+            log.info(f'test read perm: read file {path} and expect data '
+                     f'"{data}"')
+            contents = mount.read_file(path)
+            self.assertEqual(data, contents)
+            log.info(f'read perm was tested successfully: "{data}" was '
+                     f'successfully read from path {path}')
+
+    def conduct_pos_test_for_write_caps(self):
+        for mount, path, data in self.test_set:
+            log.info(f'test write perm: try writing data "{data}" to '
+                     f'file {path}.')
+            mount.write_file(path=path, data=data)
+            contents = mount.read_file(path=path)
+            self.assertEqual(data, contents)
+            log.info(f'write perm was tested was successfully: data '
+                     f'"{data}" was successfully written to file "{path}".')
+
+    def conduct_neg_test_for_write_caps(self, sudo_write=False):
+        possible_errmsgs = ('permission denied', 'operation not permitted')
+        cmdargs = ['echo', 'some random data', Raw('|')]
+        cmdargs += ['sudo', 'tee'] if sudo_write else ['tee']
+
+        # don't use data, cmd args to write are set already above.
+        for mount, path, data in self.test_set:
+            log.info('test absence of write perm: expect failure '
+                     f'writing data to file {path}.')
+            cmdargs.append(path)
+            mount.negtestcmd(args=cmdargs, retval=1, errmsgs=possible_errmsgs)
+            cmdargs.pop(-1)
+            log.info('absence of write perm was tested successfully: '
+                     f'failed to be write data to file {path}.')
diff --git a/qa/tasks/cephfs/cephfs_test_case.py b/qa/tasks/cephfs/cephfs_test_case.py
new file mode 100644
index 000000000..d2688929c
--- /dev/null
+++ b/qa/tasks/cephfs/cephfs_test_case.py
@@ -0,0 +1,442 @@
+import json
+import logging
+import os
+import re
+
+from shlex import split as shlex_split
+
+from tasks.ceph_test_case import CephTestCase
+
+from teuthology import contextutil
+from teuthology.orchestra import run
+from teuthology.exceptions import CommandFailedError
+
+log = logging.getLogger(__name__)
+
+def classhook(m):
+    def dec(cls):
+        getattr(cls, m)()
+        return cls
+    return dec
+
+def for_teuthology(f):
+    """
+    Decorator that adds an "is_for_teuthology" attribute to the wrapped function
+    """
+    f.is_for_teuthology = True
+    return f
+
+
+def needs_trimming(f):
+    """
+    Mark fn as requiring a client capable of trimming its cache (i.e. for ceph-fuse
+    this means it needs to be able to run as root, currently)
+    """
+    f.needs_trimming = True
+    return f
+
+
+class MountDetails():
+
+    def __init__(self, mntobj):
+        self.client_id = mntobj.client_id
+        self.client_keyring_path = mntobj.client_keyring_path
+        self.client_remote = mntobj.client_remote
+        self.cephfs_name = mntobj.cephfs_name
+        self.cephfs_mntpt = mntobj.cephfs_mntpt
+        self.hostfs_mntpt = mntobj.hostfs_mntpt
+
+    def restore(self, mntobj):
+        mntobj.client_id = self.client_id
+        mntobj.client_keyring_path = self.client_keyring_path
+        mntobj.client_remote = self.client_remote
+        mntobj.cephfs_name = self.cephfs_name
+        mntobj.cephfs_mntpt = self.cephfs_mntpt
+        mntobj.hostfs_mntpt = self.hostfs_mntpt
+
+
+class CephFSTestCase(CephTestCase):
+    """
+    Test case for Ceph FS, requires caller to populate Filesystem and Mounts,
+    into the fs, mount_a, mount_b class attributes (setting mount_b is optional)
+
+    Handles resetting the cluster under test between tests.
+    """
+
+    # FIXME weird explicit naming
+    mount_a = None
+    mount_b = None
+    recovery_mount = None
+
+    # Declarative test requirements: subclasses should override these to indicate
+    # their special needs.  If not met, tests will be skipped.
+    CLIENTS_REQUIRED = 1
+    MDSS_REQUIRED = 1
+    REQUIRE_ONE_CLIENT_REMOTE = False
+
+    # Whether to create the default filesystem during setUp
+    REQUIRE_FILESYSTEM = True
+
+    # create a backup filesystem if required.
+    # required REQUIRE_FILESYSTEM enabled
+    REQUIRE_BACKUP_FILESYSTEM = False
+
+    LOAD_SETTINGS = [] # type: ignore
+
+    def _save_mount_details(self):
+        """
+        XXX: Tests may change details of mount objects, so let's stash them so
+        that these details are restored later to ensure smooth setUps and
+        tearDowns for upcoming tests.
+        """
+        self._orig_mount_details = [MountDetails(m) for m in self.mounts]
+        log.info(self._orig_mount_details)
+
+    def _remove_blocklist(self):
+        # In case anything is in the OSD blocklist list, clear it out.  This is to avoid
+        # the OSD map changing in the background (due to blocklist expiry) while tests run.
+        try:
+            self.mds_cluster.mon_manager.run_cluster_cmd(args="osd blocklist clear")
+        except CommandFailedError:
+            # Fallback for older Ceph cluster
+            try:
+                blocklist = json.loads(self.mds_cluster.mon_manager.raw_cluster_cmd("osd",
+                                      "dump", "--format=json-pretty"))['blocklist']
+                log.info(f"Removing {len(blocklist)} blocklist entries")
+                for addr, blocklisted_at in blocklist.items():
+                    self.mds_cluster.mon_manager.raw_cluster_cmd("osd", "blocklist", "rm", addr)
+            except KeyError:
+                # Fallback for more older Ceph clusters, who will use 'blacklist' instead.
+                blacklist = json.loads(self.mds_cluster.mon_manager.raw_cluster_cmd("osd",
+                                      "dump", "--format=json-pretty"))['blacklist']
+                log.info(f"Removing {len(blacklist)} blacklist entries")
+                for addr, blocklisted_at in blacklist.items():
+                    self.mds_cluster.mon_manager.raw_cluster_cmd("osd", "blacklist", "rm", addr)
+
+    def setUp(self):
+        super(CephFSTestCase, self).setUp()
+
+        self.config_set('mon', 'mon_allow_pool_delete', True)
+
+        if len(self.mds_cluster.mds_ids) < self.MDSS_REQUIRED:
+            self.skipTest("Only have {0} MDSs, require {1}".format(
+                len(self.mds_cluster.mds_ids), self.MDSS_REQUIRED
+            ))
+
+        if len(self.mounts) < self.CLIENTS_REQUIRED:
+            self.skipTest("Only have {0} clients, require {1}".format(
+                len(self.mounts), self.CLIENTS_REQUIRED
+            ))
+
+        if self.REQUIRE_ONE_CLIENT_REMOTE:
+            if self.mounts[0].client_remote.hostname in self.mds_cluster.get_mds_hostnames():
+                self.skipTest("Require first client to be on separate server from MDSs")
+
+        # Create friendly mount_a, mount_b attrs
+        for i in range(0, self.CLIENTS_REQUIRED):
+            setattr(self, "mount_{0}".format(chr(ord('a') + i)), self.mounts[i])
+
+        self.mds_cluster.clear_firewall()
+
+        # Unmount all clients, we are about to blow away the filesystem
+        for mount in self.mounts:
+            if mount.is_mounted():
+                mount.umount_wait(force=True)
+        self._save_mount_details()
+
+        # To avoid any issues with e.g. unlink bugs, we destroy and recreate
+        # the filesystem rather than just doing a rm -rf of files
+        self.mds_cluster.delete_all_filesystems()
+        self.mds_cluster.mds_restart() # to reset any run-time configs, etc.
+        self.fs = None # is now invalid!
+        self.backup_fs = None
+        self.recovery_fs = None
+
+        self._remove_blocklist()
+
+        client_mount_ids = [m.client_id for m in self.mounts]
+        # In case there were any extra auth identities around from a previous
+        # test, delete them
+        for entry in self.auth_list():
+            ent_type, ent_id = entry['entity'].split(".")
+            if ent_type == "client" and ent_id not in client_mount_ids and not (ent_id == "admin" or ent_id[:6] == 'mirror'):
+                self.mds_cluster.mon_manager.raw_cluster_cmd("auth", "del", entry['entity'])
+
+        if self.REQUIRE_FILESYSTEM:
+            self.fs = self.mds_cluster.newfs(create=True)
+
+            # In case some test messed with auth caps, reset them
+            for client_id in client_mount_ids:
+                cmd = ['auth', 'caps', f'client.{client_id}', 'mon','allow r',
+                       'osd', f'allow rw tag cephfs data={self.fs.name}',
+                       'mds', 'allow']
+
+                if self.run_cluster_cmd_result(cmd) == 0:
+                    break
+
+                cmd[1] = 'add'
+                if self.run_cluster_cmd_result(cmd) != 0:
+                    raise RuntimeError(f'Failed to create new client {cmd[2]}')
+
+            # wait for ranks to become active
+            self.fs.wait_for_daemons()
+
+            # Mount the requested number of clients
+            for i in range(0, self.CLIENTS_REQUIRED):
+                self.mounts[i].mount_wait()
+
+        if self.REQUIRE_BACKUP_FILESYSTEM:
+            if not self.REQUIRE_FILESYSTEM:
+                self.skipTest("backup filesystem requires a primary filesystem as well")
+            self.fs.mon_manager.raw_cluster_cmd('fs', 'flag', 'set',
+                                                'enable_multiple', 'true',
+                                                '--yes-i-really-mean-it')
+            self.backup_fs = self.mds_cluster.newfs(name="backup_fs")
+            self.backup_fs.wait_for_daemons()
+
+        # Load an config settings of interest
+        for setting in self.LOAD_SETTINGS:
+            setattr(self, setting, float(self.fs.mds_asok(
+                ['config', 'get', setting], list(self.mds_cluster.mds_ids)[0]
+            )[setting]))
+
+        self.configs_set = set()
+
+    def tearDown(self):
+        self.mds_cluster.clear_firewall()
+        for m in self.mounts:
+            m.teardown()
+
+        # To prevent failover messages during Unwind of ceph task
+        self.mds_cluster.delete_all_filesystems()
+
+        for m, md in zip(self.mounts, self._orig_mount_details):
+            md.restore(m)
+
+        for subsys, key in self.configs_set:
+            self.mds_cluster.clear_ceph_conf(subsys, key)
+
+        return super(CephFSTestCase, self).tearDown()
+
+    def set_conf(self, subsys, key, value):
+        self.configs_set.add((subsys, key))
+        self.mds_cluster.set_ceph_conf(subsys, key, value)
+
+    def auth_list(self):
+        """
+        Convenience wrapper on "ceph auth ls"
+        """
+        return json.loads(self.mds_cluster.mon_manager.raw_cluster_cmd(
+            "auth", "ls", "--format=json-pretty"
+        ))['auth_dump']
+
+    def assert_session_count(self, expected, ls_data=None, mds_id=None):
+        if ls_data is None:
+            ls_data = self.fs.mds_asok(['session', 'ls'], mds_id=mds_id)
+
+        alive_count = len([s for s in ls_data if s['state'] != 'killing'])
+
+        self.assertEqual(expected, alive_count, "Expected {0} sessions, found {1}".format(
+            expected, alive_count
+        ))
+
+    def assert_session_state(self, client_id,  expected_state):
+        self.assertEqual(
+            self._session_by_id(
+                self.fs.mds_asok(['session', 'ls'])).get(client_id, {'state': None})['state'],
+            expected_state)
+
+    def get_session_data(self, client_id):
+        return self._session_by_id(client_id)
+
+    def _session_list(self):
+        ls_data = self.fs.mds_asok(['session', 'ls'])
+        ls_data = [s for s in ls_data if s['state'] not in ['stale', 'closed']]
+        return ls_data
+
+    def get_session(self, client_id, session_ls=None):
+        if session_ls is None:
+            session_ls = self.fs.mds_asok(['session', 'ls'])
+
+        return self._session_by_id(session_ls)[client_id]
+
+    def _session_by_id(self, session_ls):
+        return dict([(s['id'], s) for s in session_ls])
+
+    def perf_dump(self, rank=None, status=None):
+        return self.fs.rank_asok(['perf', 'dump'], rank=rank, status=status)
+
+    def wait_until_evicted(self, client_id, timeout=30):
+        def is_client_evicted():
+            ls = self._session_list()
+            for s in ls:
+                if s['id'] == client_id:
+                    return False
+            return True
+        self.wait_until_true(is_client_evicted, timeout)
+
+    def wait_for_daemon_start(self, daemon_ids=None):
+        """
+        Wait until all the daemons appear in the FSMap, either assigned
+        MDS ranks or in the list of standbys
+        """
+        def get_daemon_names():
+            return [info['name'] for info in self.mds_cluster.status().get_all()]
+
+        if daemon_ids is None:
+            daemon_ids = self.mds_cluster.mds_ids
+
+        try:
+            self.wait_until_true(
+                lambda: set(daemon_ids) & set(get_daemon_names()) == set(daemon_ids),
+                timeout=30
+            )
+        except RuntimeError:
+            log.warning("Timeout waiting for daemons {0}, while we have {1}".format(
+                daemon_ids, get_daemon_names()
+            ))
+            raise
+
+    def delete_mds_coredump(self, daemon_id):
+        # delete coredump file, otherwise teuthology.internal.coredump will
+        # catch it later and treat it as a failure.
+        core_pattern = self.mds_cluster.mds_daemons[daemon_id].remote.sh(
+            "sudo sysctl -n kernel.core_pattern")
+        core_dir = os.path.dirname(core_pattern.strip())
+        if core_dir:  # Non-default core_pattern with a directory in it
+            # We have seen a core_pattern that looks like it's from teuthology's coredump
+            # task, so proceed to clear out the core file
+            if core_dir[0] == '|':
+                log.info("Piped core dumps to program {0}, skip cleaning".format(core_dir[1:]))
+                return;
+
+            log.info("Clearing core from directory: {0}".format(core_dir))
+
+            # Verify that we see the expected single coredump
+            ls_output = self.mds_cluster.mds_daemons[daemon_id].remote.sh([
+                "cd", core_dir, run.Raw('&&'),
+                "sudo", "ls", run.Raw('|'), "sudo", "xargs", "file"
+            ])
+            cores = [l.partition(":")[0]
+                     for l in ls_output.strip().split("\n")
+                     if re.match(r'.*ceph-mds.* -i +{0}'.format(daemon_id), l)]
+
+            log.info("Enumerated cores: {0}".format(cores))
+            self.assertEqual(len(cores), 1)
+
+            log.info("Found core file {0}, deleting it".format(cores[0]))
+
+            self.mds_cluster.mds_daemons[daemon_id].remote.run(args=[
+                "cd", core_dir, run.Raw('&&'), "sudo", "rm", "-f", cores[0]
+            ])
+        else:
+            log.info("No core_pattern directory set, nothing to clear (internal.coredump not enabled?)")
+
+    def _get_subtrees(self, status=None, rank=None, path=None):
+        if path is None:
+            path = "/"
+        try:
+            with contextutil.safe_while(sleep=1, tries=3) as proceed:
+                while proceed():
+                    try:
+                        if rank == "all":
+                            subtrees = []
+                            for r in self.fs.get_ranks(status=status):
+                                s = self.fs.rank_asok(["get", "subtrees"], status=status, rank=r['rank'])
+                                s = filter(lambda s: s['auth_first'] == r['rank'] and s['auth_second'] == -2, s)
+                                subtrees += s
+                        else:
+                            subtrees = self.fs.rank_asok(["get", "subtrees"], status=status, rank=rank)
+                        subtrees = filter(lambda s: s['dir']['path'].startswith(path), subtrees)
+                        return list(subtrees)
+                    except CommandFailedError as e:
+                        # Sometimes we get transient errors
+                        if e.exitstatus == 22:
+                            pass
+                        else:
+                            raise
+        except contextutil.MaxWhileTries as e:
+            raise RuntimeError(f"could not get subtree state from rank {rank}") from e
+
+    def _wait_subtrees(self, test, status=None, rank=None, timeout=30, sleep=2, action=None, path=None):
+        test = sorted(test)
+        try:
+            with contextutil.safe_while(sleep=sleep, tries=timeout//sleep) as proceed:
+                while proceed():
+                    subtrees = self._get_subtrees(status=status, rank=rank, path=path)
+                    filtered = sorted([(s['dir']['path'], s['auth_first']) for s in subtrees])
+                    log.info("%s =?= %s", filtered, test)
+                    if filtered == test:
+                        # Confirm export_pin in output is correct:
+                        for s in subtrees:
+                            if s['export_pin_target'] >= 0:
+                                self.assertTrue(s['export_pin_target'] == s['auth_first'])
+                        return subtrees
+                    if action is not None:
+                        action()
+        except contextutil.MaxWhileTries as e:
+            raise RuntimeError("rank {0} failed to reach desired subtree state".format(rank)) from e
+
+    def _wait_until_scrub_complete(self, path="/", recursive=True, timeout=100):
+        out_json = self.fs.run_scrub(["start", path] + ["recursive"] if recursive else [])
+        if not self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"],
+                                                 sleep=10, timeout=timeout):
+            log.info("timed out waiting for scrub to complete")
+
+    def _wait_distributed_subtrees(self, count, status=None, rank=None, path=None):
+        try:
+            with contextutil.safe_while(sleep=5, tries=20) as proceed:
+                while proceed():
+                    subtrees = self._get_subtrees(status=status, rank=rank, path=path)
+                    subtrees = list(filter(lambda s: s['distributed_ephemeral_pin'] == True and
+                                                     s['auth_first'] == s['export_pin_target'],
+                                           subtrees))
+                    log.info(f"len={len(subtrees)} {subtrees}")
+                    if len(subtrees) >= count:
+                        return subtrees
+        except contextutil.MaxWhileTries as e:
+            raise RuntimeError("rank {0} failed to reach desired subtree state".format(rank)) from e
+
+    def _wait_random_subtrees(self, count, status=None, rank=None, path=None):
+        try:
+            with contextutil.safe_while(sleep=5, tries=20) as proceed:
+                while proceed():
+                    subtrees = self._get_subtrees(status=status, rank=rank, path=path)
+                    subtrees = list(filter(lambda s: s['random_ephemeral_pin'] == True and
+                                                     s['auth_first'] == s['export_pin_target'],
+                                           subtrees))
+                    log.info(f"len={len(subtrees)} {subtrees}")
+                    if len(subtrees) >= count:
+                        return subtrees
+        except contextutil.MaxWhileTries as e:
+            raise RuntimeError("rank {0} failed to reach desired subtree state".format(rank)) from e
+
+    def run_cluster_cmd(self, cmd):
+        if isinstance(cmd, str):
+            cmd = shlex_split(cmd)
+        return self.fs.mon_manager.raw_cluster_cmd(*cmd)
+
+    def run_cluster_cmd_result(self, cmd):
+        if isinstance(cmd, str):
+            cmd = shlex_split(cmd)
+        return self.fs.mon_manager.raw_cluster_cmd_result(*cmd)
+
+    def create_client(self, client_id, moncap=None, osdcap=None, mdscap=None):
+        if not (moncap or osdcap or mdscap):
+            if self.fs:
+                return self.fs.authorize(client_id, ('/', 'rw'))
+            else:
+                raise RuntimeError('no caps were passed and the default FS '
+                                   'is not created yet to allow client auth '
+                                   'for it.')
+
+        cmd = ['auth', 'add', f'client.{client_id}']
+        if moncap:
+            cmd += ['mon', moncap]
+        if osdcap:
+            cmd += ['osd', osdcap]
+        if mdscap:
+            cmd += ['mds', mdscap]
+
+        self.run_cluster_cmd(cmd)
+        return self.run_cluster_cmd(f'auth get {self.client_name}')
diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py
new file mode 100644
index 000000000..777ba8249
--- /dev/null
+++ b/qa/tasks/cephfs/filesystem.py
@@ -0,0 +1,1712 @@
+
+import json
+import logging
+from gevent import Greenlet
+import os
+import time
+import datetime
+import re
+import errno
+import random
+
+from io import BytesIO, StringIO
+from errno import EBUSY
+
+from teuthology.exceptions import CommandFailedError
+from teuthology import misc
+from teuthology.nuke import clear_firewall
+from teuthology.parallel import parallel
+from teuthology import contextutil
+from tasks.ceph_manager import write_conf
+from tasks import ceph_manager
+
+
+log = logging.getLogger(__name__)
+
+
+DAEMON_WAIT_TIMEOUT = 120
+ROOT_INO = 1
+
+class FileLayout(object):
+    def __init__(self, pool=None, pool_namespace=None, stripe_unit=None, stripe_count=None, object_size=None):
+        self.pool = pool
+        self.pool_namespace = pool_namespace
+        self.stripe_unit = stripe_unit
+        self.stripe_count = stripe_count
+        self.object_size = object_size
+
+    @classmethod
+    def load_from_ceph(layout_str):
+        # TODO
+        pass
+
+    def items(self):
+        if self.pool is not None:
+            yield ("pool", self.pool)
+        if self.pool_namespace:
+            yield ("pool_namespace", self.pool_namespace)
+        if self.stripe_unit is not None:
+            yield ("stripe_unit", self.stripe_unit)
+        if self.stripe_count is not None:
+            yield ("stripe_count", self.stripe_count)
+        if self.object_size is not None:
+            yield ("object_size", self.stripe_size)
+
+class ObjectNotFound(Exception):
+    def __init__(self, object_name):
+        self._object_name = object_name
+
+    def __str__(self):
+        return "Object not found: '{0}'".format(self._object_name)
+
+class FSMissing(Exception):
+    def __init__(self, ident):
+        self.ident = ident
+
+    def __str__(self):
+        return f"File system {self.ident} does not exist in the map"
+
+class FSStatus(object):
+    """
+    Operations on a snapshot of the FSMap.
+    """
+    def __init__(self, mon_manager, epoch=None):
+        self.mon = mon_manager
+        cmd = ["fs", "dump", "--format=json"]
+        if epoch is not None:
+            cmd.append(str(epoch))
+        self.map = json.loads(self.mon.raw_cluster_cmd(*cmd))
+
+    def __str__(self):
+        return json.dumps(self.map, indent = 2, sort_keys = True)
+
+    # Expose the fsmap for manual inspection.
+    def __getitem__(self, key):
+        """
+        Get a field from the fsmap.
+        """
+        return self.map[key]
+
+    def get_filesystems(self):
+        """
+        Iterator for all filesystems.
+        """
+        for fs in self.map['filesystems']:
+            yield fs
+
+    def get_all(self):
+        """
+        Iterator for all the mds_info components in the FSMap.
+        """
+        for info in self.map['standbys']:
+            yield info
+        for fs in self.map['filesystems']:
+            for info in fs['mdsmap']['info'].values():
+                yield info
+
+    def get_standbys(self):
+        """
+        Iterator for all standbys.
+        """
+        for info in self.map['standbys']:
+            yield info
+
+    def get_fsmap(self, fscid):
+        """
+        Get the fsmap for the given FSCID.
+        """
+        for fs in self.map['filesystems']:
+            if fscid is None or fs['id'] == fscid:
+                return fs
+        raise FSMissing(fscid)
+
+    def get_fsmap_byname(self, name):
+        """
+        Get the fsmap for the given file system name.
+        """
+        for fs in self.map['filesystems']:
+            if name is None or fs['mdsmap']['fs_name'] == name:
+                return fs
+        raise FSMissing(name)
+
+    def get_replays(self, fscid):
+        """
+        Get the standby:replay MDS for the given FSCID.
+        """
+        fs = self.get_fsmap(fscid)
+        for info in fs['mdsmap']['info'].values():
+            if info['state'] == 'up:standby-replay':
+                yield info
+
+    def get_ranks(self, fscid):
+        """
+        Get the ranks for the given FSCID.
+        """
+        fs = self.get_fsmap(fscid)
+        for info in fs['mdsmap']['info'].values():
+            if info['rank'] >= 0 and info['state'] != 'up:standby-replay':
+                yield info
+
+    def get_damaged(self, fscid):
+        """
+        Get the damaged ranks for the given FSCID.
+        """
+        fs = self.get_fsmap(fscid)
+        return fs['mdsmap']['damaged']
+
+    def get_rank(self, fscid, rank):
+        """
+        Get the rank for the given FSCID.
+        """
+        for info in self.get_ranks(fscid):
+            if info['rank'] == rank:
+                return info
+        raise RuntimeError("FSCID {0} has no rank {1}".format(fscid, rank))
+
+    def get_mds(self, name):
+        """
+        Get the info for the given MDS name.
+        """
+        for info in self.get_all():
+            if info['name'] == name:
+                return info
+        return None
+
+    def get_mds_addr(self, name):
+        """
+        Return the instance addr as a string, like "10.214.133.138:6807\/10825"
+        """
+        info = self.get_mds(name)
+        if info:
+            return info['addr']
+        else:
+            log.warning(json.dumps(list(self.get_all()), indent=2))  # dump for debugging
+            raise RuntimeError("MDS id '{0}' not found in map".format(name))
+
+    def get_mds_addrs(self, name):
+        """
+        Return the instance addr as a string, like "[10.214.133.138:6807 10.214.133.138:6808]"
+        """
+        info = self.get_mds(name)
+        if info:
+            return [e['addr'] for e in info['addrs']['addrvec']]
+        else:
+            log.warn(json.dumps(list(self.get_all()), indent=2))  # dump for debugging
+            raise RuntimeError("MDS id '{0}' not found in map".format(name))
+
+    def get_mds_gid(self, gid):
+        """
+        Get the info for the given MDS gid.
+        """
+        for info in self.get_all():
+            if info['gid'] == gid:
+                return info
+        return None
+
+    def hadfailover(self, status):
+        """
+        Compares two statuses for mds failovers.
+        Returns True if there is a failover.
+        """
+        for fs in status.map['filesystems']:
+            for info in fs['mdsmap']['info'].values():
+                oldinfo = self.get_mds_gid(info['gid'])
+                if oldinfo is None or oldinfo['incarnation'] != info['incarnation']:
+                    return True
+        #all matching
+        return False
+
+class CephCluster(object):
+    @property
+    def admin_remote(self):
+        first_mon = misc.get_first_mon(self._ctx, None)
+        (result,) = self._ctx.cluster.only(first_mon).remotes.keys()
+        return result
+
+    def __init__(self, ctx) -> None:
+        self._ctx = ctx
+        self.mon_manager = ceph_manager.CephManager(self.admin_remote, ctx=ctx, logger=log.getChild('ceph_manager'))
+
+    def get_config(self, key, service_type=None):
+        """
+        Get config from mon by default, or a specific service if caller asks for it
+        """
+        if service_type is None:
+            service_type = 'mon'
+
+        service_id = sorted(misc.all_roles_of_type(self._ctx.cluster, service_type))[0]
+        return self.json_asok(['config', 'get', key], service_type, service_id)[key]
+
+    def set_ceph_conf(self, subsys, key, value):
+        if subsys not in self._ctx.ceph['ceph'].conf:
+            self._ctx.ceph['ceph'].conf[subsys] = {}
+        self._ctx.ceph['ceph'].conf[subsys][key] = value
+        write_conf(self._ctx)  # XXX because we don't have the ceph task's config object, if they
+                               # used a different config path this won't work.
+
+    def clear_ceph_conf(self, subsys, key):
+        del self._ctx.ceph['ceph'].conf[subsys][key]
+        write_conf(self._ctx)
+
+    def json_asok(self, command, service_type, service_id, timeout=None):
+        if timeout is None:
+            timeout = 300
+        command.insert(0, '--format=json')
+        proc = self.mon_manager.admin_socket(service_type, service_id, command, timeout=timeout)
+        response_data = proc.stdout.getvalue().strip()
+        if len(response_data) > 0:
+
+            def get_nonnumeric_values(value):
+                c = {"NaN": float("nan"), "Infinity": float("inf"),
+                     "-Infinity": -float("inf")}
+                return c[value]
+
+            j = json.loads(response_data.replace('inf', 'Infinity'),
+                           parse_constant=get_nonnumeric_values)
+            pretty = json.dumps(j, sort_keys=True, indent=2)
+            log.debug(f"_json_asok output\n{pretty}")
+            return j
+        else:
+            log.debug("_json_asok output empty")
+            return None
+
+    def is_addr_blocklisted(self, addr):
+        blocklist = json.loads(self.mon_manager.raw_cluster_cmd(
+            "osd", "dump", "--format=json"))['blocklist']
+        if addr in blocklist:
+            return True
+        log.warn(f'The address {addr} is not blocklisted')
+        return False
+
+
+class MDSCluster(CephCluster):
+    """
+    Collective operations on all the MDS daemons in the Ceph cluster.  These
+    daemons may be in use by various Filesystems.
+
+    For the benefit of pre-multi-filesystem tests, this class is also
+    a parent of Filesystem.  The correct way to use MDSCluster going forward is
+    as a separate instance outside of your (multiple) Filesystem instances.
+    """
+
+    def __init__(self, ctx):
+        super(MDSCluster, self).__init__(ctx)
+
+    @property
+    def mds_ids(self):
+        # do this dynamically because the list of ids may change periodically with cephadm
+        return list(misc.all_roles_of_type(self._ctx.cluster, 'mds'))
+
+    @property
+    def mds_daemons(self):
+        return dict([(mds_id, self._ctx.daemons.get_daemon('mds', mds_id)) for mds_id in self.mds_ids])
+
+    def _one_or_all(self, mds_id, cb, in_parallel=True):
+        """
+        Call a callback for a single named MDS, or for all.
+
+        Note that the parallelism here isn't for performance, it's to avoid being overly kind
+        to the cluster by waiting a graceful ssh-latency of time between doing things, and to
+        avoid being overly kind by executing them in a particular order.  However, some actions
+        don't cope with being done in parallel, so it's optional (`in_parallel`)
+
+        :param mds_id: MDS daemon name, or None
+        :param cb: Callback taking single argument of MDS daemon name
+        :param in_parallel: whether to invoke callbacks concurrently (else one after the other)
+        """
+
+        if mds_id is None:
+            if in_parallel:
+                with parallel() as p:
+                    for mds_id in self.mds_ids:
+                        p.spawn(cb, mds_id)
+            else:
+                for mds_id in self.mds_ids:
+                    cb(mds_id)
+        else:
+            cb(mds_id)
+
+    def get_config(self, key, service_type=None):
+        """
+        get_config specialization of service_type="mds"
+        """
+        if service_type != "mds":
+            return super(MDSCluster, self).get_config(key, service_type)
+
+        # Some tests stop MDS daemons, don't send commands to a dead one:
+        running_daemons = [i for i, mds in self.mds_daemons.items() if mds.running()]
+        service_id = random.sample(running_daemons, 1)[0]
+        return self.json_asok(['config', 'get', key], service_type, service_id)[key]
+
+    def mds_stop(self, mds_id=None):
+        """
+        Stop the MDS daemon process(se).  If it held a rank, that rank
+        will eventually go laggy.
+        """
+        self._one_or_all(mds_id, lambda id_: self.mds_daemons[id_].stop())
+
+    def mds_fail(self, mds_id=None):
+        """
+        Inform MDSMonitor of the death of the daemon process(es).  If it held
+        a rank, that rank will be relinquished.
+        """
+        self._one_or_all(mds_id, lambda id_: self.mon_manager.raw_cluster_cmd("mds", "fail", id_))
+
+    def mds_restart(self, mds_id=None):
+        self._one_or_all(mds_id, lambda id_: self.mds_daemons[id_].restart())
+
+    def mds_fail_restart(self, mds_id=None):
+        """
+        Variation on restart that includes marking MDSs as failed, so that doing this
+        operation followed by waiting for healthy daemon states guarantees that they
+        have gone down and come up, rather than potentially seeing the healthy states
+        that existed before the restart.
+        """
+        def _fail_restart(id_):
+            self.mds_daemons[id_].stop()
+            self.mon_manager.raw_cluster_cmd("mds", "fail", id_)
+            self.mds_daemons[id_].restart()
+
+        self._one_or_all(mds_id, _fail_restart)
+
+    def mds_signal(self, mds_id, sig, silent=False):
+        """
+        signal a MDS daemon
+        """
+        self.mds_daemons[mds_id].signal(sig, silent);
+
+    def mds_is_running(self, mds_id):
+        return self.mds_daemons[mds_id].running()
+
+    def newfs(self, name='cephfs', create=True):
+        return Filesystem(self._ctx, name=name, create=create)
+
+    def status(self, epoch=None):
+        return FSStatus(self.mon_manager, epoch)
+
+    def get_standby_daemons(self):
+        return set([s['name'] for s in self.status().get_standbys()])
+
+    def get_mds_hostnames(self):
+        result = set()
+        for mds_id in self.mds_ids:
+            mds_remote = self.mon_manager.find_remote('mds', mds_id)
+            result.add(mds_remote.hostname)
+
+        return list(result)
+
+    def set_clients_block(self, blocked, mds_id=None):
+        """
+        Block (using iptables) client communications to this MDS.  Be careful: if
+        other services are running on this MDS, or other MDSs try to talk to this
+        MDS, their communications may also be blocked as collatoral damage.
+
+        :param mds_id: Optional ID of MDS to block, default to all
+        :return:
+        """
+        da_flag = "-A" if blocked else "-D"
+
+        def set_block(_mds_id):
+            remote = self.mon_manager.find_remote('mds', _mds_id)
+            status = self.status()
+
+            addr = status.get_mds_addr(_mds_id)
+            ip_str, port_str, inst_str = re.match("(.+):(.+)/(.+)", addr).groups()
+
+            remote.run(
+                args=["sudo", "iptables", da_flag, "OUTPUT", "-p", "tcp", "--sport", port_str, "-j", "REJECT", "-m",
+                      "comment", "--comment", "teuthology"])
+            remote.run(
+                args=["sudo", "iptables", da_flag, "INPUT", "-p", "tcp", "--dport", port_str, "-j", "REJECT", "-m",
+                      "comment", "--comment", "teuthology"])
+
+        self._one_or_all(mds_id, set_block, in_parallel=False)
+
+    def set_inter_mds_block(self, blocked, mds_rank_1, mds_rank_2):
+        """
+        Block (using iptables) communications from a provided MDS to other MDSs.
+        Block all ports that an MDS uses for communication.
+
+        :param blocked: True to block the MDS, False otherwise
+        :param mds_rank_1: MDS rank
+        :param mds_rank_2: MDS rank
+        :return:
+        """
+        da_flag = "-A" if blocked else "-D"
+
+        def set_block(mds_ids):
+            status = self.status()
+
+            mds = mds_ids[0]
+            remote = self.mon_manager.find_remote('mds', mds)
+            addrs = status.get_mds_addrs(mds)
+            for addr in addrs:
+                ip_str, port_str = re.match("(.+):(.+)", addr).groups()
+                remote.run(
+                    args=["sudo", "iptables", da_flag, "INPUT", "-p", "tcp", "--dport", port_str, "-j", "REJECT", "-m",
+                          "comment", "--comment", "teuthology"], omit_sudo=False)
+
+
+            mds = mds_ids[1]
+            remote = self.mon_manager.find_remote('mds', mds)
+            addrs = status.get_mds_addrs(mds)
+            for addr in addrs:
+                ip_str, port_str = re.match("(.+):(.+)", addr).groups()
+                remote.run(
+                    args=["sudo", "iptables", da_flag, "OUTPUT", "-p", "tcp", "--sport", port_str, "-j", "REJECT", "-m",
+                          "comment", "--comment", "teuthology"], omit_sudo=False)
+                remote.run(
+                    args=["sudo", "iptables", da_flag, "INPUT", "-p", "tcp", "--dport", port_str, "-j", "REJECT", "-m",
+                          "comment", "--comment", "teuthology"], omit_sudo=False)
+
+        self._one_or_all((mds_rank_1, mds_rank_2), set_block, in_parallel=False)
+
+    def clear_firewall(self):
+        clear_firewall(self._ctx)
+
+    def get_mds_info(self, mds_id):
+        return FSStatus(self.mon_manager).get_mds(mds_id)
+
+    def is_pool_full(self, pool_name):
+        pools = json.loads(self.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['pools']
+        for pool in pools:
+            if pool['pool_name'] == pool_name:
+                return 'full' in pool['flags_names'].split(",")
+
+        raise RuntimeError("Pool not found '{0}'".format(pool_name))
+
+    def delete_all_filesystems(self):
+        """
+        Remove all filesystems that exist, and any pools in use by them.
+        """
+        for fs in self.status().get_filesystems():
+            Filesystem(ctx=self._ctx, fscid=fs['id']).destroy()
+
+    @property
+    def beacon_timeout(self):
+        """
+        Generate an acceptable timeout for the mons to drive some MDSMap change
+        because of missed beacons from some MDS. This involves looking up the
+        grace period in use by the mons and adding an acceptable buffer.
+        """
+
+        grace = float(self.get_config("mds_beacon_grace", service_type="mon"))
+        return grace*2+15
+
+
+class Filesystem(MDSCluster):
+
+    """
+    Generator for all Filesystems in the cluster.
+    """
+    @classmethod
+    def get_all_fs(cls, ctx):
+        mdsc = MDSCluster(ctx)
+        status = mdsc.status()
+        for fs in status.get_filesystems():
+            yield cls(ctx, fscid=fs['id'])
+
+    """
+    This object is for driving a CephFS filesystem.  The MDS daemons driven by
+    MDSCluster may be shared with other Filesystems.
+    """
+    def __init__(self, ctx, fs_config={}, fscid=None, name=None, create=False):
+        super(Filesystem, self).__init__(ctx)
+
+        self.name = name
+        self.id = None
+        self.metadata_pool_name = None
+        self.data_pool_name = None
+        self.data_pools = None
+        self.fs_config = fs_config
+        self.ec_profile = fs_config.get('ec_profile')
+
+        client_list = list(misc.all_roles_of_type(self._ctx.cluster, 'client'))
+        self.client_id = client_list[0]
+        self.client_remote = list(misc.get_clients(ctx=ctx, roles=["client.{0}".format(self.client_id)]))[0][1]
+
+        if name is not None:
+            if fscid is not None:
+                raise RuntimeError("cannot specify fscid when creating fs")
+            if create and not self.legacy_configured():
+                self.create()
+        else:
+            if fscid is not None:
+                self.id = fscid
+                self.getinfo(refresh = True)
+
+        # Stash a reference to the first created filesystem on ctx, so
+        # that if someone drops to the interactive shell they can easily
+        # poke our methods.
+        if not hasattr(self._ctx, "filesystem"):
+            self._ctx.filesystem = self
+
+    def dead(self):
+        try:
+            return not bool(self.get_mds_map())
+        except FSMissing:
+            return True
+
+    def get_task_status(self, status_key):
+        return self.mon_manager.get_service_task_status("mds", status_key)
+
+    def getinfo(self, refresh = False):
+        status = self.status()
+        if self.id is not None:
+            fsmap = status.get_fsmap(self.id)
+        elif self.name is not None:
+            fsmap = status.get_fsmap_byname(self.name)
+        else:
+            fss = [fs for fs in status.get_filesystems()]
+            if len(fss) == 1:
+                fsmap = fss[0]
+            elif len(fss) == 0:
+                raise RuntimeError("no file system available")
+            else:
+                raise RuntimeError("more than one file system available")
+        self.id = fsmap['id']
+        self.name = fsmap['mdsmap']['fs_name']
+        self.get_pool_names(status = status, refresh = refresh)
+        return status
+
+    def reach_max_mds(self):
+        status = self.wait_for_daemons()
+        mds_map = self.get_mds_map(status=status)
+        assert(mds_map['in'] == list(range(0, mds_map['max_mds'])))
+
+    def reset(self):
+        self.mon_manager.raw_cluster_cmd("fs", "reset", str(self.name), '--yes-i-really-mean-it')
+
+    def fail(self):
+        self.mon_manager.raw_cluster_cmd("fs", "fail", str(self.name))
+
+    def set_flag(self, var, *args):
+        a = map(lambda x: str(x).lower(), args)
+        self.mon_manager.raw_cluster_cmd("fs", "flag", "set", var, *a)
+
+    def set_allow_multifs(self, yes=True):
+        self.set_flag("enable_multiple", yes)
+
+    def set_var(self, var, *args):
+        a = map(lambda x: str(x).lower(), args)
+        self.mon_manager.raw_cluster_cmd("fs", "set", self.name, var, *a)
+
+    def set_down(self, down=True):
+        self.set_var("down", str(down).lower())
+
+    def set_joinable(self, joinable=True):
+        self.set_var("joinable", joinable)
+
+    def set_max_mds(self, max_mds):
+        self.set_var("max_mds", "%d" % max_mds)
+
+    def set_session_timeout(self, timeout):
+        self.set_var("session_timeout", "%d" % timeout)
+
+    def set_allow_standby_replay(self, yes):
+        self.set_var("allow_standby_replay", yes)
+
+    def set_allow_new_snaps(self, yes):
+        self.set_var("allow_new_snaps", yes, '--yes-i-really-mean-it')
+
+    def set_bal_rank_mask(self, bal_rank_mask):
+        self.set_var("bal_rank_mask", bal_rank_mask)
+
+    def set_refuse_client_session(self, yes):
+        self.set_var("refuse_client_session", yes)
+
+    def compat(self, *args):
+        a = map(lambda x: str(x).lower(), args)
+        self.mon_manager.raw_cluster_cmd("fs", "compat", self.name, *a)
+
+    def add_compat(self, *args):
+        self.compat("add_compat", *args)
+
+    def add_incompat(self, *args):
+        self.compat("add_incompat", *args)
+
+    def rm_compat(self, *args):
+        self.compat("rm_compat", *args)
+
+    def rm_incompat(self, *args):
+        self.compat("rm_incompat", *args)
+
+    def required_client_features(self, *args, **kwargs):
+        c = ["fs", "required_client_features", self.name, *args]
+        return self.mon_manager.run_cluster_cmd(args=c, **kwargs)
+
+    # Since v15.1.0 the pg autoscale mode has been enabled as default,
+    # will let the pg autoscale mode to calculate the pg_num as needed.
+    # We set the pg_num_min to 64 to make sure that pg autoscale mode
+    # won't set the pg_num to low to fix Tracker#45434.
+    pg_num = 64
+    pg_num_min = 64
+    target_size_ratio = 0.9
+    target_size_ratio_ec = 0.9
+
+    def create(self, recover=False, metadata_overlay=False):
+        if self.name is None:
+            self.name = "cephfs"
+        if self.metadata_pool_name is None:
+            self.metadata_pool_name = "{0}_metadata".format(self.name)
+        if self.data_pool_name is None:
+            data_pool_name = "{0}_data".format(self.name)
+        else:
+            data_pool_name = self.data_pool_name
+
+        # will use the ec pool to store the data and a small amount of
+        # metadata still goes to the primary data pool for all files.
+        if not metadata_overlay and self.ec_profile and 'disabled' not in self.ec_profile:
+            self.target_size_ratio = 0.05
+
+        log.debug("Creating filesystem '{0}'".format(self.name))
+
+        try:
+            self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
+                                             self.metadata_pool_name,
+                                             '--pg_num_min', str(self.pg_num_min))
+
+            self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
+                                             data_pool_name, str(self.pg_num),
+                                             '--pg_num_min', str(self.pg_num_min),
+                                             '--target_size_ratio',
+                                             str(self.target_size_ratio))
+        except CommandFailedError as e:
+            if e.exitstatus == 22: # nautilus couldn't specify --pg_num_min option
+                self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
+                                                 self.metadata_pool_name,
+                                                 str(self.pg_num_min))
+
+                self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
+                                                 data_pool_name, str(self.pg_num),
+                                                 str(self.pg_num_min))
+            else:
+                raise
+
+        args = ["fs", "new", self.name, self.metadata_pool_name, data_pool_name]
+        if recover:
+            args.append('--recover')
+        if metadata_overlay:
+            args.append('--allow-dangerous-metadata-overlay')
+        self.mon_manager.raw_cluster_cmd(*args)
+
+        if not recover:
+            if self.ec_profile and 'disabled' not in self.ec_profile:
+                ec_data_pool_name = data_pool_name + "_ec"
+                log.debug("EC profile is %s", self.ec_profile)
+                cmd = ['osd', 'erasure-code-profile', 'set', ec_data_pool_name]
+                cmd.extend(self.ec_profile)
+                self.mon_manager.raw_cluster_cmd(*cmd)
+                try:
+                    self.mon_manager.raw_cluster_cmd(
+                        'osd', 'pool', 'create', ec_data_pool_name,
+                        'erasure', ec_data_pool_name,
+                        '--pg_num_min', str(self.pg_num_min),
+                        '--target_size_ratio', str(self.target_size_ratio_ec))
+                except CommandFailedError as e:
+                    if e.exitstatus == 22: # nautilus couldn't specify --pg_num_min option
+                        self.mon_manager.raw_cluster_cmd(
+                            'osd', 'pool', 'create', ec_data_pool_name,
+                            str(self.pg_num_min), 'erasure', ec_data_pool_name)
+                    else:
+                        raise
+                self.mon_manager.raw_cluster_cmd(
+                    'osd', 'pool', 'set',
+                    ec_data_pool_name, 'allow_ec_overwrites', 'true')
+                self.add_data_pool(ec_data_pool_name, create=False)
+                self.check_pool_application(ec_data_pool_name)
+
+                self.run_client_payload(f"setfattr -n ceph.dir.layout.pool -v {ec_data_pool_name} . && getfattr -n ceph.dir.layout .")
+
+        self.check_pool_application(self.metadata_pool_name)
+        self.check_pool_application(data_pool_name)
+
+        # Turn off spurious standby count warnings from modifying max_mds in tests.
+        try:
+            self.mon_manager.raw_cluster_cmd('fs', 'set', self.name, 'standby_count_wanted', '0')
+        except CommandFailedError as e:
+            if e.exitstatus == 22:
+                # standby_count_wanted not available prior to luminous (upgrade tests would fail otherwise)
+                pass
+            else:
+                raise
+
+        if self.fs_config is not None:
+            log.debug(f"fs_config: {self.fs_config}")
+            max_mds = self.fs_config.get('max_mds', 1)
+            if max_mds > 1:
+                self.set_max_mds(max_mds)
+
+            standby_replay = self.fs_config.get('standby_replay', False)
+            self.set_allow_standby_replay(standby_replay)
+
+            # If absent will use the default value (60 seconds)
+            session_timeout = self.fs_config.get('session_timeout', 60)
+            if session_timeout != 60:
+                self.set_session_timeout(session_timeout)
+
+            if self.fs_config.get('subvols', None) is not None:
+                log.debug(f"Creating {self.fs_config.get('subvols')} subvols "
+                          f"for filesystem '{self.name}'")
+                if not hasattr(self._ctx, "created_subvols"):
+                    self._ctx.created_subvols = dict()
+
+                subvols = self.fs_config.get('subvols')
+                assert(isinstance(subvols, dict))
+                assert(isinstance(subvols['create'], int))
+                assert(subvols['create'] > 0)
+
+                for sv in range(0, subvols['create']):
+                    sv_name = f'sv_{sv}'
+                    self.mon_manager.raw_cluster_cmd(
+                        'fs', 'subvolume', 'create', self.name, sv_name,
+                        self.fs_config.get('subvol_options', ''))
+
+                    if self.name not in self._ctx.created_subvols:
+                        self._ctx.created_subvols[self.name] = []
+                    
+                    subvol_path = self.mon_manager.raw_cluster_cmd(
+                        'fs', 'subvolume', 'getpath', self.name, sv_name)
+                    subvol_path = subvol_path.strip()
+                    self._ctx.created_subvols[self.name].append(subvol_path)
+            else:
+                log.debug(f"Not Creating any subvols for filesystem '{self.name}'")
+
+
+        self.getinfo(refresh = True)
+
+        # wait pgs to be clean
+        self.mon_manager.wait_for_clean()
+
+    def run_client_payload(self, cmd):
+        # avoid circular dep by importing here:
+        from tasks.cephfs.fuse_mount import FuseMount
+
+        # Wait for at MDS daemons to be ready before mounting the
+        # ceph-fuse client in run_client_payload()
+        self.wait_for_daemons()
+
+        d = misc.get_testdir(self._ctx)
+        m = FuseMount(self._ctx, d, "admin", self.client_remote, cephfs_name=self.name)
+        m.mount_wait()
+        m.run_shell_payload(cmd)
+        m.umount_wait(require_clean=True)
+
+    def _remove_pool(self, name, **kwargs):
+        c = f'osd pool rm {name} {name} --yes-i-really-really-mean-it'
+        return self.mon_manager.ceph(c, **kwargs)
+
+    def rm(self, **kwargs):
+        c = f'fs rm {self.name} --yes-i-really-mean-it'
+        return self.mon_manager.ceph(c, **kwargs)
+
+    def remove_pools(self, data_pools):
+        self._remove_pool(self.get_metadata_pool_name())
+        for poolname in data_pools:
+            try:
+                self._remove_pool(poolname)
+            except CommandFailedError as e:
+                # EBUSY, this data pool is used by two metadata pools, let the
+                # 2nd pass delete it
+                if e.exitstatus == EBUSY:
+                    pass
+                else:
+                    raise
+
+    def destroy(self, reset_obj_attrs=True):
+        log.info(f'Destroying file system {self.name} and related pools')
+
+        if self.dead():
+            log.debug('already dead...')
+            return
+
+        data_pools = self.get_data_pool_names(refresh=True)
+
+        # make sure no MDSs are attached to given FS.
+        self.fail()
+        self.rm()
+
+        self.remove_pools(data_pools)
+
+        if reset_obj_attrs:
+            self.id = None
+            self.name = None
+            self.metadata_pool_name = None
+            self.data_pool_name = None
+            self.data_pools = None
+
+    def recreate(self):
+        self.destroy()
+
+        self.create()
+        self.getinfo(refresh=True)
+
+    def check_pool_application(self, pool_name):
+        osd_map = self.mon_manager.get_osd_dump_json()
+        for pool in osd_map['pools']:
+            if pool['pool_name'] == pool_name:
+                if "application_metadata" in pool:
+                    if not "cephfs" in pool['application_metadata']:
+                        raise RuntimeError("Pool {pool_name} does not name cephfs as application!".\
+                                           format(pool_name=pool_name))
+
+    def __del__(self):
+        if getattr(self._ctx, "filesystem", None) == self:
+            delattr(self._ctx, "filesystem")
+
+    def exists(self):
+        """
+        Whether a filesystem exists in the mon's filesystem list
+        """
+        fs_list = json.loads(self.mon_manager.raw_cluster_cmd('fs', 'ls', '--format=json-pretty'))
+        return self.name in [fs['name'] for fs in fs_list]
+
+    def legacy_configured(self):
+        """
+        Check if a legacy (i.e. pre "fs new") filesystem configuration is present.  If this is
+        the case, the caller should avoid using Filesystem.create
+        """
+        try:
+            out_text = self.mon_manager.raw_cluster_cmd('--format=json-pretty', 'osd', 'lspools')
+            pools = json.loads(out_text)
+            metadata_pool_exists = 'metadata' in [p['poolname'] for p in pools]
+            if metadata_pool_exists:
+                self.metadata_pool_name = 'metadata'
+        except CommandFailedError as e:
+            # For use in upgrade tests, Ceph cuttlefish and earlier don't support
+            # structured output (--format) from the CLI.
+            if e.exitstatus == 22:
+                metadata_pool_exists = True
+            else:
+                raise
+
+        return metadata_pool_exists
+
+    def _df(self):
+        return json.loads(self.mon_manager.raw_cluster_cmd("df", "--format=json-pretty"))
+
+    # may raise FSMissing
+    def get_mds_map(self, status=None):
+        if status is None:
+            status = self.status()
+        return status.get_fsmap(self.id)['mdsmap']
+
+    def get_var(self, var, status=None):
+        return self.get_mds_map(status=status)[var]
+
+    def set_dir_layout(self, mount, path, layout):
+        for name, value in layout.items():
+            mount.run_shell(args=["setfattr", "-n", "ceph.dir.layout."+name, "-v", str(value), path])
+
+    def add_data_pool(self, name, create=True):
+        if create:
+            try:
+                self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', name,
+                                                 '--pg_num_min', str(self.pg_num_min))
+            except CommandFailedError as e:
+                if e.exitstatus == 22: # nautilus couldn't specify --pg_num_min option
+                  self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', name,
+                                                   str(self.pg_num_min))
+                else:
+                    raise
+        self.mon_manager.raw_cluster_cmd('fs', 'add_data_pool', self.name, name)
+        self.get_pool_names(refresh = True)
+        for poolid, fs_name in self.data_pools.items():
+            if name == fs_name:
+                return poolid
+        raise RuntimeError("could not get just created pool '{0}'".format(name))
+
+    def get_pool_names(self, refresh = False, status = None):
+        if refresh or self.metadata_pool_name is None or self.data_pools is None:
+            if status is None:
+                status = self.status()
+            fsmap = status.get_fsmap(self.id)
+
+            osd_map = self.mon_manager.get_osd_dump_json()
+            id_to_name = {}
+            for p in osd_map['pools']:
+                id_to_name[p['pool']] = p['pool_name']
+
+            self.metadata_pool_name = id_to_name[fsmap['mdsmap']['metadata_pool']]
+            self.data_pools = {}
+            for data_pool in fsmap['mdsmap']['data_pools']:
+                self.data_pools[data_pool] = id_to_name[data_pool]
+
+    def get_data_pool_name(self, refresh = False):
+        if refresh or self.data_pools is None:
+            self.get_pool_names(refresh = True)
+        assert(len(self.data_pools) == 1)
+        return next(iter(self.data_pools.values()))
+
+    def get_data_pool_id(self, refresh = False):
+        """
+        Don't call this if you have multiple data pools
+        :return: integer
+        """
+        if refresh or self.data_pools is None:
+            self.get_pool_names(refresh = True)
+        assert(len(self.data_pools) == 1)
+        return next(iter(self.data_pools.keys()))
+
+    def get_data_pool_names(self, refresh = False):
+        if refresh or self.data_pools is None:
+            self.get_pool_names(refresh = True)
+        return list(self.data_pools.values())
+
+    def get_metadata_pool_name(self):
+        return self.metadata_pool_name
+
+    def set_data_pool_name(self, name):
+        if self.id is not None:
+            raise RuntimeError("can't set filesystem name if its fscid is set")
+        self.data_pool_name = name
+
+    def get_pool_pg_num(self, pool_name):
+        pgs = json.loads(self.mon_manager.raw_cluster_cmd('osd', 'pool', 'get',
+                                                          pool_name, 'pg_num',
+                                                          '--format=json-pretty'))
+        return int(pgs['pg_num'])
+
+    def get_namespace_id(self):
+        return self.id
+
+    def get_pool_df(self, pool_name):
+        """
+        Return a dict like:
+        {u'bytes_used': 0, u'max_avail': 83848701, u'objects': 0, u'kb_used': 0}
+        """
+        for pool_df in self._df()['pools']:
+            if pool_df['name'] == pool_name:
+                return pool_df['stats']
+
+        raise RuntimeError("Pool name '{0}' not found".format(pool_name))
+
+    def get_usage(self):
+        return self._df()['stats']['total_used_bytes']
+
+    def are_daemons_healthy(self, status=None, skip_max_mds_check=False):
+        """
+        Return true if all daemons are in one of active, standby, standby-replay, and
+        at least max_mds daemons are in 'active'.
+
+        Unlike most of Filesystem, this function is tolerant of new-style `fs`
+        commands being missing, because we are part of the ceph installation
+        process during upgrade suites, so must fall back to old style commands
+        when we get an EINVAL on a new style command.
+
+        :return:
+        """
+        # First, check to see that processes haven't exited with an error code
+        for mds in self._ctx.daemons.iter_daemons_of_role('mds'):
+            mds.check_status()
+
+        active_count = 0
+        mds_map = self.get_mds_map(status=status)
+
+        log.debug("are_daemons_healthy: mds map: {0}".format(mds_map))
+
+        for mds_id, mds_status in mds_map['info'].items():
+            if mds_status['state'] not in ["up:active", "up:standby", "up:standby-replay"]:
+                log.warning("Unhealthy mds state {0}:{1}".format(mds_id, mds_status['state']))
+                return False
+            elif mds_status['state'] == 'up:active':
+                active_count += 1
+
+        log.debug("are_daemons_healthy: {0}/{1}".format(
+            active_count, mds_map['max_mds']
+        ))
+
+        if not skip_max_mds_check:
+            if active_count > mds_map['max_mds']:
+                log.debug("are_daemons_healthy: number of actives is greater than max_mds: {0}".format(mds_map))
+                return False
+            elif active_count == mds_map['max_mds']:
+                # The MDSMap says these guys are active, but let's check they really are
+                for mds_id, mds_status in mds_map['info'].items():
+                    if mds_status['state'] == 'up:active':
+                        try:
+                            daemon_status = self.mds_tell(["status"], mds_id=mds_status['name'])
+                        except CommandFailedError as cfe:
+                            if cfe.exitstatus == errno.EINVAL:
+                                # Old version, can't do this check
+                                continue
+                            else:
+                                # MDS not even running
+                                return False
+
+                        if daemon_status['state'] != 'up:active':
+                            # MDS hasn't taken the latest map yet
+                            return False
+
+                return True
+            else:
+                return False
+        else:
+            log.debug("are_daemons_healthy: skipping max_mds check")
+            return True
+
+    def get_daemon_names(self, state=None, status=None):
+        """
+        Return MDS daemon names of those daemons in the given state
+        :param state:
+        :return:
+        """
+        mdsmap = self.get_mds_map(status)
+        result = []
+        for mds_status in sorted(mdsmap['info'].values(),
+                                 key=lambda _: _['rank']):
+            if mds_status['state'] == state or state is None:
+                result.append(mds_status['name'])
+
+        return result
+
+    def get_active_names(self, status=None):
+        """
+        Return MDS daemon names of those daemons holding ranks
+        in state up:active
+
+        :return: list of strings like ['a', 'b'], sorted by rank
+        """
+        return self.get_daemon_names("up:active", status=status)
+
+    def get_all_mds_rank(self, status=None):
+        mdsmap = self.get_mds_map(status)
+        result = []
+        for mds_status in sorted(mdsmap['info'].values(),
+                                 key=lambda _: _['rank']):
+            if mds_status['rank'] != -1 and mds_status['state'] != 'up:standby-replay':
+                result.append(mds_status['rank'])
+
+        return result
+
+    def get_rank(self, rank=None, status=None):
+        if status is None:
+            status = self.getinfo()
+        if rank is None:
+            rank = 0
+        return status.get_rank(self.id, rank)
+
+    def rank_restart(self, rank=0, status=None):
+        name = self.get_rank(rank=rank, status=status)['name']
+        self.mds_restart(mds_id=name)
+
+    def rank_signal(self, signal, rank=0, status=None):
+        name = self.get_rank(rank=rank, status=status)['name']
+        self.mds_signal(name, signal)
+
+    def rank_freeze(self, yes, rank=0):
+        self.mon_manager.raw_cluster_cmd("mds", "freeze", "{}:{}".format(self.id, rank), str(yes).lower())
+
+    def rank_repaired(self, rank):
+        self.mon_manager.raw_cluster_cmd("mds", "repaired", "{}:{}".format(self.id, rank))
+
+    def rank_fail(self, rank=0):
+        self.mon_manager.raw_cluster_cmd("mds", "fail", "{}:{}".format(self.id, rank))
+
+    def rank_is_running(self, rank=0, status=None):
+        name = self.get_rank(rank=rank, status=status)['name']
+        return self.mds_is_running(name)
+
+    def get_ranks(self, status=None):
+        if status is None:
+            status = self.getinfo()
+        return status.get_ranks(self.id)
+
+    def get_damaged(self, status=None):
+        if status is None:
+            status = self.getinfo()
+        return status.get_damaged(self.id)
+
+    def get_replays(self, status=None):
+        if status is None:
+            status = self.getinfo()
+        return status.get_replays(self.id)
+
+    def get_replay(self, rank=0, status=None):
+        for replay in self.get_replays(status=status):
+            if replay['rank'] == rank:
+                return replay
+        return None
+
+    def get_rank_names(self, status=None):
+        """
+        Return MDS daemon names of those daemons holding a rank,
+        sorted by rank.  This includes e.g. up:replay/reconnect
+        as well as active, but does not include standby or
+        standby-replay.
+        """
+        mdsmap = self.get_mds_map(status)
+        result = []
+        for mds_status in sorted(mdsmap['info'].values(),
+                                 key=lambda _: _['rank']):
+            if mds_status['rank'] != -1 and mds_status['state'] != 'up:standby-replay':
+                result.append(mds_status['name'])
+
+        return result
+
+    def wait_for_daemons(self, timeout=None, skip_max_mds_check=False, status=None):
+        """
+        Wait until all daemons are healthy
+        :return:
+        """
+
+        if timeout is None:
+            timeout = DAEMON_WAIT_TIMEOUT
+
+        if self.id is None:
+            status = self.getinfo(refresh=True)
+
+        if status is None:
+            status = self.status()
+
+        elapsed = 0
+        while True:
+            if self.are_daemons_healthy(status=status, skip_max_mds_check=skip_max_mds_check):
+                return status
+            else:
+                time.sleep(1)
+                elapsed += 1
+
+            if elapsed > timeout:
+                log.debug("status = {0}".format(status))
+                raise RuntimeError("Timed out waiting for MDS daemons to become healthy")
+
+            status = self.status()
+
+    def dencoder(self, obj_type, obj_blob):
+        args = [os.path.join(self._prefix, "ceph-dencoder"), 'type', obj_type, 'import', '-', 'decode', 'dump_json']
+        p = self.mon_manager.controller.run(args=args, stdin=BytesIO(obj_blob), stdout=BytesIO())
+        return p.stdout.getvalue()
+
+    def rados(self, *args, **kwargs):
+        """
+        Callout to rados CLI.
+        """
+
+        return self.mon_manager.do_rados(*args, **kwargs)
+
+    def radosm(self, *args, **kwargs):
+        """
+        Interact with the metadata pool via rados CLI.
+        """
+
+        return self.rados(*args, **kwargs, pool=self.get_metadata_pool_name())
+
+    def radosmo(self, *args, stdout=BytesIO(), **kwargs):
+        """
+        Interact with the metadata pool via rados CLI. Get the stdout.
+        """
+
+        return self.radosm(*args, **kwargs, stdout=stdout).stdout.getvalue()
+
+    def get_metadata_object(self, object_type, object_id):
+        """
+        Retrieve an object from the metadata pool, pass it through
+        ceph-dencoder to dump it to JSON, and return the decoded object.
+        """
+
+        o = self.radosmo(['get', object_id, '-'])
+        j = self.dencoder(object_type, o)
+        try:
+            return json.loads(j)
+        except (TypeError, ValueError):
+            log.error("Failed to decode JSON: '{0}'".format(j))
+            raise
+
+    def get_journal_version(self):
+        """
+        Read the JournalPointer and Journal::Header objects to learn the version of
+        encoding in use.
+        """
+        journal_pointer_object = '400.00000000'
+        journal_pointer_dump = self.get_metadata_object("JournalPointer", journal_pointer_object)
+        journal_ino = journal_pointer_dump['journal_pointer']['front']
+
+        journal_header_object = "{0:x}.00000000".format(journal_ino)
+        journal_header_dump = self.get_metadata_object('Journaler::Header', journal_header_object)
+
+        version = journal_header_dump['journal_header']['stream_format']
+        log.debug("Read journal version {0}".format(version))
+
+        return version
+
+    def mds_asok(self, command, mds_id=None, timeout=None):
+        if mds_id is None:
+            return self.rank_asok(command, timeout=timeout)
+
+        return self.json_asok(command, 'mds', mds_id, timeout=timeout)
+
+    def mds_tell(self, command, mds_id=None):
+        if mds_id is None:
+            return self.rank_tell(command)
+
+        return json.loads(self.mon_manager.raw_cluster_cmd("tell", f"mds.{mds_id}", *command))
+
+    def rank_asok(self, command, rank=0, status=None, timeout=None):
+        info = self.get_rank(rank=rank, status=status)
+        return self.json_asok(command, 'mds', info['name'], timeout=timeout)
+
+    def rank_tell(self, command, rank=0, status=None):
+        try:
+            out = self.mon_manager.raw_cluster_cmd("tell", f"mds.{self.id}:{rank}", *command)
+            return json.loads(out)
+        except json.decoder.JSONDecodeError:
+            log.error("could not decode: {}".format(out))
+            raise
+
+    def ranks_tell(self, command, status=None):
+        if status is None:
+            status = self.status()
+        out = []
+        for r in status.get_ranks(self.id):
+            result = self.rank_tell(command, rank=r['rank'], status=status)
+            out.append((r['rank'], result))
+        return sorted(out)
+
+    def ranks_perf(self, f, status=None):
+        perf = self.ranks_tell(["perf", "dump"], status=status)
+        out = []
+        for rank, perf in perf:
+            out.append((rank, f(perf)))
+        return out
+
+    def read_cache(self, path, depth=None, rank=None):
+        cmd = ["dump", "tree", path]
+        if depth is not None:
+            cmd.append(depth.__str__())
+        result = self.rank_asok(cmd, rank=rank)
+        if result is None or len(result) == 0:
+            raise RuntimeError("Path not found in cache: {0}".format(path))
+
+        return result
+
+    def wait_for_state(self, goal_state, reject=None, timeout=None, mds_id=None, rank=None):
+        """
+        Block until the MDS reaches a particular state, or a failure condition
+        is met.
+
+        When there are multiple MDSs, succeed when exaclty one MDS is in the
+        goal state, or fail when any MDS is in the reject state.
+
+        :param goal_state: Return once the MDS is in this state
+        :param reject: Fail if the MDS enters this state before the goal state
+        :param timeout: Fail if this many seconds pass before reaching goal
+        :return: number of seconds waited, rounded down to integer
+        """
+
+        started_at = time.time()
+        while True:
+            status = self.status()
+            if rank is not None:
+                try:
+                    mds_info = status.get_rank(self.id, rank)
+                    current_state = mds_info['state'] if mds_info else None
+                    log.debug("Looked up MDS state for mds.{0}: {1}".format(rank, current_state))
+                except:
+                    mdsmap = self.get_mds_map(status=status)
+                    if rank in mdsmap['failed']:
+                        log.debug("Waiting for rank {0} to come back.".format(rank))
+                        current_state = None
+                    else:
+                        raise
+            elif mds_id is not None:
+                # mds_info is None if no daemon with this ID exists in the map
+                mds_info = status.get_mds(mds_id)
+                current_state = mds_info['state'] if mds_info else None
+                log.debug("Looked up MDS state for {0}: {1}".format(mds_id, current_state))
+            else:
+                # In general, look for a single MDS
+                states = [m['state'] for m in status.get_ranks(self.id)]
+                if [s for s in states if s == goal_state] == [goal_state]:
+                    current_state = goal_state
+                elif reject in states:
+                    current_state = reject
+                else:
+                    current_state = None
+                log.debug("mapped states {0} to {1}".format(states, current_state))
+
+            elapsed = time.time() - started_at
+            if current_state == goal_state:
+                log.debug("reached state '{0}' in {1}s".format(current_state, elapsed))
+                return elapsed
+            elif reject is not None and current_state == reject:
+                raise RuntimeError("MDS in reject state {0}".format(current_state))
+            elif timeout is not None and elapsed > timeout:
+                log.error("MDS status at timeout: {0}".format(status.get_fsmap(self.id)))
+                raise RuntimeError(
+                    "Reached timeout after {0} seconds waiting for state {1}, while in state {2}".format(
+                        elapsed, goal_state, current_state
+                    ))
+            else:
+                time.sleep(1)
+
+    def _read_data_xattr(self, ino_no, xattr_name, obj_type, pool):
+        if pool is None:
+            pool = self.get_data_pool_name()
+
+        obj_name = "{0:x}.00000000".format(ino_no)
+
+        args = ["getxattr", obj_name, xattr_name]
+        try:
+            proc = self.rados(args, pool=pool, stdout=BytesIO())
+        except CommandFailedError as e:
+            log.error(e.__str__())
+            raise ObjectNotFound(obj_name)
+
+        obj_blob = proc.stdout.getvalue()
+        return json.loads(self.dencoder(obj_type, obj_blob).strip())
+
+    def _write_data_xattr(self, ino_no, xattr_name, data, pool=None):
+        """
+        Write to an xattr of the 0th data object of an inode.  Will
+        succeed whether the object and/or xattr already exist or not.
+
+        :param ino_no: integer inode number
+        :param xattr_name: string name of the xattr
+        :param data: byte array data to write to the xattr
+        :param pool: name of data pool or None to use primary data pool
+        :return: None
+        """
+        if pool is None:
+            pool = self.get_data_pool_name()
+
+        obj_name = "{0:x}.00000000".format(ino_no)
+        args = ["setxattr", obj_name, xattr_name, data]
+        self.rados(args, pool=pool)
+
+    def read_symlink(self, ino_no, pool=None):
+        return self._read_data_xattr(ino_no, "symlink", "string_wrapper", pool)
+
+    def read_backtrace(self, ino_no, pool=None):
+        """
+        Read the backtrace from the data pool, return a dict in the format
+        given by inode_backtrace_t::dump, which is something like:
+
+        ::
+
+            rados -p cephfs_data getxattr 10000000002.00000000 parent > out.bin
+            ceph-dencoder type inode_backtrace_t import out.bin decode dump_json
+
+            { "ino": 1099511627778,
+              "ancestors": [
+                    { "dirino": 1,
+                      "dname": "blah",
+                      "version": 11}],
+              "pool": 1,
+              "old_pools": []}
+
+        :param pool: name of pool to read backtrace from.  If omitted, FS must have only
+                     one data pool and that will be used.
+        """
+        return self._read_data_xattr(ino_no, "parent", "inode_backtrace_t", pool)
+
+    def read_layout(self, ino_no, pool=None):
+        """
+        Read 'layout' xattr of an inode and parse the result, returning a dict like:
+        ::
+            {
+                "stripe_unit": 4194304,
+                "stripe_count": 1,
+                "object_size": 4194304,
+                "pool_id": 1,
+                "pool_ns": "",
+            }
+
+        :param pool: name of pool to read backtrace from.  If omitted, FS must have only
+                     one data pool and that will be used.
+        """
+        return self._read_data_xattr(ino_no, "layout", "file_layout_t", pool)
+
+    def _enumerate_data_objects(self, ino, size):
+        """
+        Get the list of expected data objects for a range, and the list of objects
+        that really exist.
+
+        :return a tuple of two lists of strings (expected, actual)
+        """
+        stripe_size = 1024 * 1024 * 4
+
+        size = max(stripe_size, size)
+
+        want_objects = [
+            "{0:x}.{1:08x}".format(ino, n)
+            for n in range(0, ((size - 1) // stripe_size) + 1)
+        ]
+
+        exist_objects = self.rados(["ls"], pool=self.get_data_pool_name(), stdout=StringIO()).stdout.getvalue().split("\n")
+
+        return want_objects, exist_objects
+
+    def data_objects_present(self, ino, size):
+        """
+        Check that *all* the expected data objects for an inode are present in the data pool
+        """
+
+        want_objects, exist_objects = self._enumerate_data_objects(ino, size)
+        missing = set(want_objects) - set(exist_objects)
+
+        if missing:
+            log.debug("Objects missing (ino {0}, size {1}): {2}".format(
+                ino, size, missing
+            ))
+            return False
+        else:
+            log.debug("All objects for ino {0} size {1} found".format(ino, size))
+            return True
+
+    def data_objects_absent(self, ino, size):
+        want_objects, exist_objects = self._enumerate_data_objects(ino, size)
+        present = set(want_objects) & set(exist_objects)
+
+        if present:
+            log.debug("Objects not absent (ino {0}, size {1}): {2}".format(
+                ino, size, present
+            ))
+            return False
+        else:
+            log.debug("All objects for ino {0} size {1} are absent".format(ino, size))
+            return True
+
+    def dirfrag_exists(self, ino, frag):
+        try:
+            self.radosm(["stat", "{0:x}.{1:08x}".format(ino, frag)])
+        except CommandFailedError:
+            return False
+        else:
+            return True
+
+    def list_dirfrag(self, dir_ino):
+        """
+        Read the named object and return the list of omap keys
+
+        :return a list of 0 or more strings
+        """
+
+        dirfrag_obj_name = "{0:x}.00000000".format(dir_ino)
+
+        try:
+            key_list_str = self.radosmo(["listomapkeys", dirfrag_obj_name], stdout=StringIO())
+        except CommandFailedError as e:
+            log.error(e.__str__())
+            raise ObjectNotFound(dirfrag_obj_name)
+
+        return key_list_str.strip().split("\n") if key_list_str else []
+
+    def get_meta_of_fs_file(self, dir_ino, obj_name, out):
+        """
+        get metadata from parent to verify the correctness of the data format encoded by the tool, cephfs-meta-injection.
+        warning : The splitting of directory is not considered here.
+        """
+
+        dirfrag_obj_name = "{0:x}.00000000".format(dir_ino)
+        try:
+            self.radosm(["getomapval", dirfrag_obj_name, obj_name+"_head", out])
+        except CommandFailedError as e:
+            log.error(e.__str__())
+            raise ObjectNotFound(dir_ino)
+
+    def erase_metadata_objects(self, prefix):
+        """
+        For all objects in the metadata pool matching the prefix,
+        erase them.
+
+        This O(N) with the number of objects in the pool, so only suitable
+        for use on toy test filesystems.
+        """
+        all_objects = self.radosmo(["ls"], stdout=StringIO()).strip().split("\n")
+        matching_objects = [o for o in all_objects if o.startswith(prefix)]
+        for o in matching_objects:
+            self.radosm(["rm", o])
+
+    def erase_mds_objects(self, rank):
+        """
+        Erase all the per-MDS objects for a particular rank.  This includes
+        inotable, sessiontable, journal
+        """
+
+        def obj_prefix(multiplier):
+            """
+            MDS object naming conventions like rank 1's
+            journal is at 201.***
+            """
+            return "%x." % (multiplier * 0x100 + rank)
+
+        # MDS_INO_LOG_OFFSET
+        self.erase_metadata_objects(obj_prefix(2))
+        # MDS_INO_LOG_BACKUP_OFFSET
+        self.erase_metadata_objects(obj_prefix(3))
+        # MDS_INO_LOG_POINTER_OFFSET
+        self.erase_metadata_objects(obj_prefix(4))
+        # MDSTables & SessionMap
+        self.erase_metadata_objects("mds{rank:d}_".format(rank=rank))
+
+    @property
+    def _prefix(self):
+        """
+        Override this to set a different
+        """
+        return ""
+
+    def _make_rank(self, rank):
+        return "{}:{}".format(self.name, rank)
+
+    def _run_tool(self, tool, args, rank=None, quiet=False):
+        # Tests frequently have [client] configuration that jacks up
+        # the objecter log level (unlikely to be interesting here)
+        # and does not set the mds log level (very interesting here)
+        if quiet:
+            base_args = [os.path.join(self._prefix, tool), '--debug-mds=1', '--debug-objecter=1']
+        else:
+            base_args = [os.path.join(self._prefix, tool), '--debug-mds=20', '--debug-ms=1', '--debug-objecter=1']
+
+        if rank is not None:
+            base_args.extend(["--rank", "%s" % str(rank)])
+
+        t1 = datetime.datetime.now()
+        r = self.tool_remote.sh(script=base_args + args, stdout=StringIO()).strip()
+        duration = datetime.datetime.now() - t1
+        log.debug("Ran {0} in time {1}, result:\n{2}".format(
+            base_args + args, duration, r
+        ))
+        return r
+
+    @property
+    def tool_remote(self):
+        """
+        An arbitrary remote to use when invoking recovery tools.  Use an MDS host because
+        it'll definitely have keys with perms to access cephfs metadata pool.  This is public
+        so that tests can use this remote to go get locally written output files from the tools.
+        """
+        return self.mon_manager.controller
+
+    def journal_tool(self, args, rank, quiet=False):
+        """
+        Invoke cephfs-journal-tool with the passed arguments for a rank, and return its stdout
+        """
+        fs_rank = self._make_rank(rank)
+        return self._run_tool("cephfs-journal-tool", args, fs_rank, quiet)
+
+    def meta_tool(self, args, rank, quiet=False):
+        """
+        Invoke cephfs-meta-injection with the passed arguments for a rank, and return its stdout
+        """
+        fs_rank = self._make_rank(rank)
+        return self._run_tool("cephfs-meta-injection", args, fs_rank, quiet)
+
+    def table_tool(self, args, quiet=False):
+        """
+        Invoke cephfs-table-tool with the passed arguments, and return its stdout
+        """
+        return self._run_tool("cephfs-table-tool", args, None, quiet)
+
+    def data_scan(self, args, quiet=False, worker_count=1):
+        """
+        Invoke cephfs-data-scan with the passed arguments, and return its stdout
+
+        :param worker_count: if greater than 1, multiple workers will be run
+                             in parallel and the return value will be None
+        """
+
+        workers = []
+
+        for n in range(0, worker_count):
+            if worker_count > 1:
+                # data-scan args first token is a command, followed by args to it.
+                # insert worker arguments after the command.
+                cmd = args[0]
+                worker_args = [cmd] + ["--worker_n", n.__str__(), "--worker_m", worker_count.__str__()] + args[1:]
+            else:
+                worker_args = args
+
+            workers.append(Greenlet.spawn(lambda wargs=worker_args:
+                                          self._run_tool("cephfs-data-scan", wargs, None, quiet)))
+
+        for w in workers:
+            w.get()
+
+        if worker_count == 1:
+            return workers[0].value
+        else:
+            return None
+
+    def is_full(self):
+        return self.is_pool_full(self.get_data_pool_name())
+
+    def authorize(self, client_id, caps=('/', 'rw')):
+        """
+        Run "ceph fs authorize" and run "ceph auth get" to get and returnt the
+        keyring.
+
+        client_id: client id that will be authorized
+        caps: tuple containing the path and permission (can be r or rw)
+              respectively.
+        """
+        if isinstance(caps[0], (tuple, list)):
+            x = []
+            for c in caps:
+                x.extend(c)
+            caps = tuple(x)
+
+        client_name = 'client.' + client_id
+        return self.mon_manager.raw_cluster_cmd('fs', 'authorize', self.name,
+                                                client_name, *caps)
+
+    def grow(self, new_max_mds, status=None):
+        oldmax = self.get_var('max_mds', status=status)
+        assert(new_max_mds > oldmax)
+        self.set_max_mds(new_max_mds)
+        return self.wait_for_daemons()
+
+    def shrink(self, new_max_mds, status=None):
+        oldmax = self.get_var('max_mds', status=status)
+        assert(new_max_mds < oldmax)
+        self.set_max_mds(new_max_mds)
+        return self.wait_for_daemons()
+
+    def run_scrub(self, cmd, rank=0):
+        return self.rank_tell(["scrub"] + cmd, rank)
+
+    def get_scrub_status(self, rank=0):
+        return self.run_scrub(["status"], rank)
+
+    def flush(self, rank=0):
+        return self.rank_tell(["flush", "journal"], rank=rank)
+
+    def wait_until_scrub_complete(self, result=None, tag=None, rank=0, sleep=30,
+                                  timeout=300, reverse=False):
+        # time out after "timeout" seconds and assume as done
+        if result is None:
+            result = "no active scrubs running"
+        with contextutil.safe_while(sleep=sleep, tries=timeout//sleep) as proceed:
+            while proceed():
+                out_json = self.rank_tell(["scrub", "status"], rank=rank)
+                assert out_json is not None
+                if not reverse:
+                    if result in out_json['status']:
+                        log.info("all active scrubs completed")
+                        return True
+                else:
+                    if result not in out_json['status']:
+                        log.info("all active scrubs completed")
+                        return True
+
+                if tag is not None:
+                    status = out_json['scrubs'][tag]
+                    if status is not None:
+                        log.info(f"scrub status for tag:{tag} - {status}")
+                    else:
+                        log.info(f"scrub has completed for tag:{tag}")
+                        return True
+
+        # timed out waiting for scrub to complete
+        return False
+
+    def get_damage(self, rank=None):
+        if rank is None:
+            result = {}
+            for info in self.get_ranks():
+                rank = info['rank']
+                result[rank] = self.get_damage(rank=rank)
+            return result
+        else:
+            return self.rank_tell(['damage', 'ls'], rank=rank)
diff --git a/qa/tasks/cephfs/fuse_mount.py b/qa/tasks/cephfs/fuse_mount.py
new file mode 100644
index 000000000..0b9b17403
--- /dev/null
+++ b/qa/tasks/cephfs/fuse_mount.py
@@ -0,0 +1,533 @@
+import json
+import time
+import logging
+
+from io import StringIO
+from textwrap import dedent
+
+from teuthology.contextutil import MaxWhileTries
+from teuthology.contextutil import safe_while
+from teuthology.orchestra import run
+from teuthology.exceptions import CommandFailedError
+from tasks.ceph_manager import get_valgrind_args
+from tasks.cephfs.mount import CephFSMount, UMOUNT_TIMEOUT
+
+log = logging.getLogger(__name__)
+
+# Refer mount.py for docstrings.
+class FuseMount(CephFSMount):
+    def __init__(self, ctx, test_dir, client_id, client_remote,
+                 client_keyring_path=None, cephfs_name=None,
+                 cephfs_mntpt=None, hostfs_mntpt=None, brxnet=None,
+                 client_config={}):
+        super(FuseMount, self).__init__(ctx=ctx, test_dir=test_dir,
+            client_id=client_id, client_remote=client_remote,
+            client_keyring_path=client_keyring_path, hostfs_mntpt=hostfs_mntpt,
+            cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet,
+            client_config=client_config)
+
+        self.fuse_daemon = None
+        self._fuse_conn = None
+        self.id = None
+        self.inst = None
+        self.addr = None
+        self.mount_timeout = int(self.client_config.get('mount_timeout', 30))
+
+        self._mount_bin = [
+            'ceph-fuse', "-f",
+            "--admin-socket", "/var/run/ceph/$cluster-$name.$pid.asok"]
+        self._mount_cmd_cwd = self.test_dir
+        if self.client_config.get('valgrind') is not None:
+            self.cwd = None # get_valgrind_args chdir for us
+        self._mount_cmd_logger = log.getChild('ceph-fuse.{id}'.format(id=self.client_id))
+        self._mount_cmd_stdin = run.PIPE
+
+    def mount(self, mntopts=None, check_status=True, mntargs=None, **kwargs):
+        self.update_attrs(**kwargs)
+        self.assert_and_log_minimum_mount_details()
+
+        self.setup_netns()
+
+        try:
+            return self._mount(mntopts, mntargs, check_status)
+        except RuntimeError:
+            # Catch exceptions by the mount() logic (i.e. not remote command
+            # failures) and ensure the mount is not left half-up.
+            # Otherwise we might leave a zombie mount point that causes
+            # anyone traversing cephtest/ to get hung up on.
+            log.warning("Trying to clean up after failed mount")
+            self.umount_wait(force=True)
+            raise
+
+    def _mount(self, mntopts, mntargs, check_status):
+        log.info("Client client.%s config is %s" % (self.client_id,
+                                                    self.client_config))
+
+        self._create_mntpt()
+
+        retval = self._run_mount_cmd(mntopts, mntargs, check_status)
+        if retval:
+            return retval
+
+        self.gather_mount_info()
+
+    def _run_mount_cmd(self, mntopts, mntargs, check_status):
+        mount_cmd = self._get_mount_cmd(mntopts, mntargs)
+        mountcmd_stdout, mountcmd_stderr = StringIO(), StringIO()
+
+        # Before starting ceph-fuse process, note the contents of
+        # /sys/fs/fuse/connections
+        pre_mount_conns = self._list_fuse_conns()
+        log.info("Pre-mount connections: {0}".format(pre_mount_conns))
+
+        self.fuse_daemon = self.client_remote.run(
+            args=mount_cmd,
+            cwd=self._mount_cmd_cwd,
+            logger=self._mount_cmd_logger,
+            stdin=self._mount_cmd_stdin,
+            stdout=mountcmd_stdout,
+            stderr=mountcmd_stderr,
+            wait=False
+        )
+
+        return self._wait_and_record_our_fuse_conn(
+            check_status, pre_mount_conns, mountcmd_stdout, mountcmd_stderr)
+
+    def _get_mount_cmd(self, mntopts, mntargs):
+        daemon_signal = 'kill'
+        if self.client_config.get('coverage') or \
+           self.client_config.get('valgrind') is not None:
+            daemon_signal = 'term'
+
+        mount_cmd = ['sudo', 'adjust-ulimits', 'ceph-coverage',
+                     '{tdir}/archive/coverage'.format(tdir=self.test_dir),
+                     'daemon-helper', daemon_signal]
+
+        mount_cmd = self._add_valgrind_args(mount_cmd)
+        mount_cmd = ['sudo'] + self._nsenter_args + mount_cmd
+
+        mount_cmd += self._mount_bin + [self.hostfs_mntpt]
+        if self.client_id:
+            mount_cmd += ['--id', self.client_id]
+        if self.client_keyring_path and self.client_id:
+            mount_cmd += ['-k', self.client_keyring_path]
+
+        self.validate_subvol_options()
+
+        if self.cephfs_mntpt:
+            mount_cmd += ["--client_mountpoint=" + self.cephfs_mntpt]
+
+        if self.cephfs_name:
+            mount_cmd += ["--client_fs=" + self.cephfs_name]
+        if mntopts:
+            mount_cmd.extend(('-o', ','.join(mntopts)))
+        if mntargs:
+            mount_cmd.extend(mntargs)
+
+        return mount_cmd
+
+    def _add_valgrind_args(self, mount_cmd):
+        if self.client_config.get('valgrind') is not None:
+            mount_cmd = get_valgrind_args(
+                self.test_dir,
+                'client.{id}'.format(id=self.client_id),
+                mount_cmd,
+                self.client_config.get('valgrind'),
+                cd=False
+            )
+
+        return mount_cmd
+
+    def _list_fuse_conns(self):
+        conn_dir = "/sys/fs/fuse/connections"
+
+        self.client_remote.run(args=['sudo', 'modprobe', 'fuse'],
+                               check_status=False)
+        self.client_remote.run(
+            args=["sudo", "mount", "-t", "fusectl", conn_dir, conn_dir],
+            check_status=False, timeout=(30))
+
+        try:
+            ls_str = self.client_remote.sh("ls " + conn_dir,
+                                           stdout=StringIO(),
+                                           timeout=300).strip()
+        except CommandFailedError:
+            return []
+
+        if ls_str:
+            return [int(n) for n in ls_str.split("\n")]
+        else:
+            return []
+
+    def _wait_and_record_our_fuse_conn(self, check_status, pre_mount_conns,
+                                       mountcmd_stdout, mountcmd_stderr):
+        """
+        Wait for the connection reference to appear in /sys
+        """
+        waited = 0
+
+        post_mount_conns = self._list_fuse_conns()
+        while len(post_mount_conns) <= len(pre_mount_conns):
+            if self.fuse_daemon.finished:
+                # Did mount fail?  Raise the CommandFailedError instead of
+                # hitting the "failed to populate /sys/" timeout
+                try:
+                    self.fuse_daemon.wait()
+                except CommandFailedError as e:
+                    log.info('mount command failed.')
+                    if check_status:
+                        raise
+                    else:
+                        return (e, mountcmd_stdout.getvalue(),
+                                mountcmd_stderr.getvalue())
+            time.sleep(1)
+            waited += 1
+            if waited > self._fuse_conn_check_timeout:
+                raise RuntimeError(
+                    "Fuse mount failed to populate/sys/ after {} "
+                    "seconds".format(waited))
+            else:
+                post_mount_conns = self._list_fuse_conns()
+
+        log.info("Post-mount connections: {0}".format(post_mount_conns))
+
+        self._record_our_fuse_conn(pre_mount_conns, post_mount_conns)
+
+    @property
+    def _fuse_conn_check_timeout(self):
+        mount_wait = self.client_config.get('mount_wait', 0)
+        if mount_wait > 0:
+            log.info("Fuse mount waits {0} seconds before checking /sys/".format(mount_wait))
+            time.sleep(mount_wait)
+        timeout = int(self.client_config.get('mount_timeout', 30))
+        return timeout
+
+    def _record_our_fuse_conn(self, pre_mount_conns, post_mount_conns):
+        """
+        Record our fuse connection number so that we can use it when forcing
+        an unmount.
+        """
+        new_conns = list(set(post_mount_conns) - set(pre_mount_conns))
+        if len(new_conns) == 0:
+            raise RuntimeError("New fuse connection directory not found ({0})".format(new_conns))
+        elif len(new_conns) > 1:
+            raise RuntimeError("Unexpectedly numerous fuse connections {0}".format(new_conns))
+        else:
+            self._fuse_conn = new_conns[0]
+
+    def gather_mount_info(self):
+        status = self.admin_socket(['status'])
+        self.id = status['id']
+        self.client_pid = status['metadata']['pid']
+        try:
+            self.inst = status['inst_str']
+            self.addr = status['addr_str']
+        except KeyError:
+            sessions = self.fs.rank_asok(['session', 'ls'])
+            for s in sessions:
+                if s['id'] == self.id:
+                    self.inst = s['inst']
+                    self.addr = self.inst.split()[1]
+            if self.inst is None:
+                raise RuntimeError("cannot find client session")
+
+    def check_mounted_state(self):
+        proc = self.client_remote.run(
+            args=[
+                'stat',
+                '--file-system',
+                '--printf=%T\n',
+                '--',
+                self.hostfs_mntpt,
+            ],
+            stdout=StringIO(),
+            stderr=StringIO(),
+            wait=False,
+            timeout=300
+        )
+        try:
+            proc.wait()
+        except CommandFailedError:
+            error = proc.stderr.getvalue()
+            if ("endpoint is not connected" in error
+            or "Software caused connection abort" in error):
+                # This happens is fuse is killed without unmount
+                log.warning("Found stale mount point at {0}".format(self.hostfs_mntpt))
+                return True
+            else:
+                # This happens if the mount directory doesn't exist
+                log.info('mount point does not exist: %s', self.hostfs_mntpt)
+                return False
+
+        fstype = proc.stdout.getvalue().rstrip('\n')
+        if fstype == 'fuseblk':
+            log.info('ceph-fuse is mounted on %s', self.hostfs_mntpt)
+            return True
+        else:
+            log.debug('ceph-fuse not mounted, got fs type {fstype!r}'.format(
+                fstype=fstype))
+            return False
+
+    def wait_until_mounted(self):
+        """
+        Check to make sure that fuse is mounted on mountpoint.  If not,
+        sleep for 5 seconds and check again.
+        """
+
+        while not self.check_mounted_state():
+            # Even if it's not mounted, it should at least
+            # be running: catch simple failures where it has terminated.
+            assert not self.fuse_daemon.poll()
+
+            time.sleep(5)
+
+        # Now that we're mounted, set permissions so that the rest of the test
+        # will have unrestricted access to the filesystem mount.
+        for retry in range(10):
+            try:
+                stderr = StringIO()
+                self.client_remote.run(args=['sudo', 'chmod', '1777',
+                                             self.hostfs_mntpt],
+                                       timeout=300,
+                                       stderr=stderr, omit_sudo=False)
+                break
+            except run.CommandFailedError:
+                stderr = stderr.getvalue().lower()
+                if "read-only file system" in stderr:
+                    break
+                elif "permission denied" in stderr:
+                    time.sleep(5)
+                else:
+                    raise
+
+    def _mountpoint_exists(self):
+        return self.client_remote.run(args=["ls", "-d", self.hostfs_mntpt],
+                                      check_status=False,
+                                      timeout=300).exitstatus == 0
+
+    def umount(self, cleanup=True):
+        """
+        umount() must not run cleanup() when it's called by umount_wait()
+        since "run.wait([self.fuse_daemon], timeout)" would hang otherwise.
+        """
+        if not self.is_mounted():
+            if cleanup:
+                self.cleanup()
+            return
+        if self.is_blocked():
+            self._run_umount_lf()
+            if cleanup:
+                self.cleanup()
+            return
+
+        try:
+            log.info('Running fusermount -u on {name}...'.format(name=self.client_remote.name))
+            stderr = StringIO()
+            self.client_remote.run(
+                args=['sudo', 'fusermount', '-u', self.hostfs_mntpt],
+                stderr=stderr, timeout=UMOUNT_TIMEOUT, omit_sudo=False)
+        except run.CommandFailedError:
+            if "mountpoint not found" in stderr.getvalue():
+                # This happens if the mount directory doesn't exist
+                log.info('mount point does not exist: %s', self.mountpoint)
+            elif "not mounted" in stderr.getvalue():
+                # This happens if the mount directory already unmouted
+                log.info('mount point not mounted: %s', self.mountpoint)
+            else:
+                log.info('Failed to unmount ceph-fuse on {name}, aborting...'.format(name=self.client_remote.name))
+
+                self.client_remote.run(
+                    args=['sudo', run.Raw('PATH=/usr/sbin:$PATH'), 'lsof',
+                    run.Raw(';'), 'ps', 'auxf'],
+                    timeout=UMOUNT_TIMEOUT, omit_sudo=False)
+
+                # abort the fuse mount, killing all hung processes
+                if self._fuse_conn:
+                    self.run_python(dedent("""
+                    import os
+                    path = "/sys/fs/fuse/connections/{0}/abort"
+                    if os.path.exists(path):
+                        open(path, "w").write("1")
+                    """).format(self._fuse_conn))
+                    self._fuse_conn = None
+
+                # make sure its unmounted
+                self._run_umount_lf()
+
+        self._fuse_conn = None
+        self.id = None
+        self.inst = None
+        self.addr = None
+        if cleanup:
+            self.cleanup()
+
+    def umount_wait(self, force=False, require_clean=False,
+                    timeout=UMOUNT_TIMEOUT):
+        """
+        :param force: Complete cleanly even if the MDS is offline
+        """
+        if not (self.is_mounted() and self.fuse_daemon):
+            log.debug('ceph-fuse client.{id} is not mounted at {remote} '
+                      '{mnt}'.format(id=self.client_id,
+                                     remote=self.client_remote,
+                                     mnt=self.hostfs_mntpt))
+            self.cleanup()
+            return
+
+        if force:
+            assert not require_clean  # mutually exclusive
+
+            # When we expect to be forcing, kill the ceph-fuse process directly.
+            # This should avoid hitting the more aggressive fallback killing
+            # in umount() which can affect other mounts too.
+            self.fuse_daemon.stdin.close()
+
+            # However, we will still hit the aggressive wait if there is an ongoing
+            # mount -o remount (especially if the remount is stuck because MDSs
+            # are unavailable)
+
+        if self.is_blocked():
+            self._run_umount_lf()
+            self.cleanup()
+            return
+
+        # cleanup is set to to fail since clieanup must happen after umount is
+        # complete; otherwise following call to run.wait hangs.
+        self.umount(cleanup=False)
+
+        try:
+            # Permit a timeout, so that we do not block forever
+            run.wait([self.fuse_daemon], timeout)
+
+        except MaxWhileTries:
+            log.error("process failed to terminate after unmount. This probably"
+                      " indicates a bug within ceph-fuse.")
+            raise
+        except CommandFailedError:
+            if require_clean:
+                raise
+
+        self.cleanup()
+
+    def teardown(self):
+        """
+        Whatever the state of the mount, get it gone.
+        """
+        super(FuseMount, self).teardown()
+
+        self.umount()
+
+        if self.fuse_daemon and not self.fuse_daemon.finished:
+            self.fuse_daemon.stdin.close()
+            try:
+                self.fuse_daemon.wait()
+            except CommandFailedError:
+                pass
+
+    def _asok_path(self):
+        return "/var/run/ceph/ceph-client.{0}.*.asok".format(self.client_id)
+
+    @property
+    def _prefix(self):
+        return ""
+
+    def find_admin_socket(self):
+        pyscript = """
+import glob
+import re
+import os
+import subprocess
+
+def _find_admin_socket(client_name):
+        asok_path = "{asok_path}"
+        files = glob.glob(asok_path)
+        mountpoint = "{mountpoint}"
+
+        # Given a non-glob path, it better be there
+        if "*" not in asok_path:
+            assert(len(files) == 1)
+            return files[0]
+
+        for f in files:
+                pid = re.match(".*\.(\d+)\.asok$", f).group(1)
+                if os.path.exists("/proc/{{0}}".format(pid)):
+                    with open("/proc/{{0}}/cmdline".format(pid), 'r') as proc_f:
+                        contents = proc_f.read()
+                        if mountpoint in contents:
+                            return f
+        raise RuntimeError("Client socket {{0}} not found".format(client_name))
+
+print(_find_admin_socket("{client_name}"))
+""".format(
+            asok_path=self._asok_path(),
+            client_name="client.{0}".format(self.client_id),
+            mountpoint=self.mountpoint)
+
+        asok_path = self.run_python(pyscript, sudo=True)
+        log.info("Found client admin socket at {0}".format(asok_path))
+        return asok_path
+
+    def admin_socket(self, args):
+        asok_path = self.find_admin_socket()
+
+        # Query client ID from admin socket, wait 2 seconds
+        # and retry 10 times if it is not ready
+        with safe_while(sleep=2, tries=10) as proceed:
+            while proceed():
+                try:
+                    p = self.client_remote.run(args=
+                        ['sudo', self._prefix + 'ceph', '--admin-daemon', asok_path] + args,
+                        stdout=StringIO(), stderr=StringIO(), wait=False,
+                        timeout=300)
+                    p.wait()
+                    break
+                except CommandFailedError:
+                    if "connection refused" in p.stderr.getvalue().lower():
+                        pass
+
+        return json.loads(p.stdout.getvalue().strip())
+
+    def get_global_id(self):
+        """
+        Look up the CephFS client ID for this mount
+        """
+        return self.admin_socket(['mds_sessions'])['id']
+
+    def get_global_inst(self):
+        """
+        Look up the CephFS client instance for this mount
+        """
+        return self.inst
+
+    def get_global_addr(self):
+        """
+        Look up the CephFS client addr for this mount
+        """
+        return self.addr
+
+    def get_client_pid(self):
+        """
+        return pid of ceph-fuse process
+        """
+        status = self.admin_socket(['status'])
+        return status['metadata']['pid']
+
+    def get_osd_epoch(self):
+        """
+        Return 2-tuple of osd_epoch, osd_epoch_barrier
+        """
+        status = self.admin_socket(['status'])
+        return status['osd_epoch'], status['osd_epoch_barrier']
+
+    def get_dentry_count(self):
+        """
+        Return 2-tuple of dentry_count, dentry_pinned_count
+        """
+        status = self.admin_socket(['status'])
+        return status['dentry_count'], status['dentry_pinned_count']
+
+    def set_cache_size(self, size):
+        return self.admin_socket(['config', 'set', 'client_cache_size', str(size)])
+
+    def get_op_read_count(self):
+        return self.admin_socket(['perf', 'dump', 'objecter'])['objecter']['osdop_read']
diff --git a/qa/tasks/cephfs/kernel_mount.py b/qa/tasks/cephfs/kernel_mount.py
new file mode 100644
index 000000000..89f6b6639
--- /dev/null
+++ b/qa/tasks/cephfs/kernel_mount.py
@@ -0,0 +1,394 @@
+import errno
+import json
+import logging
+import os
+import re
+
+from io import StringIO
+from textwrap import dedent
+
+from teuthology.exceptions import CommandFailedError
+from teuthology.orchestra import run
+from teuthology.contextutil import MaxWhileTries
+
+from tasks.cephfs.mount import CephFSMount, UMOUNT_TIMEOUT
+
+log = logging.getLogger(__name__)
+
+
+# internal metadata directory
+DEBUGFS_META_DIR = 'meta'
+
+class KernelMount(CephFSMount):
+    def __init__(self, ctx, test_dir, client_id, client_remote,
+                 client_keyring_path=None, hostfs_mntpt=None,
+                 cephfs_name=None, cephfs_mntpt=None, brxnet=None,
+                 client_config={}):
+        super(KernelMount, self).__init__(ctx=ctx, test_dir=test_dir,
+            client_id=client_id, client_remote=client_remote,
+            client_keyring_path=client_keyring_path, hostfs_mntpt=hostfs_mntpt,
+            cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet,
+            client_config=client_config)
+
+        if client_config.get('debug', False):
+            self.client_remote.run(args=["sudo", "bash", "-c", "echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control"])
+            self.client_remote.run(args=["sudo", "bash", "-c", "echo 'module libceph +p' > /sys/kernel/debug/dynamic_debug/control"])
+
+        self.dynamic_debug = self.client_config.get('dynamic_debug', False)
+        self.rbytes = self.client_config.get('rbytes', False)
+        self.snapdirname = client_config.get('snapdirname', '.snap')
+        self.syntax_style = self.client_config.get('syntax', 'v2')
+        self.inst = None
+        self.addr = None
+        self._mount_bin = ['adjust-ulimits', 'ceph-coverage', self.test_dir +\
+                           '/archive/coverage', '/bin/mount', '-t', 'ceph']
+
+    def mount(self, mntopts=None, check_status=True, **kwargs):
+        self.update_attrs(**kwargs)
+        self.assert_and_log_minimum_mount_details()
+
+        self.setup_netns()
+
+        if not self.cephfs_mntpt:
+            self.cephfs_mntpt = '/'
+        if not self.cephfs_name:
+            self.cephfs_name = 'cephfs'
+
+        self._create_mntpt()
+
+        retval = self._run_mount_cmd(mntopts, check_status)
+        if retval:
+            return retval
+
+        self._set_filemode_on_mntpt()
+
+        if self.dynamic_debug:
+            kmount_count = self.ctx.get(f'kmount_count.{self.client_remote.hostname}', 0)
+            if kmount_count == 0:
+                self.enable_dynamic_debug()
+            self.ctx[f'kmount_count.{self.client_remote.hostname}'] = kmount_count + 1
+
+        try:
+            self.gather_mount_info()
+        except:
+            log.warn('failed to fetch mount info - tests depending on mount addr/inst may fail!')
+
+    def gather_mount_info(self):
+        self.id = self._get_global_id()
+        self.get_global_inst()
+        self.get_global_addr()
+
+    def _run_mount_cmd(self, mntopts, check_status):
+        mount_cmd = self._get_mount_cmd(mntopts)
+        mountcmd_stdout, mountcmd_stderr = StringIO(), StringIO()
+
+        try:
+            self.client_remote.run(args=mount_cmd, timeout=300,
+                                   stdout=mountcmd_stdout,
+                                   stderr=mountcmd_stderr, omit_sudo=False)
+        except CommandFailedError as e:
+            log.info('mount command failed')
+            if check_status:
+                raise
+            else:
+                return (e, mountcmd_stdout.getvalue(),
+                        mountcmd_stderr.getvalue())
+        log.info('mount command passed')
+
+    def _make_mount_cmd_old_or_new_style(self):
+        optd = {}
+        mnt_stx = ''
+
+        self.validate_subvol_options()
+
+        assert(self.cephfs_mntpt)
+        if self.syntax_style == 'v1':
+            mnt_stx = f':{self.cephfs_mntpt}'
+            if self.client_id:
+                optd['name'] = self.client_id
+            if self.cephfs_name:
+                optd['mds_namespace'] = self.cephfs_name
+        elif self.syntax_style == 'v2':
+            mnt_stx = f'{self.client_id}@.{self.cephfs_name}={self.cephfs_mntpt}'
+        else:
+            assert 0, f'invalid syntax style: {self.syntax_style}'
+        return (mnt_stx, optd)
+
+    def _get_mount_cmd(self, mntopts):
+        opts = 'norequire_active_mds'
+        if self.client_keyring_path and self.client_id:
+            opts += ',secret=' + self.get_key_from_keyfile()
+        if self.config_path:
+            opts += ',conf=' + self.config_path
+        if self.rbytes:
+            opts += ",rbytes"
+        else:
+            opts += ",norbytes"
+        if self.snapdirname != '.snap':
+            opts += f',snapdirname={self.snapdirname}'
+
+        mount_cmd = ['sudo'] + self._nsenter_args
+        stx_opt = self._make_mount_cmd_old_or_new_style()
+        for opt_name, opt_val in stx_opt[1].items():
+            opts += f',{opt_name}={opt_val}'
+        if mntopts:
+            opts += ',' + ','.join(mntopts)
+        log.info(f'mounting using device: {stx_opt[0]}')
+        # do not fall-back to old-style mount (catch new-style
+        # mount syntax bugs in the kernel). exclude this config
+        # when using v1-style syntax, since old mount helpers
+        # (pre-quincy) would pass this option to the kernel.
+        if self.syntax_style != 'v1':
+            opts += ",nofallback"
+        mount_cmd += self._mount_bin + [stx_opt[0], self.hostfs_mntpt, '-v',
+                                        '-o', opts]
+        return mount_cmd
+
+    def umount(self, force=False):
+        if not self.is_mounted():
+            self.cleanup()
+            return
+
+        if self.is_blocked():
+            self._run_umount_lf()
+            self.cleanup()
+            return
+
+        log.debug('Unmounting client client.{id}...'.format(id=self.client_id))
+
+        try:
+            cmd=['sudo', 'umount', self.hostfs_mntpt]
+            if force:
+                cmd.append('-f')
+            self.client_remote.run(args=cmd, timeout=UMOUNT_TIMEOUT, omit_sudo=False)
+        except Exception as e:
+            log.debug('Killing processes on client.{id}...'.format(id=self.client_id))
+            self.client_remote.run(
+                args=['sudo', run.Raw('PATH=/usr/sbin:$PATH'), 'lsof',
+                      run.Raw(';'), 'ps', 'auxf'],
+                timeout=UMOUNT_TIMEOUT, omit_sudo=False)
+            raise e
+
+        if self.dynamic_debug:
+            kmount_count = self.ctx.get(f'kmount_count.{self.client_remote.hostname}')
+            assert kmount_count
+            if kmount_count == 1:
+                self.disable_dynamic_debug()
+            self.ctx[f'kmount_count.{self.client_remote.hostname}'] = kmount_count - 1
+
+        self.cleanup()
+
+    def umount_wait(self, force=False, require_clean=False,
+                    timeout=UMOUNT_TIMEOUT):
+        """
+        Unlike the fuse client, the kernel client's umount is immediate
+        """
+        if not self.is_mounted():
+            self.cleanup()
+            return
+
+        try:
+            self.umount(force)
+        except (CommandFailedError, MaxWhileTries):
+            if not force:
+                raise
+
+            # force delete the netns and umount
+            self._run_umount_lf()
+            self.cleanup()
+
+    def wait_until_mounted(self):
+        """
+        Unlike the fuse client, the kernel client is up and running as soon
+        as the initial mount() function returns.
+        """
+        assert self.is_mounted()
+
+    def teardown(self):
+        super(KernelMount, self).teardown()
+        if self.is_mounted():
+            self.umount()
+
+    def _get_debug_dir(self):
+        """
+        Get the debugfs folder for this mount
+        """
+
+        cluster_name = 'ceph'
+        fsid = self.ctx.ceph[cluster_name].fsid
+
+        global_id = self._get_global_id()
+
+        return os.path.join("/sys/kernel/debug/ceph/", f"{fsid}.client{global_id}")
+
+    def read_debug_file(self, filename):
+        """
+        Read the debug file "filename", return None if the file doesn't exist.
+        """
+
+        path = os.path.join(self._get_debug_dir(), filename)
+
+        stdout = StringIO()
+        stderr = StringIO()
+        try:
+            self.run_shell_payload(f"sudo dd if={path}", timeout=(5 * 60),
+                                   stdout=stdout, stderr=stderr)
+            return stdout.getvalue()
+        except CommandFailedError:
+            if 'no such file or directory' in stderr.getvalue().lower():
+                return errno.ENOENT
+            elif 'not a directory' in stderr.getvalue().lower():
+                return errno.ENOTDIR
+            elif 'permission denied' in stderr.getvalue().lower():
+                return errno.EACCES
+            raise
+
+    def _get_global_id(self):
+        try:
+            p = self.run_shell_payload("getfattr --only-values -n ceph.client_id .", stdout=StringIO())
+            v = p.stdout.getvalue()
+            prefix = "client"
+            assert v.startswith(prefix)
+            return int(v[len(prefix):])
+        except CommandFailedError:
+            # Probably this fallback can be deleted in a few releases when the kernel xattr is widely available.
+            log.debug("Falling back to messy global_id lookup via /sys...")
+
+            pyscript = dedent("""
+                import glob
+                import os
+                import json
+
+                def get_id_to_dir():
+                    result = {}
+                    for dir in glob.glob("/sys/kernel/debug/ceph/*"):
+                        if os.path.basename(dir) == DEBUGFS_META_DIR:
+                            continue
+                        mds_sessions_lines = open(os.path.join(dir, "mds_sessions")).readlines()
+                        global_id = mds_sessions_lines[0].split()[1].strip('"')
+                        client_id = mds_sessions_lines[1].split()[1].strip('"')
+                        result[client_id] = global_id
+                    return result
+                print(json.dumps(get_id_to_dir()))
+            """)
+
+            output = self.client_remote.sh([
+                'sudo', 'python3', '-c', pyscript
+            ], timeout=(5*60))
+            client_id_to_global_id = json.loads(output)
+
+            try:
+                return client_id_to_global_id[self.client_id]
+            except KeyError:
+                log.error("Client id '{0}' debug dir not found (clients seen were: {1})".format(
+                    self.client_id, ",".join(client_id_to_global_id.keys())
+                ))
+                raise
+
+    def _dynamic_debug_control(self, enable):
+        """
+        Write to dynamic debug control file.
+        """
+        if enable:
+            fdata = "module ceph +p"
+        else:
+            fdata = "module ceph -p"
+
+        self.run_shell_payload(f"""
+sudo modprobe ceph
+echo '{fdata}' | sudo tee /sys/kernel/debug/dynamic_debug/control
+""")
+
+    def enable_dynamic_debug(self):
+        """
+        Enable the dynamic debug.
+        """
+        self._dynamic_debug_control(True)
+
+    def disable_dynamic_debug(self):
+        """
+        Disable the dynamic debug.
+        """
+        self._dynamic_debug_control(False)
+
+    def get_global_id(self):
+        """
+        Look up the CephFS client ID for this mount, using debugfs.
+        """
+
+        assert self.is_mounted()
+
+        return self._get_global_id()
+
+    @property
+    def _global_addr(self):
+        if self.addr is not None:
+            return self.addr
+
+        # The first line of the "status" file's output will be something
+        # like:
+        #   "instance: client.4297 (0)10.72.47.117:0/1148470933"
+        # What we need here is only the string "10.72.47.117:0/1148470933"
+        status = self.read_debug_file("status")
+        if status is None:
+            return None
+
+        instance = re.findall(r'instance:.*', status)[0]
+        self.addr = instance.split()[2].split(')')[1]
+        return self.addr;
+
+    @property
+    def _global_inst(self):
+        if self.inst is not None:
+            return self.inst
+
+        client_gid = "client%d" % self.get_global_id()
+        self.inst = " ".join([client_gid, self._global_addr])
+        return self.inst
+
+    def get_global_inst(self):
+        """
+        Look up the CephFS client instance for this mount
+        """
+        return self._global_inst
+
+    def get_global_addr(self):
+        """
+        Look up the CephFS client addr for this mount
+        """
+        return self._global_addr
+
+    def get_osd_epoch(self):
+        """
+        Return 2-tuple of osd_epoch, osd_epoch_barrier
+        """
+        osd_map = self.read_debug_file("osdmap")
+        assert osd_map
+
+        lines = osd_map.split("\n")
+        first_line_tokens = lines[0].split()
+        epoch, barrier = int(first_line_tokens[1]), int(first_line_tokens[3])
+
+        return epoch, barrier
+
+    def get_op_read_count(self):
+        stdout = StringIO()
+        stderr = StringIO()
+        try:
+            path = os.path.join(self._get_debug_dir(), "metrics/size")
+            self.run_shell(f"sudo stat {path}", stdout=stdout,
+                           stderr=stderr, cwd=None)
+            buf = self.read_debug_file("metrics/size")
+        except CommandFailedError:
+            if 'no such file or directory' in stderr.getvalue().lower() \
+                    or 'not a directory' in stderr.getvalue().lower():
+                try:
+                    path = os.path.join(self._get_debug_dir(), "metrics")
+                    self.run_shell(f"sudo stat {path}", stdout=stdout,
+                                   stderr=stderr, cwd=None)
+                    buf = self.read_debug_file("metrics")
+                except CommandFailedError:
+                    return errno.ENOENT
+            else:
+                return 0
+        return int(re.findall(r'read.*', buf)[0].split()[1])
diff --git a/qa/tasks/cephfs/mount.py b/qa/tasks/cephfs/mount.py
new file mode 100644
index 000000000..4a8187406
--- /dev/null
+++ b/qa/tasks/cephfs/mount.py
@@ -0,0 +1,1570 @@
+import hashlib
+import json
+import logging
+import datetime
+import os
+import re
+import time
+
+from io import StringIO
+from contextlib import contextmanager
+from textwrap import dedent
+from IPy import IP
+
+from teuthology.contextutil import safe_while
+from teuthology.misc import get_file, write_file
+from teuthology.orchestra import run
+from teuthology.orchestra.run import Raw
+from teuthology.exceptions import CommandFailedError, ConnectionLostError
+
+from tasks.cephfs.filesystem import Filesystem
+
+log = logging.getLogger(__name__)
+
+
+UMOUNT_TIMEOUT = 300
+
+
+class CephFSMount(object):
+    def __init__(self, ctx, test_dir, client_id, client_remote,
+                 client_keyring_path=None, hostfs_mntpt=None,
+                 cephfs_name=None, cephfs_mntpt=None, brxnet=None,
+                 client_config=None):
+        """
+        :param test_dir: Global teuthology test dir
+        :param client_id: Client ID, the 'foo' in client.foo
+        :param client_keyring_path: path to keyring for given client_id
+        :param client_remote: Remote instance for the host where client will
+                              run
+        :param hostfs_mntpt: Path to directory on the FS on which Ceph FS will
+                             be mounted
+        :param cephfs_name: Name of Ceph FS to be mounted
+        :param cephfs_mntpt: Path to directory inside Ceph FS that will be
+                             mounted as root
+        """
+        self.ctx = ctx
+        self.test_dir = test_dir
+
+        self._verify_attrs(client_id=client_id,
+                           client_keyring_path=client_keyring_path,
+                           hostfs_mntpt=hostfs_mntpt, cephfs_name=cephfs_name,
+                           cephfs_mntpt=cephfs_mntpt)
+
+        if client_config is None:
+            client_config = {}
+        self.client_config = client_config
+
+        self.cephfs_name = cephfs_name
+        self.client_id = client_id
+        self.client_keyring_path = client_keyring_path
+        self.client_remote = client_remote
+        self.cluster_name = 'ceph' # TODO: use config['cluster']
+        self.fs = None
+
+        if cephfs_mntpt is None and client_config.get("mount_path"):
+            self.cephfs_mntpt = client_config.get("mount_path")
+            log.info(f"using client_config[\"cephfs_mntpt\"] = {self.cephfs_mntpt}")
+        else:
+            self.cephfs_mntpt = cephfs_mntpt
+        log.info(f"cephfs_mntpt = {self.cephfs_mntpt}")
+
+        if hostfs_mntpt is None and client_config.get("mountpoint"):
+            self.hostfs_mntpt = client_config.get("mountpoint")
+            log.info(f"using client_config[\"hostfs_mntpt\"] = {self.hostfs_mntpt}")
+        elif hostfs_mntpt is not None:
+            self.hostfs_mntpt = hostfs_mntpt
+        else:
+            self.hostfs_mntpt = os.path.join(self.test_dir, f'mnt.{self.client_id}')
+        self.hostfs_mntpt_dirname = os.path.basename(self.hostfs_mntpt)
+        log.info(f"hostfs_mntpt = {self.hostfs_mntpt}")
+
+        self._netns_name = None
+        self.nsid = -1
+        if brxnet is None:
+            self.ceph_brx_net = '192.168.0.0/16'
+        else:
+            self.ceph_brx_net = brxnet
+
+        self.test_files = ['a', 'b', 'c']
+
+        self.background_procs = []
+
+    # This will cleanup the stale netnses, which are from the
+    # last failed test cases.
+    @staticmethod
+    def cleanup_stale_netnses_and_bridge(remote):
+        p = remote.run(args=['ip', 'netns', 'list'],
+                       stdout=StringIO(), timeout=(5*60))
+        p = p.stdout.getvalue().strip()
+
+        # Get the netns name list
+        netns_list = re.findall(r'ceph-ns-[^()\s][-.\w]+[^():\s]', p)
+
+        # Remove the stale netnses
+        for ns in netns_list:
+            ns_name = ns.split()[0]
+            args = ['sudo', 'ip', 'netns', 'delete', '{0}'.format(ns_name)]
+            try:
+                remote.run(args=args, timeout=(5*60), omit_sudo=False)
+            except Exception:
+                pass
+
+        # Remove the stale 'ceph-brx'
+        try:
+            args = ['sudo', 'ip', 'link', 'delete', 'ceph-brx']
+            remote.run(args=args, timeout=(5*60), omit_sudo=False)
+        except Exception:
+            pass
+
+    def _parse_netns_name(self):
+        self._netns_name = '-'.join(["ceph-ns",
+                                     re.sub(r'/+', "-", self.mountpoint)])
+
+    @property
+    def mountpoint(self):
+        if self.hostfs_mntpt is None:
+            self.hostfs_mntpt = os.path.join(self.test_dir,
+                                             self.hostfs_mntpt_dirname)
+        return self.hostfs_mntpt
+
+    @mountpoint.setter
+    def mountpoint(self, path):
+        if not isinstance(path, str):
+            raise RuntimeError('path should be of str type.')
+        self._mountpoint = self.hostfs_mntpt = path
+
+    @property
+    def netns_name(self):
+        if self._netns_name == None:
+            self._parse_netns_name()
+        return self._netns_name
+
+    @netns_name.setter
+    def netns_name(self, name):
+        self._netns_name = name
+
+    def assert_that_ceph_fs_exists(self):
+        output = self.ctx.managers[self.cluster_name].raw_cluster_cmd("fs", "ls")
+        if self.cephfs_name:
+            assert self.cephfs_name in output, \
+                'expected ceph fs is not present on the cluster'
+            log.info(f'Mounting Ceph FS {self.cephfs_name}; just confirmed its presence on cluster')
+        else:
+            assert 'No filesystems enabled' not in output, \
+                'ceph cluster has no ceph fs, not even the default ceph fs'
+            log.info('Mounting default Ceph FS; just confirmed its presence on cluster')
+
+    def assert_and_log_minimum_mount_details(self):
+        """
+        Make sure we have minimum details required for mounting. Ideally, this
+        method should be called at the beginning of the mount method.
+        """
+        if not self.client_id or not self.client_remote or \
+           not self.hostfs_mntpt:
+            log.error(f"self.client_id = {self.client_id}")
+            log.error(f"self.client_remote = {self.client_remote}")
+            log.error(f"self.hostfs_mntpt = {self.hostfs_mntpt}")
+            errmsg = ('Mounting CephFS requires that at least following '
+                      'details to be provided -\n'
+                      '1. the client ID,\n2. the mountpoint and\n'
+                      '3. the remote machine where CephFS will be mounted.\n')
+            raise RuntimeError(errmsg)
+
+        self.assert_that_ceph_fs_exists()
+
+        log.info('Mounting Ceph FS. Following are details of mount; remember '
+                 '"None" represents Python type None -')
+        log.info(f'self.client_remote.hostname = {self.client_remote.hostname}')
+        log.info(f'self.client.name = client.{self.client_id}')
+        log.info(f'self.hostfs_mntpt = {self.hostfs_mntpt}')
+        log.info(f'self.cephfs_name = {self.cephfs_name}')
+        log.info(f'self.cephfs_mntpt = {self.cephfs_mntpt}')
+        log.info(f'self.client_keyring_path = {self.client_keyring_path}')
+        if self.client_keyring_path:
+            log.info('keyring content -\n' +
+                     get_file(self.client_remote, self.client_keyring_path,
+                              sudo=True).decode())
+
+    def is_blocked(self):
+        if not self.addr:
+            # can't infer if our addr is blocklisted - let the caller try to
+            # umount without lazy/force. If the client was blocklisted, then
+            # the umount would be stuck and the test would fail on timeout.
+            # happens only with Ubuntu 20.04 (missing kclient patches :/).
+            return False
+        self.fs = Filesystem(self.ctx, name=self.cephfs_name)
+
+        try:
+            output = self.fs.mon_manager.raw_cluster_cmd(args='osd blocklist ls')
+        except CommandFailedError:
+            # Fallback for older Ceph cluster
+            output = self.fs.mon_manager.raw_cluster_cmd(args='osd blacklist ls')
+
+        return self.addr in output
+
+    def is_stuck(self):
+        """
+        Check if mount is stuck/in a hanged state.
+        """
+        if not self.is_mounted():
+            return False
+
+        retval = self.client_remote.run(args=f'sudo stat {self.hostfs_mntpt}',
+                                        omit_sudo=False, wait=False).returncode
+        if retval == 0:
+            return False
+
+        time.sleep(10)
+        proc = self.client_remote.run(args='ps -ef', stdout=StringIO())
+        # if proc was running even after 10 seconds, it has to be stuck.
+        if f'stat {self.hostfs_mntpt}' in proc.stdout.getvalue():
+            log.critical('client mounted at self.hostfs_mntpt is stuck!')
+            return True
+        return False
+
+    def is_mounted(self):
+        file = self.client_remote.read_file('/proc/self/mounts',stdout=StringIO())
+        if self.hostfs_mntpt in file:
+            return True
+        else:
+            log.debug(f"not mounted; /proc/self/mounts is:\n{file}")
+            return False
+
+    def setupfs(self, name=None):
+        if name is None and self.fs is not None:
+            # Previous mount existed, reuse the old name
+            name = self.fs.name
+        self.fs = Filesystem(self.ctx, name=name)
+        log.info('Wait for MDS to reach steady state...')
+        self.fs.wait_for_daemons()
+        log.info('Ready to start {}...'.format(type(self).__name__))
+
+    def _create_mntpt(self):
+        self.client_remote.run(args=f'mkdir -p -v {self.hostfs_mntpt}',
+                               timeout=60)
+        # Use 0000 mode to prevent undesired modifications to the mountpoint on
+        # the local file system.
+        self.client_remote.run(args=f'chmod 0000 {self.hostfs_mntpt}',
+                               timeout=60)
+
+    @property
+    def _nsenter_args(self):
+        return ['nsenter', f'--net=/var/run/netns/{self.netns_name}']
+
+    def _set_filemode_on_mntpt(self):
+        stderr = StringIO()
+        try:
+            self.client_remote.run(
+                args=['sudo', 'chmod', '1777', self.hostfs_mntpt],
+                stderr=stderr, timeout=(5*60))
+        except CommandFailedError:
+            # the client does not have write permissions in the caps it holds
+            # for the Ceph FS that was just mounted.
+            if 'permission denied' in stderr.getvalue().lower():
+                pass
+
+    def _setup_brx_and_nat(self):
+        # The ip for ceph-brx should be
+        ip = IP(self.ceph_brx_net)[-2]
+        mask = self.ceph_brx_net.split('/')[1]
+        brd = IP(self.ceph_brx_net).broadcast()
+
+        brx = self.client_remote.run(args=['ip', 'addr'], stderr=StringIO(),
+                                     stdout=StringIO(), timeout=(5*60))
+        brx = re.findall(r'inet .* ceph-brx', brx.stdout.getvalue())
+        if brx:
+            # If the 'ceph-brx' already exists, then check whether
+            # the new net is conflicting with it
+            _ip, _mask = brx[0].split()[1].split('/', 1)
+            if _ip != "{}".format(ip) or _mask != mask:
+                raise RuntimeError("Conflict with existing ceph-brx {0}, new {1}/{2}".format(brx[0].split()[1], ip, mask))
+
+        # Setup the ceph-brx and always use the last valid IP
+        if not brx:
+            log.info("Setuping the 'ceph-brx' with {0}/{1}".format(ip, mask))
+
+            self.run_shell_payload(f"""
+                set -e
+                sudo ip link add name ceph-brx type bridge
+                sudo ip addr flush dev ceph-brx
+                sudo ip link set ceph-brx up
+                sudo ip addr add {ip}/{mask} brd {brd} dev ceph-brx
+            """, timeout=(5*60), omit_sudo=False, cwd='/')
+        
+        args = "echo 1 | sudo tee /proc/sys/net/ipv4/ip_forward"
+        self.client_remote.run(args=args, timeout=(5*60), omit_sudo=False)
+        
+        # Setup the NAT
+        p = self.client_remote.run(args=['route'], stderr=StringIO(),
+                                   stdout=StringIO(), timeout=(5*60))
+        p = re.findall(r'default .*', p.stdout.getvalue())
+        if p == False:
+            raise RuntimeError("No default gw found")
+        gw = p[0].split()[7]
+
+        self.run_shell_payload(f"""
+            set -e
+            sudo iptables -A FORWARD -o {gw} -i ceph-brx -j ACCEPT
+            sudo iptables -A FORWARD -i {gw} -o ceph-brx -j ACCEPT
+            sudo iptables -t nat -A POSTROUTING -s {ip}/{mask} -o {gw} -j MASQUERADE
+        """, timeout=(5*60), omit_sudo=False, cwd='/')
+
+    def _setup_netns(self):
+        p = self.client_remote.run(args=['ip', 'netns', 'list'],
+                                   stderr=StringIO(), stdout=StringIO(),
+                                   timeout=(5*60)).stdout.getvalue().strip()
+
+        # Get the netns name list
+        netns_list = re.findall(r'[^()\s][-.\w]+[^():\s]', p)
+
+        out = re.search(r"{0}".format(self.netns_name), p)
+        if out is None:
+            # Get an uniq nsid for the new netns
+            nsid = 0
+            p = self.client_remote.run(args=['ip', 'netns', 'list-id'],
+                                       stderr=StringIO(), stdout=StringIO(),
+                                       timeout=(5*60)).stdout.getvalue()
+            while True:
+                out = re.search(r"nsid {} ".format(nsid), p)
+                if out is None:
+                    break
+
+                nsid += 1
+
+            # Add one new netns and set it id
+            self.run_shell_payload(f"""
+                set -e
+                sudo ip netns add {self.netns_name}
+                sudo ip netns set {self.netns_name} {nsid}
+            """, timeout=(5*60), omit_sudo=False, cwd='/')
+            self.nsid = nsid;
+        else:
+            # The netns already exists and maybe suspended by self.kill()
+            self.resume_netns();
+
+            nsid = int(re.search(r"{0} \(id: (\d+)\)".format(self.netns_name), p).group(1))
+            self.nsid = nsid;
+            return
+
+        # Get one ip address for netns
+        ips = IP(self.ceph_brx_net)
+        for ip in ips:
+            found = False
+            if ip == ips[0]:
+                continue
+            if ip == ips[-2]:
+                raise RuntimeError("we have ran out of the ip addresses")
+
+            for ns in netns_list:
+                ns_name = ns.split()[0]
+                args = ['sudo', 'ip', 'netns', 'exec', '{0}'.format(ns_name), 'ip', 'addr']
+                try:
+                    p = self.client_remote.run(args=args, stderr=StringIO(),
+                                               stdout=StringIO(), timeout=(5*60),
+                                               omit_sudo=False)
+                    q = re.search("{0}".format(ip), p.stdout.getvalue())
+                    if q is not None:
+                        found = True
+                        break
+                except CommandFailedError:
+                    if "No such file or directory" in p.stderr.getvalue():
+                        pass
+                    if "Invalid argument" in p.stderr.getvalue():
+                        pass
+
+            if found == False:
+                break
+
+        mask = self.ceph_brx_net.split('/')[1]
+        brd = IP(self.ceph_brx_net).broadcast()
+
+        log.info("Setuping the netns '{0}' with {1}/{2}".format(self.netns_name, ip, mask))
+
+        # Setup the veth interfaces
+        brxip = IP(self.ceph_brx_net)[-2]
+        self.run_shell_payload(f"""
+            set -e
+            sudo ip link add veth0 netns {self.netns_name} type veth peer name brx.{nsid}
+            sudo ip netns exec {self.netns_name} ip addr add {ip}/{mask} brd {brd} dev veth0
+            sudo ip netns exec {self.netns_name} ip link set veth0 up
+            sudo ip netns exec {self.netns_name} ip link set lo up
+            sudo ip netns exec {self.netns_name} ip route add default via {brxip}
+        """, timeout=(5*60), omit_sudo=False, cwd='/')
+
+        # Bring up the brx interface and join it to 'ceph-brx'
+        self.run_shell_payload(f"""
+            set -e
+            sudo ip link set brx.{nsid} up
+            sudo ip link set dev brx.{nsid} master ceph-brx
+        """, timeout=(5*60), omit_sudo=False, cwd='/')
+
+    def _cleanup_netns(self):
+        if self.nsid == -1:
+            return
+        log.info("Removing the netns '{0}'".format(self.netns_name))
+
+        # Delete the netns and the peer veth interface
+        self.run_shell_payload(f"""
+            set -e
+            sudo ip link set brx.{self.nsid} down
+            sudo ip link delete dev brx.{self.nsid}
+            sudo ip netns delete {self.netns_name}
+        """, timeout=(5*60), omit_sudo=False, cwd='/')
+
+        self.nsid = -1
+
+    def _cleanup_brx_and_nat(self):
+        brx = self.client_remote.run(args=['ip', 'addr'], stderr=StringIO(),
+                                     stdout=StringIO(), timeout=(5*60))
+        brx = re.findall(r'inet .* ceph-brx', brx.stdout.getvalue())
+        if not brx:
+            return
+
+        # If we are the last netns, will delete the ceph-brx
+        args = ['sudo', 'ip', 'link', 'show']
+        p = self.client_remote.run(args=args, stdout=StringIO(),
+                                   timeout=(5*60), omit_sudo=False)
+        _list = re.findall(r'brx\.', p.stdout.getvalue().strip())
+        if len(_list) != 0:
+            return
+
+        log.info("Removing the 'ceph-brx'")
+
+        self.run_shell_payload("""
+            set -e
+            sudo ip link set ceph-brx down
+            sudo ip link delete ceph-brx
+        """, timeout=(5*60), omit_sudo=False, cwd='/')
+
+        # Drop the iptables NAT rules
+        ip = IP(self.ceph_brx_net)[-2]
+        mask = self.ceph_brx_net.split('/')[1]
+
+        p = self.client_remote.run(args=['route'], stderr=StringIO(),
+                                   stdout=StringIO(), timeout=(5*60))
+        p = re.findall(r'default .*', p.stdout.getvalue())
+        if p == False:
+            raise RuntimeError("No default gw found")
+        gw = p[0].split()[7]
+        self.run_shell_payload(f"""
+            set -e
+            sudo iptables -D FORWARD -o {gw} -i ceph-brx -j ACCEPT
+            sudo iptables -D FORWARD -i {gw} -o ceph-brx -j ACCEPT
+            sudo iptables -t nat -D POSTROUTING -s {ip}/{mask} -o {gw} -j MASQUERADE
+        """, timeout=(5*60), omit_sudo=False, cwd='/')
+
+    def setup_netns(self):
+        """
+        Setup the netns for the mountpoint.
+        """
+        log.info("Setting the '{0}' netns for '{1}'".format(self._netns_name, self.mountpoint))
+        self._setup_brx_and_nat()
+        self._setup_netns()
+
+    def cleanup_netns(self):
+        """
+        Cleanup the netns for the mountpoint.
+        """
+        # We will defer cleaning the netnses and bridge until the last
+        # mountpoint is unmounted, this will be a temporary work around
+        # for issue#46282.
+
+        # log.info("Cleaning the '{0}' netns for '{1}'".format(self._netns_name, self.mountpoint))
+        # self._cleanup_netns()
+        # self._cleanup_brx_and_nat()
+
+    def suspend_netns(self):
+        """
+        Suspend the netns veth interface.
+        """
+        if self.nsid == -1:
+            return
+
+        log.info("Suspending the '{0}' netns for '{1}'".format(self._netns_name, self.mountpoint))
+
+        args = ['sudo', 'ip', 'link', 'set', 'brx.{0}'.format(self.nsid), 'down']
+        self.client_remote.run(args=args, timeout=(5*60), omit_sudo=False)
+
+    def resume_netns(self):
+        """
+        Resume the netns veth interface.
+        """
+        if self.nsid == -1:
+            return
+
+        log.info("Resuming the '{0}' netns for '{1}'".format(self._netns_name, self.mountpoint))
+
+        args = ['sudo', 'ip', 'link', 'set', 'brx.{0}'.format(self.nsid), 'up']
+        self.client_remote.run(args=args, timeout=(5*60), omit_sudo=False)
+
+    def mount(self, mntopts=[], check_status=True, **kwargs):
+        """
+        kwargs expects its members to be same as the arguments accepted by
+        self.update_attrs().
+        """
+        raise NotImplementedError()
+
+    def mount_wait(self, **kwargs):
+        """
+        Accepts arguments same as self.mount().
+        """
+        self.mount(**kwargs)
+        self.wait_until_mounted()
+
+    def _run_umount_lf(self):
+        log.debug(f'Force/lazy unmounting on client.{self.client_id}')
+
+        try:
+            proc = self.client_remote.run(
+                args=f'sudo umount --lazy --force {self.hostfs_mntpt}',
+                timeout=UMOUNT_TIMEOUT, omit_sudo=False)
+        except CommandFailedError:
+            if self.is_mounted():
+                raise
+
+        return proc
+
+    def umount(self):
+        raise NotImplementedError()
+
+    def umount_wait(self, force=False, require_clean=False,
+                    timeout=UMOUNT_TIMEOUT):
+        """
+
+        :param force: Expect that the mount will not shutdown cleanly: kill
+                      it hard.
+        :param require_clean: Wait for the Ceph client associated with the
+                              mount (e.g. ceph-fuse) to terminate, and
+                              raise if it doesn't do so cleanly.
+        :param timeout: amount of time to be waited for umount command to finish
+        :return:
+        """
+        raise NotImplementedError()
+
+    def _verify_attrs(self, **kwargs):
+        """
+        Verify that client_id, client_keyring_path, client_remote, hostfs_mntpt,
+        cephfs_name, cephfs_mntpt are either type str or None.
+        """
+        for k, v in kwargs.items():
+            if v is not None and not isinstance(v, str):
+                raise RuntimeError('value of attributes should be either str '
+                                   f'or None. {k} - {v}')
+
+    def update_attrs(self, client_id=None, client_keyring_path=None,
+                     client_remote=None, hostfs_mntpt=None, cephfs_name=None,
+                     cephfs_mntpt=None):
+        if not (client_id or client_keyring_path or client_remote or
+                cephfs_name or cephfs_mntpt or hostfs_mntpt):
+            return
+
+        self._verify_attrs(client_id=client_id,
+                           client_keyring_path=client_keyring_path,
+                           hostfs_mntpt=hostfs_mntpt, cephfs_name=cephfs_name,
+                           cephfs_mntpt=cephfs_mntpt)
+
+        if client_id:
+            self.client_id = client_id
+        if client_keyring_path:
+            self.client_keyring_path = client_keyring_path
+        if client_remote:
+            self.client_remote = client_remote
+        if hostfs_mntpt:
+            self.hostfs_mntpt = hostfs_mntpt
+        if cephfs_name:
+            self.cephfs_name = cephfs_name
+        if cephfs_mntpt:
+            self.cephfs_mntpt = cephfs_mntpt
+
+    def remount(self, **kwargs):
+        """
+        Update mount object's attributes and attempt remount with these
+        new values for these attrbiutes.
+
+        1. Run umount_wait().
+        2. Run update_attrs().
+        3. Run mount().
+
+        Accepts arguments of self.mount() and self.update_attrs() with 1
+        exception: wait accepted too which can be True or False.
+        """
+        self.umount_wait()
+        assert not self.is_mounted()
+
+        mntopts = kwargs.pop('mntopts', [])
+        check_status = kwargs.pop('check_status', True)
+        wait = kwargs.pop('wait', True)
+
+        self.update_attrs(**kwargs)
+
+        retval = self.mount(mntopts=mntopts, check_status=check_status)
+        # avoid this scenario (again): mount command might've failed and
+        # check_status might have silenced the exception, yet we attempt to
+        # wait which might lead to an error.
+        if retval is None and wait:
+            self.wait_until_mounted()
+
+        return retval
+
+    def kill(self):
+        """
+        Suspend the netns veth interface to make the client disconnected
+        from the ceph cluster
+        """
+        log.info('Killing connection on {0}...'.format(self.client_remote.name))
+        self.suspend_netns()
+
+    def kill_cleanup(self):
+        """
+        Follow up ``kill`` to get to a clean unmounted state.
+        """
+        log.info('Cleaning up killed connection on {0}'.format(self.client_remote.name))
+        self.umount_wait(force=True)
+
+    def cleanup(self):
+        """
+        Remove the mount point.
+
+        Prerequisite: the client is not mounted.
+        """
+        log.info('Cleaning up mount {0}'.format(self.client_remote.name))
+        stderr = StringIO()
+        try:
+            self.client_remote.run(args=['rmdir', '--', self.mountpoint],
+                                   cwd=self.test_dir, stderr=stderr,
+                                   timeout=(60*5), check_status=False)
+        except CommandFailedError:
+            if "no such file or directory" not in stderr.getvalue().lower():
+                raise
+
+        self.cleanup_netns()
+
+    def wait_until_mounted(self):
+        raise NotImplementedError()
+
+    def get_keyring_path(self):
+        # N.B.: default keyring is /etc/ceph/ceph.keyring; see ceph.py and generate_caps
+        return '/etc/ceph/ceph.client.{id}.keyring'.format(id=self.client_id)
+
+    def get_key_from_keyfile(self):
+        # XXX: don't call run_shell(), since CephFS might be unmounted.
+        keyring = self.client_remote.read_file(self.client_keyring_path).\
+            decode()
+
+        for line in keyring.split('\n'):
+            if line.find('key') != -1:
+                return line[line.find('=') + 1 : ].strip()
+
+        raise RuntimeError('Key not found in keyring file '
+                           f'{self.client_keyring_path}. Its contents are -\n'
+                           f'{keyring}')
+
+    @property
+    def config_path(self):
+        """
+        Path to ceph.conf: override this if you're not a normal systemwide ceph install
+        :return: stringv
+        """
+        return "/etc/ceph/ceph.conf"
+
+    @contextmanager
+    def mounted_wait(self):
+        """
+        A context manager, from an initially unmounted state, to mount
+        this, yield, and then unmount and clean up.
+        """
+        self.mount()
+        self.wait_until_mounted()
+        try:
+            yield
+        finally:
+            self.umount_wait()
+
+    def create_file(self, filename='testfile', dirname=None, user=None,
+                    check_status=True):
+        assert(self.is_mounted())
+
+        if not os.path.isabs(filename):
+            if dirname:
+                if os.path.isabs(dirname):
+                    path = os.path.join(dirname, filename)
+                else:
+                    path = os.path.join(self.hostfs_mntpt, dirname, filename)
+            else:
+                path = os.path.join(self.hostfs_mntpt, filename)
+        else:
+            path = filename
+
+        if user:
+            args = ['sudo', '-u', user, '-s', '/bin/bash', '-c', 'touch ' + path]
+        else:
+            args = 'touch ' + path
+
+        return self.client_remote.run(args=args, check_status=check_status)
+
+    def create_files(self):
+        assert(self.is_mounted())
+
+        for suffix in self.test_files:
+            log.info("Creating file {0}".format(suffix))
+            self.client_remote.run(args=[
+                'touch', os.path.join(self.hostfs_mntpt, suffix)
+            ])
+
+    def test_create_file(self, filename='testfile', dirname=None, user=None,
+                         check_status=True):
+        return self.create_file(filename=filename, dirname=dirname, user=user,
+                                check_status=False)
+
+    def check_files(self):
+        assert(self.is_mounted())
+
+        for suffix in self.test_files:
+            log.info("Checking file {0}".format(suffix))
+            r = self.client_remote.run(args=[
+                'ls', os.path.join(self.hostfs_mntpt, suffix)
+            ], check_status=False)
+            if r.exitstatus != 0:
+                raise RuntimeError("Expected file {0} not found".format(suffix))
+
+    def write_file(self, path, data, perms=None):
+        """
+        Write the given data at the given path and set the given perms to the
+        file on the path.
+        """
+        if path.find(self.hostfs_mntpt) == -1:
+            path = os.path.join(self.hostfs_mntpt, path)
+
+        write_file(self.client_remote, path, data)
+
+        if perms:
+            self.run_shell(args=f'chmod {perms} {path}')
+
+    def read_file(self, path):
+        """
+        Return the data from the file on given path.
+        """
+        if path.find(self.hostfs_mntpt) == -1:
+            path = os.path.join(self.hostfs_mntpt, path)
+
+        return self.run_shell(args=['cat', path]).\
+            stdout.getvalue().strip()
+
+    def create_destroy(self):
+        assert(self.is_mounted())
+
+        filename = "{0} {1}".format(datetime.datetime.now(), self.client_id)
+        log.debug("Creating test file {0}".format(filename))
+        self.client_remote.run(args=[
+            'touch', os.path.join(self.hostfs_mntpt, filename)
+        ])
+        log.debug("Deleting test file {0}".format(filename))
+        self.client_remote.run(args=[
+            'rm', '-f', os.path.join(self.hostfs_mntpt, filename)
+        ])
+
+    def _run_python(self, pyscript, py_version='python3', sudo=False):
+        args, omit_sudo = [], True
+        if sudo:
+            args.append('sudo')
+            omit_sudo = False
+        args += ['adjust-ulimits', 'daemon-helper', 'kill', py_version, '-c', pyscript]
+        return self.client_remote.run(args=args, wait=False, stdin=run.PIPE,
+                                      stdout=StringIO(), omit_sudo=omit_sudo)
+
+    def run_python(self, pyscript, py_version='python3', sudo=False):
+        p = self._run_python(pyscript, py_version, sudo=sudo)
+        p.wait()
+        return p.stdout.getvalue().strip()
+
+    def run_shell(self, args, timeout=300, **kwargs):
+        omit_sudo = kwargs.pop('omit_sudo', False)
+        cwd = kwargs.pop('cwd', self.mountpoint)
+        stdout = kwargs.pop('stdout', StringIO())
+        stderr = kwargs.pop('stderr', StringIO())
+
+        return self.client_remote.run(args=args, cwd=cwd, timeout=timeout,
+                                      stdout=stdout, stderr=stderr,
+                                      omit_sudo=omit_sudo, **kwargs)
+
+    def run_shell_payload(self, payload, **kwargs):
+        kwargs['args'] = ["bash", "-c", Raw(f"'{payload}'")]
+        if kwargs.pop('sudo', False):
+            kwargs['args'].insert(0, 'sudo')
+            kwargs['omit_sudo'] = False
+        return self.run_shell(**kwargs)
+
+    def run_as_user(self, **kwargs):
+        """
+        Besides the arguments defined for run_shell() this method also
+        accepts argument 'user'.
+        """
+        args = kwargs.pop('args')
+        user = kwargs.pop('user')
+        if isinstance(args, str):
+            args = ['sudo', '-u', user, '-s', '/bin/bash', '-c', args]
+        elif isinstance(args, list):
+            cmdlist = args
+            cmd = ''
+            for i in cmdlist:
+                cmd = cmd + i + ' '
+            # get rid of extra space at the end.
+            cmd = cmd[:-1]
+
+            args = ['sudo', '-u', user, '-s', '/bin/bash', '-c', cmd]
+
+        kwargs['args'] = args
+        kwargs['omit_sudo'] = False
+        return self.run_shell(**kwargs)
+
+    def run_as_root(self, **kwargs):
+        """
+        Accepts same arguments as run_shell().
+        """
+        kwargs['user'] = 'root'
+        return self.run_as_user(**kwargs)
+
+    def assert_retval(self, proc_retval, exp_retval):
+        msg = (f'expected return value: {exp_retval}\n'
+               f'received return value: {proc_retval}\n')
+        assert proc_retval == exp_retval, msg
+
+    def _verify(self, proc, exp_retval=None, exp_errmsgs=None):
+        if exp_retval is None and exp_errmsgs is None:
+            raise RuntimeError('Method didn\'t get enough parameters. Pass '
+                               'return value or error message expected from '
+                               'the command/process.')
+
+        if exp_retval is not None:
+            self.assert_retval(proc.returncode, exp_retval)
+        if exp_errmsgs is None:
+            return
+
+        if isinstance(exp_errmsgs, str):
+            exp_errmsgs = (exp_errmsgs, )
+
+        proc_stderr = proc.stderr.getvalue().lower()
+        msg = ('didn\'t find any of the expected string in stderr.\n'
+               f'expected string: {exp_errmsgs}\n'
+               f'received error message: {proc_stderr}\n'
+               'note: received error message is converted to lowercase')
+        for e in exp_errmsgs:
+            if e in proc_stderr:
+                break
+        # this else is meant for for loop.
+        else:
+            assert False, msg
+
+    def negtestcmd(self, args, retval=None, errmsgs=None, stdin=None,
+                   cwd=None, wait=True):
+        """
+        Conduct a negative test for the given command.
+
+        retval and errmsgs are parameters to confirm the cause of command
+        failure.
+
+        Note: errmsgs is expected to be a tuple, but in case there's only
+        error message, it can also be a string. This method will handle
+        that internally.
+        """
+        proc = self.run_shell(args=args, wait=wait, stdin=stdin, cwd=cwd,
+                              check_status=False)
+        self._verify(proc, retval, errmsgs)
+        return proc
+
+    def negtestcmd_as_user(self, args, user, retval=None, errmsgs=None,
+                           stdin=None, cwd=None, wait=True):
+        proc = self.run_as_user(args=args, user=user, wait=wait, stdin=stdin,
+                                cwd=cwd, check_status=False)
+        self._verify(proc, retval, errmsgs)
+        return proc
+
+    def negtestcmd_as_root(self, args, retval=None, errmsgs=None, stdin=None,
+                           cwd=None, wait=True):
+        proc = self.run_as_root(args=args, wait=wait, stdin=stdin, cwd=cwd,
+                                check_status=False)
+        self._verify(proc, retval, errmsgs)
+        return proc
+
+    def open_for_reading(self, basename):
+        """
+        Open a file for reading only.
+        """
+        assert(self.is_mounted())
+
+        path = os.path.join(self.hostfs_mntpt, basename)
+
+        return self._run_python(dedent(
+            """
+            import os
+            mode = os.O_RDONLY
+            fd = os.open("{path}", mode)
+            os.close(fd)
+            """.format(path=path)
+        ))
+
+    def open_for_writing(self, basename, creat=True, trunc=True, excl=False):
+        """
+        Open a file for writing only.
+        """
+        assert(self.is_mounted())
+
+        path = os.path.join(self.hostfs_mntpt, basename)
+
+        return self._run_python(dedent(
+            """
+            import os
+            mode = os.O_WRONLY
+            if {creat}:
+                mode |= os.O_CREAT
+            if {trunc}:
+                mode |= os.O_TRUNC
+            if {excl}:
+                mode |= os.O_EXCL
+            fd = os.open("{path}", mode)
+            os.close(fd)
+            """.format(path=path, creat=creat, trunc=trunc, excl=excl)
+        ))
+
+    def open_no_data(self, basename):
+        """
+        A pure metadata operation
+        """
+        assert(self.is_mounted())
+
+        path = os.path.join(self.hostfs_mntpt, basename)
+
+        p = self._run_python(dedent(
+            """
+            f = open("{path}", 'w')
+            """.format(path=path)
+        ))
+        p.wait()
+
+    def open_background(self, basename="background_file", write=True, content="content"):
+        """
+        Open a file for writing, then block such that the client
+        will hold a capability.
+
+        Don't return until the remote process has got as far as opening
+        the file, then return the RemoteProcess instance.
+        """
+        assert(self.is_mounted())
+
+        path = os.path.join(self.hostfs_mntpt, basename)
+
+        if write:
+            pyscript = dedent("""
+                import time
+
+                with open("{path}", 'w') as f:
+                    f.write("{content}")
+                    f.flush()
+                    while True:
+                        time.sleep(1)
+                """).format(path=path, content=content)
+        else:
+            pyscript = dedent("""
+                import time
+
+                with open("{path}", 'r') as f:
+                    while True:
+                        time.sleep(1)
+                """).format(path=path)
+
+        rproc = self._run_python(pyscript)
+        self.background_procs.append(rproc)
+
+        # This wait would not be sufficient if the file had already
+        # existed, but it's simple and in practice users of open_background
+        # are not using it on existing files.
+        if write:
+            self.wait_for_visible(basename, size=len(content))
+        else:
+            self.wait_for_visible(basename)
+
+        return rproc
+
+    def open_dir_background(self, basename):
+        """
+        Create and hold a capability to a directory.
+        """
+        assert(self.is_mounted())
+
+        path = os.path.join(self.hostfs_mntpt, basename)
+
+        pyscript = dedent("""
+            import time
+            import os
+
+            os.mkdir("{path}")
+            fd = os.open("{path}", os.O_RDONLY)
+            while True:
+                time.sleep(1)
+            """).format(path=path)
+
+        rproc = self._run_python(pyscript)
+        self.background_procs.append(rproc)
+
+        self.wait_for_visible(basename)
+
+        return rproc
+
+    def wait_for_dir_empty(self, dirname, timeout=30):
+        dirpath = os.path.join(self.hostfs_mntpt, dirname)
+        with safe_while(sleep=5, tries=(timeout//5)) as proceed:
+            while proceed():
+                p = self.run_shell_payload(f"stat -c %h {dirpath}")
+                nr_links = int(p.stdout.getvalue().strip())
+                if nr_links == 2:
+                    return
+
+    def wait_for_visible(self, basename="background_file", size=None, timeout=30):
+        i = 0
+        args = ['stat']
+        if size is not None:
+            args += ['--printf=%s']
+        args += [os.path.join(self.hostfs_mntpt, basename)]
+        while i < timeout:
+            p = self.client_remote.run(args=args, stdout=StringIO(), check_status=False)
+            if p.exitstatus == 0:
+                if size is not None:
+                    s = p.stdout.getvalue().strip()
+                    if int(s) == size:
+                        log.info(f"File {basename} became visible with size {size} from {self.client_id} after {i}s")
+                        return
+                    else:
+                        log.error(f"File {basename} became visible but with size {int(s)} not {size}")
+                else:
+                    log.info(f"File {basename} became visible from {self.client_id} after {i}s")
+                    return
+            time.sleep(1)
+            i += 1
+
+        raise RuntimeError("Timed out after {0}s waiting for {1} to become visible from {2}".format(
+            i, basename, self.client_id))
+
+    def lock_background(self, basename="background_file", do_flock=True):
+        """
+        Open and lock a files for writing, hold the lock in a background process
+        """
+        assert(self.is_mounted())
+
+        path = os.path.join(self.hostfs_mntpt, basename)
+
+        script_builder = """
+            import time
+            import fcntl
+            import struct"""
+        if do_flock:
+            script_builder += """
+            f1 = open("{path}-1", 'w')
+            fcntl.flock(f1, fcntl.LOCK_EX | fcntl.LOCK_NB)"""
+        script_builder += """
+            f2 = open("{path}-2", 'w')
+            lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0)
+            fcntl.fcntl(f2, fcntl.F_SETLK, lockdata)
+            while True:
+                time.sleep(1)
+            """
+
+        pyscript = dedent(script_builder).format(path=path)
+
+        log.info("lock_background file {0}".format(basename))
+        rproc = self._run_python(pyscript)
+        self.background_procs.append(rproc)
+        return rproc
+
+    def lock_and_release(self, basename="background_file"):
+        assert(self.is_mounted())
+
+        path = os.path.join(self.hostfs_mntpt, basename)
+
+        script = """
+            import time
+            import fcntl
+            import struct
+            f1 = open("{path}-1", 'w')
+            fcntl.flock(f1, fcntl.LOCK_EX)
+            f2 = open("{path}-2", 'w')
+            lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0)
+            fcntl.fcntl(f2, fcntl.F_SETLK, lockdata)
+            """
+        pyscript = dedent(script).format(path=path)
+
+        log.info("lock_and_release file {0}".format(basename))
+        return self._run_python(pyscript)
+
+    def check_filelock(self, basename="background_file", do_flock=True):
+        assert(self.is_mounted())
+
+        path = os.path.join(self.hostfs_mntpt, basename)
+
+        script_builder = """
+            import fcntl
+            import errno
+            import struct"""
+        if do_flock:
+            script_builder += """
+            f1 = open("{path}-1", 'r')
+            try:
+                fcntl.flock(f1, fcntl.LOCK_EX | fcntl.LOCK_NB)
+            except IOError as e:
+                if e.errno == errno.EAGAIN:
+                    pass
+            else:
+                raise RuntimeError("flock on file {path}-1 not found")"""
+        script_builder += """
+            f2 = open("{path}-2", 'r')
+            try:
+                lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0)
+                fcntl.fcntl(f2, fcntl.F_SETLK, lockdata)
+            except IOError as e:
+                if e.errno == errno.EAGAIN:
+                    pass
+            else:
+                raise RuntimeError("posix lock on file {path}-2 not found")
+            """
+        pyscript = dedent(script_builder).format(path=path)
+
+        log.info("check lock on file {0}".format(basename))
+        self.client_remote.run(args=[
+            'python3', '-c', pyscript
+        ])
+
+    def write_background(self, basename="background_file", loop=False):
+        """
+        Open a file for writing, complete as soon as you can
+        :param basename:
+        :return:
+        """
+        assert(self.is_mounted())
+
+        path = os.path.join(self.hostfs_mntpt, basename)
+
+        pyscript = dedent("""
+            import os
+            import time
+
+            fd = os.open("{path}", os.O_RDWR | os.O_CREAT, 0o644)
+            try:
+                while True:
+                    os.write(fd, b'content')
+                    time.sleep(1)
+                    if not {loop}:
+                        break
+            except IOError as e:
+                pass
+            os.close(fd)
+            """).format(path=path, loop=str(loop))
+
+        rproc = self._run_python(pyscript)
+        self.background_procs.append(rproc)
+        return rproc
+
+    def write_n_mb(self, filename, n_mb, seek=0, wait=True):
+        """
+        Write the requested number of megabytes to a file
+        """
+        assert(self.is_mounted())
+
+        return self.run_shell(["dd", "if=/dev/urandom", "of={0}".format(filename),
+                               "bs=1M", "conv=fdatasync",
+                               "count={0}".format(int(n_mb)),
+                               "seek={0}".format(int(seek))
+                               ], wait=wait)
+
+    def write_test_pattern(self, filename, size):
+        log.info("Writing {0} bytes to {1}".format(size, filename))
+        return self.run_python(dedent("""
+            import zlib
+            path = "{path}"
+            with open(path, 'w') as f:
+                for i in range(0, {size}):
+                    val = zlib.crc32(str(i).encode('utf-8')) & 7
+                    f.write(chr(val))
+        """.format(
+            path=os.path.join(self.hostfs_mntpt, filename),
+            size=size
+        )))
+
+    def validate_test_pattern(self, filename, size):
+        log.info("Validating {0} bytes from {1}".format(size, filename))
+        # Use sudo because cephfs-data-scan may recreate the file with owner==root
+        return self.run_python(dedent("""
+            import zlib
+            path = "{path}"
+            with open(path, 'r') as f:
+                bytes = f.read()
+            if len(bytes) != {size}:
+                raise RuntimeError("Bad length {{0}} vs. expected {{1}}".format(
+                    len(bytes), {size}
+                ))
+            for i, b in enumerate(bytes):
+                val = zlib.crc32(str(i).encode('utf-8')) & 7
+                if b != chr(val):
+                    raise RuntimeError("Bad data at offset {{0}}".format(i))
+        """.format(
+            path=os.path.join(self.hostfs_mntpt, filename),
+            size=size
+        )), sudo=True)
+
+    def open_n_background(self, fs_path, count):
+        """
+        Open N files for writing, hold them open in a background process
+
+        :param fs_path: Path relative to CephFS root, e.g. "foo/bar"
+        :return: a RemoteProcess
+        """
+        assert(self.is_mounted())
+
+        abs_path = os.path.join(self.hostfs_mntpt, fs_path)
+
+        pyscript = dedent("""
+            import sys
+            import time
+            import os
+
+            n = {count}
+            abs_path = "{abs_path}"
+
+            if not os.path.exists(abs_path):
+                os.makedirs(abs_path)
+
+            handles = []
+            for i in range(0, n):
+                fname = "file_"+str(i)
+                path = os.path.join(abs_path, fname)
+                handles.append(open(path, 'w'))
+
+            while True:
+                time.sleep(1)
+            """).format(abs_path=abs_path, count=count)
+
+        rproc = self._run_python(pyscript)
+        self.background_procs.append(rproc)
+        return rproc
+
+    def create_n_files(self, fs_path, count, sync=False, dirsync=False,
+                       unlink=False, finaldirsync=False, hard_links=0):
+        """
+        Create n files.
+
+        :param sync: sync the file after writing
+        :param dirsync: sync the containing directory after closing the file
+        :param unlink: unlink the file after closing
+        :param finaldirsync: sync the containing directory after closing the last file
+        :param hard_links: create given number of hard link(s) for each file
+        """
+
+        assert(self.is_mounted())
+
+        abs_path = os.path.join(self.hostfs_mntpt, fs_path)
+
+        pyscript = dedent(f"""
+            import os
+            import uuid
+
+            n = {count}
+            create_hard_links = False
+            if {hard_links} > 0:
+                create_hard_links = True
+            path = "{abs_path}"
+
+            dpath = os.path.dirname(path)
+            fnameprefix = os.path.basename(path)
+            os.makedirs(dpath, exist_ok=True)
+
+            try:
+                dirfd = os.open(dpath, os.O_DIRECTORY)
+
+                for i in range(n):
+                    fpath = os.path.join(dpath, f"{{fnameprefix}}_{{i}}")
+                    with open(fpath, 'w') as f:
+                        f.write(f"{{i}}")
+                        if {sync}:
+                            f.flush()
+                            os.fsync(f.fileno())
+                    if {unlink}:
+                        os.unlink(fpath)
+                    if {dirsync}:
+                        os.fsync(dirfd)
+                    if create_hard_links:
+                        for j in range({hard_links}):
+                            os.system(f"ln {{fpath}} {{dpath}}/{{fnameprefix}}_{{i}}_{{uuid.uuid4()}}")     
+                if {finaldirsync}:
+                    os.fsync(dirfd)
+            finally:
+                os.close(dirfd)
+            """)
+
+        self.run_python(pyscript)
+
+    def teardown(self):
+        for p in self.background_procs:
+            log.info("Terminating background process")
+            self._kill_background(p)
+
+        self.background_procs = []
+
+    def _kill_background(self, p):
+        if p.stdin:
+            p.stdin.close()
+            try:
+                p.wait()
+            except (CommandFailedError, ConnectionLostError):
+                pass
+
+    def kill_background(self, p):
+        """
+        For a process that was returned by one of the _background member functions,
+        kill it hard.
+        """
+        self._kill_background(p)
+        self.background_procs.remove(p)
+
+    def send_signal(self, signal):
+        signal = signal.lower()
+        if signal.lower() not in ['sigstop', 'sigcont', 'sigterm', 'sigkill']:
+            raise NotImplementedError
+
+        self.client_remote.run(args=['sudo', 'kill', '-{0}'.format(signal),
+                                self.client_pid], omit_sudo=False)
+
+    def get_global_id(self):
+        raise NotImplementedError()
+
+    def get_global_inst(self):
+        raise NotImplementedError()
+
+    def get_global_addr(self):
+        raise NotImplementedError()
+
+    def get_osd_epoch(self):
+        raise NotImplementedError()
+
+    def get_op_read_count(self):
+        raise NotImplementedError()
+
+    def readlink(self, fs_path):
+        abs_path = os.path.join(self.hostfs_mntpt, fs_path)
+
+        pyscript = dedent("""
+            import os
+
+            print(os.readlink("{path}"))
+            """).format(path=abs_path)
+
+        proc = self._run_python(pyscript)
+        proc.wait()
+        return str(proc.stdout.getvalue().strip())
+
+
+    def lstat(self, fs_path, follow_symlinks=False, wait=True):
+        return self.stat(fs_path, follow_symlinks=False, wait=True)
+
+    def stat(self, fs_path, follow_symlinks=True, wait=True, **kwargs):
+        """
+        stat a file, and return the result as a dictionary like this:
+        {
+          "st_ctime": 1414161137.0,
+          "st_mtime": 1414161137.0,
+          "st_nlink": 33,
+          "st_gid": 0,
+          "st_dev": 16777218,
+          "st_size": 1190,
+          "st_ino": 2,
+          "st_uid": 0,
+          "st_mode": 16877,
+          "st_atime": 1431520593.0
+        }
+
+        Raises exception on absent file.
+        """
+        abs_path = os.path.join(self.hostfs_mntpt, fs_path)
+        if follow_symlinks:
+            stat_call = "os.stat('" + abs_path + "')"
+        else:
+            stat_call = "os.lstat('" + abs_path + "')"
+
+        pyscript = dedent("""
+            import os
+            import stat
+            import json
+            import sys
+
+            try:
+                s = {stat_call}
+            except OSError as e:
+                sys.exit(e.errno)
+
+            attrs = ["st_mode", "st_ino", "st_dev", "st_nlink", "st_uid", "st_gid", "st_size", "st_atime", "st_mtime", "st_ctime"]
+            print(json.dumps(
+                dict([(a, getattr(s, a)) for a in attrs]),
+                indent=2))
+            """).format(stat_call=stat_call)
+        proc = self._run_python(pyscript, **kwargs)
+        if wait:
+            proc.wait()
+            return json.loads(proc.stdout.getvalue().strip())
+        else:
+            return proc
+
+    def touch(self, fs_path):
+        """
+        Create a dentry if it doesn't already exist.  This python
+        implementation exists because the usual command line tool doesn't
+        pass through error codes like EIO.
+
+        :param fs_path:
+        :return:
+        """
+        abs_path = os.path.join(self.hostfs_mntpt, fs_path)
+        pyscript = dedent("""
+            import sys
+            import errno
+
+            try:
+                f = open("{path}", "w")
+                f.close()
+            except IOError as e:
+                sys.exit(errno.EIO)
+            """).format(path=abs_path)
+        proc = self._run_python(pyscript)
+        proc.wait()
+
+    def path_to_ino(self, fs_path, follow_symlinks=True):
+        abs_path = os.path.join(self.hostfs_mntpt, fs_path)
+
+        if follow_symlinks:
+            pyscript = dedent("""
+                import os
+                import stat
+
+                print(os.stat("{path}").st_ino)
+                """).format(path=abs_path)
+        else:
+            pyscript = dedent("""
+                import os
+                import stat
+
+                print(os.lstat("{path}").st_ino)
+                """).format(path=abs_path)
+
+        proc = self._run_python(pyscript)
+        proc.wait()
+        return int(proc.stdout.getvalue().strip())
+
+    def path_to_nlink(self, fs_path):
+        abs_path = os.path.join(self.hostfs_mntpt, fs_path)
+
+        pyscript = dedent("""
+            import os
+            import stat
+
+            print(os.stat("{path}").st_nlink)
+            """).format(path=abs_path)
+
+        proc = self._run_python(pyscript)
+        proc.wait()
+        return int(proc.stdout.getvalue().strip())
+
+    def ls(self, path=None, **kwargs):
+        """
+        Wrap ls: return a list of strings
+        """
+        kwargs['args'] = ["ls"]
+        if path:
+            kwargs['args'].append(path)
+        if kwargs.pop('sudo', False):
+            kwargs['args'].insert(0, 'sudo')
+            kwargs['omit_sudo'] = False
+        ls_text = self.run_shell(**kwargs).stdout.getvalue().strip()
+
+        if ls_text:
+            return ls_text.split("\n")
+        else:
+            # Special case because otherwise split on empty string
+            # gives you [''] instead of []
+            return []
+
+    def setfattr(self, path, key, val, **kwargs):
+        """
+        Wrap setfattr.
+
+        :param path: relative to mount point
+        :param key: xattr name
+        :param val: xattr value
+        :return: None
+        """
+        kwargs['args'] = ["setfattr", "-n", key, "-v", val, path]
+        if kwargs.pop('sudo', False):
+            kwargs['args'].insert(0, 'sudo')
+            kwargs['omit_sudo'] = False
+        self.run_shell(**kwargs)
+
+    def getfattr(self, path, attr, **kwargs):
+        """
+        Wrap getfattr: return the values of a named xattr on one file, or
+        None if the attribute is not found.
+
+        :return: a string
+        """
+        kwargs['args'] = ["getfattr", "--only-values", "-n", attr, path]
+        if kwargs.pop('sudo', False):
+            kwargs['args'].insert(0, 'sudo')
+            kwargs['omit_sudo'] = False
+        kwargs['wait'] = False
+        p = self.run_shell(**kwargs)
+        try:
+            p.wait()
+        except CommandFailedError as e:
+            if e.exitstatus == 1 and "No such attribute" in p.stderr.getvalue():
+                return None
+            else:
+                raise
+
+        return str(p.stdout.getvalue())
+
+    def df(self):
+        """
+        Wrap df: return a dict of usage fields in bytes
+        """
+
+        p = self.run_shell(["df", "-B1", "."])
+        lines = p.stdout.getvalue().strip().split("\n")
+        fs, total, used, avail = lines[1].split()[:4]
+        log.warning(lines)
+
+        return {
+            "total": int(total),
+            "used": int(used),
+            "available": int(avail)
+        }
+
+    def dir_checksum(self, path=None, follow_symlinks=False):
+        cmd = ["find"]
+        if follow_symlinks:
+            cmd.append("-L")
+        if path:
+            cmd.append(path)
+        cmd.extend(["-type", "f", "-exec", "md5sum", "{}", "+"])
+        checksum_text = self.run_shell(cmd).stdout.getvalue().strip()
+        checksum_sorted = sorted(checksum_text.split('\n'), key=lambda v: v.split()[1])
+        return hashlib.md5(('\n'.join(checksum_sorted)).encode('utf-8')).hexdigest()
+
+    def validate_subvol_options(self):
+        mount_subvol_num = self.client_config.get('mount_subvol_num', None)
+        if self.cephfs_mntpt and mount_subvol_num is not None:
+            log.warning("You cannot specify both: cephfs_mntpt and mount_subvol_num")
+            log.info(f"Mounting subvol {mount_subvol_num} for now")
+
+        if mount_subvol_num is not None:
+            # mount_subvol must be an index into the subvol path array for the fs
+            if not self.cephfs_name:
+                self.cephfs_name = 'cephfs'
+            assert(hasattr(self.ctx, "created_subvols"))
+            # mount_subvol must be specified under client.[0-9] yaml section
+            subvol_paths = self.ctx.created_subvols[self.cephfs_name]
+            path_to_mount = subvol_paths[mount_subvol_num]
+            self.cephfs_mntpt = path_to_mount
diff --git a/qa/tasks/cephfs/test_acls.py b/qa/tasks/cephfs/test_acls.py
new file mode 100644
index 000000000..48160dd8b
--- /dev/null
+++ b/qa/tasks/cephfs/test_acls.py
@@ -0,0 +1,39 @@
+from logging import getLogger
+
+from io import StringIO
+from tasks.cephfs.xfstests_dev import XFSTestsDev
+
+
+log = getLogger(__name__)
+
+
+class TestACLs(XFSTestsDev):
+
+    def test_acls(self):
+        from tasks.cephfs.fuse_mount import FuseMount
+        from tasks.cephfs.kernel_mount import KernelMount
+
+        if isinstance(self.mount_a, FuseMount):
+            log.info('client is fuse mounted')
+        elif isinstance(self.mount_a, KernelMount):
+            log.info('client is kernel mounted')
+
+        # XXX: check_status is set to False so that we can check for command's
+        # failure on our own (since this command doesn't set right error code
+        # and error message in some cases) and print custom log messages
+        # accordingly.
+        proc = self.mount_a.client_remote.run(args=['sudo', 'env', 'DIFF_LENGTH=0',
+            './check', 'generic/099'], cwd=self.xfstests_repo_path, stdout=StringIO(),
+            stderr=StringIO(), timeout=30, check_status=False,omit_sudo=False,
+            label='running tests for ACLs from xfstests-dev')
+
+        if proc.returncode != 0:
+            log.info('Command failed.')
+        log.info(f'Command return value: {proc.returncode}')
+        stdout, stderr = proc.stdout.getvalue(), proc.stderr.getvalue()
+        log.info(f'Command stdout -\n{stdout}')
+        log.info(f'Command stderr -\n{stderr}')
+
+        self.assertEqual(proc.returncode, 0)
+        success_line = 'Passed all 1 tests'
+        self.assertIn(success_line, stdout)
diff --git a/qa/tasks/cephfs/test_admin.py b/qa/tasks/cephfs/test_admin.py
new file mode 100644
index 000000000..9890381c6
--- /dev/null
+++ b/qa/tasks/cephfs/test_admin.py
@@ -0,0 +1,1494 @@
+import errno
+import json
+import logging
+import time
+import uuid
+from io import StringIO
+from os.path import join as os_path_join
+
+from teuthology.exceptions import CommandFailedError
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase, classhook
+from tasks.cephfs.filesystem import FileLayout, FSMissing
+from tasks.cephfs.fuse_mount import FuseMount
+from tasks.cephfs.caps_helper import CapTester
+
+log = logging.getLogger(__name__)
+
+class TestAdminCommands(CephFSTestCase):
+    """
+    Tests for administration command.
+    """
+
+    CLIENTS_REQUIRED = 1
+    MDSS_REQUIRED = 1
+
+    def check_pool_application_metadata_key_value(self, pool, app, key, value):
+        output = self.fs.mon_manager.raw_cluster_cmd(
+            'osd', 'pool', 'application', 'get', pool, app, key)
+        self.assertEqual(str(output.strip()), value)
+
+    def setup_ec_pools(self, n, metadata=True, overwrites=True):
+        if metadata:
+            self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', n+"-meta", "8")
+        cmd = ['osd', 'erasure-code-profile', 'set', n+"-profile", "m=2", "k=2", "crush-failure-domain=osd"]
+        self.fs.mon_manager.raw_cluster_cmd(*cmd)
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', n+"-data", "8", "erasure", n+"-profile")
+        if overwrites:
+            self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'set', n+"-data", 'allow_ec_overwrites', 'true')
+
+@classhook('_add_valid_tell')
+class TestValidTell(TestAdminCommands):
+    @classmethod
+    def _add_valid_tell(cls):
+        tells = [
+          ['cache', 'status'],
+          ['damage', 'ls'],
+          ['dump_blocked_ops'],
+          ['dump_blocked_ops_count'],
+          ['dump_historic_ops'],
+          ['dump_historic_ops_by_duration'],
+          ['dump_mempools'],
+          ['dump_ops_in_flight'],
+          ['flush', 'journal'],
+          ['get', 'subtrees'],
+          ['ops', 'locks'],
+          ['ops'],
+          ['status'],
+          ['version'],
+        ]
+        def test(c):
+            def f(self):
+                J = self.fs.rank_tell(c)
+                json.dumps(J)
+                log.debug("dumped:\n%s", str(J))
+            return f
+        for c in tells:
+            setattr(cls, 'test_valid_' + '_'.join(c), test(c))
+
+class TestFsStatus(TestAdminCommands):
+    """
+    Test "ceph fs status subcommand.
+    """
+
+    def test_fs_status(self):
+        """
+        That `ceph fs status` command functions.
+        """
+
+        s = self.fs.mon_manager.raw_cluster_cmd("fs", "status")
+        self.assertTrue("active" in s)
+
+        mdsmap = json.loads(self.fs.mon_manager.raw_cluster_cmd("fs", "status", "--format=json-pretty"))["mdsmap"]
+        self.assertEqual(mdsmap[0]["state"], "active")
+
+        mdsmap = json.loads(self.fs.mon_manager.raw_cluster_cmd("fs", "status", "--format=json"))["mdsmap"]
+        self.assertEqual(mdsmap[0]["state"], "active")
+
+
+class TestAddDataPool(TestAdminCommands):
+    """
+    Test "ceph fs add_data_pool" subcommand.
+    """
+
+    def test_add_data_pool_root(self):
+        """
+        That a new data pool can be added and used for the root directory.
+        """
+
+        p = self.fs.add_data_pool("foo")
+        self.fs.set_dir_layout(self.mount_a, ".", FileLayout(pool=p))
+
+    def test_add_data_pool_application_metadata(self):
+        """
+        That the application metadata set on a newly added data pool is as expected.
+        """
+        pool_name = "foo"
+        mon_cmd = self.fs.mon_manager.raw_cluster_cmd
+        mon_cmd('osd', 'pool', 'create', pool_name, '--pg_num_min',
+                str(self.fs.pg_num_min))
+        # Check whether https://tracker.ceph.com/issues/43061 is fixed
+        mon_cmd('osd', 'pool', 'application', 'enable', pool_name, 'cephfs')
+        self.fs.add_data_pool(pool_name, create=False)
+        self.check_pool_application_metadata_key_value(
+            pool_name, 'cephfs', 'data', self.fs.name)
+
+    def test_add_data_pool_subdir(self):
+        """
+        That a new data pool can be added and used for a sub-directory.
+        """
+
+        p = self.fs.add_data_pool("foo")
+        self.mount_a.run_shell("mkdir subdir")
+        self.fs.set_dir_layout(self.mount_a, "subdir", FileLayout(pool=p))
+
+    def test_add_data_pool_non_alphamueric_name_as_subdir(self):
+        """
+        That a new data pool with non-alphanumeric name can be added and used for a sub-directory.
+        """
+        p = self.fs.add_data_pool("I-am-data_pool00.")
+        self.mount_a.run_shell("mkdir subdir")
+        self.fs.set_dir_layout(self.mount_a, "subdir", FileLayout(pool=p))
+
+    def test_add_data_pool_ec(self):
+        """
+        That a new EC data pool can be added.
+        """
+
+        n = "test_add_data_pool_ec"
+        self.setup_ec_pools(n, metadata=False)
+        self.fs.add_data_pool(n+"-data", create=False)
+
+    def test_add_already_in_use_data_pool(self):
+        """
+        That command try to add data pool which is already in use with another fs.
+        """
+
+        # create first data pool, metadata pool and add with filesystem
+        first_fs = "first_fs"
+        first_metadata_pool = "first_metadata_pool"
+        first_data_pool = "first_data_pool"
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool)
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool)
+        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
+
+        # create second data pool, metadata pool and add with filesystem
+        second_fs = "second_fs"
+        second_metadata_pool = "second_metadata_pool"
+        second_data_pool = "second_data_pool"
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_metadata_pool)
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_data_pool)
+        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool)
+
+        # try to add 'first_data_pool' with 'second_fs'
+        # Expecting EINVAL exit status because 'first_data_pool' is already in use with 'first_fs'
+        try:
+            self.fs.mon_manager.raw_cluster_cmd('fs', 'add_data_pool', second_fs, first_data_pool)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.EINVAL)
+        else:
+            self.fail("Expected EINVAL because data pool is already in use as data pool for first_fs")
+
+    def test_add_already_in_use_metadata_pool(self):
+        """
+        That command try to add metadata pool which is already in use with another fs.
+        """
+
+        # create first data pool, metadata pool and add with filesystem
+        first_fs = "first_fs"
+        first_metadata_pool = "first_metadata_pool"
+        first_data_pool = "first_data_pool"
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool)
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool)
+        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
+
+        # create second data pool, metadata pool and add with filesystem
+        second_fs = "second_fs"
+        second_metadata_pool = "second_metadata_pool"
+        second_data_pool = "second_data_pool"
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_metadata_pool)
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_data_pool)
+        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool)
+
+        # try to add 'second_metadata_pool' with 'first_fs' as a data pool
+        # Expecting EINVAL exit status because 'second_metadata_pool'
+        # is already in use with 'second_fs' as a metadata pool
+        try:
+            self.fs.mon_manager.raw_cluster_cmd('fs', 'add_data_pool', first_fs, second_metadata_pool)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.EINVAL)
+        else:
+            self.fail("Expected EINVAL because data pool is already in use as metadata pool for 'second_fs'")
+
+class TestFsNew(TestAdminCommands):
+    """
+    Test "ceph fs new" subcommand.
+    """
+    MDSS_REQUIRED = 3
+
+    def test_fsnames_can_only_by_goodchars(self):
+        n = 'test_fsnames_can_only_by_goodchars'
+        metapoolname, datapoolname = n+'-testmetapool', n+'-testdatapool'
+        badname = n+'badname@#'
+
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
+                                            n+metapoolname)
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
+                                            n+datapoolname)
+
+        # test that fsname not with "goodchars" fails
+        args = ['fs', 'new', badname, metapoolname, datapoolname]
+        proc = self.fs.mon_manager.run_cluster_cmd(args=args,stderr=StringIO(),
+                                                   check_status=False)
+        self.assertIn('invalid chars', proc.stderr.getvalue().lower())
+
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'rm', metapoolname,
+                                            metapoolname,
+                                            '--yes-i-really-really-mean-it-not-faking')
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'rm', datapoolname,
+                                            datapoolname,
+                                            '--yes-i-really-really-mean-it-not-faking')
+
+    def test_new_default_ec(self):
+        """
+        That a new file system warns/fails with an EC default data pool.
+        """
+
+        self.mount_a.umount_wait(require_clean=True)
+        self.mds_cluster.delete_all_filesystems()
+        n = "test_new_default_ec"
+        self.setup_ec_pools(n)
+        try:
+            self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data")
+        except CommandFailedError as e:
+            if e.exitstatus == 22:
+                pass
+            else:
+                raise
+        else:
+            raise RuntimeError("expected failure")
+
+    def test_new_default_ec_force(self):
+        """
+        That a new file system succeeds with an EC default data pool with --force.
+        """
+
+        self.mount_a.umount_wait(require_clean=True)
+        self.mds_cluster.delete_all_filesystems()
+        n = "test_new_default_ec_force"
+        self.setup_ec_pools(n)
+        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data", "--force")
+
+    def test_new_default_ec_no_overwrite(self):
+        """
+        That a new file system fails with an EC default data pool without overwrite.
+        """
+
+        self.mount_a.umount_wait(require_clean=True)
+        self.mds_cluster.delete_all_filesystems()
+        n = "test_new_default_ec_no_overwrite"
+        self.setup_ec_pools(n, overwrites=False)
+        try:
+            self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data")
+        except CommandFailedError as e:
+            if e.exitstatus == 22:
+                pass
+            else:
+                raise
+        else:
+            raise RuntimeError("expected failure")
+        # and even with --force !
+        try:
+            self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data", "--force")
+        except CommandFailedError as e:
+            if e.exitstatus == 22:
+                pass
+            else:
+                raise
+        else:
+            raise RuntimeError("expected failure")
+
+    def test_fs_new_pool_application_metadata(self):
+        """
+        That the application metadata set on the pools of a newly created filesystem are as expected.
+        """
+        self.mount_a.umount_wait(require_clean=True)
+        self.mds_cluster.delete_all_filesystems()
+        fs_name = "test_fs_new_pool_application"
+        keys = ['metadata', 'data']
+        pool_names = [fs_name+'-'+key for key in keys]
+        mon_cmd = self.fs.mon_manager.raw_cluster_cmd
+        for p in pool_names:
+            mon_cmd('osd', 'pool', 'create', p, '--pg_num_min', str(self.fs.pg_num_min))
+            mon_cmd('osd', 'pool', 'application', 'enable', p, 'cephfs')
+        mon_cmd('fs', 'new', fs_name, pool_names[0], pool_names[1])
+        for i in range(2):
+            self.check_pool_application_metadata_key_value(
+                pool_names[i], 'cephfs', keys[i], fs_name)
+
+    def test_fs_new_with_specific_id(self):
+        """
+        That a file system can be created with a specific ID.
+        """
+        fs_name = "test_fs_specific_id"
+        fscid = 100
+        keys = ['metadata', 'data']
+        pool_names = [fs_name+'-'+key for key in keys]
+        for p in pool_names:
+            self.run_cluster_cmd(f'osd pool create {p}')
+        self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid  {fscid} --force')
+        self.fs.status().get_fsmap(fscid)
+        for i in range(2):
+            self.check_pool_application_metadata_key_value(pool_names[i], 'cephfs', keys[i], fs_name)
+
+    def test_fs_new_with_specific_id_idempotency(self):
+        """
+        That command to create file system with specific ID is idempotent.
+        """
+        fs_name = "test_fs_specific_id"
+        fscid = 100
+        keys = ['metadata', 'data']
+        pool_names = [fs_name+'-'+key for key in keys]
+        for p in pool_names:
+            self.run_cluster_cmd(f'osd pool create {p}')
+        self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid  {fscid} --force')
+        self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid  {fscid} --force')
+        self.fs.status().get_fsmap(fscid)
+
+    def test_fs_new_with_specific_id_fails_without_force_flag(self):
+        """
+        That command to create file system with specific ID fails without '--force' flag.
+        """
+        fs_name = "test_fs_specific_id"
+        fscid = 100
+        keys = ['metadata', 'data']
+        pool_names = [fs_name+'-'+key for key in keys]
+        for p in pool_names:
+            self.run_cluster_cmd(f'osd pool create {p}')
+        try:
+            self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid  {fscid}')
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EINVAL,
+                "invalid error code on creating a file system with specifc ID without --force flag")
+        else:
+            self.fail("expected creating file system with specific ID without '--force' flag to fail")
+
+    def test_fs_new_with_specific_id_fails_already_in_use(self):
+        """
+        That creating file system with ID already in use fails.
+        """
+        fs_name = "test_fs_specific_id"
+        # file system ID already in use
+        fscid =  self.fs.status().map['filesystems'][0]['id']
+        keys = ['metadata', 'data']
+        pool_names = [fs_name+'-'+key for key in keys]
+        for p in pool_names:
+            self.run_cluster_cmd(f'osd pool create {p}')
+        try:
+            self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid  {fscid} --force')
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EINVAL,
+                "invalid error code on creating a file system with specifc ID that is already in use")
+        else:
+            self.fail("expected creating file system with ID already in use to fail")
+
+    def test_fs_new_metadata_pool_already_in_use(self):
+        """
+        That creating file system with metadata pool already in use.
+        """
+
+        # create first data pool, metadata pool and add with filesystem
+        first_fs = "first_fs"
+        first_metadata_pool = "first_metadata_pool"
+        first_data_pool = "first_data_pool"
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool)
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool)
+        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
+
+        second_fs = "second_fs"
+        second_data_pool = "second_data_pool"
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_data_pool)
+
+        # try to create new fs 'second_fs' with following configuration
+        # metadata pool -> 'first_metadata_pool'
+        # data pool -> 'second_data_pool'
+        # Expecting EINVAL exit status because 'first_metadata_pool'
+        # is already in use with 'first_fs'
+        try:
+            self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, first_metadata_pool, second_data_pool)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.EINVAL)
+        else:
+            self.fail("Expected EINVAL because metadata  pool is already in use for 'first_fs'")
+
+    def test_fs_new_data_pool_already_in_use(self):
+        """
+        That creating file system with data pool already in use.
+        """
+
+        # create first data pool, metadata pool and add with filesystem
+        first_fs = "first_fs"
+        first_metadata_pool = "first_metadata_pool"
+        first_data_pool = "first_data_pool"
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool)
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool)
+        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
+
+        second_fs = "second_fs"
+        second_metadata_pool = "second_metadata_pool"
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_metadata_pool)
+
+        # try to create new fs 'second_fs' with following configuration
+        # metadata pool -> 'second_metadata_pool'
+        # data pool -> 'first_data_pool'
+        # Expecting EINVAL exit status because 'first_data_pool'
+        # is already in use with 'first_fs'
+        try:
+            self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, second_metadata_pool, first_data_pool)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.EINVAL)
+        else:
+            self.fail("Expected EINVAL because data pool is already in use for 'first_fs'")
+
+    def test_fs_new_metadata_and_data_pool_in_use_by_another_same_fs(self):
+        """
+        That creating file system with metadata and data pool which is already in use by another same fs.
+        """
+
+        # create first data pool, metadata pool and add with filesystem
+        first_fs = "first_fs"
+        first_metadata_pool = "first_metadata_pool"
+        first_data_pool = "first_data_pool"
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool)
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool)
+        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
+
+        second_fs = "second_fs"
+
+        # try to create new fs 'second_fs' with following configuration
+        # metadata pool -> 'first_metadata_pool'
+        # data pool -> 'first_data_pool'
+        # Expecting EINVAL exit status because 'first_metadata_pool' and 'first_data_pool'
+        # is already in use with 'first_fs'
+        try:
+            self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, first_metadata_pool, first_data_pool)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.EINVAL)
+        else:
+            self.fail("Expected EINVAL because metadata and data pool is already in use for 'first_fs'")
+
+    def test_fs_new_metadata_and_data_pool_in_use_by_different_fs(self):
+        """
+        That creating file system with metadata and data pool which is already in use by different fs.
+        """
+
+        # create first data pool, metadata pool and add with filesystem
+        first_fs = "first_fs"
+        first_metadata_pool = "first_metadata_pool"
+        first_data_pool = "first_data_pool"
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool)
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool)
+        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
+
+        # create second data pool, metadata pool and add with filesystem
+        second_fs = "second_fs"
+        second_metadata_pool = "second_metadata_pool"
+        second_data_pool = "second_data_pool"
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_metadata_pool)
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_data_pool)
+        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool)
+
+        third_fs = "third_fs"
+
+        # try to create new fs 'third_fs' with following configuration
+        # metadata pool -> 'first_metadata_pool'
+        # data pool -> 'second_data_pool'
+        # Expecting EINVAL exit status because 'first_metadata_pool' and 'second_data_pool'
+        # is already in use with 'first_fs' and 'second_fs'
+        try:
+            self.fs.mon_manager.raw_cluster_cmd('fs', 'new', third_fs, first_metadata_pool, second_data_pool)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.EINVAL)
+        else:
+            self.fail("Expected EINVAL because metadata and data pool is already in use for 'first_fs' and 'second_fs'")
+
+    def test_fs_new_interchange_already_in_use_metadata_and_data_pool_of_same_fs(self):
+        """
+        That creating file system with interchanging metadata and data pool which is already in use by same fs.
+        """
+
+        # create first data pool, metadata pool and add with filesystem
+        first_fs = "first_fs"
+        first_metadata_pool = "first_metadata_pool"
+        first_data_pool = "first_data_pool"
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool)
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool)
+        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
+
+        second_fs = "second_fs"
+
+        # try to create new fs 'second_fs' with following configuration
+        # metadata pool -> 'first_data_pool' (already used as data pool for 'first_fs')
+        # data pool -> 'first_metadata_pool' (already used as metadata pool for 'first_fs')
+        # Expecting EINVAL exit status because 'first_data_pool' and 'first_metadata_pool'
+        # is already in use with 'first_fs'
+        try:
+            self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, first_data_pool, first_metadata_pool)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.EINVAL)
+        else:
+            self.fail("Expected EINVAL because metadata and data pool is already in use for 'first_fs'")
+
+    def test_fs_new_interchange_already_in_use_metadata_and_data_pool_of_different_fs(self):
+        """
+        That creating file system with interchanging metadata and data pool which is already in use by defferent fs.
+        """
+
+        # create first data pool, metadata pool and add with filesystem
+        first_fs = "first_fs"
+        first_metadata_pool = "first_metadata_pool"
+        first_data_pool = "first_data_pool"
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool)
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool)
+        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
+
+        # create second data pool, metadata pool and add with filesystem
+        second_fs = "second_fs"
+        second_metadata_pool = "second_metadata_pool"
+        second_data_pool = "second_data_pool"
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_metadata_pool)
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_data_pool)
+        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool)
+
+        third_fs = "third_fs"
+
+        # try to create new fs 'third_fs' with following configuration
+        # metadata pool -> 'first_data_pool' (already used as data pool for 'first_fs')
+        # data pool -> 'second_metadata_pool' (already used as metadata pool for 'second_fs')
+        # Expecting EINVAL exit status because 'first_data_pool' and 'second_metadata_pool'
+        # is already in use with 'first_fs' and 'second_fs'
+        try:
+            self.fs.mon_manager.raw_cluster_cmd('fs', 'new', third_fs, first_data_pool, second_metadata_pool)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.EINVAL)
+        else:
+            self.fail("Expected EINVAL because metadata and data pool is already in use for 'first_fs' and 'second_fs'")
+
+    def test_fs_new_metadata_pool_already_in_use_with_rbd(self):
+        """
+        That creating new file system with metadata pool already used by rbd.
+        """
+
+        # create pool and initialise with rbd
+        new_pool = "new_pool"
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', new_pool)
+        self.ctx.cluster.run(args=['rbd', 'pool', 'init', new_pool])
+
+        new_fs = "new_fs"
+        new_data_pool = "new_data_pool"
+
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', new_data_pool)
+
+        # try to create new fs 'new_fs' with following configuration
+        # metadata pool -> 'new_pool' (already used by rbd app)
+        # data pool -> 'new_data_pool'
+        # Expecting EINVAL exit status because 'new_pool' is already in use with 'rbd' app
+        try:
+            self.fs.mon_manager.raw_cluster_cmd('fs', 'new', new_fs, new_pool, new_data_pool)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.EINVAL)
+        else:
+            self.fail("Expected EINVAL because metadata pool is already in use for rbd")
+
+    def test_fs_new_data_pool_already_in_use_with_rbd(self):
+        """
+        That creating new file system with data pool already used by rbd.
+        """
+
+        # create pool and initialise with rbd
+        new_pool = "new_pool"
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', new_pool)
+        self.ctx.cluster.run(args=['rbd', 'pool', 'init', new_pool])
+
+        new_fs = "new_fs"
+        new_metadata_pool = "new_metadata_pool"
+
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', new_metadata_pool)
+
+        # try to create new fs 'new_fs' with following configuration
+        # metadata pool -> 'new_metadata_pool'
+        # data pool -> 'new_pool' (already used by rbd app)
+        # Expecting EINVAL exit status because 'new_pool' is already in use with 'rbd' app
+        try:
+            self.fs.mon_manager.raw_cluster_cmd('fs', 'new', new_fs, new_metadata_pool, new_pool)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.EINVAL)
+        else:
+            self.fail("Expected EINVAL because data pool is already in use for rbd")
+
+class TestRenameCommand(TestAdminCommands):
+    """
+    Tests for rename command.
+    """
+
+    CLIENTS_REQUIRED = 1
+    MDSS_REQUIRED = 2
+
+    def test_fs_rename(self):
+        """
+        That the file system can be renamed, and the application metadata set on its pools are as expected.
+        """
+        # Renaming the file system breaks this mount as the client uses
+        # file system specific authorization. The client cannot read
+        # or write even if the client's cephx ID caps are updated to access
+        # the new file system name without the client being unmounted and
+        # re-mounted.
+        self.mount_a.umount_wait(require_clean=True)
+        orig_fs_name = self.fs.name
+        new_fs_name = 'new_cephfs'
+        client_id = 'test_new_cephfs'
+
+        self.run_cluster_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it')
+
+        # authorize a cephx ID access to the renamed file system.
+        # use the ID to write to the file system.
+        self.fs.name = new_fs_name
+        keyring = self.fs.authorize(client_id, ('/', 'rw'))
+        keyring_path = self.mount_a.client_remote.mktemp(data=keyring)
+        self.mount_a.remount(client_id=client_id,
+                             client_keyring_path=keyring_path,
+                             cephfs_mntpt='/',
+                             cephfs_name=self.fs.name)
+        filedata, filename = 'some data on fs', 'file_on_fs'
+        filepath = os_path_join(self.mount_a.hostfs_mntpt, filename)
+        self.mount_a.write_file(filepath, filedata)
+        self.check_pool_application_metadata_key_value(
+            self.fs.get_data_pool_name(), 'cephfs', 'data', new_fs_name)
+        self.check_pool_application_metadata_key_value(
+            self.fs.get_metadata_pool_name(), 'cephfs', 'metadata', new_fs_name)
+
+        # cleanup
+        self.mount_a.umount_wait()
+        self.run_cluster_cmd(f'auth rm client.{client_id}')
+
+    def test_fs_rename_idempotency(self):
+        """
+        That the file system rename operation is idempotent.
+        """
+        # Renaming the file system breaks this mount as the client uses
+        # file system specific authorization.
+        self.mount_a.umount_wait(require_clean=True)
+        orig_fs_name = self.fs.name
+        new_fs_name = 'new_cephfs'
+
+        self.run_cluster_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it')
+        self.run_cluster_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it')
+
+        # original file system name does not appear in `fs ls` command
+        self.assertFalse(self.fs.exists())
+        self.fs.name = new_fs_name
+        self.assertTrue(self.fs.exists())
+
+    def test_fs_rename_fs_new_fails_with_old_fsname_existing_pools(self):
+        """
+        That after renaming a file system, creating a file system with
+        old name and existing FS pools fails.
+        """
+        # Renaming the file system breaks this mount as the client uses
+        # file system specific authorization.
+        self.mount_a.umount_wait(require_clean=True)
+        orig_fs_name = self.fs.name
+        new_fs_name = 'new_cephfs'
+        data_pool = self.fs.get_data_pool_name()
+        metadata_pool = self.fs.get_metadata_pool_name()
+        self.run_cluster_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it')
+
+        try:
+            self.run_cluster_cmd(f"fs new {orig_fs_name} {metadata_pool} {data_pool}")
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EINVAL,
+                "invalid error code on creating a new file system with old "
+                "name and existing pools.")
+        else:
+            self.fail("expected creating new file system with old name and "
+                      "existing pools to fail.")
+
+        try:
+            self.run_cluster_cmd(f"fs new {orig_fs_name} {metadata_pool} {data_pool} --force")
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EINVAL,
+                "invalid error code on creating a new file system with old "
+                "name, existing pools and --force flag.")
+        else:
+            self.fail("expected creating new file system with old name, "
+                      "existing pools, and --force flag to fail.")
+
+        try:
+            self.run_cluster_cmd(f"fs new {orig_fs_name} {metadata_pool} {data_pool} "
+                                 "--allow-dangerous-metadata-overlay")
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EINVAL,
+                "invalid error code on creating a new file system with old name, "
+                "existing pools and --allow-dangerous-metadata-overlay flag.")
+        else:
+            self.fail("expected creating new file system with old name, "
+                      "existing pools, and --allow-dangerous-metadata-overlay flag to fail.")
+
+    def test_fs_rename_fails_without_yes_i_really_mean_it_flag(self):
+        """
+        That renaming a file system without '--yes-i-really-mean-it' flag fails.
+        """
+        try:
+            self.run_cluster_cmd(f"fs rename {self.fs.name} new_fs")
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EPERM,
+                "invalid error code on renaming a file system without the  "
+                "'--yes-i-really-mean-it' flag")
+        else:
+            self.fail("expected renaming of file system without the "
+                      "'--yes-i-really-mean-it' flag to fail ")
+
+    def test_fs_rename_fails_for_non_existent_fs(self):
+        """
+        That renaming a non-existent file system fails.
+        """
+        try:
+            self.run_cluster_cmd("fs rename non_existent_fs new_fs --yes-i-really-mean-it")
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on renaming a non-existent fs")
+        else:
+            self.fail("expected renaming of a non-existent file system to fail")
+
+    def test_fs_rename_fails_new_name_already_in_use(self):
+        """
+        That renaming a file system fails if the new name refers to an existing file system.
+        """
+        self.fs2 = self.mds_cluster.newfs(name='cephfs2', create=True)
+
+        try:
+            self.run_cluster_cmd(f"fs rename {self.fs.name} {self.fs2.name} --yes-i-really-mean-it")
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EINVAL,
+                             "invalid error code on renaming to a fs name that is already in use")
+        else:
+            self.fail("expected renaming to a new file system name that is already in use to fail.")
+
+    def test_fs_rename_fails_with_mirroring_enabled(self):
+        """
+        That renaming a file system fails if mirroring is enabled on it.
+        """
+        orig_fs_name = self.fs.name
+        new_fs_name = 'new_cephfs'
+
+        self.run_cluster_cmd(f'fs mirror enable {orig_fs_name}')
+        try:
+            self.run_cluster_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it')
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EPERM, "invalid error code on renaming a mirrored file system")
+        else:
+            self.fail("expected renaming of a mirrored file system to fail")
+        self.run_cluster_cmd(f'fs mirror disable {orig_fs_name}')
+
+
+class TestDump(CephFSTestCase):
+    CLIENTS_REQUIRED = 0
+    MDSS_REQUIRED = 1
+
+    def test_fs_dump_epoch(self):
+        """
+        That dumping a specific epoch works.
+        """
+
+        status1 = self.fs.status()
+        status2 = self.fs.status(epoch=status1["epoch"]-1)
+        self.assertEqual(status1["epoch"], status2["epoch"]+1)
+
+    def test_fsmap_trim(self):
+        """
+        That the fsmap is trimmed normally.
+        """
+
+        paxos_service_trim_min = 25
+        self.config_set('mon', 'paxos_service_trim_min', paxos_service_trim_min)
+        mon_max_mdsmap_epochs = 20
+        self.config_set('mon', 'mon_max_mdsmap_epochs', mon_max_mdsmap_epochs)
+
+        status = self.fs.status()
+        epoch = status["epoch"]
+
+        # for N mutations
+        mutations = paxos_service_trim_min + mon_max_mdsmap_epochs
+        b = False
+        for i in range(mutations):
+            self.fs.set_joinable(b)
+            b = not b
+
+        time.sleep(10) # for tick/compaction
+
+        try:
+            self.fs.status(epoch=epoch)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.ENOENT, "invalid error code when trying to fetch FSMap that was trimmed")
+        else:
+            self.fail("trimming did not occur as expected")
+
+    def test_fsmap_force_trim(self):
+        """
+        That the fsmap is trimmed forcefully.
+        """
+
+        status = self.fs.status()
+        epoch = status["epoch"]
+
+        paxos_service_trim_min = 1
+        self.config_set('mon', 'paxos_service_trim_min', paxos_service_trim_min)
+        mon_mds_force_trim_to = epoch+1
+        self.config_set('mon', 'mon_mds_force_trim_to', mon_mds_force_trim_to)
+
+        # force a new fsmap
+        self.fs.set_joinable(False)
+        time.sleep(10) # for tick/compaction
+
+        status = self.fs.status()
+        log.debug(f"new epoch is {status['epoch']}")
+        self.fs.status(epoch=epoch+1) # epoch+1 is not trimmed, may not == status["epoch"]
+
+        try:
+            self.fs.status(epoch=epoch)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.ENOENT, "invalid error code when trying to fetch FSMap that was trimmed")
+        else:
+            self.fail("trimming did not occur as expected")
+
+
+class TestRequiredClientFeatures(CephFSTestCase):
+    CLIENTS_REQUIRED = 0
+    MDSS_REQUIRED = 1
+
+    def test_required_client_features(self):
+        """
+        That `ceph fs required_client_features` command functions.
+        """
+
+        def is_required(index):
+            out = self.fs.mon_manager.raw_cluster_cmd('fs', 'get', self.fs.name, '--format=json-pretty')
+            features = json.loads(out)['mdsmap']['required_client_features']
+            if "feature_{0}".format(index) in features:
+                return True;
+            return False;
+
+        features = json.loads(self.fs.mon_manager.raw_cluster_cmd('fs', 'feature', 'ls', '--format=json-pretty'))
+        self.assertGreater(len(features), 0);
+
+        for f in features:
+            self.fs.required_client_features('rm', str(f['index']))
+
+        for f in features:
+            index = f['index']
+            feature = f['name']
+            if feature == 'reserved':
+                feature = str(index)
+
+            if index % 3 == 0:
+                continue;
+            self.fs.required_client_features('add', feature)
+            self.assertTrue(is_required(index))
+
+            if index % 2 == 0:
+                continue;
+            self.fs.required_client_features('rm', feature)
+            self.assertFalse(is_required(index))
+
+    def test_required_client_feature_add_reserved(self):
+        """
+        That `ceph fs required_client_features X add reserved` fails.
+        """
+
+        p = self.fs.required_client_features('add', 'reserved', check_status=False, stderr=StringIO())
+        self.assertIn('Invalid feature name', p.stderr.getvalue())
+
+    def test_required_client_feature_rm_reserved(self):
+        """
+        That `ceph fs required_client_features X rm reserved` fails.
+        """
+
+        p = self.fs.required_client_features('rm', 'reserved', check_status=False, stderr=StringIO())
+        self.assertIn('Invalid feature name', p.stderr.getvalue())
+
+    def test_required_client_feature_add_reserved_bit(self):
+        """
+        That `ceph fs required_client_features X add <reserved_bit>` passes.
+        """
+
+        p = self.fs.required_client_features('add', '1', stderr=StringIO())
+        self.assertIn("added feature 'reserved' to required_client_features", p.stderr.getvalue())
+
+    def test_required_client_feature_rm_reserved_bit(self):
+        """
+        That `ceph fs required_client_features X rm <reserved_bit>` passes.
+        """
+
+        self.fs.required_client_features('add', '1')
+        p = self.fs.required_client_features('rm', '1', stderr=StringIO())
+        self.assertIn("removed feature 'reserved' from required_client_features", p.stderr.getvalue())
+
+class TestCompatCommands(CephFSTestCase):
+    """
+    """
+
+    CLIENTS_REQUIRED = 0
+    MDSS_REQUIRED = 3
+
+    def test_add_compat(self):
+        """
+        Test adding a compat.
+        """
+
+        self.fs.fail()
+        self.fs.add_compat(63, 'placeholder')
+        mdsmap = self.fs.get_mds_map()
+        self.assertIn("feature_63", mdsmap['compat']['compat'])
+
+    def test_add_incompat(self):
+        """
+        Test adding an incompat.
+        """
+
+        self.fs.fail()
+        self.fs.add_incompat(63, 'placeholder')
+        mdsmap = self.fs.get_mds_map()
+        log.info(f"{mdsmap}")
+        self.assertIn("feature_63", mdsmap['compat']['incompat'])
+
+    def test_rm_compat(self):
+        """
+        Test removing a compat.
+        """
+
+        self.fs.fail()
+        self.fs.add_compat(63, 'placeholder')
+        self.fs.rm_compat(63)
+        mdsmap = self.fs.get_mds_map()
+        self.assertNotIn("feature_63", mdsmap['compat']['compat'])
+
+    def test_rm_incompat(self):
+        """
+        Test removing an incompat.
+        """
+
+        self.fs.fail()
+        self.fs.add_incompat(63, 'placeholder')
+        self.fs.rm_incompat(63)
+        mdsmap = self.fs.get_mds_map()
+        self.assertNotIn("feature_63", mdsmap['compat']['incompat'])
+
+    def test_standby_compat(self):
+        """
+        That adding a compat does not prevent standbys from joining.
+        """
+
+        self.fs.fail()
+        self.fs.add_compat(63, "placeholder")
+        self.fs.set_joinable()
+        self.fs.wait_for_daemons()
+        mdsmap = self.fs.get_mds_map()
+        self.assertIn("feature_63", mdsmap['compat']['compat'])
+
+    def test_standby_incompat_reject(self):
+        """
+        That adding an incompat feature prevents incompatible daemons from joining.
+        """
+
+        self.fs.fail()
+        self.fs.add_incompat(63, "placeholder")
+        self.fs.set_joinable()
+        try:
+            self.fs.wait_for_daemons(timeout=60)
+        except RuntimeError as e:
+            if "Timed out waiting for MDS daemons to become healthy" in str(e):
+                pass
+            else:
+                raise
+        else:
+            self.fail()
+
+    def test_standby_incompat_upgrade(self):
+        """
+        That an MDS can upgrade the compat of a fs.
+        """
+
+        self.fs.fail()
+        self.fs.rm_incompat(1)
+        self.fs.set_joinable()
+        self.fs.wait_for_daemons()
+        mdsmap = self.fs.get_mds_map()
+        self.assertIn("feature_1", mdsmap['compat']['incompat'])
+
+    def test_standby_replay_not_upgradeable(self):
+        """
+        That the mons will not upgrade the MDSMap compat if standby-replay is
+        enabled.
+        """
+
+        self.fs.fail()
+        self.fs.rm_incompat(1)
+        self.fs.set_allow_standby_replay(True)
+        self.fs.set_joinable()
+        try:
+            self.fs.wait_for_daemons(timeout=60)
+        except RuntimeError as e:
+            if "Timed out waiting for MDS daemons to become healthy" in str(e):
+                pass
+            else:
+                raise
+        else:
+            self.fail()
+
+    def test_standby_incompat_reject_multifs(self):
+        """
+        Like test_standby_incompat_reject but with a second fs.
+        """
+
+        fs2 = self.mds_cluster.newfs(name="cephfs2", create=True)
+        fs2.fail()
+        fs2.add_incompat(63, 'placeholder')
+        fs2.set_joinable()
+        try:
+            fs2.wait_for_daemons(timeout=60)
+        except RuntimeError as e:
+            if "Timed out waiting for MDS daemons to become healthy" in str(e):
+                pass
+            else:
+                raise
+        else:
+            self.fail()
+        # did self.fs lose MDS or standbys suicide?
+        self.fs.wait_for_daemons()
+        mdsmap = fs2.get_mds_map()
+        self.assertIn("feature_63", mdsmap['compat']['incompat'])
+
+class TestConfigCommands(CephFSTestCase):
+    """
+    Test that daemons and clients respond to the otherwise rarely-used
+    runtime config modification operations.
+    """
+
+    CLIENTS_REQUIRED = 1
+    MDSS_REQUIRED = 1
+
+    def test_ceph_config_show(self):
+        """
+        That I can successfully show MDS configuration.
+        """
+
+        names = self.fs.get_rank_names()
+        for n in names:
+            s = self.fs.mon_manager.raw_cluster_cmd("config", "show", "mds."+n)
+            self.assertTrue("NAME" in s)
+            self.assertTrue("mon_host" in s)
+
+
+    def test_client_config(self):
+        """
+        That I can successfully issue asok "config set" commands
+
+        :return:
+        """
+
+        if not isinstance(self.mount_a, FuseMount):
+            self.skipTest("Test only applies to FUSE clients")
+
+        test_key = "client_cache_size"
+        test_val = "123"
+        self.mount_a.admin_socket(['config', 'set', test_key, test_val])
+        out = self.mount_a.admin_socket(['config', 'get', test_key])
+        self.assertEqual(out[test_key], test_val)
+
+
+    def test_mds_config_asok(self):
+        test_key = "mds_max_purge_ops"
+        test_val = "123"
+        self.fs.mds_asok(['config', 'set', test_key, test_val])
+        out = self.fs.mds_asok(['config', 'get', test_key])
+        self.assertEqual(out[test_key], test_val)
+
+    def test_mds_dump_cache_asok(self):
+        cache_file = "cache_file"
+        timeout = "1"
+        self.fs.rank_asok(['dump', 'cache', cache_file, timeout])
+
+    def test_mds_config_tell(self):
+        test_key = "mds_max_purge_ops"
+        test_val = "123"
+
+        self.fs.rank_tell(['injectargs', "--{0}={1}".format(test_key, test_val)])
+
+        # Read it back with asok because there is no `tell` equivalent
+        out = self.fs.rank_tell(['config', 'get', test_key])
+        self.assertEqual(out[test_key], test_val)
+
+
+class TestMirroringCommands(CephFSTestCase):
+    CLIENTS_REQUIRED = 1
+    MDSS_REQUIRED = 1
+
+    def _enable_mirroring(self, fs_name):
+        self.fs.mon_manager.raw_cluster_cmd("fs", "mirror", "enable", fs_name)
+
+    def _disable_mirroring(self, fs_name):
+        self.fs.mon_manager.raw_cluster_cmd("fs", "mirror", "disable", fs_name)
+
+    def _add_peer(self, fs_name, peer_spec, remote_fs_name):
+        peer_uuid = str(uuid.uuid4())
+        self.fs.mon_manager.raw_cluster_cmd("fs", "mirror", "peer_add", fs_name, peer_uuid, peer_spec, remote_fs_name)
+
+    def _remove_peer(self, fs_name, peer_uuid):
+        self.fs.mon_manager.raw_cluster_cmd("fs", "mirror", "peer_remove", fs_name, peer_uuid)
+
+    def _verify_mirroring(self, fs_name, flag_str):
+        status = self.fs.status()
+        fs_map = status.get_fsmap_byname(fs_name)
+        if flag_str == 'enabled':
+            self.assertTrue('mirror_info' in fs_map)
+        elif flag_str == 'disabled':
+            self.assertTrue('mirror_info' not in fs_map)
+        else:
+            raise RuntimeError(f'invalid flag_str {flag_str}')
+
+    def _get_peer_uuid(self, fs_name, peer_spec):
+        status = self.fs.status()
+        fs_map = status.get_fsmap_byname(fs_name)
+        mirror_info = fs_map.get('mirror_info', None)
+        self.assertTrue(mirror_info is not None)
+        for peer_uuid, remote in mirror_info['peers'].items():
+            client_name = remote['remote']['client_name']
+            cluster_name = remote['remote']['cluster_name']
+            spec = f'{client_name}@{cluster_name}'
+            if spec == peer_spec:
+                return peer_uuid
+        return None
+
+    def test_mirroring_command(self):
+        """basic mirroring command test -- enable, disable mirroring on a
+        filesystem"""
+        self._enable_mirroring(self.fs.name)
+        self._verify_mirroring(self.fs.name, "enabled")
+        self._disable_mirroring(self.fs.name)
+        self._verify_mirroring(self.fs.name, "disabled")
+
+    def test_mirroring_peer_commands(self):
+        """test adding and removing peers to a mirror enabled filesystem"""
+        self._enable_mirroring(self.fs.name)
+        self._add_peer(self.fs.name, "client.site-b@site-b", "fs_b")
+        self._add_peer(self.fs.name, "client.site-c@site-c", "fs_c")
+        self._verify_mirroring(self.fs.name, "enabled")
+        uuid_peer_b = self._get_peer_uuid(self.fs.name, "client.site-b@site-b")
+        uuid_peer_c = self._get_peer_uuid(self.fs.name, "client.site-c@site-c")
+        self.assertTrue(uuid_peer_b is not None)
+        self.assertTrue(uuid_peer_c is not None)
+        self._remove_peer(self.fs.name, uuid_peer_b)
+        self._remove_peer(self.fs.name, uuid_peer_c)
+        self._disable_mirroring(self.fs.name)
+        self._verify_mirroring(self.fs.name, "disabled")
+
+    def test_mirroring_command_idempotency(self):
+        """test to check idempotency of mirroring family of commands """
+        self._enable_mirroring(self.fs.name)
+        self._verify_mirroring(self.fs.name, "enabled")
+        self._enable_mirroring(self.fs.name)
+        # add peer
+        self._add_peer(self.fs.name, "client.site-b@site-b", "fs_b")
+        uuid_peer_b1 = self._get_peer_uuid(self.fs.name, "client.site-b@site-b")
+        self.assertTrue(uuid_peer_b1 is not None)
+        # adding the peer again should be idempotent
+        self._add_peer(self.fs.name, "client.site-b@site-b", "fs_b")
+        uuid_peer_b2 = self._get_peer_uuid(self.fs.name, "client.site-b@site-b")
+        self.assertTrue(uuid_peer_b2 is not None)
+        self.assertTrue(uuid_peer_b1 == uuid_peer_b2)
+        # remove peer
+        self._remove_peer(self.fs.name, uuid_peer_b1)
+        uuid_peer_b3 = self._get_peer_uuid(self.fs.name, "client.site-b@site-b")
+        self.assertTrue(uuid_peer_b3 is None)
+        # removing the peer again should be idempotent
+        self._remove_peer(self.fs.name, uuid_peer_b1)
+        self._disable_mirroring(self.fs.name)
+        self._verify_mirroring(self.fs.name, "disabled")
+        self._disable_mirroring(self.fs.name)
+
+    def test_mirroring_disable_with_peers(self):
+        """test disabling mirroring for a filesystem with active peers"""
+        self._enable_mirroring(self.fs.name)
+        self._add_peer(self.fs.name, "client.site-b@site-b", "fs_b")
+        self._verify_mirroring(self.fs.name, "enabled")
+        uuid_peer_b = self._get_peer_uuid(self.fs.name, "client.site-b@site-b")
+        self.assertTrue(uuid_peer_b is not None)
+        self._disable_mirroring(self.fs.name)
+        self._verify_mirroring(self.fs.name, "disabled")
+        # enable mirroring to check old peers
+        self._enable_mirroring(self.fs.name)
+        self._verify_mirroring(self.fs.name, "enabled")
+        # peer should be gone
+        uuid_peer_b = self._get_peer_uuid(self.fs.name, "client.site-b@site-b")
+        self.assertTrue(uuid_peer_b is None)
+        self._disable_mirroring(self.fs.name)
+        self._verify_mirroring(self.fs.name, "disabled")
+
+    def test_mirroring_with_filesystem_reset(self):
+        """test to verify mirroring state post filesystem reset"""
+        self._enable_mirroring(self.fs.name)
+        self._add_peer(self.fs.name, "client.site-b@site-b", "fs_b")
+        self._verify_mirroring(self.fs.name, "enabled")
+        uuid_peer_b = self._get_peer_uuid(self.fs.name, "client.site-b@site-b")
+        self.assertTrue(uuid_peer_b is not None)
+        # reset filesystem
+        self.fs.fail()
+        self.fs.reset()
+        self.fs.wait_for_daemons()
+        self._verify_mirroring(self.fs.name, "disabled")
+
+
+class TestFsAuthorize(CephFSTestCase):
+    client_id = 'testuser'
+    client_name = 'client.' + client_id
+
+    def test_single_path_r(self):
+        PERM = 'r'
+        FS_AUTH_CAPS = (('/', PERM),)
+        self.captester = CapTester()
+        self.setup_test_env(FS_AUTH_CAPS)
+
+        self.captester.run_mon_cap_tests(self.fs, self.client_id)
+        self.captester.run_mds_cap_tests(PERM)
+
+    def test_single_path_rw(self):
+        PERM = 'rw'
+        FS_AUTH_CAPS = (('/', PERM),)
+        self.captester = CapTester()
+        self.setup_test_env(FS_AUTH_CAPS)
+
+        self.captester.run_mon_cap_tests(self.fs, self.client_id)
+        self.captester.run_mds_cap_tests(PERM)
+
+    def test_single_path_rootsquash(self):
+        PERM = 'rw'
+        FS_AUTH_CAPS = (('/', PERM, 'root_squash'),)
+        self.captester = CapTester()
+        self.setup_test_env(FS_AUTH_CAPS)
+
+        # testing MDS caps...
+        # Since root_squash is set in client caps, client can read but not
+        # write even thought access level is set to "rw".
+        self.captester.conduct_pos_test_for_read_caps()
+        self.captester.conduct_neg_test_for_write_caps(sudo_write=True)
+
+    def test_single_path_authorize_on_nonalphanumeric_fsname(self):
+        """
+        That fs authorize command works on filesystems with names having [_.-]
+        characters
+        """
+        self.mount_a.umount_wait(require_clean=True)
+        self.mds_cluster.delete_all_filesystems()
+        fs_name = "cephfs-_."
+        self.fs = self.mds_cluster.newfs(name=fs_name)
+        self.fs.wait_for_daemons()
+        self.run_cluster_cmd(f'auth caps client.{self.mount_a.client_id} '
+                             f'mon "allow r" '
+                             f'osd "allow rw pool={self.fs.get_data_pool_name()}" '
+                             f'mds allow')
+        self.mount_a.remount(cephfs_name=self.fs.name)
+        PERM = 'rw'
+        FS_AUTH_CAPS = (('/', PERM),)
+        self.captester = CapTester()
+        self.setup_test_env(FS_AUTH_CAPS)
+        self.captester.run_mds_cap_tests(PERM)
+
+    def test_multiple_path_r(self):
+        PERM = 'r'
+        FS_AUTH_CAPS = (('/dir1/dir12', PERM), ('/dir2/dir22', PERM))
+        for c in FS_AUTH_CAPS:
+            self.mount_a.run_shell(f'mkdir -p .{c[0]}')
+        self.captesters = (CapTester(), CapTester())
+        self.setup_test_env(FS_AUTH_CAPS)
+
+        self.run_cap_test_one_by_one(FS_AUTH_CAPS)
+
+    def test_multiple_path_rw(self):
+        PERM = 'rw'
+        FS_AUTH_CAPS = (('/dir1/dir12', PERM), ('/dir2/dir22', PERM))
+        for c in FS_AUTH_CAPS:
+            self.mount_a.run_shell(f'mkdir -p .{c[0]}')
+        self.captesters = (CapTester(), CapTester())
+        self.setup_test_env(FS_AUTH_CAPS)
+
+        self.run_cap_test_one_by_one(FS_AUTH_CAPS)
+
+    def run_cap_test_one_by_one(self, fs_auth_caps):
+        keyring = self.run_cluster_cmd(f'auth get {self.client_name}')
+        for i, c in enumerate(fs_auth_caps):
+            self.assertIn(i, (0, 1))
+            PATH = c[0]
+            PERM = c[1]
+            self._remount(keyring, PATH)
+            # actual tests...
+            self.captesters[i].run_mon_cap_tests(self.fs, self.client_id)
+            self.captesters[i].run_mds_cap_tests(PERM, PATH)
+
+    def tearDown(self):
+        self.mount_a.umount_wait()
+        self.run_cluster_cmd(f'auth rm {self.client_name}')
+
+        super(type(self), self).tearDown()
+
+    def _remount(self, keyring, path='/'):
+        keyring_path = self.mount_a.client_remote.mktemp(data=keyring)
+        self.mount_a.remount(client_id=self.client_id,
+                             client_keyring_path=keyring_path,
+                             cephfs_mntpt=path)
+
+    def setup_for_single_path(self, fs_auth_caps):
+        self.captester.write_test_files((self.mount_a,), '/')
+        keyring = self.fs.authorize(self.client_id, fs_auth_caps)
+        self._remount(keyring)
+
+    def setup_for_multiple_paths(self, fs_auth_caps):
+        for i, c in enumerate(fs_auth_caps):
+            PATH = c[0]
+            self.captesters[i].write_test_files((self.mount_a,), PATH)
+
+        self.fs.authorize(self.client_id, fs_auth_caps)
+
+    def setup_test_env(self, fs_auth_caps):
+        if len(fs_auth_caps) == 1:
+            self.setup_for_single_path(fs_auth_caps[0])
+        else:
+            self.setup_for_multiple_paths(fs_auth_caps)
+
+
+class TestAdminCommandIdempotency(CephFSTestCase):
+    """
+    Tests for administration command idempotency.
+    """
+
+    CLIENTS_REQUIRED = 0
+    MDSS_REQUIRED = 1
+
+    def test_rm_idempotency(self):
+        """
+        That a removing a fs twice is idempotent.
+        """
+
+        data_pools = self.fs.get_data_pool_names(refresh=True)
+        self.fs.fail()
+        self.fs.rm()
+        try:
+            self.fs.get_mds_map()
+        except FSMissing:
+            pass
+        else:
+            self.fail("get_mds_map should raise")
+        p = self.fs.rm()
+        self.assertIn("does not exist", p.stderr.getvalue())
+        self.fs.remove_pools(data_pools)
+
+
+class TestAdminCommandDumpTree(CephFSTestCase):
+    """
+    Tests for administration command subtrees.
+    """
+
+    CLIENTS_REQUIRED = 0
+    MDSS_REQUIRED = 1
+
+    def test_dump_subtrees(self):
+        """
+        Dump all the subtrees to make sure the MDS daemon won't crash.
+        """
+
+        subtrees = self.fs.mds_asok(['get', 'subtrees'])
+        log.info(f"dumping {len(subtrees)} subtrees:")
+        for subtree in subtrees:
+            log.info(f"  subtree: '{subtree['dir']['path']}'")
+            self.fs.mds_asok(['dump', 'tree', subtree['dir']['path']])
+
+        log.info("dumping 2 special subtrees:")
+        log.info("  subtree: '/'")
+        self.fs.mds_asok(['dump', 'tree', '/'])
+        log.info("  subtree: '~mdsdir'")
+        self.fs.mds_asok(['dump', 'tree', '~mdsdir'])
+
+class TestAdminCommandDumpLoads(CephFSTestCase):
+    """
+    Tests for administration command dump loads.
+    """
+
+    CLIENTS_REQUIRED = 0
+    MDSS_REQUIRED = 1
+
+    def test_dump_loads(self):
+        """
+        make sure depth limit param is considered when dump loads for a MDS daemon.
+        """
+
+        log.info("dumping loads")
+        loads = self.fs.mds_asok(['dump', 'loads', '1'])
+        self.assertIsNotNone(loads)
+        self.assertIn("dirfrags", loads)
+        for d in loads["dirfrags"]:
+            self.assertLessEqual(d["path"].count("/"), 1)
+
+class TestFsBalRankMask(CephFSTestCase):
+    """
+    Tests ceph fs set <fs_name> bal_rank_mask
+    """
+
+    CLIENTS_REQUIRED = 0
+    MDSS_REQUIRED = 2
+
+    def test_bal_rank_mask(self):
+        """
+        check whether a specified bal_rank_mask value is valid or not.
+        """
+        bal_rank_mask = '0x0'
+        log.info(f"set bal_rank_mask {bal_rank_mask}")
+        self.fs.set_bal_rank_mask(bal_rank_mask)
+        self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask'))
+
+        bal_rank_mask = '0'
+        log.info(f"set bal_rank_mask {bal_rank_mask}")
+        self.fs.set_bal_rank_mask(bal_rank_mask)
+        self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask'))
+
+        bal_rank_mask = '-1'
+        log.info(f"set bal_rank_mask {bal_rank_mask}")
+        self.fs.set_bal_rank_mask(bal_rank_mask)
+        self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask'))
+
+        bal_rank_mask = 'all'
+        log.info(f"set bal_rank_mask {bal_rank_mask}")
+        self.fs.set_bal_rank_mask(bal_rank_mask)
+        self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask'))
+
+        bal_rank_mask = '0x1'
+        log.info(f"set bal_rank_mask {bal_rank_mask}")
+        self.fs.set_bal_rank_mask(bal_rank_mask)
+        self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask'))
+
+        bal_rank_mask = '1'
+        log.info(f"set bal_rank_mask {bal_rank_mask}")
+        self.fs.set_bal_rank_mask(bal_rank_mask)
+        self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask'))
+
+        bal_rank_mask = 'f0'
+        log.info(f"set bal_rank_mask {bal_rank_mask}")
+        self.fs.set_bal_rank_mask(bal_rank_mask)
+        self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask'))
+
+        bal_rank_mask = 'ab'
+        log.info(f"set bal_rank_mask {bal_rank_mask}")
+        self.fs.set_bal_rank_mask(bal_rank_mask)
+        self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask'))
+
+        bal_rank_mask = '0xfff0'
+        log.info(f"set bal_rank_mask {bal_rank_mask}")
+        self.fs.set_bal_rank_mask(bal_rank_mask)
+        self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask'))
+
+        MAX_MDS = 256
+        bal_rank_mask = '0x' + 'f' * int(MAX_MDS / 4)
+        log.info(f"set bal_rank_mask {bal_rank_mask}")
+        self.fs.set_bal_rank_mask(bal_rank_mask)
+        self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask'))
+
+        bal_rank_mask = ''
+        log.info("set bal_rank_mask to empty string")
+        try:
+            self.fs.set_bal_rank_mask(bal_rank_mask)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.EINVAL)
+
+        bal_rank_mask = '0x1' + 'f' * int(MAX_MDS / 4)
+        log.info(f"set bal_rank_mask {bal_rank_mask}")
+        try:
+            self.fs.set_bal_rank_mask(bal_rank_mask)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.EINVAL)
diff --git a/qa/tasks/cephfs/test_auto_repair.py b/qa/tasks/cephfs/test_auto_repair.py
new file mode 100644
index 000000000..e6f0a8f0b
--- /dev/null
+++ b/qa/tasks/cephfs/test_auto_repair.py
@@ -0,0 +1,88 @@
+
+"""
+Exercise the MDS's auto repair functions
+"""
+
+import logging
+import time
+
+from teuthology.exceptions import CommandFailedError
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+
+log = logging.getLogger(__name__)
+
+
+# Arbitrary timeouts for operations involving restarting
+# an MDS or waiting for it to come up
+MDS_RESTART_GRACE = 60
+
+
+class TestMDSAutoRepair(CephFSTestCase):
+    def test_backtrace_repair(self):
+        """
+        MDS should verify/fix backtrace on fetch dirfrag
+        """
+
+        self.mount_a.run_shell(["mkdir", "testdir1"])
+        self.mount_a.run_shell(["touch", "testdir1/testfile"])
+        dir_objname = "{:x}.00000000".format(self.mount_a.path_to_ino("testdir1"))
+
+        # drop inodes caps
+        self.mount_a.umount_wait()
+
+        # flush journal entries to dirfrag objects, and expire journal
+        self.fs.mds_asok(['flush', 'journal'])
+
+        # Restart the MDS to drop the metadata cache (because we expired the journal,
+        # nothing gets replayed into cache on restart)
+        self.fs.rank_fail()
+        self.fs.wait_for_daemons()
+
+        # remove testdir1's backtrace
+        self.fs.radosm(["rmxattr", dir_objname, "parent"])
+
+        # readdir (fetch dirfrag) should fix testdir1's backtrace
+        self.mount_a.mount_wait()
+        self.mount_a.run_shell(["ls", "testdir1"])
+
+        # flush journal entries to dirfrag objects
+        self.fs.mds_asok(['flush', 'journal'])
+
+        # check if backtrace exists
+        self.fs.radosm(["getxattr", dir_objname, "parent"])
+
+    def test_mds_readonly(self):
+        """
+        test if MDS behave correct when it's readonly
+        """
+        # operation should successd when MDS is not readonly
+        self.mount_a.run_shell(["touch", "test_file1"])
+        writer = self.mount_a.write_background(loop=True)
+
+        time.sleep(10)
+        self.assertFalse(writer.finished)
+
+        # force MDS to read-only mode
+        self.fs.mds_asok(['force_readonly'])
+        time.sleep(10)
+
+        # touching test file should fail
+        try:
+            self.mount_a.run_shell(["touch", "test_file1"])
+        except CommandFailedError:
+            pass
+        else:
+            self.assertTrue(False)
+
+        # background writer also should fail
+        self.assertTrue(writer.finished)
+
+        # The MDS should report its readonly health state to the mon
+        self.wait_for_health("MDS_READ_ONLY", timeout=30)
+
+        # restart mds to make it writable
+        self.fs.mds_fail_restart()
+        self.fs.wait_for_daemons()
+
+        self.wait_for_health_clear(timeout=30)
diff --git a/qa/tasks/cephfs/test_backtrace.py b/qa/tasks/cephfs/test_backtrace.py
new file mode 100644
index 000000000..6b094569b
--- /dev/null
+++ b/qa/tasks/cephfs/test_backtrace.py
@@ -0,0 +1,102 @@
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from tasks.cephfs.filesystem import ObjectNotFound
+
+class TestBacktrace(CephFSTestCase):
+    def test_backtrace(self):
+        """
+        That the 'parent' 'layout' and 'symlink' xattrs on the head objects of files
+        are updated correctly.
+        """
+
+        old_data_pool_name = self.fs.get_data_pool_name()
+        old_pool_id = self.fs.get_data_pool_id()
+
+        # Not enabling symlink recovery option should not store symlink xattr
+        self.config_set('mds', 'mds_symlink_recovery', 'false')
+        self.mount_a.run_shell(["mkdir", "sym_dir0"])
+        self.mount_a.run_shell(["touch", "sym_dir0/file1"])
+        self.mount_a.run_shell(["ln", "-s", "sym_dir0/file1", "sym_dir0/symlink_file1"])
+        file_ino = self.mount_a.path_to_ino("sym_dir0/symlink_file1", follow_symlinks=False)
+
+        self.fs.mds_asok(["flush", "journal"])
+        with self.assertRaises(ObjectNotFound):
+            self.fs.read_symlink(file_ino)
+
+        # Enabling symlink recovery option should store symlink xattr for symlinks
+        self.config_set('mds', 'mds_symlink_recovery', 'true')
+        self.mount_a.run_shell(["mkdir", "sym_dir"])
+        self.mount_a.run_shell(["touch", "sym_dir/file1"])
+        self.mount_a.run_shell(["ln", "-s", "./file1", "sym_dir/symlink_file1"])
+        file_ino = self.mount_a.path_to_ino("sym_dir/symlink_file1", follow_symlinks=False)
+
+        self.fs.mds_asok(["flush", "journal"])
+        symlink = self.fs.read_symlink(file_ino)
+        self.assertEqual(symlink, {
+            "s" : "./file1",
+            })
+
+        # Create a file for subsequent checks
+        self.mount_a.run_shell(["mkdir", "parent_a"])
+        self.mount_a.run_shell(["touch", "parent_a/alpha"])
+        file_ino = self.mount_a.path_to_ino("parent_a/alpha")
+
+        # That backtrace and layout are written after initial flush
+        self.fs.mds_asok(["flush", "journal"])
+        backtrace = self.fs.read_backtrace(file_ino)
+        self.assertEqual(['alpha', 'parent_a'], [a['dname'] for a in backtrace['ancestors']])
+        layout = self.fs.read_layout(file_ino)
+        self.assertDictEqual(layout, {
+            "stripe_unit": 4194304,
+            "stripe_count": 1,
+            "object_size": 4194304,
+            "pool_id": old_pool_id,
+            "pool_ns": "",
+        })
+        self.assertEqual(backtrace['pool'], old_pool_id)
+
+        # That backtrace is written after parentage changes
+        self.mount_a.run_shell(["mkdir", "parent_b"])
+        self.mount_a.run_shell(["mv", "parent_a/alpha", "parent_b/alpha"])
+
+        self.fs.mds_asok(["flush", "journal"])
+        backtrace = self.fs.read_backtrace(file_ino)
+        self.assertEqual(['alpha', 'parent_b'], [a['dname'] for a in backtrace['ancestors']])
+
+        # Create a new data pool
+        new_pool_name = "data_new"
+        new_pool_id = self.fs.add_data_pool(new_pool_name)
+
+        # That an object which has switched pools gets its backtrace updated
+        self.mount_a.setfattr("./parent_b/alpha",
+                              "ceph.file.layout.pool", new_pool_name)
+        self.fs.mds_asok(["flush", "journal"])
+        backtrace_old_pool = self.fs.read_backtrace(file_ino, pool=old_data_pool_name)
+        self.assertEqual(backtrace_old_pool['pool'], new_pool_id)
+        backtrace_new_pool = self.fs.read_backtrace(file_ino, pool=new_pool_name)
+        self.assertEqual(backtrace_new_pool['pool'], new_pool_id)
+        new_pool_layout = self.fs.read_layout(file_ino, pool=new_pool_name)
+        self.assertEqual(new_pool_layout['pool_id'], new_pool_id)
+        self.assertEqual(new_pool_layout['pool_ns'], '')
+
+        # That subsequent linkage changes are only written to new pool backtrace
+        self.mount_a.run_shell(["mkdir", "parent_c"])
+        self.mount_a.run_shell(["mv", "parent_b/alpha", "parent_c/alpha"])
+        self.fs.mds_asok(["flush", "journal"])
+        backtrace_old_pool = self.fs.read_backtrace(file_ino, pool=old_data_pool_name)
+        self.assertEqual(['alpha', 'parent_b'], [a['dname'] for a in backtrace_old_pool['ancestors']])
+        backtrace_new_pool = self.fs.read_backtrace(file_ino, pool=new_pool_name)
+        self.assertEqual(['alpha', 'parent_c'], [a['dname'] for a in backtrace_new_pool['ancestors']])
+
+        # That layout is written to new pool after change to other field in layout
+        self.mount_a.setfattr("./parent_c/alpha",
+                              "ceph.file.layout.object_size", "8388608")
+
+        self.fs.mds_asok(["flush", "journal"])
+        new_pool_layout = self.fs.read_layout(file_ino, pool=new_pool_name)
+        self.assertEqual(new_pool_layout['object_size'], 8388608)
+
+        # ...but not to the old pool: the old pool's backtrace points to the new pool, and that's enough,
+        # we don't update the layout in all the old pools whenever it changes
+        old_pool_layout = self.fs.read_layout(file_ino, pool=old_data_pool_name)
+        self.assertEqual(old_pool_layout['object_size'], 4194304)
diff --git a/qa/tasks/cephfs/test_cap_flush.py b/qa/tasks/cephfs/test_cap_flush.py
new file mode 100644
index 000000000..70fdc3893
--- /dev/null
+++ b/qa/tasks/cephfs/test_cap_flush.py
@@ -0,0 +1,58 @@
+
+import os
+import time
+from textwrap import dedent
+from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
+
+class TestCapFlush(CephFSTestCase):
+    @for_teuthology
+    def test_replay_create(self):
+        """
+        MDS starts to handle client caps when it enters clientreplay stage.
+        When handling a client cap in clientreplay stage, it's possible that
+        corresponding inode does not exist because the client request which
+        creates inode hasn't been replayed.
+        """
+
+        dir_path = os.path.join(self.mount_a.mountpoint, "testdir")
+        py_script = dedent("""
+            import os
+            os.mkdir("{0}")
+            fd = os.open("{0}", os.O_RDONLY)
+            os.fchmod(fd, 0o777)
+            os.fsync(fd)
+            """).format(dir_path)
+        self.mount_a.run_python(py_script)
+
+        self.fs.mds_asok(["flush", "journal"])
+
+        # client will only get unsafe replay
+        self.fs.mds_asok(["config", "set", "mds_log_pause", "1"])
+
+        file_name = "testfile"
+        file_path = dir_path + "/" + file_name
+
+        # Create a file and modify its mode. ceph-fuse will mark Ax cap dirty
+        py_script = dedent("""
+            import os
+            os.chdir("{0}")
+            os.setgid(65534)
+            os.setuid(65534)
+            fd = os.open("{1}", os.O_CREAT | os.O_RDWR, 0o644)
+            os.fchmod(fd, 0o640)
+            """).format(dir_path, file_name)
+        self.mount_a.run_python(py_script, sudo=True)
+
+        # Modify file mode by different user. ceph-fuse will send a setattr request
+        self.mount_a.run_shell(["sudo", "chmod", "600", file_path], wait=False, omit_sudo=False)
+
+        time.sleep(10)
+
+        # Restart mds. Client will re-send the unsafe request and cap flush
+        self.fs.rank_fail()
+        self.fs.wait_for_daemons()
+
+        mode = self.mount_a.run_shell(['stat', '-c' '%a', file_path]).stdout.getvalue().strip()
+        # If the cap flush get dropped, mode should be 0644.
+        # (Ax cap stays in dirty state, which prevents setattr reply from updating file mode)
+        self.assertEqual(mode, "600")
diff --git a/qa/tasks/cephfs/test_cephfs_shell.py b/qa/tasks/cephfs/test_cephfs_shell.py
new file mode 100644
index 000000000..9f7434762
--- /dev/null
+++ b/qa/tasks/cephfs/test_cephfs_shell.py
@@ -0,0 +1,1167 @@
+"""
+NOTE: For running this tests locally (using vstart_runner.py), export the
+path to src/tools/cephfs/shell/cephfs-shell module to $PATH. Running
+"export PATH=$PATH:$(cd ../src/tools/cephfs/shell && pwd)" from the build dir
+will update the environment without hassles of typing the path correctly.
+"""
+from io import StringIO
+from os import path
+import crypt
+import logging
+from tempfile import mkstemp as tempfile_mkstemp
+import math
+from time import sleep
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.exceptions import CommandFailedError
+from textwrap import dedent
+
+log = logging.getLogger(__name__)
+
+
+def humansize(nbytes):
+    suffixes = ['B', 'K', 'M', 'G', 'T', 'P']
+    i = 0
+    while nbytes >= 1024 and i < len(suffixes) - 1:
+        nbytes /= 1024.
+        i += 1
+    nbytes = math.ceil(nbytes)
+    f = ('%d' % nbytes).rstrip('.')
+    return '%s%s' % (f, suffixes[i])
+
+
+def ensure_str(s):
+    if isinstance(s, str):
+        return s
+    if isinstance(s, bytes):
+        return s.decode()
+    raise TypeError("not expecting type '%s'" % type(s))
+
+
+class TestCephFSShell(CephFSTestCase):
+    CLIENTS_REQUIRED = 1
+
+    def setUp(self):
+        super(TestCephFSShell, self).setUp()
+
+        conf_contents = "[cephfs-shell]\ncolors = False\ndebug = True\n"
+        confpath = self.mount_a.client_remote.sh('mktemp').strip()
+        self.mount_a.client_remote.write_file(confpath, conf_contents)
+        self.default_shell_conf_path = confpath
+
+    def run_cephfs_shell_cmd(self, cmd, mount_x=None, shell_conf_path=None,
+                             opts=None, stdout=None, stderr=None, stdin=None,
+                             check_status=True):
+        stdout = stdout or StringIO()
+        stderr = stderr or StringIO()
+        if mount_x is None:
+            mount_x = self.mount_a
+        if isinstance(cmd, list):
+            cmd = " ".join(cmd)
+        if not shell_conf_path:
+            shell_conf_path = self.default_shell_conf_path
+
+        args = ["cephfs-shell", "-c", shell_conf_path]
+        if opts:
+            args += opts
+        args.extend(("--", cmd))
+
+        log.info("Running command: {}".format(" ".join(args)))
+        return mount_x.client_remote.run(args=args, stdout=stdout,
+                                         stderr=stderr, stdin=stdin,
+                                         check_status=check_status)
+
+    def negtest_cephfs_shell_cmd(self, **kwargs):
+        """
+        This method verifies that cephfs shell command fails with expected
+        return value and/or error message.
+
+        kwargs is expected to hold the arguments same as
+        run_cephfs_shell_cmd() with the following exceptions -
+            * It should not contain check_status (since commands are expected
+              to fail, check_status is hardcoded to False).
+            * It is optional to set expected error message and return value to
+              dict members 'errmsg' and 'retval' respectively.
+
+        This method servers as shorthand for codeblocks like -
+
+        try:
+            proc = self.run_cephfs_shell_cmd(args=['some', 'cmd'],
+                                             check_status=False,
+                                             stdout=stdout)
+        except CommandFailedError as e:
+            self.assertNotIn('some error message',
+                              proc.stderr.getvalue.lower())
+
+
+        try:
+            proc = self.run_cephfs_shell_cmd(args=['some', 'cmd'],
+                                             check_status=False,
+                                             stdout=stdout)
+        except CommandFailedError as e:
+            self.assertNotEqual(1, proc.returncode)
+        """
+        retval = kwargs.pop('retval', None)
+        errmsg = kwargs.pop('errmsg', None)
+        kwargs['check_status'] = False
+
+        proc = self.run_cephfs_shell_cmd(**kwargs)
+        if retval:
+            self.assertEqual(proc.returncode, retval)
+        else:
+            self.assertNotEqual(proc.returncode, 0)
+        if errmsg:
+            self.assertIn(errmsg, proc.stderr.getvalue().lower())
+
+        return proc
+
+    def get_cephfs_shell_cmd_output(self, cmd, mount_x=None,
+                                    shell_conf_path=None, opts=None,
+                                    stdout=None, stdin=None,
+                                    check_status=True):
+        return ensure_str(self.run_cephfs_shell_cmd(
+            cmd=cmd, mount_x=mount_x, shell_conf_path=shell_conf_path,
+            opts=opts, stdout=stdout, stdin=stdin,
+            check_status=check_status).stdout.getvalue().strip())
+
+    def get_cephfs_shell_cmd_error(self, cmd, mount_x=None,
+                                   shell_conf_path=None, opts=None,
+                                   stderr=None, stdin=None, check_status=True):
+        return ensure_str(self.run_cephfs_shell_cmd(
+            cmd=cmd, mount_x=mount_x, shell_conf_path=shell_conf_path,
+            opts=opts, stderr=stderr, stdin=stdin,
+            check_status=check_status).stderr.getvalue().strip())
+
+    def run_cephfs_shell_script(self, script, mount_x=None,
+                                shell_conf_path=None, opts=None, stdout=None,
+                                stderr=None, stdin=None, check_status=True):
+        stdout = stdout or StringIO()
+        stderr = stderr or StringIO()
+        if mount_x is None:
+            mount_x = self.mount_a
+
+        scriptpath = tempfile_mkstemp(prefix='test-cephfs', text=True)[1]
+        with open(scriptpath, 'w') as scriptfile:
+            scriptfile.write(script)
+        # copy script to the machine running cephfs-shell.
+        mount_x.client_remote.put_file(scriptpath, scriptpath)
+        mount_x.run_shell_payload(f"chmod 755 {scriptpath}")
+
+        args = ["cephfs-shell", '-b', scriptpath]
+        if shell_conf_path:
+            args[1:1] = ["-c", shell_conf_path]
+        log.info('Running script \"' + scriptpath + '\"')
+        return mount_x.client_remote.run(args=args, stdout=stdout,
+                                         stderr=stderr, stdin=stdin,
+                                         check_status=True)
+
+    def get_cephfs_shell_script_output(self, script, mount_x=None,
+                                       shell_conf_path=None, opts=None,
+                                       stdout=None, stdin=None,
+                                       check_status=True):
+        return ensure_str(self.run_cephfs_shell_script(
+            script=script, mount_x=mount_x, shell_conf_path=shell_conf_path,
+            opts=opts, stdout=stdout, stdin=stdin,
+            check_status=check_status).stdout.getvalue().strip())
+
+
+class TestGeneric(TestCephFSShell):
+
+    def test_mistyped_cmd(self):
+        with self.assertRaises(CommandFailedError) as cm:
+            self.run_cephfs_shell_cmd('lsx')
+        self.assertEqual(cm.exception.exitstatus, 127)
+
+
+class TestMkdir(TestCephFSShell):
+    def test_mkdir(self):
+        """
+        Test that mkdir creates directory
+        """
+        o = self.get_cephfs_shell_cmd_output("mkdir d1")
+        log.info("cephfs-shell output:\n{}".format(o))
+
+        o = self.mount_a.stat('d1')
+        log.info("mount_a output:\n{}".format(o))
+
+    def test_mkdir_with_070000_octal_mode(self):
+        """
+        Test that mkdir fails with octal mode greater than 07777
+        """
+        self.negtest_cephfs_shell_cmd(cmd="mkdir -m 070000 d2")
+        try:
+            self.mount_a.stat('d2')
+        except CommandFailedError:
+            pass
+
+    def test_mkdir_with_negative_octal_mode(self):
+        """
+        Test that mkdir fails with negative octal mode
+        """
+        self.negtest_cephfs_shell_cmd(cmd="mkdir -m -0755 d3")
+        try:
+            self.mount_a.stat('d3')
+        except CommandFailedError:
+            pass
+
+    def test_mkdir_with_non_octal_mode(self):
+        """
+        Test that mkdir passes with non-octal mode
+        """
+        o = self.get_cephfs_shell_cmd_output("mkdir -m u=rwx d4")
+        log.info("cephfs-shell output:\n{}".format(o))
+
+        # mkdir d4 should pass
+        o = self.mount_a.stat('d4')
+        assert ((o['st_mode'] & 0o700) == 0o700)
+
+    def test_mkdir_with_bad_non_octal_mode(self):
+        """
+        Test that mkdir failes with bad non-octal mode
+        """
+        self.negtest_cephfs_shell_cmd(cmd="mkdir -m ugx=0755 d5")
+        try:
+            self.mount_a.stat('d5')
+        except CommandFailedError:
+            pass
+
+    def test_mkdir_path_without_path_option(self):
+        """
+        Test that mkdir fails without path option for creating path
+        """
+        self.negtest_cephfs_shell_cmd(cmd="mkdir d5/d6/d7")
+        try:
+            self.mount_a.stat('d5/d6/d7')
+        except CommandFailedError:
+            pass
+
+    def test_mkdir_path_with_path_option(self):
+        """
+        Test that mkdir passes with path option for creating path
+        """
+        o = self.get_cephfs_shell_cmd_output("mkdir -p d5/d6/d7")
+        log.info("cephfs-shell output:\n{}".format(o))
+
+        # mkdir d5/d6/d7 should pass
+        o = self.mount_a.stat('d5/d6/d7')
+        log.info("mount_a output:\n{}".format(o))
+
+
+class TestRmdir(TestCephFSShell):
+    dir_name = "test_dir"
+
+    def dir_does_not_exists(self):
+        """
+        Tests that directory does not exists
+        """
+        try:
+            self.mount_a.stat(self.dir_name)
+        except CommandFailedError as e:
+            if e.exitstatus == 2:
+                return 0
+            raise
+
+    def test_rmdir(self):
+        """
+        Test that rmdir deletes directory
+        """
+        self.run_cephfs_shell_cmd("mkdir " + self.dir_name)
+        self.run_cephfs_shell_cmd("rmdir " + self.dir_name)
+        self.dir_does_not_exists()
+
+    def test_rmdir_non_existing_dir(self):
+        """
+        Test that rmdir does not delete a non existing directory
+        """
+        self.negtest_cephfs_shell_cmd(cmd="rmdir test_dir")
+        self.dir_does_not_exists()
+
+    def test_rmdir_dir_with_file(self):
+        """
+        Test that rmdir does not delete directory containing file
+        """
+        self.run_cephfs_shell_cmd("mkdir " + self.dir_name)
+
+        self.run_cephfs_shell_cmd("put - test_dir/dumpfile", stdin="Valid File")
+        # see comment below
+        # with self.assertRaises(CommandFailedError) as cm:
+        with self.assertRaises(CommandFailedError):
+            self.run_cephfs_shell_cmd("rmdir " + self.dir_name)
+        # TODO: we need to check for exit code and error message as well.
+        # skipping it for not since error codes used by cephfs-shell are not
+        # standard and they may change soon.
+        # self.assertEqual(cm.exception.exitcode, 39)
+        self.mount_a.stat(self.dir_name)
+
+    def test_rmdir_existing_file(self):
+        """
+        Test that rmdir does not delete a file
+        """
+        self.run_cephfs_shell_cmd("put - dumpfile", stdin="Valid File")
+        self.negtest_cephfs_shell_cmd(cmd="rmdir dumpfile")
+        self.mount_a.stat("dumpfile")
+
+    def test_rmdir_p(self):
+        """
+        Test that rmdir -p deletes all empty directories in the root
+        directory passed
+        """
+        self.run_cephfs_shell_cmd("mkdir -p test_dir/t1/t2/t3")
+        self.run_cephfs_shell_cmd("rmdir -p " + self.dir_name)
+        self.dir_does_not_exists()
+
+    def test_rmdir_p_valid_path(self):
+        """
+        Test that rmdir -p deletes all empty directories in the path passed
+        """
+        self.run_cephfs_shell_cmd("mkdir -p test_dir/t1/t2/t3")
+        self.run_cephfs_shell_cmd("rmdir -p test_dir/t1/t2/t3")
+        self.dir_does_not_exists()
+
+    def test_rmdir_p_non_existing_dir(self):
+        """
+        Test that rmdir -p does not delete an invalid directory
+        """
+        self.negtest_cephfs_shell_cmd(cmd="rmdir -p test_dir")
+        self.dir_does_not_exists()
+
+    def test_rmdir_p_dir_with_file(self):
+        """
+        Test that rmdir -p does not delete the directory containing a file
+        """
+        self.run_cephfs_shell_cmd("mkdir " + self.dir_name)
+        self.run_cephfs_shell_cmd("put - test_dir/dumpfile",
+                                  stdin="Valid File")
+        self.run_cephfs_shell_cmd("rmdir -p " + self.dir_name)
+        self.mount_a.stat(self.dir_name)
+
+
+class TestLn(TestCephFSShell):
+    dir1 = 'test_dir1'
+    dir2 = 'test_dir2'
+    dump_id = 11
+    s = 'somedata'
+    dump_file = 'dump11'
+
+    def test_soft_link_without_link_name(self):
+        self.run_cephfs_shell_cmd(f'mkdir -p {self.dir1}/{self.dir2}')
+        self.mount_a.write_file(path=f'{self.dir1}/{self.dump_file}',
+                                data=self.s)
+        self.run_cephfs_shell_script(script=dedent(f'''
+                cd /{self.dir1}/{self.dir2}
+                ln -s ../{self.dump_file}'''))
+        o = self.get_cephfs_shell_cmd_output(f'cat /{self.dir1}/{self.dir2}'
+                                             f'/{self.dump_file}')
+        self.assertEqual(self.s, o)
+
+    def test_soft_link_with_link_name(self):
+        self.run_cephfs_shell_cmd(f'mkdir -p {self.dir1}/{self.dir2}')
+        self.mount_a.write_file(path=f'{self.dir1}/{self.dump_file}',
+                                data=self.s)
+        self.run_cephfs_shell_cmd(f'ln -s /{self.dir1}/{self.dump_file} '
+                                  f'/{self.dir1}/{self.dir2}/')
+        o = self.get_cephfs_shell_cmd_output(f'cat /{self.dir1}/{self.dir2}'
+                                             f'/{self.dump_file}')
+        self.assertEqual(self.s, o)
+
+    def test_hard_link_without_link_name(self):
+        self.run_cephfs_shell_cmd(f'mkdir -p {self.dir1}/{self.dir2}')
+        self.mount_a.write_file(path=f'{self.dir1}/{self.dump_file}',
+                                data=self.s)
+        self.run_cephfs_shell_script(script=dedent(f'''
+                cd /{self.dir1}/{self.dir2}
+                ln ../{self.dump_file}'''))
+        o = self.get_cephfs_shell_cmd_output(f'cat /{self.dir1}/{self.dir2}'
+                                             f'/{self.dump_file}')
+        self.assertEqual(self.s, o)
+
+    def test_hard_link_with_link_name(self):
+        self.run_cephfs_shell_cmd(f'mkdir -p {self.dir1}/{self.dir2}')
+        self.mount_a.write_file(path=f'{self.dir1}/{self.dump_file}',
+                                data=self.s)
+        self.run_cephfs_shell_cmd(f'ln /{self.dir1}/{self.dump_file} '
+                                  f'/{self.dir1}/{self.dir2}/')
+        o = self.get_cephfs_shell_cmd_output(f'cat /{self.dir1}/{self.dir2}'
+                                             f'/{self.dump_file}')
+        self.assertEqual(self.s, o)
+
+    def test_hard_link_to_dir_not_allowed(self):
+        self.run_cephfs_shell_cmd(f'mkdir {self.dir1}')
+        self.run_cephfs_shell_cmd(f'mkdir {self.dir2}')
+        r = self.run_cephfs_shell_cmd(f'ln /{self.dir1} /{self.dir2}/',
+                                      check_status=False)
+        self.assertEqual(r.returncode, 3)
+
+    def test_target_exists_in_dir(self):
+        self.mount_a.write_file(path=f'{self.dump_file}', data=self.s)
+        r = self.run_cephfs_shell_cmd(f'ln {self.dump_file} {self.dump_file}',
+                                      check_status=False)
+        self.assertEqual(r.returncode, 1)
+
+    def test_incorrect_dir(self):
+        self.mount_a.write_file(path=f'{self.dump_file}', data=self.s)
+        r = self.run_cephfs_shell_cmd(f'ln {self.dump_file} /dir1/',
+                                      check_status=False)
+        self.assertEqual(r.returncode, 5)
+
+
+class TestGetAndPut(TestCephFSShell):
+    def test_get_with_target_name(self):
+        """
+        Test that get passes with target name
+        """
+        s = 'C' * 1024
+        s_hash = crypt.crypt(s, '.A')
+        o = self.get_cephfs_shell_cmd_output("put - dump4", stdin=s)
+        log.info("cephfs-shell output:\n{}".format(o))
+
+        # put - dump4 should pass
+        o = self.mount_a.stat('dump4')
+        log.info("mount_a output:\n{}".format(o))
+
+        o = self.get_cephfs_shell_cmd_output("get dump4 ./dump4")
+        log.info("cephfs-shell output:\n{}".format(o))
+
+        # NOTE: cwd=None because we want to run it at CWD, not at cephfs mntpt.
+        o = self.mount_a.run_shell('cat dump4', cwd=None).stdout.getvalue(). \
+            strip()
+        o_hash = crypt.crypt(o, '.A')
+
+        # s_hash must be equal to o_hash
+        log.info("s_hash:{}".format(s_hash))
+        log.info("o_hash:{}".format(o_hash))
+        assert (s_hash == o_hash)
+
+        # cleanup
+        self.mount_a.run_shell("rm dump4", cwd=None, check_status=False)
+
+    def test_get_without_target_name(self):
+        """
+        Test that get should fail when there is no target name
+        """
+        s = 'Somedata'
+        # put - dump5 should pass
+        self.get_cephfs_shell_cmd_output("put - dump5", stdin=s)
+
+        self.mount_a.stat('dump5')
+
+        # get dump5 should fail as there is no local_path mentioned
+        with self.assertRaises(CommandFailedError):
+            self.get_cephfs_shell_cmd_output("get dump5")
+
+        # stat dump would return non-zero exit code as get dump failed
+        # cwd=None because we want to run it at CWD, not at cephfs mntpt.
+        r = self.mount_a.run_shell('stat dump5', cwd=None,
+                                   check_status=False).returncode
+        self.assertEqual(r, 1)
+
+    def test_get_doesnt_create_dir(self):
+        # if get cmd is creating subdirs on its own then dump7 will be
+        # stored as ./dump7/tmp/dump7 and not ./dump7, therefore
+        # if doing `cat ./dump7` returns non-zero exit code(i.e. 1) then
+        # it implies that no such file exists at that location
+        dir_abspath = path.join(self.mount_a.mountpoint, 'tmp')
+        self.mount_a.run_shell_payload(f"mkdir {dir_abspath}")
+        self.mount_a.client_remote.write_file(path.join(dir_abspath, 'dump7'),
+                                              'somedata')
+        self.get_cephfs_shell_cmd_output("get /tmp/dump7 ./dump7")
+        # test that dump7 exists
+        self.mount_a.run_shell("cat ./dump7", cwd=None)
+
+        # cleanup
+        self.mount_a.run_shell(args='rm dump7', cwd=None, check_status=False)
+
+    def test_get_to_console(self):
+        """
+        Test that get passes with target name
+        """
+        s = 'E' * 1024
+        s_hash = crypt.crypt(s, '.A')
+        o = self.get_cephfs_shell_cmd_output("put - dump6", stdin=s)
+        log.info("cephfs-shell output:\n{}".format(o))
+
+        # put - dump6 should pass
+        o = self.mount_a.stat('dump6')
+        log.info("mount_a output:\n{}".format(o))
+
+        # get dump6 - should pass
+        o = self.get_cephfs_shell_cmd_output("get dump6 -")
+        o_hash = crypt.crypt(o, '.A')
+        log.info("cephfs-shell output:\n{}".format(o))
+
+        # s_hash must be equal to o_hash
+        log.info("s_hash:{}".format(s_hash))
+        log.info("o_hash:{}".format(o_hash))
+        assert (s_hash == o_hash)
+
+
+    def test_put_without_target_name(self):
+        """
+        put - should fail as the cmd expects both arguments are mandatory.
+        """
+        with self.assertRaises(CommandFailedError):
+            self.get_cephfs_shell_cmd_output("put -")
+
+    def test_put_validate_local_path(self):
+        """
+        This test is intended to make sure local_path is validated before
+        trying to put the file from local fs to cephfs and the command
+        put ./dumpXYZ dump8 would fail as dumpXYX doesn't exist.
+        """
+        with self.assertRaises(CommandFailedError):
+            o = self.get_cephfs_shell_cmd_output("put ./dumpXYZ dump8")
+            log.info("cephfs-shell output:\n{}".format(o))
+
+class TestSnapshots(TestCephFSShell):
+    def test_snap(self):
+        """
+        Test that snapshot creation and deletion work
+        """
+        sd = self.fs.get_config('client_snapdir')
+        sdn = "data_dir/{}/snap1".format(sd)
+
+        # create a data dir and dump some files into it
+        self.get_cephfs_shell_cmd_output("mkdir data_dir")
+        s = 'A' * 10240
+        o = self.get_cephfs_shell_cmd_output("put - data_dir/data_a", stdin=s)
+        s = 'B' * 10240
+        o = self.get_cephfs_shell_cmd_output("put - data_dir/data_b", stdin=s)
+        s = 'C' * 10240
+        o = self.get_cephfs_shell_cmd_output("put - data_dir/data_c", stdin=s)
+        s = 'D' * 10240
+        o = self.get_cephfs_shell_cmd_output("put - data_dir/data_d", stdin=s)
+        s = 'E' * 10240
+        o = self.get_cephfs_shell_cmd_output("put - data_dir/data_e", stdin=s)
+
+        o = self.get_cephfs_shell_cmd_output("ls -l /data_dir")
+        log.info("cephfs-shell output:\n{}".format(o))
+
+        # create the snapshot - must pass
+        o = self.get_cephfs_shell_cmd_output("snap create snap1 /data_dir")
+        log.info("cephfs-shell output:\n{}".format(o))
+        self.assertEqual("", o)
+        o = self.mount_a.stat(sdn)
+        log.info("mount_a output:\n{}".format(o))
+        self.assertIn('st_mode', o)
+
+        # create the same snapshot again - must fail with an error message
+        self.negtest_cephfs_shell_cmd(cmd="snap create snap1 /data_dir",
+                                      errmsg="snapshot 'snap1' already exists")
+        o = self.mount_a.stat(sdn)
+        log.info("mount_a output:\n{}".format(o))
+        self.assertIn('st_mode', o)
+
+        # delete the snapshot - must pass
+        o = self.get_cephfs_shell_cmd_output("snap delete snap1 /data_dir")
+        log.info("cephfs-shell output:\n{}".format(o))
+        self.assertEqual("", o)
+        try:
+            o = self.mount_a.stat(sdn)
+        except CommandFailedError:
+            # snap dir should not exist anymore
+            pass
+        log.info("mount_a output:\n{}".format(o))
+        self.assertNotIn('st_mode', o)
+
+        # delete the same snapshot again - must fail with an error message
+        self.negtest_cephfs_shell_cmd(cmd="snap delete snap1 /data_dir",
+                                      errmsg="'snap1': no such snapshot")
+        try:
+            o = self.mount_a.stat(sdn)
+        except CommandFailedError:
+            pass
+        log.info("mount_a output:\n{}".format(o))
+        self.assertNotIn('st_mode', o)
+
+
+class TestCD(TestCephFSShell):
+    CLIENTS_REQUIRED = 1
+
+    def test_cd_with_no_args(self):
+        """
+        Test that when cd is issued without any arguments, CWD is changed
+        to root directory.
+        """
+        path = 'dir1/dir2/dir3'
+        self.mount_a.run_shell_payload(f"mkdir -p {path}")
+        expected_cwd = '/'
+
+        script = 'cd {}\ncd\ncwd\n'.format(path)
+        output = self.get_cephfs_shell_script_output(script)
+        self.assertEqual(output, expected_cwd)
+
+    def test_cd_with_args(self):
+        """
+        Test that when cd is issued with an argument, CWD is changed
+        to the path passed in the argument.
+        """
+        path = 'dir1/dir2/dir3'
+        self.mount_a.run_shell_payload(f"mkdir -p {path}")
+        expected_cwd = '/dir1/dir2/dir3'
+
+        script = 'cd {}\ncwd\n'.format(path)
+        output = self.get_cephfs_shell_script_output(script)
+        self.assertEqual(output, expected_cwd)
+
+
+class TestDU(TestCephFSShell):
+    CLIENTS_REQUIRED = 1
+
+    def test_du_works_for_regfiles(self):
+        regfilename = 'some_regfile'
+        regfile_abspath = path.join(self.mount_a.mountpoint, regfilename)
+        self.mount_a.client_remote.write_file(regfile_abspath, 'somedata')
+
+        size = humansize(self.mount_a.stat(regfile_abspath)['st_size'])
+        expected_output = r'{}{}{}'.format(size, " +", regfilename)
+
+        du_output = self.get_cephfs_shell_cmd_output('du ' + regfilename)
+        self.assertRegex(du_output, expected_output)
+
+    def test_du_works_for_non_empty_dirs(self):
+        dirname = 'some_directory'
+        dir_abspath = path.join(self.mount_a.mountpoint, dirname)
+        regfilename = 'some_regfile'
+        regfile_abspath = path.join(dir_abspath, regfilename)
+        self.mount_a.run_shell_payload(f"mkdir {dir_abspath}")
+        self.mount_a.client_remote.write_file(regfile_abspath, 'somedata')
+
+        # XXX: we stat `regfile_abspath` here because ceph du reports
+        # a non-empty
+        # directory's size as sum of sizes of all files under it.
+        size = humansize(self.mount_a.stat(regfile_abspath)['st_size'])
+        expected_output = r'{}{}{}'.format(size, " +", dirname)
+
+        sleep(10)
+        du_output = self.get_cephfs_shell_cmd_output('du ' + dirname)
+        self.assertRegex(du_output, expected_output)
+
+    def test_du_works_for_empty_dirs(self):
+        dirname = 'some_directory'
+        dir_abspath = path.join(self.mount_a.mountpoint, dirname)
+        self.mount_a.run_shell_payload(f"mkdir {dir_abspath}")
+
+        size = humansize(self.mount_a.stat(dir_abspath)['st_size'])
+        expected_output = r'{}{}{}'.format(size, " +", dirname)
+
+        du_output = self.get_cephfs_shell_cmd_output('du ' + dirname)
+        self.assertRegex(du_output, expected_output)
+
+    def test_du_works_for_hardlinks(self):
+        regfilename = 'some_regfile'
+        regfile_abspath = path.join(self.mount_a.mountpoint, regfilename)
+        self.mount_a.client_remote.write_file(regfile_abspath, 'somedata')
+        hlinkname = 'some_hardlink'
+        hlink_abspath = path.join(self.mount_a.mountpoint, hlinkname)
+        self.mount_a.run_shell_payload(f"ln {regfile_abspath} {hlink_abspath}")
+
+        size = humansize(self.mount_a.stat(hlink_abspath)['st_size'])
+        expected_output = r'{}{}{}'.format(size, " +", hlinkname)
+
+        du_output = self.get_cephfs_shell_cmd_output('du ' + hlinkname)
+        self.assertRegex(du_output, expected_output)
+
+    def test_du_works_for_softlinks_to_files(self):
+        regfilename = 'some_regfile'
+        regfile_abspath = path.join(self.mount_a.mountpoint, regfilename)
+        self.mount_a.client_remote.write_file(regfile_abspath, 'somedata')
+        slinkname = 'some_softlink'
+        slink_abspath = path.join(self.mount_a.mountpoint, slinkname)
+        self.mount_a.run_shell_payload(
+            f"ln -s {regfile_abspath} {slink_abspath}")
+
+        size = humansize(self.mount_a.lstat(slink_abspath)['st_size'])
+        expected_output = r'{}{}{}'.format(size, " +", slinkname)
+
+        du_output = self.get_cephfs_shell_cmd_output('du ' + slinkname)
+        self.assertRegex(du_output, expected_output)
+
+    def test_du_works_for_softlinks_to_dirs(self):
+        dirname = 'some_directory'
+        dir_abspath = path.join(self.mount_a.mountpoint, dirname)
+        self.mount_a.run_shell_payload(f"mkdir {dir_abspath}")
+        slinkname = 'some_softlink'
+        slink_abspath = path.join(self.mount_a.mountpoint, slinkname)
+        self.mount_a.run_shell_payload(f"ln -s {dir_abspath} {slink_abspath}")
+
+        size = humansize(self.mount_a.lstat(slink_abspath)['st_size'])
+        expected_output = r'{}{}{}'.format(size, " +", slinkname)
+
+        du_output = self.get_cephfs_shell_cmd_output('du ' + slinkname)
+        self.assertRegex(du_output, expected_output)
+
+    # NOTE: tests using these are pretty slow since to this methods sleeps for
+    # 15 seconds
+    def _setup_files(self, return_path_to_files=False, path_prefix='./'):
+        dirname = 'dir1'
+        regfilename = 'regfile'
+        hlinkname = 'hlink'
+        slinkname = 'slink1'
+        slink2name = 'slink2'
+
+        dir_abspath = path.join(self.mount_a.mountpoint, dirname)
+        regfile_abspath = path.join(self.mount_a.mountpoint, regfilename)
+        hlink_abspath = path.join(self.mount_a.mountpoint, hlinkname)
+        slink_abspath = path.join(self.mount_a.mountpoint, slinkname)
+        slink2_abspath = path.join(self.mount_a.mountpoint, slink2name)
+
+        self.mount_a.run_shell_payload(f"mkdir {dir_abspath}")
+        self.mount_a.run_shell_payload(f"touch {regfile_abspath}")
+        self.mount_a.run_shell_payload(f"ln {regfile_abspath} {hlink_abspath}")
+        self.mount_a.run_shell_payload(
+            f"ln -s {regfile_abspath} {slink_abspath}")
+        self.mount_a.run_shell_payload(f"ln -s {dir_abspath} {slink2_abspath}")
+
+        dir2_name = 'dir2'
+        dir21_name = 'dir21'
+        regfile121_name = 'regfile121'
+        dir2_abspath = path.join(self.mount_a.mountpoint, dir2_name)
+        dir21_abspath = path.join(dir2_abspath, dir21_name)
+        regfile121_abspath = path.join(dir21_abspath, regfile121_name)
+        self.mount_a.run_shell_payload(f"mkdir -p {dir21_abspath}")
+        self.mount_a.run_shell_payload(f"touch {regfile121_abspath}")
+
+        self.mount_a.client_remote.write_file(regfile_abspath, 'somedata')
+        self.mount_a.client_remote.write_file(regfile121_abspath,
+                                              'somemoredata')
+
+        # TODO: is there a way to trigger/force update ceph.dir.rbytes?
+        # wait so that attr ceph.dir.rbytes gets a chance to be updated.
+        sleep(20)
+
+        expected_patterns = []
+        path_to_files = []
+
+        def append_expected_output_pattern(f):
+            if f == '/':
+                expected_patterns.append(r'{}{}{}'.format(size, " +", '.' + f))
+            else:
+                expected_patterns.append(r'{}{}{}'.format(
+                    size, " +",
+                    path_prefix + path.relpath(f, self.mount_a.mountpoint)))
+
+        for f in [dir_abspath, regfile_abspath, regfile121_abspath,
+                  hlink_abspath, slink_abspath, slink2_abspath]:
+            size = humansize(self.mount_a.stat(
+                f, follow_symlinks=False)['st_size'])
+            append_expected_output_pattern(f)
+
+        # get size for directories containig regfiles within
+        for f in [dir2_abspath, dir21_abspath]:
+            size = humansize(self.mount_a.stat(regfile121_abspath,
+                                               follow_symlinks=False)[
+                                 'st_size'])
+            append_expected_output_pattern(f)
+
+        # get size for CephFS root
+        size = 0
+        for f in [regfile_abspath, regfile121_abspath, slink_abspath,
+                  slink2_abspath]:
+            size += self.mount_a.stat(f, follow_symlinks=False)['st_size']
+        size = humansize(size)
+        append_expected_output_pattern('/')
+
+        if return_path_to_files:
+            for p in [dir_abspath, regfile_abspath, dir2_abspath,
+                      dir21_abspath, regfile121_abspath, hlink_abspath,
+                      slink_abspath, slink2_abspath]:
+                path_to_files.append(path.relpath(p, self.mount_a.mountpoint))
+
+            return expected_patterns, path_to_files
+        else:
+            return expected_patterns
+
+    def test_du_works_recursively_with_no_path_in_args(self):
+        expected_patterns_in_output = self._setup_files()
+        du_output = self.get_cephfs_shell_cmd_output('du -r')
+
+        for expected_output in expected_patterns_in_output:
+            self.assertRegex(du_output, expected_output)
+
+    def test_du_with_path_in_args(self):
+        expected_patterns_in_output, path_to_files = self._setup_files(
+            True, path_prefix='')
+
+        args = ['du', '/']
+        for p in path_to_files:
+            args.append(p)
+        du_output = self.get_cephfs_shell_cmd_output(args)
+
+        for expected_output in expected_patterns_in_output:
+            self.assertRegex(du_output, expected_output)
+
+    def test_du_with_no_args(self):
+        expected_patterns_in_output = self._setup_files()
+
+        du_output = self.get_cephfs_shell_cmd_output('du')
+
+        for expected_output in expected_patterns_in_output:
+            # Since CWD is CephFS root and being non-recursive expect only
+            # CWD in DU report.
+            if expected_output.find('/') == len(expected_output) - 1:
+                self.assertRegex(du_output, expected_output)
+
+
+class TestDF(TestCephFSShell):
+    def validate_df(self, filename):
+        df_output = self.get_cephfs_shell_cmd_output('df ' + filename)
+        log.info("cephfs-shell df output:\n{}".format(df_output))
+
+        shell_df = df_output.splitlines()[1].split()
+
+        block_size = int(self.mount_a.df()["total"]) // 1024
+        log.info("cephfs df block size output:{}\n".format(block_size))
+
+        st_size = int(self.mount_a.stat(filename)["st_size"])
+        log.info("cephfs stat used output:{}".format(st_size))
+        log.info("cephfs available:{}\n".format(block_size - st_size))
+
+        self.assertTupleEqual((block_size, st_size, block_size - st_size),
+                              (int(shell_df[0]), int(shell_df[1]),
+                               int(shell_df[2])))
+
+    def test_df_with_no_args(self):
+        expected_output = ''
+        df_output = self.get_cephfs_shell_cmd_output('df')
+        assert df_output == expected_output
+
+    def test_df_for_valid_directory(self):
+        dir_name = 'dir1'
+        mount_output = self.mount_a.run_shell_payload(f"mkdir {dir_name}")
+        log.info("cephfs-shell mount output:\n{}".format(mount_output))
+        self.validate_df(dir_name)
+
+    def test_df_for_invalid_directory(self):
+        dir_abspath = path.join(self.mount_a.mountpoint, 'non-existent-dir')
+        self.negtest_cephfs_shell_cmd(cmd='df ' + dir_abspath,
+                                      errmsg='error in stat')
+
+    def test_df_for_valid_file(self):
+        s = 'df test' * 14145016
+        o = self.get_cephfs_shell_cmd_output("put - dumpfile", stdin=s)
+        log.info("cephfs-shell output:\n{}".format(o))
+        self.validate_df("dumpfile")
+
+
+class TestQuota(TestCephFSShell):
+    dir_name = 'testdir'
+
+    def create_dir(self):
+        mount_output = self.get_cephfs_shell_cmd_output(
+            'mkdir ' + self.dir_name)
+        log.info("cephfs-shell mount output:\n{}".format(mount_output))
+
+    def set_and_get_quota_vals(self, input_val, check_status=True):
+        self.run_cephfs_shell_cmd(['quota', 'set', '--max_bytes',
+                                   input_val[0], '--max_files', input_val[1],
+                                   self.dir_name], check_status=check_status)
+
+        quota_output = self.get_cephfs_shell_cmd_output(
+            ['quota', 'get', self.dir_name],
+            check_status=check_status)
+
+        quota_output = quota_output.split()
+        return quota_output[1], quota_output[3]
+
+    def test_set(self):
+        self.create_dir()
+        set_values = ('6', '2')
+        self.assertTupleEqual(self.set_and_get_quota_vals(set_values),
+                              set_values)
+
+    def test_replace_values(self):
+        self.test_set()
+        set_values = ('20', '4')
+        self.assertTupleEqual(self.set_and_get_quota_vals(set_values),
+                              set_values)
+
+    def test_set_invalid_dir(self):
+        set_values = ('5', '5')
+        try:
+            self.assertTupleEqual(self.set_and_get_quota_vals(
+                set_values, False), set_values)
+            raise Exception(
+                "Something went wrong!! Values set for non existing directory")
+        except IndexError:
+            # Test should pass as values cannot be set for non
+            # existing directory
+            pass
+
+    def test_set_invalid_values(self):
+        self.create_dir()
+        set_values = ('-6', '-5')
+        try:
+            self.assertTupleEqual(self.set_and_get_quota_vals(set_values,
+                                                              False),
+                                  set_values)
+            raise Exception("Something went wrong!! Invalid values set")
+        except IndexError:
+            # Test should pass as invalid values cannot be set
+            pass
+
+    def test_exceed_file_limit(self):
+        self.test_set()
+        dir_abspath = path.join(self.mount_a.mountpoint, self.dir_name)
+        self.mount_a.run_shell_payload(f"touch {dir_abspath}/file1")
+        file2 = path.join(dir_abspath, "file2")
+        try:
+            self.mount_a.run_shell_payload(f"touch {file2}")
+            raise Exception(
+                "Something went wrong!! File creation should have failed")
+        except CommandFailedError:
+            # Test should pass as file quota set to 2
+            # Additional condition to confirm file creation failure
+            if not path.exists(file2):
+                return 0
+            raise
+
+    def test_exceed_write_limit(self):
+        self.test_set()
+        dir_abspath = path.join(self.mount_a.mountpoint, self.dir_name)
+        filename = 'test_file'
+        file_abspath = path.join(dir_abspath, filename)
+        try:
+            # Write should fail as bytes quota is set to 6
+            self.mount_a.client_remote.write_file(file_abspath,
+                                                  'Disk raise Exception')
+            raise Exception("Write should have failed")
+        except CommandFailedError:
+            # Test should pass only when write command fails
+            path_exists = path.exists(file_abspath)
+            if not path_exists:
+                # Testing with teuthology: No file is created.
+                return 0
+            elif path_exists and not path.getsize(file_abspath):
+                # Testing on Fedora 30: When write fails, empty
+                # file gets created.
+                return 0
+            else:
+                raise
+
+
+class TestXattr(TestCephFSShell):
+    dir_name = 'testdir'
+
+    def create_dir(self):
+        self.run_cephfs_shell_cmd('mkdir ' + self.dir_name)
+
+    def set_get_list_xattr_vals(self, input_val, negtest=False):
+        setxattr_output = self.get_cephfs_shell_cmd_output(
+            ['setxattr', self.dir_name, input_val[0], input_val[1]])
+        log.info("cephfs-shell setxattr output:\n{}".format(setxattr_output))
+
+        getxattr_output = self.get_cephfs_shell_cmd_output(
+            ['getxattr', self.dir_name, input_val[0]])
+        log.info("cephfs-shell getxattr output:\n{}".format(getxattr_output))
+
+        listxattr_output = self.get_cephfs_shell_cmd_output(
+            ['listxattr', self.dir_name])
+        log.info("cephfs-shell listxattr output:\n{}".format(listxattr_output))
+
+        return listxattr_output, getxattr_output
+
+    def test_set(self):
+        self.create_dir()
+        set_values = ('user.key', '2')
+        self.assertTupleEqual(self.set_get_list_xattr_vals(set_values),
+                              set_values)
+
+    def test_reset(self):
+        self.test_set()
+        set_values = ('user.key', '4')
+        self.assertTupleEqual(self.set_get_list_xattr_vals(set_values),
+                              set_values)
+
+    def test_non_existing_dir(self):
+        input_val = ('user.key', '9')
+        self.negtest_cephfs_shell_cmd(
+            cmd=['setxattr', self.dir_name, input_val[0],
+                 input_val[1]])
+        self.negtest_cephfs_shell_cmd(
+            cmd=['getxattr', self.dir_name, input_val[0]])
+        self.negtest_cephfs_shell_cmd(cmd=['listxattr', self.dir_name])
+
+
+class TestLS(TestCephFSShell):
+    dir_name = 'test_dir'
+    hidden_dir_name = '.test_hidden_dir'
+
+    def test_ls(self):
+        """ Test that ls prints files in CWD. """
+        self.run_cephfs_shell_cmd(f'mkdir {self.dir_name}')
+
+        ls_output = self.get_cephfs_shell_cmd_output("ls")
+        log.info(f"output of ls command:\n{ls_output}")
+
+        self.assertIn(self.dir_name, ls_output)
+
+    def test_ls_a(self):
+        """ Test ls -a prints hidden files in CWD."""
+
+        self.run_cephfs_shell_cmd(f'mkdir {self.hidden_dir_name}')
+
+        ls_a_output = self.get_cephfs_shell_cmd_output(['ls', '-a'])
+        log.info(f"output of ls -a command:\n{ls_a_output}")
+
+        self.assertIn(self.hidden_dir_name, ls_a_output)
+
+    def test_ls_does_not_print_hidden_dir(self):
+        """ Test ls command does not print hidden directory """
+
+        self.run_cephfs_shell_cmd(f'mkdir {self.hidden_dir_name}')
+
+        ls_output = self.get_cephfs_shell_cmd_output("ls")
+        log.info(f"output of ls command:\n{ls_output}")
+
+        self.assertNotIn(self.hidden_dir_name, ls_output)
+
+    def test_ls_a_prints_non_hidden_dir(self):
+        """ Test ls -a command prints non hidden directory """
+
+        self.run_cephfs_shell_cmd(
+            f'mkdir {self.hidden_dir_name} {self.dir_name}')
+
+        ls_a_output = self.get_cephfs_shell_cmd_output(['ls', '-a'])
+        log.info(f"output of ls -a command:\n{ls_a_output}")
+
+        self.assertIn(self.dir_name, ls_a_output)
+
+    def test_ls_H_prints_human_readable_file_size(self):
+        """ Test "ls -lH" prints human readable file size."""
+
+        file_sizes = ['1', '1K', '1M', '1G']
+        file_names = ['dump1', 'dump2', 'dump3', 'dump4']
+
+        for (file_size, file_name) in zip(file_sizes, file_names):
+            temp_file = self.mount_a.client_remote.mktemp(file_name)
+            self.mount_a.run_shell_payload(
+                f"fallocate -l {file_size} {temp_file}")
+            self.mount_a.run_shell_payload(f'mv {temp_file} ./')
+
+        ls_H_output = self.get_cephfs_shell_cmd_output(['ls', '-lH'])
+
+        ls_H_file_size = set()
+        for line in ls_H_output.split('\n'):
+            ls_H_file_size.add(line.split()[1])
+
+        # test that file sizes are in human readable format
+        self.assertEqual({'1B', '1K', '1M', '1G'}, ls_H_file_size)
+
+    def test_ls_s_sort_by_size(self):
+        """ Test "ls -S" sorts file listing by file_size """
+        test_file1 = "test_file1.txt"
+        test_file2 = "test_file2.txt"
+        file1_content = 'A' * 102
+        file2_content = 'B' * 10
+
+        self.run_cephfs_shell_cmd(f"write {test_file1}", stdin=file1_content)
+        self.run_cephfs_shell_cmd(f"write {test_file2}", stdin=file2_content)
+
+        ls_s_output = self.get_cephfs_shell_cmd_output(['ls', '-lS'])
+
+        file_sizes = []
+        for line in ls_s_output.split('\n'):
+            file_sizes.append(line.split()[1])
+
+        # test that file size are in ascending order
+        self.assertEqual(file_sizes, sorted(file_sizes))
+
+
+class TestMisc(TestCephFSShell):
+    def test_issue_cephfs_shell_cmd_at_invocation(self):
+        """
+        Test that `cephfs-shell -c conf cmd` works.
+        """
+        # choosing a long name since short ones have a higher probability
+        # of getting matched by coincidence.
+        dirname = 'somedirectory'
+        self.run_cephfs_shell_cmd(['mkdir', dirname])
+
+        output = self.mount_a.client_remote.sh(['cephfs-shell', 'ls']). \
+            strip()
+
+        self.assertRegex(output, dirname)
+
+    def test_help(self):
+        """
+        Test that help outputs commands.
+        """
+        o = self.get_cephfs_shell_cmd_output("help all")
+        log.info("output:\n{}".format(o))
+
+
+    def test_chmod(self):
+        """Test chmod is allowed above o0777 """
+        
+        test_file1 = "test_file2.txt"
+        file1_content = 'A' * 102
+        self.run_cephfs_shell_cmd(f"write {test_file1}", stdin=file1_content)
+        self.run_cephfs_shell_cmd(f"chmod 01777 {test_file1}")
+        
+class TestShellOpts(TestCephFSShell):
+    """
+    Contains tests for shell options from conf file and shell prompt.
+    """
+
+    def setUp(self):
+        super(type(self), self).setUp()
+
+        # output of following command -
+        # editor - was: 'vim'
+        # now: '?'
+        # editor: '?'
+        self.editor_val = self.get_cephfs_shell_cmd_output(
+            'set editor ?, set editor').split('\n')[2]
+        self.editor_val = self.editor_val.split(':')[1]. \
+            replace("'", "", 2).strip()
+
+    def write_tempconf(self, confcontents):
+        self.tempconfpath = self.mount_a.client_remote.mktemp(
+            suffix='cephfs-shell.conf')
+        self.mount_a.client_remote.write_file(self.tempconfpath,
+                                              confcontents)
+
+    def test_reading_conf(self):
+        self.write_tempconf("[cephfs-shell]\neditor =  ???")
+
+        # output of following command -
+        # CephFS:~/>>> set editor
+        # editor: 'vim'
+        final_editor_val = self.get_cephfs_shell_cmd_output(
+            cmd='set editor', shell_conf_path=self.tempconfpath)
+        final_editor_val = final_editor_val.split(': ')[1]
+        final_editor_val = final_editor_val.replace("'", "", 2)
+
+        self.assertNotEqual(self.editor_val, final_editor_val)
+
+    def test_reading_conf_with_dup_opt(self):
+        """
+        Read conf without duplicate sections/options.
+        """
+        self.write_tempconf("[cephfs-shell]\neditor = ???\neditor = " +
+                            self.editor_val)
+
+        # output of following command -
+        # CephFS:~/>>> set editor
+        # editor: 'vim'
+        final_editor_val = self.get_cephfs_shell_cmd_output(
+            cmd='set editor', shell_conf_path=self.tempconfpath)
+        final_editor_val = final_editor_val.split(': ')[1]
+        final_editor_val = final_editor_val.replace("'", "", 2)
+
+        self.assertEqual(self.editor_val, final_editor_val)
+
+    def test_setting_opt_after_reading_conf(self):
+        self.write_tempconf("[cephfs-shell]\neditor = ???")
+
+        # output of following command -
+        # editor - was: vim
+        # now: vim
+        # editor: vim
+        final_editor_val = self.get_cephfs_shell_cmd_output(
+            cmd='set editor %s, set editor' % self.editor_val,
+            shell_conf_path=self.tempconfpath)
+        final_editor_val = final_editor_val.split('\n')[2]
+        final_editor_val = final_editor_val.split(': ')[1]
+        final_editor_val = final_editor_val.replace("'", "", 2)
+
+        self.assertEqual(self.editor_val, final_editor_val)
diff --git a/qa/tasks/cephfs/test_client_limits.py b/qa/tasks/cephfs/test_client_limits.py
new file mode 100644
index 000000000..c4215df33
--- /dev/null
+++ b/qa/tasks/cephfs/test_client_limits.py
@@ -0,0 +1,397 @@
+
+"""
+Exercise the MDS's behaviour when clients and the MDCache reach or
+exceed the limits of how many caps/inodes they should hold.
+"""
+
+import logging
+from textwrap import dedent
+from tasks.ceph_test_case import TestTimeoutError
+from tasks.cephfs.cephfs_test_case import CephFSTestCase, needs_trimming
+from tasks.cephfs.fuse_mount import FuseMount
+from teuthology.exceptions import CommandFailedError
+import os
+from io import StringIO
+
+
+log = logging.getLogger(__name__)
+
+
+# Arbitrary timeouts for operations involving restarting
+# an MDS or waiting for it to come up
+MDS_RESTART_GRACE = 60
+
+# Hardcoded values from Server::recall_client_state
+CAP_RECALL_RATIO = 0.8
+CAP_RECALL_MIN = 100
+
+
+class TestClientLimits(CephFSTestCase):
+    CLIENTS_REQUIRED = 2
+
+    def _test_client_pin(self, use_subdir, open_files):
+        """
+        When a client pins an inode in its cache, for example because the file is held open,
+        it should reject requests from the MDS to trim these caps.  The MDS should complain
+        to the user that it is unable to enforce its cache size limits because of this
+        objectionable client.
+
+        :param use_subdir: whether to put test files in a subdir or use root
+        """
+
+        # Set MDS cache memory limit to a low value that will make the MDS to
+        # ask the client to trim the caps.
+        cache_memory_limit = "1K"
+
+        self.config_set('mds', 'mds_cache_memory_limit', cache_memory_limit)
+        self.config_set('mds', 'mds_recall_max_caps', int(open_files/2))
+        self.config_set('mds', 'mds_recall_warning_threshold', open_files)
+
+        mds_min_caps_per_client = int(self.config_get('mds', "mds_min_caps_per_client"))
+        self.config_set('mds', 'mds_min_caps_working_set', mds_min_caps_per_client)
+        mds_max_caps_per_client = int(self.config_get('mds', "mds_max_caps_per_client"))
+        mds_recall_warning_decay_rate = float(self.config_get('mds', "mds_recall_warning_decay_rate"))
+        self.assertGreaterEqual(open_files, mds_min_caps_per_client)
+
+        mount_a_client_id = self.mount_a.get_global_id()
+        path = "subdir" if use_subdir else "."
+        open_proc = self.mount_a.open_n_background(path, open_files)
+
+        # Client should now hold:
+        # `open_files` caps for the open files
+        # 1 cap for root
+        # 1 cap for subdir
+        self.wait_until_equal(lambda: self.get_session(mount_a_client_id)['num_caps'],
+                              open_files + (2 if use_subdir else 1),
+                              timeout=600,
+                              reject_fn=lambda x: x > open_files + 2)
+
+        # MDS should not be happy about that, as the client is failing to comply
+        # with the SESSION_RECALL messages it is being sent
+        self.wait_for_health("MDS_CLIENT_RECALL", mds_recall_warning_decay_rate*2)
+
+        # We can also test that the MDS health warning for oversized
+        # cache is functioning as intended.
+        self.wait_for_health("MDS_CACHE_OVERSIZED", mds_recall_warning_decay_rate*2)
+
+        # When the client closes the files, it should retain only as many caps as allowed
+        # under the SESSION_RECALL policy
+        log.info("Terminating process holding files open")
+        self.mount_a._kill_background(open_proc)
+
+        # The remaining caps should comply with the numbers sent from MDS in SESSION_RECALL message,
+        # which depend on the caps outstanding, cache size and overall ratio
+        def expected_caps():
+            num_caps = self.get_session(mount_a_client_id)['num_caps']
+            if num_caps <= mds_min_caps_per_client:
+                return True
+            elif num_caps <= mds_max_caps_per_client:
+                return True
+            else:
+                return False
+
+        self.wait_until_true(expected_caps, timeout=60)
+
+    @needs_trimming
+    def test_client_pin_root(self):
+        self._test_client_pin(False, 400)
+
+    @needs_trimming
+    def test_client_pin(self):
+        self._test_client_pin(True, 800)
+
+    @needs_trimming
+    def test_client_pin_mincaps(self):
+        self._test_client_pin(True, 200)
+
+    def test_client_min_caps_working_set(self):
+        """
+        When a client has inodes pinned in its cache (open files), that the MDS
+        will not warn about the client not responding to cache pressure when
+        the number of caps is below mds_min_caps_working_set.
+        """
+
+        # Set MDS cache memory limit to a low value that will make the MDS to
+        # ask the client to trim the caps.
+        cache_memory_limit = "1K"
+        open_files = 400
+
+        self.config_set('mds', 'mds_cache_memory_limit', cache_memory_limit)
+        self.config_set('mds', 'mds_recall_max_caps', int(open_files/2))
+        self.config_set('mds', 'mds_recall_warning_threshold', open_files)
+        self.config_set('mds', 'mds_min_caps_working_set', open_files*2)
+
+        mds_min_caps_per_client = int(self.config_get('mds', "mds_min_caps_per_client"))
+        mds_recall_warning_decay_rate = float(self.config_get('mds', "mds_recall_warning_decay_rate"))
+        self.assertGreaterEqual(open_files, mds_min_caps_per_client)
+
+        mount_a_client_id = self.mount_a.get_global_id()
+        self.mount_a.open_n_background("subdir", open_files)
+
+        # Client should now hold:
+        # `open_files` caps for the open files
+        # 1 cap for root
+        # 1 cap for subdir
+        self.wait_until_equal(lambda: self.get_session(mount_a_client_id)['num_caps'],
+                              open_files + 2,
+                              timeout=600,
+                              reject_fn=lambda x: x > open_files + 2)
+
+        # We can also test that the MDS health warning for oversized
+        # cache is functioning as intended.
+        self.wait_for_health("MDS_CACHE_OVERSIZED", mds_recall_warning_decay_rate*2)
+
+        try:
+            # MDS should not be happy about that but it's not sending
+            # MDS_CLIENT_RECALL warnings because the client's caps are below
+            # mds_min_caps_working_set.
+            self.wait_for_health("MDS_CLIENT_RECALL", mds_recall_warning_decay_rate*2)
+        except TestTimeoutError:
+            pass
+        else:
+            raise RuntimeError("expected no client recall warning")
+
+    def test_cap_acquisition_throttle_readdir(self):
+        """
+        Mostly readdir acquires caps faster than the mds recalls, so the cap
+        acquisition via readdir is throttled by retrying the readdir after
+        a fraction of second (0.5) by default when throttling condition is met.
+        """
+
+        subdir_count = 4
+        files_per_dir = 25
+
+        # throttle in a way so that two dir reads are already hitting it.
+        throttle_value = (files_per_dir * 3) // 2
+
+        # activate throttling logic by setting max per client to a low value
+        self.config_set('mds', 'mds_max_caps_per_client', 1)
+        self.config_set('mds', 'mds_session_cap_acquisition_throttle', throttle_value)
+
+        # Create files split across {subdir_count} directories, {per_dir_count} in each dir
+        for i in range(1, subdir_count+1):
+            self.mount_a.create_n_files("dir{0}/file".format(i), files_per_dir, sync=True)
+
+        mount_a_client_id = self.mount_a.get_global_id()
+
+        # recursive readdir. macOs wants an explicit directory for `find`.
+        proc = self.mount_a.run_shell_payload("find . | wc", stderr=StringIO())
+        # return code may be None if the command got interrupted
+        self.assertTrue(proc.returncode is None or proc.returncode == 0, proc.stderr.getvalue())
+
+        # validate the throttle condition to be hit atleast once
+        cap_acquisition_throttle_hit_count = self.perf_dump()['mds_server']['cap_acquisition_throttle']
+        self.assertGreaterEqual(cap_acquisition_throttle_hit_count, 1)
+
+        # validate cap_acquisition decay counter after readdir to NOT exceed the throttle value
+        # plus one batch that could have been taken immediately before querying
+        # assuming the batch is equal to the per dir file count.
+        cap_acquisition_value = self.get_session(mount_a_client_id)['cap_acquisition']['value']
+        self.assertLessEqual(cap_acquisition_value, files_per_dir + throttle_value)
+
+        # make sure that the throttle was reported in the events
+        def historic_ops_have_event(expected_event):
+            ops_dump = self.fs.rank_tell(['dump_historic_ops'])
+            # reverse the events and the ops assuming that later ops would be throttled
+            for op in reversed(ops_dump['ops']):
+                for ev in reversed(op.get('type_data', {}).get('events', [])):
+                    if ev['event'] == expected_event:
+                        return True
+            return False
+
+        self.assertTrue(historic_ops_have_event('cap_acquisition_throttle'))
+
+    def test_client_release_bug(self):
+        """
+        When a client has a bug (which we will simulate) preventing it from releasing caps,
+        the MDS should notice that releases are not being sent promptly, and generate a health
+        metric to that effect.
+        """
+
+        # The debug hook to inject the failure only exists in the fuse client
+        if not isinstance(self.mount_a, FuseMount):
+            self.skipTest("Require FUSE client to inject client release failure")
+
+        self.set_conf('client.{0}'.format(self.mount_a.client_id), 'client inject release failure', 'true')
+        self.mount_a.teardown()
+        self.mount_a.mount_wait()
+        mount_a_client_id = self.mount_a.get_global_id()
+
+        # Client A creates a file.  He will hold the write caps on the file, and later (simulated bug) fail
+        # to comply with the MDSs request to release that cap
+        self.mount_a.run_shell(["touch", "file1"])
+
+        # Client B tries to stat the file that client A created
+        rproc = self.mount_b.write_background("file1")
+
+        # After session_timeout, we should see a health warning (extra lag from
+        # MDS beacon period)
+        session_timeout = self.fs.get_var("session_timeout")
+        self.wait_for_health("MDS_CLIENT_LATE_RELEASE", session_timeout + 10)
+
+        # Client B should still be stuck
+        self.assertFalse(rproc.finished)
+
+        # Kill client A
+        self.mount_a.kill()
+        self.mount_a.kill_cleanup()
+
+        # Client B should complete
+        self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
+        rproc.wait()
+
+    def test_client_blocklisted_oldest_tid(self):
+        """
+        that a client is blocklisted when its encoded session metadata exceeds the
+        configured threshold (due to ever growing `completed_requests` caused due
+        to an unidentified bug (in the client or the MDS)).
+        """
+
+        # num of requests client issues
+        max_requests = 10000
+
+        # The debug hook to inject the failure only exists in the fuse client
+        if not isinstance(self.mount_a, FuseMount):
+            self.skipTest("Require FUSE client to inject client release failure")
+
+        self.config_set('client', 'client inject fixed oldest tid', 'true')
+        self.mount_a.teardown()
+        self.mount_a.mount_wait()
+
+        self.config_set('mds', 'mds_max_completed_requests', max_requests);
+
+        # Create lots of files
+        self.mount_a.create_n_files("testdir/file1", max_requests + 100)
+
+        # Create a few files synchronously. This makes sure previous requests are completed
+        self.mount_a.create_n_files("testdir/file2", 5, True)
+
+        # Wait for the health warnings. Assume mds can handle 10 request per second at least
+        self.wait_for_health("MDS_CLIENT_OLDEST_TID", max_requests // 10, check_in_detail=str(self.mount_a.client_id))
+
+        # set the threshold low so that it has a high probability of
+        # hitting.
+        self.config_set('mds', 'mds_session_metadata_threshold', 5000);
+
+        # Create lot many files synchronously. This would hit the session metadata threshold
+        # causing the client to get blocklisted.
+        with self.assertRaises(CommandFailedError):
+            self.mount_a.create_n_files("testdir/file2", 100000, True)
+
+        self.mds_cluster.is_addr_blocklisted(self.mount_a.get_global_addr())
+        # the mds should bump up the relevant perf counter
+        pd = self.perf_dump()
+        self.assertGreater(pd['mds_sessions']['mdthresh_evicted'], 0)
+
+        # reset the config
+        self.config_set('client', 'client inject fixed oldest tid', 'false')
+
+        self.mount_a.kill_cleanup()
+        self.mount_a.mount_wait()
+
+    def test_client_oldest_tid(self):
+        """
+        When a client does not advance its oldest tid, the MDS should notice that
+        and generate health warnings.
+        """
+
+        # num of requests client issues
+        max_requests = 1000
+
+        # The debug hook to inject the failure only exists in the fuse client
+        if not isinstance(self.mount_a, FuseMount):
+            self.skipTest("Require FUSE client to inject client release failure")
+
+        self.set_conf('client', 'client inject fixed oldest tid', 'true')
+        self.mount_a.teardown()
+        self.mount_a.mount_wait()
+
+        self.fs.mds_asok(['config', 'set', 'mds_max_completed_requests', '{0}'.format(max_requests)])
+
+        # Create lots of files
+        self.mount_a.create_n_files("testdir/file1", max_requests + 100)
+
+        # Create a few files synchronously. This makes sure previous requests are completed
+        self.mount_a.create_n_files("testdir/file2", 5, True)
+
+        # Wait for the health warnings. Assume mds can handle 10 request per second at least
+        self.wait_for_health("MDS_CLIENT_OLDEST_TID", max_requests // 10)
+
+    def _test_client_cache_size(self, mount_subdir):
+        """
+        check if client invalidate kernel dcache according to its cache size config
+        """
+
+        # The debug hook to inject the failure only exists in the fuse client
+        if not isinstance(self.mount_a, FuseMount):
+            self.skipTest("Require FUSE client to inject client release failure")
+
+        if mount_subdir:
+            # fuse assigns a fix inode number (1) to root inode. But in mounting into
+            # subdir case, the actual inode number of root is not 1. This mismatch
+            # confuses fuse_lowlevel_notify_inval_entry() when invalidating dentries
+            # in root directory.
+            self.mount_a.run_shell(["mkdir", "subdir"])
+            self.mount_a.umount_wait()
+            self.set_conf('client', 'client mountpoint', '/subdir')
+            self.mount_a.mount_wait()
+            root_ino = self.mount_a.path_to_ino(".")
+            self.assertEqual(root_ino, 1);
+
+        dir_path = os.path.join(self.mount_a.mountpoint, "testdir")
+
+        mkdir_script = dedent("""
+            import os
+            os.mkdir("{path}")
+            for n in range(0, {num_dirs}):
+                os.mkdir("{path}/dir{{0}}".format(n))
+            """)
+
+        num_dirs = 1000
+        self.mount_a.run_python(mkdir_script.format(path=dir_path, num_dirs=num_dirs))
+        self.mount_a.run_shell(["sync"])
+
+        dentry_count, dentry_pinned_count = self.mount_a.get_dentry_count()
+        self.assertGreaterEqual(dentry_count, num_dirs)
+        self.assertGreaterEqual(dentry_pinned_count, num_dirs)
+
+        cache_size = num_dirs // 10
+        self.mount_a.set_cache_size(cache_size)
+
+        def trimmed():
+            dentry_count, dentry_pinned_count = self.mount_a.get_dentry_count()
+            log.info("waiting, dentry_count, dentry_pinned_count: {0}, {1}".format(
+                dentry_count, dentry_pinned_count
+            ))
+            if dentry_count > cache_size or dentry_pinned_count > cache_size:
+                return False
+
+            return True
+
+        self.wait_until_true(trimmed, 30)
+
+    @needs_trimming
+    def test_client_cache_size(self):
+        self._test_client_cache_size(False)
+        self._test_client_cache_size(True)
+
+    def test_client_max_caps(self):
+        """
+        That the MDS will not let a client sit above mds_max_caps_per_client caps.
+        """
+
+        mds_min_caps_per_client = int(self.config_get('mds', "mds_min_caps_per_client"))
+        mds_max_caps_per_client = 2*mds_min_caps_per_client
+        self.config_set('mds', 'mds_max_caps_per_client', mds_max_caps_per_client)
+
+        self.mount_a.create_n_files("foo/", 3*mds_max_caps_per_client, sync=True)
+
+        mount_a_client_id = self.mount_a.get_global_id()
+        def expected_caps():
+            num_caps = self.get_session(mount_a_client_id)['num_caps']
+            if num_caps <= mds_max_caps_per_client:
+                return True
+            else:
+                return False
+
+        self.wait_until_true(expected_caps, timeout=60)
diff --git a/qa/tasks/cephfs/test_client_recovery.py b/qa/tasks/cephfs/test_client_recovery.py
new file mode 100644
index 000000000..1bd6884a9
--- /dev/null
+++ b/qa/tasks/cephfs/test_client_recovery.py
@@ -0,0 +1,757 @@
+
+"""
+Teuthology task for exercising CephFS client recovery
+"""
+
+import logging
+from textwrap import dedent
+import time
+import distutils.version as version
+import random
+import re
+import string
+import os
+
+from teuthology.orchestra import run
+from teuthology.exceptions import CommandFailedError
+from tasks.cephfs.fuse_mount import FuseMount
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.packaging import get_package_version
+
+log = logging.getLogger(__name__)
+
+
+# Arbitrary timeouts for operations involving restarting
+# an MDS or waiting for it to come up
+MDS_RESTART_GRACE = 60
+
+
+class TestClientNetworkRecovery(CephFSTestCase):
+    REQUIRE_ONE_CLIENT_REMOTE = True
+    CLIENTS_REQUIRED = 2
+
+    LOAD_SETTINGS = ["mds_reconnect_timeout", "ms_max_backoff"]
+
+    # Environment references
+    mds_reconnect_timeout = None
+    ms_max_backoff = None
+
+    def test_network_death(self):
+        """
+        Simulate software freeze or temporary network failure.
+
+        Check that the client blocks I/O during failure, and completes
+        I/O after failure.
+        """
+
+        session_timeout = self.fs.get_var("session_timeout")
+        self.fs.mds_asok(['config', 'set', 'mds_defer_session_stale', 'false'])
+
+        # We only need one client
+        self.mount_b.umount_wait()
+
+        # Initially our one client session should be visible
+        client_id = self.mount_a.get_global_id()
+        ls_data = self._session_list()
+        self.assert_session_count(1, ls_data)
+        self.assertEqual(ls_data[0]['id'], client_id)
+        self.assert_session_state(client_id, "open")
+
+        # ...and capable of doing I/O without blocking
+        self.mount_a.create_files()
+
+        # ...but if we turn off the network
+        self.fs.set_clients_block(True)
+
+        # ...and try and start an I/O
+        write_blocked = self.mount_a.write_background()
+
+        # ...then it should block
+        self.assertFalse(write_blocked.finished)
+        self.assert_session_state(client_id, "open")
+        time.sleep(session_timeout * 1.5)  # Long enough for MDS to consider session stale
+        self.assertFalse(write_blocked.finished)
+        self.assert_session_state(client_id, "stale")
+
+        # ...until we re-enable I/O
+        self.fs.set_clients_block(False)
+
+        # ...when it should complete promptly
+        a = time.time()
+        self.wait_until_true(lambda: write_blocked.finished, self.ms_max_backoff * 2)
+        write_blocked.wait()  # Already know we're finished, wait() to raise exception on errors
+        recovery_time = time.time() - a
+        log.info("recovery time: {0}".format(recovery_time))
+        self.assert_session_state(client_id, "open")
+
+
+class TestClientRecovery(CephFSTestCase):
+    CLIENTS_REQUIRED = 2
+
+    LOAD_SETTINGS = ["mds_reconnect_timeout", "ms_max_backoff"]
+
+    # Environment references
+    mds_reconnect_timeout = None
+    ms_max_backoff = None
+
+    def test_basic(self):
+        # Check that two clients come up healthy and see each others' files
+        # =====================================================
+        self.mount_a.create_files()
+        self.mount_a.check_files()
+        self.mount_a.umount_wait()
+
+        self.mount_b.check_files()
+
+        self.mount_a.mount_wait()
+
+        # Check that the admin socket interface is correctly reporting
+        # two sessions
+        # =====================================================
+        ls_data = self._session_list()
+        self.assert_session_count(2, ls_data)
+
+        self.assertSetEqual(
+            set([l['id'] for l in ls_data]),
+            {self.mount_a.get_global_id(), self.mount_b.get_global_id()}
+        )
+
+    def test_restart(self):
+        # Check that after an MDS restart both clients reconnect and continue
+        # to handle I/O
+        # =====================================================
+        self.fs.mds_fail_restart()
+        self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
+
+        self.mount_a.create_destroy()
+        self.mount_b.create_destroy()
+
+    def _session_num_caps(self, client_id):
+        ls_data = self.fs.mds_asok(['session', 'ls'])
+        return int(self._session_by_id(ls_data).get(client_id, {'num_caps': None})['num_caps'])
+
+    def test_reconnect_timeout(self):
+        # Reconnect timeout
+        # =================
+        # Check that if I stop an MDS and a client goes away, the MDS waits
+        # for the reconnect period
+
+        mount_a_client_id = self.mount_a.get_global_id()
+
+        self.fs.fail()
+
+        self.mount_a.umount_wait(force=True)
+
+        self.fs.set_joinable()
+
+        self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE)
+        # Check that the MDS locally reports its state correctly
+        status = self.fs.mds_asok(['status'])
+        self.assertIn("reconnect_status", status)
+
+        ls_data = self._session_list()
+        self.assert_session_count(2, ls_data)
+
+        # The session for the dead client should have the 'reconnect' flag set
+        self.assertTrue(self.get_session(mount_a_client_id)['reconnecting'])
+
+        # Wait for the reconnect state to clear, this should take the
+        # reconnect timeout period.
+        in_reconnect_for = self.fs.wait_for_state('up:active', timeout=self.mds_reconnect_timeout * 2)
+        # Check that the period we waited to enter active is within a factor
+        # of two of the reconnect timeout.
+        self.assertGreater(in_reconnect_for, self.mds_reconnect_timeout // 2,
+                           "Should have been in reconnect phase for {0} but only took {1}".format(
+                               self.mds_reconnect_timeout, in_reconnect_for
+                           ))
+
+        self.assert_session_count(1)
+
+        # Check that the client that timed out during reconnect can
+        # mount again and do I/O
+        self.mount_a.mount_wait()
+        self.mount_a.create_destroy()
+
+        self.assert_session_count(2)
+
+    def test_reconnect_eviction(self):
+        # Eviction during reconnect
+        # =========================
+        mount_a_client_id = self.mount_a.get_global_id()
+
+        self.fs.fail()
+
+        # The mount goes away while the MDS is offline
+        self.mount_a.kill()
+
+        # wait for it to die
+        time.sleep(5)
+
+        self.fs.set_joinable()
+
+        # Enter reconnect phase
+        self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE)
+        self.assert_session_count(2)
+
+        # Evict the stuck client
+        self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
+        self.assert_session_count(1)
+
+        # Observe that we proceed to active phase without waiting full reconnect timeout
+        evict_til_active = self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
+        # Once we evict the troublemaker, the reconnect phase should complete
+        # in well under the reconnect timeout.
+        self.assertLess(evict_til_active, self.mds_reconnect_timeout * 0.5,
+                        "reconnect did not complete soon enough after eviction, took {0}".format(
+                            evict_til_active
+                        ))
+
+        # We killed earlier so must clean up before trying to use again
+        self.mount_a.kill_cleanup()
+
+        # Bring the client back
+        self.mount_a.mount_wait()
+        self.mount_a.create_destroy()
+
+    def _test_stale_caps(self, write):
+        session_timeout = self.fs.get_var("session_timeout")
+
+        # Capability release from stale session
+        # =====================================
+        if write:
+            content = ''.join(random.choices(string.ascii_uppercase + string.digits, k=16))
+            cap_holder = self.mount_a.open_background(content=content)
+        else:
+            content = ''
+            self.mount_a.run_shell(["touch", "background_file"])
+            self.mount_a.umount_wait()
+            self.mount_a.mount_wait()
+            cap_holder = self.mount_a.open_background(write=False)
+
+        self.assert_session_count(2)
+        mount_a_gid = self.mount_a.get_global_id()
+
+        # Wait for the file to be visible from another client, indicating
+        # that mount_a has completed its network ops
+        self.mount_b.wait_for_visible(size=len(content))
+
+        # Simulate client death
+        self.mount_a.suspend_netns()
+
+        # wait for it to die so it doesn't voluntarily release buffer cap
+        time.sleep(5)
+
+        try:
+            # Now, after session_timeout seconds, the waiter should
+            # complete their operation when the MDS marks the holder's
+            # session stale.
+            cap_waiter = self.mount_b.write_background()
+            a = time.time()
+            cap_waiter.wait()
+            b = time.time()
+
+            # Should have succeeded
+            self.assertEqual(cap_waiter.exitstatus, 0)
+
+            if write:
+                self.assert_session_count(1)
+            else:
+                self.assert_session_state(mount_a_gid, "stale")
+
+            cap_waited = b - a
+            log.info("cap_waiter waited {0}s".format(cap_waited))
+            self.assertTrue(session_timeout / 2.0 <= cap_waited <= session_timeout * 2.0,
+                            "Capability handover took {0}, expected approx {1}".format(
+                                cap_waited, session_timeout
+                            ))
+        finally:
+            self.mount_a.resume_netns() # allow the mount to recover otherwise background proc is unkillable
+        self.mount_a._kill_background(cap_holder)
+
+    def test_stale_read_caps(self):
+        self._test_stale_caps(False)
+
+    def test_stale_write_caps(self):
+        self._test_stale_caps(True)
+
+    def test_evicted_caps(self):
+        # Eviction while holding a capability
+        # ===================================
+
+        session_timeout = self.fs.get_var("session_timeout")
+
+        # Take out a write capability on a file on client A,
+        # and then immediately kill it.
+        cap_holder = self.mount_a.open_background()
+        mount_a_client_id = self.mount_a.get_global_id()
+
+        # Wait for the file to be visible from another client, indicating
+        # that mount_a has completed its network ops
+        self.mount_b.wait_for_visible()
+
+        # Simulate client death
+        self.mount_a.suspend_netns()
+
+        # wait for it to die so it doesn't voluntarily release buffer cap
+        time.sleep(5)
+
+        try:
+            # The waiter should get stuck waiting for the capability
+            # held on the MDS by the now-dead client A
+            cap_waiter = self.mount_b.write_background()
+            time.sleep(5)
+            self.assertFalse(cap_waiter.finished)
+
+            self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
+            # Now, because I evicted the old holder of the capability, it should
+            # immediately get handed over to the waiter
+            a = time.time()
+            cap_waiter.wait()
+            b = time.time()
+            cap_waited = b - a
+            log.info("cap_waiter waited {0}s".format(cap_waited))
+            # This is the check that it happened 'now' rather than waiting
+            # for the session timeout
+            self.assertLess(cap_waited, session_timeout / 2.0,
+                            "Capability handover took {0}, expected less than {1}".format(
+                                cap_waited, session_timeout / 2.0
+                            ))
+
+        finally:
+            self.mount_a.resume_netns() # allow the mount to recover otherwise background proc is unkillable
+        self.mount_a._kill_background(cap_holder)
+
+    def test_trim_caps(self):
+        # Trim capability when reconnecting MDS
+        # ===================================
+
+        count = 500
+        # Create lots of files
+        for i in range(count):
+            self.mount_a.run_shell(["touch", "f{0}".format(i)])
+
+        # Populate mount_b's cache
+        self.mount_b.run_shell(["ls", "-l"])
+
+        client_id = self.mount_b.get_global_id()
+        num_caps = self._session_num_caps(client_id)
+        self.assertGreaterEqual(num_caps, count)
+
+        # Restart MDS. client should trim its cache when reconnecting to the MDS
+        self.fs.mds_fail_restart()
+        self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
+
+        num_caps = self._session_num_caps(client_id)
+        self.assertLess(num_caps, count,
+                        "should have less than {0} capabilities, have {1}".format(
+                            count, num_caps
+                        ))
+
+    def _is_flockable(self):
+        a_version_str = get_package_version(self.mount_a.client_remote, "fuse")
+        b_version_str = get_package_version(self.mount_b.client_remote, "fuse")
+        flock_version_str = "2.9"
+
+        version_regex = re.compile(r"[0-9\.]+")
+        a_result = version_regex.match(a_version_str)
+        self.assertTrue(a_result)
+        b_result = version_regex.match(b_version_str)
+        self.assertTrue(b_result)
+        a_version = version.StrictVersion(a_result.group())
+        b_version = version.StrictVersion(b_result.group())
+        flock_version=version.StrictVersion(flock_version_str)
+
+        if (a_version >= flock_version and b_version >= flock_version):
+            log.info("flock locks are available")
+            return True
+        else:
+            log.info("not testing flock locks, machines have versions {av} and {bv}".format(
+                av=a_version_str,bv=b_version_str))
+            return False
+
+    def test_filelock(self):
+        """
+        Check that file lock doesn't get lost after an MDS restart
+        """
+
+        flockable = self._is_flockable()
+        lock_holder = self.mount_a.lock_background(do_flock=flockable)
+
+        self.mount_b.wait_for_visible("background_file-2")
+        self.mount_b.check_filelock(do_flock=flockable)
+
+        self.fs.mds_fail_restart()
+        self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
+
+        self.mount_b.check_filelock(do_flock=flockable)
+
+        self.mount_a._kill_background(lock_holder)
+
+    def test_filelock_eviction(self):
+        """
+        Check that file lock held by evicted client is given to
+        waiting client.
+        """
+        if not self._is_flockable():
+            self.skipTest("flock is not available")
+
+        lock_holder = self.mount_a.lock_background()
+        self.mount_b.wait_for_visible("background_file-2")
+        self.mount_b.check_filelock()
+
+        lock_taker = self.mount_b.lock_and_release()
+        # Check the taker is waiting (doesn't get it immediately)
+        time.sleep(2)
+        self.assertFalse(lock_holder.finished)
+        self.assertFalse(lock_taker.finished)
+
+        try:
+            mount_a_client_id = self.mount_a.get_global_id()
+            self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
+
+            # Evicting mount_a should let mount_b's attempt to take the lock
+            # succeed
+            self.wait_until_true(lambda: lock_taker.finished, timeout=10)
+        finally:
+            self.mount_a._kill_background(lock_holder)
+
+            # teardown() doesn't quite handle this case cleanly, so help it out
+            self.mount_a.kill()
+            self.mount_a.kill_cleanup()
+
+        # Bring the client back
+        self.mount_a.mount_wait()
+
+    def test_dir_fsync(self):
+        self._test_fsync(True);
+
+    def test_create_fsync(self):
+        self._test_fsync(False);
+
+    def _test_fsync(self, dirfsync):
+        """
+        That calls to fsync guarantee visibility of metadata to another
+        client immediately after the fsyncing client dies.
+        """
+
+        # Leave this guy out until he's needed
+        self.mount_b.umount_wait()
+
+        # Create dir + child dentry on client A, and fsync the dir
+        path = os.path.join(self.mount_a.mountpoint, "subdir")
+        self.mount_a.run_python(
+            dedent("""
+                import os
+                import time
+
+                path = "{path}"
+
+                print("Starting creation...")
+                start = time.time()
+
+                os.mkdir(path)
+                dfd = os.open(path, os.O_DIRECTORY)
+
+                fd = open(os.path.join(path, "childfile"), "w")
+                print("Finished creation in {{0}}s".format(time.time() - start))
+
+                print("Starting fsync...")
+                start = time.time()
+                if {dirfsync}:
+                    os.fsync(dfd)
+                else:
+                    os.fsync(fd)
+                print("Finished fsync in {{0}}s".format(time.time() - start))
+            """.format(path=path,dirfsync=str(dirfsync)))
+        )
+
+        # Immediately kill the MDS and then client A
+        self.fs.fail()
+        self.mount_a.kill()
+        self.mount_a.kill_cleanup()
+
+        # Restart the MDS.  Wait for it to come up, it'll have to time out in clientreplay
+        self.fs.set_joinable()
+        log.info("Waiting for reconnect...")
+        self.fs.wait_for_state("up:reconnect")
+        log.info("Waiting for active...")
+        self.fs.wait_for_state("up:active", timeout=MDS_RESTART_GRACE + self.mds_reconnect_timeout)
+        log.info("Reached active...")
+
+        # Is the child dentry visible from mount B?
+        self.mount_b.mount_wait()
+        self.mount_b.run_shell(["ls", "subdir/childfile"])
+
+    def test_unmount_for_evicted_client(self):
+        """Test if client hangs on unmount after evicting the client."""
+        mount_a_client_id = self.mount_a.get_global_id()
+        self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
+
+        self.mount_a.umount_wait(require_clean=True, timeout=30)
+
+    def test_mount_after_evicted_client(self):
+        """Test if a new mount of same fs works after client eviction."""
+
+        # trash this : we need it to use same remote as mount_a
+        self.mount_b.umount_wait()
+
+        cl = self.mount_a.__class__
+
+        # create a new instance of mount_a's class with most of the
+        # same settings, but mounted on mount_b's mountpoint.
+        m = cl(ctx=self.mount_a.ctx,
+               client_config=self.mount_a.client_config,
+               test_dir=self.mount_a.test_dir,
+               client_id=self.mount_a.client_id,
+               client_remote=self.mount_a.client_remote,
+               client_keyring_path=self.mount_a.client_keyring_path,
+               cephfs_name=self.mount_a.cephfs_name,
+               cephfs_mntpt= self.mount_a.cephfs_mntpt,
+               hostfs_mntpt=self.mount_b.hostfs_mntpt,
+               brxnet=self.mount_a.ceph_brx_net)
+
+        # evict mount_a
+        mount_a_client_id = self.mount_a.get_global_id()
+        self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
+
+        m.mount_wait()
+        m.create_files()
+        m.check_files()
+        m.umount_wait(require_clean=True)
+
+    def test_stale_renew(self):
+        if not isinstance(self.mount_a, FuseMount):
+            self.skipTest("Require FUSE client to handle signal STOP/CONT")
+
+        session_timeout = self.fs.get_var("session_timeout")
+
+        self.mount_a.run_shell(["mkdir", "testdir"])
+        self.mount_a.run_shell(["touch", "testdir/file1"])
+        # populate readdir cache
+        self.mount_a.run_shell(["ls", "testdir"])
+        self.mount_b.run_shell(["ls", "testdir"])
+
+        # check if readdir cache is effective
+        initial_readdirs = self.fs.mds_asok(['perf', 'dump', 'mds_server', 'req_readdir_latency'])
+        self.mount_b.run_shell(["ls", "testdir"])
+        current_readdirs = self.fs.mds_asok(['perf', 'dump', 'mds_server', 'req_readdir_latency'])
+        self.assertEqual(current_readdirs, initial_readdirs);
+
+        mount_b_gid = self.mount_b.get_global_id()
+        # stop ceph-fuse process of mount_b
+        self.mount_b.suspend_netns()
+
+        self.assert_session_state(mount_b_gid, "open")
+        time.sleep(session_timeout * 1.5)  # Long enough for MDS to consider session stale
+
+        self.mount_a.run_shell(["touch", "testdir/file2"])
+        self.assert_session_state(mount_b_gid, "stale")
+
+        # resume ceph-fuse process of mount_b
+        self.mount_b.resume_netns()
+        # Is the new file visible from mount_b? (caps become invalid after session stale)
+        self.mount_b.run_shell(["ls", "testdir/file2"])
+
+    def test_abort_conn(self):
+        """
+        Check that abort_conn() skips closing mds sessions.
+        """
+        if not isinstance(self.mount_a, FuseMount):
+            self.skipTest("Testing libcephfs function")
+
+        self.fs.mds_asok(['config', 'set', 'mds_defer_session_stale', 'false'])
+        session_timeout = self.fs.get_var("session_timeout")
+
+        self.mount_a.umount_wait()
+        self.mount_b.umount_wait()
+
+        gid_str = self.mount_a.run_python(dedent("""
+            import cephfs as libcephfs
+            cephfs = libcephfs.LibCephFS(conffile='')
+            cephfs.mount()
+            client_id = cephfs.get_instance_id()
+            cephfs.abort_conn()
+            print(client_id)
+            """)
+        )
+        gid = int(gid_str);
+
+        self.assert_session_state(gid, "open")
+        time.sleep(session_timeout * 1.5)  # Long enough for MDS to consider session stale
+        self.assert_session_state(gid, "stale")
+
+    def test_dont_mark_unresponsive_client_stale(self):
+        """
+        Test that an unresponsive client holding caps is not marked stale or
+        evicted unless another clients wants its caps.
+        """
+        if not isinstance(self.mount_a, FuseMount):
+            self.skipTest("Require FUSE client to handle signal STOP/CONT")
+
+        # XXX: To conduct this test we need at least two clients since a
+        # single client is never evcited by MDS.
+        SESSION_TIMEOUT = 30
+        SESSION_AUTOCLOSE = 50
+        time_at_beg = time.time()
+        mount_a_gid = self.mount_a.get_global_id()
+        _ = self.mount_a.client_pid
+        self.fs.set_var('session_timeout', SESSION_TIMEOUT)
+        self.fs.set_var('session_autoclose', SESSION_AUTOCLOSE)
+        self.assert_session_count(2, self.fs.mds_asok(['session', 'ls']))
+
+        # test that client holding cap not required by any other client is not
+        # marked stale when it becomes unresponsive.
+        self.mount_a.run_shell(['mkdir', 'dir'])
+        self.mount_a.send_signal('sigstop')
+        time.sleep(SESSION_TIMEOUT + 2)
+        self.assert_session_state(mount_a_gid, "open")
+
+        # test that other clients have to wait to get the caps from
+        # unresponsive client until session_autoclose.
+        self.mount_b.run_shell(['stat', 'dir'])
+        self.assert_session_count(1, self.fs.mds_asok(['session', 'ls']))
+        self.assertLess(time.time(), time_at_beg + SESSION_AUTOCLOSE)
+
+        self.mount_a.send_signal('sigcont')
+
+    def test_config_session_timeout(self):
+        self.fs.mds_asok(['config', 'set', 'mds_defer_session_stale', 'false'])
+        session_timeout = self.fs.get_var("session_timeout")
+        mount_a_gid = self.mount_a.get_global_id()
+
+        self.fs.mds_asok(['session', 'config', '%s' % mount_a_gid, 'timeout', '%s' % (session_timeout * 2)])
+
+        self.mount_a.kill();
+
+        self.assert_session_count(2)
+
+        time.sleep(session_timeout * 1.5)
+        self.assert_session_state(mount_a_gid, "open")
+
+        time.sleep(session_timeout)
+        self.assert_session_count(1)
+
+        self.mount_a.kill_cleanup()
+
+    def test_reconnect_after_blocklisted(self):
+        """
+        Test reconnect after blocklisted.
+        - writing to a fd that was opened before blocklist should return -EBADF
+        - reading/writing to a file with lost file locks should return -EIO
+        - readonly fd should continue to work
+        """
+
+        self.mount_a.umount_wait()
+
+        if isinstance(self.mount_a, FuseMount):
+            self.mount_a.mount_wait(mntargs=['--client_reconnect_stale=1', '--fuse_disable_pagecache=1'])
+        else:
+            try:
+                self.mount_a.mount_wait(mntopts=['recover_session=clean'])
+            except CommandFailedError:
+                self.mount_a.kill_cleanup()
+                self.skipTest("Not implemented in current kernel")
+
+        self.mount_a.wait_until_mounted()
+
+        path = os.path.join(self.mount_a.mountpoint, 'testfile_reconnect_after_blocklisted')
+        pyscript = dedent("""
+            import os
+            import sys
+            import fcntl
+            import errno
+            import time
+
+            fd1 = os.open("{path}.1", os.O_RDWR | os.O_CREAT, 0O666)
+            fd2 = os.open("{path}.1", os.O_RDONLY)
+            fd3 = os.open("{path}.2", os.O_RDWR | os.O_CREAT, 0O666)
+            fd4 = os.open("{path}.2", os.O_RDONLY)
+
+            os.write(fd1, b'content')
+            os.read(fd2, 1);
+
+            os.write(fd3, b'content')
+            os.read(fd4, 1);
+            fcntl.flock(fd4, fcntl.LOCK_SH | fcntl.LOCK_NB)
+
+            print("blocklist")
+            sys.stdout.flush()
+
+            sys.stdin.readline()
+
+            # wait for mds to close session
+            time.sleep(10);
+
+            # trigger 'open session' message. kclient relies on 'session reject' message
+            # to detect if itself is blocklisted
+            try:
+                os.stat("{path}.1")
+            except:
+                pass
+
+            # wait for auto reconnect
+            time.sleep(10);
+
+            try:
+                os.write(fd1, b'content')
+            except OSError as e:
+                if e.errno != errno.EBADF:
+                    raise
+            else:
+                raise RuntimeError("write() failed to raise error")
+
+            os.read(fd2, 1);
+
+            try:
+                os.read(fd4, 1)
+            except OSError as e:
+                if e.errno != errno.EIO:
+                    raise
+            else:
+                raise RuntimeError("read() failed to raise error")
+            """).format(path=path)
+        rproc = self.mount_a.client_remote.run(
+                    args=['python3', '-c', pyscript],
+                    wait=False, stdin=run.PIPE, stdout=run.PIPE)
+
+        rproc.stdout.readline()
+
+        mount_a_client_id = self.mount_a.get_global_id()
+        self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
+
+        rproc.stdin.writelines(['done\n'])
+        rproc.stdin.flush()
+
+        rproc.wait()
+        self.assertEqual(rproc.exitstatus, 0)
+
+    def test_refuse_client_session(self):
+        """
+        Test that client cannot start session when file system flag
+        refuse_client_session is set
+        """
+
+        self.mount_a.umount_wait()
+        self.fs.set_refuse_client_session(True)
+        with self.assertRaises(CommandFailedError):
+            self.mount_a.mount_wait()
+
+    def test_refuse_client_session_on_reconnect(self):
+        """
+        Test that client cannot reconnect when filesystem comes online and
+        file system flag refuse_client_session is set
+        """
+
+        self.mount_a.create_files()
+        self.mount_a.check_files()
+
+        self.fs.fail()
+        self.fs.set_refuse_client_session(True)
+        self.fs.set_joinable()
+        with self.assert_cluster_log('client could not reconnect as'
+                                     ' file system flag'
+                                     ' refuse_client_session is set'):
+            time.sleep(self.fs.get_var("session_timeout") * 1.5)
+            self.assertEqual(len(self.fs.mds_tell(["session", "ls"])), 0)
+        self.mount_a.umount_wait(force=True)
+
diff --git a/qa/tasks/cephfs/test_damage.py b/qa/tasks/cephfs/test_damage.py
new file mode 100644
index 000000000..bfaa23453
--- /dev/null
+++ b/qa/tasks/cephfs/test_damage.py
@@ -0,0 +1,663 @@
+from io import BytesIO, StringIO
+import json
+import logging
+import errno
+import re
+import time
+from teuthology.contextutil import MaxWhileTries
+from teuthology.exceptions import CommandFailedError
+from teuthology.orchestra.run import wait
+from tasks.cephfs.fuse_mount import FuseMount
+from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
+
+DAMAGED_ON_START = "damaged_on_start"
+DAMAGED_ON_LS = "damaged_on_ls"
+CRASHED = "server crashed"
+NO_DAMAGE = "no damage"
+READONLY = "readonly"
+FAILED_CLIENT = "client failed"
+FAILED_SERVER = "server failed"
+
+# An EIO in response to a stat from the client
+EIO_ON_LS = "eio"
+
+# An EIO, but nothing in damage table (not ever what we expect)
+EIO_NO_DAMAGE = "eio without damage entry"
+
+
+log = logging.getLogger(__name__)
+
+
+class TestDamage(CephFSTestCase):
+    def _simple_workload_write(self):
+        self.mount_a.run_shell(["mkdir", "subdir"])
+        self.mount_a.write_n_mb("subdir/sixmegs", 6)
+        return self.mount_a.stat("subdir/sixmegs")
+
+    def is_marked_damaged(self, rank):
+        mds_map = self.fs.get_mds_map()
+        return rank in mds_map['damaged']
+
+    @for_teuthology #459s
+    def test_object_deletion(self):
+        """
+        That the MDS has a clean 'damaged' response to loss of any single metadata object
+        """
+
+        self._simple_workload_write()
+
+        # Hmm, actually it would be nice to permute whether the metadata pool
+        # state contains sessions or not, but for the moment close this session
+        # to avoid waiting through reconnect on every MDS start.
+        self.mount_a.umount_wait()
+        for mds_name in self.fs.get_active_names():
+            self.fs.mds_asok(["flush", "journal"], mds_name)
+
+        self.fs.fail()
+
+        serialized = self.fs.radosmo(['export', '-'])
+
+        def is_ignored(obj_id, dentry=None):
+            """
+            A filter to avoid redundantly mutating many similar objects (e.g.
+            stray dirfrags) or similar dentries (e.g. stray dir dentries)
+            """
+            if re.match("60.\.00000000", obj_id) and obj_id != "600.00000000":
+                return True
+
+            if dentry and obj_id == "100.00000000":
+                if re.match("stray.+_head", dentry) and dentry != "stray0_head":
+                    return True
+
+            return False
+
+        def get_path(obj_id, dentry=None):
+            """
+            What filesystem path does this object or dentry correspond to?   i.e.
+            what should I poke to see EIO after damaging it?
+            """
+
+            if obj_id == "1.00000000" and dentry == "subdir_head":
+                return "./subdir"
+            elif obj_id == "10000000000.00000000" and dentry == "sixmegs_head":
+                return "./subdir/sixmegs"
+
+            # None means ls will do an "ls -R" in hope of seeing some errors
+            return None
+
+        objects = self.fs.radosmo(["ls"], stdout=StringIO()).strip().split("\n")
+        objects = [o for o in objects if not is_ignored(o)]
+
+        # Find all objects with an OMAP header
+        omap_header_objs = []
+        for o in objects:
+            header = self.fs.radosmo(["getomapheader", o], stdout=StringIO())
+            # The rados CLI wraps the header output in a hex-printed style
+            header_bytes = int(re.match("header \((.+) bytes\)", header).group(1))
+            if header_bytes > 0:
+                omap_header_objs.append(o)
+
+        # Find all OMAP key/vals
+        omap_keys = []
+        for o in objects:
+            keys_str = self.fs.radosmo(["listomapkeys", o], stdout=StringIO())
+            if keys_str:
+                for key in keys_str.strip().split("\n"):
+                    if not is_ignored(o, key):
+                        omap_keys.append((o, key))
+
+        # Find objects that have data in their bodies
+        data_objects = []
+        for obj_id in objects:
+            stat_out = self.fs.radosmo(["stat", obj_id], stdout=StringIO())
+            size = int(re.match(".+, size (.+)$", stat_out).group(1))
+            if size > 0:
+                data_objects.append(obj_id)
+
+        # Define the various forms of damage we will inflict
+        class MetadataMutation(object):
+            def __init__(self, obj_id_, desc_, mutate_fn_, expectation_, ls_path=None):
+                self.obj_id = obj_id_
+                self.desc = desc_
+                self.mutate_fn = mutate_fn_
+                self.expectation = expectation_
+                if ls_path is None:
+                    self.ls_path = "."
+                else:
+                    self.ls_path = ls_path
+
+            def __eq__(self, other):
+                return self.desc == other.desc
+
+            def __hash__(self):
+                return hash(self.desc)
+
+        junk = "deadbeef" * 10
+        mutations = []
+
+        # Removals
+        for o in objects:
+            if o in [
+                # JournalPointers are auto-replaced if missing (same path as upgrade)
+                "400.00000000",
+                # Missing dirfrags for non-system dirs result in empty directory
+                "10000000000.00000000",
+                # PurgeQueue is auto-created if not found on startup
+                "500.00000000",
+                # open file table is auto-created if not found on startup
+                "mds0_openfiles.0"
+            ]:
+                expectation = NO_DAMAGE
+            else:
+                expectation = DAMAGED_ON_START
+
+            log.info("Expectation on rm '{0}' will be '{1}'".format(
+                o, expectation
+            ))
+
+            mutations.append(MetadataMutation(
+                o,
+                "Delete {0}".format(o),
+                lambda o=o: self.fs.radosm(["rm", o]),
+                expectation
+            ))
+
+        # Blatant corruptions
+        for obj_id in data_objects:
+            if obj_id == "500.00000000":
+                # purge queue corruption results in read-only FS
+                mutations.append(MetadataMutation(
+                    obj_id,
+                    "Corrupt {0}".format(obj_id),
+                    lambda o=obj_id: self.fs.radosm(["put", o, "-"], stdin=StringIO(junk)),
+                    READONLY
+                ))
+            else:
+                mutations.append(MetadataMutation(
+                    obj_id,
+                    "Corrupt {0}".format(obj_id),
+                    lambda o=obj_id: self.fs.radosm(["put", o, "-"], stdin=StringIO(junk)),
+                    DAMAGED_ON_START
+                ))
+
+        # Truncations
+        for o in data_objects:
+            if o == "500.00000000":
+                # The PurgeQueue is allowed to be empty: Journaler interprets
+                # an empty header object as an empty journal.
+                expectation = NO_DAMAGE
+            else:
+                expectation = DAMAGED_ON_START
+
+            mutations.append(
+                MetadataMutation(
+                    o,
+                    "Truncate {0}".format(o),
+                    lambda o=o: self.fs.radosm(["truncate", o, "0"]),
+                    expectation
+            ))
+
+        # OMAP value corruptions
+        for o, k in omap_keys:
+            if o.startswith("100."):
+                # Anything in rank 0's 'mydir'
+                expectation = DAMAGED_ON_START
+            else:
+                expectation = EIO_ON_LS
+
+            mutations.append(
+                MetadataMutation(
+                    o,
+                    "Corrupt omap key {0}:{1}".format(o, k),
+                    lambda o=o,k=k: self.fs.radosm(["setomapval", o, k, junk]),
+                    expectation,
+                    get_path(o, k)
+                )
+            )
+
+        # OMAP header corruptions
+        for o in omap_header_objs:
+            if re.match("60.\.00000000", o) \
+                    or o in ["1.00000000", "100.00000000", "mds0_sessionmap"]:
+                expectation = DAMAGED_ON_START
+            else:
+                expectation = NO_DAMAGE
+
+            log.info("Expectation on corrupt header '{0}' will be '{1}'".format(
+                o, expectation
+            ))
+
+            mutations.append(
+                MetadataMutation(
+                    o,
+                    "Corrupt omap header on {0}".format(o),
+                    lambda o=o: self.fs.radosm(["setomapheader", o, junk]),
+                    expectation
+                )
+            )
+
+        results = {}
+
+        for mutation in mutations:
+            log.info("Applying mutation '{0}'".format(mutation.desc))
+
+            # Reset MDS state
+            self.mount_a.umount_wait(force=True)
+            self.fs.fail()
+            self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
+
+            # Reset RADOS pool state
+            self.fs.radosm(['import', '-'], stdin=BytesIO(serialized))
+
+            # Inject the mutation
+            mutation.mutate_fn()
+
+            # Try starting the MDS
+            self.fs.set_joinable()
+
+            # How long we'll wait between starting a daemon and expecting
+            # it to make it through startup, and potentially declare itself
+            # damaged to the mon cluster.
+            startup_timeout = 60
+
+            if mutation.expectation not in (EIO_ON_LS, DAMAGED_ON_LS, NO_DAMAGE):
+                if mutation.expectation == DAMAGED_ON_START:
+                    # The MDS may pass through active before making it to damaged
+                    try:
+                        self.wait_until_true(lambda: self.is_marked_damaged(0), startup_timeout)
+                    except RuntimeError:
+                        pass
+
+                # Wait for MDS to either come up or go into damaged state
+                try:
+                    self.wait_until_true(lambda: self.is_marked_damaged(0) or self.fs.are_daemons_healthy(), startup_timeout)
+                except RuntimeError:
+                    crashed = False
+                    # Didn't make it to healthy or damaged, did it crash?
+                    for daemon_id, daemon in self.fs.mds_daemons.items():
+                        if daemon.proc and daemon.proc.finished:
+                            crashed = True
+                            log.error("Daemon {0} crashed!".format(daemon_id))
+                            daemon.proc = None  # So that subsequent stop() doesn't raise error
+                    if not crashed:
+                        # Didn't go health, didn't go damaged, didn't crash, so what?
+                        raise
+                    else:
+                        log.info("Result: Mutation '{0}' led to crash".format(mutation.desc))
+                        results[mutation] = CRASHED
+                        continue
+                if self.is_marked_damaged(0):
+                    log.info("Result: Mutation '{0}' led to DAMAGED state".format(mutation.desc))
+                    results[mutation] = DAMAGED_ON_START
+                    continue
+                else:
+                    log.info("Mutation '{0}' did not prevent MDS startup, attempting ls...".format(mutation.desc))
+            else:
+                try:
+                    self.wait_until_true(self.fs.are_daemons_healthy, 60)
+                except RuntimeError:
+                    log.info("Result: Mutation '{0}' should have left us healthy, actually not.".format(mutation.desc))
+                    if self.is_marked_damaged(0):
+                        results[mutation] = DAMAGED_ON_START
+                    else:
+                        results[mutation] = FAILED_SERVER
+                    continue
+                log.info("Daemons came up after mutation '{0}', proceeding to ls".format(mutation.desc))
+
+            # MDS is up, should go damaged on ls or client mount
+            self.mount_a.mount_wait()
+            if mutation.ls_path == ".":
+                proc = self.mount_a.run_shell(["ls", "-R", mutation.ls_path], wait=False)
+            else:
+                proc = self.mount_a.stat(mutation.ls_path, wait=False)
+
+            if mutation.expectation == DAMAGED_ON_LS:
+                try:
+                    self.wait_until_true(lambda: self.is_marked_damaged(0), 60)
+                    log.info("Result: Mutation '{0}' led to DAMAGED state after ls".format(mutation.desc))
+                    results[mutation] = DAMAGED_ON_LS
+                except RuntimeError:
+                    if self.fs.are_daemons_healthy():
+                        log.error("Result: Failed to go damaged on mutation '{0}', actually went active".format(
+                            mutation.desc))
+                        results[mutation] = NO_DAMAGE
+                    else:
+                        log.error("Result: Failed to go damaged on mutation '{0}'".format(mutation.desc))
+                        results[mutation] = FAILED_SERVER
+            elif mutation.expectation == READONLY:
+                proc = self.mount_a.run_shell(["mkdir", "foo"], wait=False)
+                try:
+                    proc.wait()
+                except CommandFailedError:
+                    stderr = proc.stderr.getvalue()
+                    log.info(stderr)
+                    if "Read-only file system".lower() in stderr.lower():
+                        pass
+                    else:
+                        raise
+            else:
+                try:
+                    wait([proc], 20)
+                    log.info("Result: Mutation '{0}' did not caused DAMAGED state".format(mutation.desc))
+                    results[mutation] = NO_DAMAGE
+                except MaxWhileTries:
+                    log.info("Result: Failed to complete client IO on mutation '{0}'".format(mutation.desc))
+                    results[mutation] = FAILED_CLIENT
+                except CommandFailedError as e:
+                    if e.exitstatus == errno.EIO:
+                        log.info("Result: EIO on client")
+                        results[mutation] = EIO_ON_LS
+                    else:
+                        log.info("Result: unexpected error {0} on client".format(e))
+                        results[mutation] = FAILED_CLIENT
+
+            if mutation.expectation == EIO_ON_LS:
+                # EIOs mean something handled by DamageTable: assert that it has
+                # been populated
+                damage = json.loads(
+                    self.fs.mon_manager.raw_cluster_cmd(
+                        'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), "damage", "ls", '--format=json-pretty'))
+                if len(damage) == 0:
+                    results[mutation] = EIO_NO_DAMAGE
+
+        failures = [(mutation, result) for (mutation, result) in results.items() if mutation.expectation != result]
+        if failures:
+            log.error("{0} mutations had unexpected outcomes:".format(len(failures)))
+            for mutation, result in failures:
+                log.error("  Expected '{0}' actually '{1}' from '{2}'".format(
+                    mutation.expectation, result, mutation.desc
+                ))
+            raise RuntimeError("{0} mutations had unexpected outcomes".format(len(failures)))
+        else:
+            log.info("All {0} mutations had expected outcomes".format(len(mutations)))
+
+    def test_damaged_dentry(self):
+        # Damage to dentrys is interesting because it leaves the
+        # directory's `complete` flag in a subtle state where
+        # we have marked the dir complete in order that folks
+        # can access it, but in actual fact there is a dentry
+        # missing
+        self.mount_a.run_shell(["mkdir", "subdir/"])
+
+        self.mount_a.run_shell(["touch", "subdir/file_undamaged"])
+        self.mount_a.run_shell(["touch", "subdir/file_to_be_damaged"])
+
+        subdir_ino = self.mount_a.path_to_ino("subdir")
+
+        self.mount_a.umount_wait()
+        for mds_name in self.fs.get_active_names():
+            self.fs.mds_asok(["flush", "journal"], mds_name)
+
+        self.fs.fail()
+
+        # Corrupt a dentry
+        junk = "deadbeef" * 10
+        dirfrag_obj = "{0:x}.00000000".format(subdir_ino)
+        self.fs.radosm(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
+
+        # Start up and try to list it
+        self.fs.set_joinable()
+        self.fs.wait_for_daemons()
+
+        self.mount_a.mount_wait()
+        dentries = self.mount_a.ls("subdir/")
+
+        # The damaged guy should have disappeared
+        self.assertEqual(dentries, ["file_undamaged"])
+
+        # I should get ENOENT if I try and read it normally, because
+        # the dir is considered complete
+        try:
+            self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.ENOENT)
+        else:
+            raise AssertionError("Expected ENOENT")
+
+        # The fact that there is damaged should have bee recorded
+        damage = json.loads(
+            self.fs.mon_manager.raw_cluster_cmd(
+                'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+                "damage", "ls", '--format=json-pretty'))
+        self.assertEqual(len(damage), 1)
+        damage_id = damage[0]['id']
+
+        # If I try to create a dentry with the same name as the damaged guy
+        # then that should be forbidden
+        try:
+            self.mount_a.touch("subdir/file_to_be_damaged")
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.EIO)
+        else:
+            raise AssertionError("Expected EIO")
+
+        # Attempting that touch will clear the client's complete flag, now
+        # when I stat it I'll get EIO instead of ENOENT
+        try:
+            self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
+        except CommandFailedError as e:
+            if isinstance(self.mount_a, FuseMount):
+                self.assertEqual(e.exitstatus, errno.EIO)
+            else:
+                # Old kernel client handles this case differently
+                self.assertIn(e.exitstatus, [errno.ENOENT, errno.EIO])
+        else:
+            raise AssertionError("Expected EIO")
+
+        nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
+        self.assertEqual(nfiles, "2")
+
+        self.mount_a.umount_wait()
+
+        # Now repair the stats
+        scrub_json = self.fs.run_scrub(["start", "/subdir", "repair"])
+        log.info(json.dumps(scrub_json, indent=2))
+
+        self.assertNotEqual(scrub_json, None)
+        self.assertEqual(scrub_json["return_code"], 0)
+        self.assertEqual(self.fs.wait_until_scrub_complete(tag=scrub_json["scrub_tag"]), True)
+
+        # Check that the file count is now correct
+        self.mount_a.mount_wait()
+        nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
+        self.assertEqual(nfiles, "1")
+
+        # Clean up the omap object
+        self.fs.radosm(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
+
+        # Clean up the damagetable entry
+        self.fs.mon_manager.raw_cluster_cmd(
+            'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+            "damage", "rm", "{did}".format(did=damage_id))
+
+        # Now I should be able to create a file with the same name as the
+        # damaged guy if I want.
+        self.mount_a.touch("subdir/file_to_be_damaged")
+
+    def test_open_ino_errors(self):
+        """
+        That errors encountered during opening inos are properly propagated
+        """
+
+        self.mount_a.run_shell(["mkdir", "dir1"])
+        self.mount_a.run_shell(["touch", "dir1/file1"])
+        self.mount_a.run_shell(["mkdir", "dir2"])
+        self.mount_a.run_shell(["touch", "dir2/file2"])
+        self.mount_a.run_shell(["mkdir", "testdir"])
+        self.mount_a.run_shell(["ln", "dir1/file1", "testdir/hardlink1"])
+        self.mount_a.run_shell(["ln", "dir2/file2", "testdir/hardlink2"])
+
+        file1_ino = self.mount_a.path_to_ino("dir1/file1")
+        file2_ino = self.mount_a.path_to_ino("dir2/file2")
+        dir2_ino = self.mount_a.path_to_ino("dir2")
+
+        # Ensure everything is written to backing store
+        self.mount_a.umount_wait()
+        self.fs.mds_asok(["flush", "journal"])
+
+        # Drop everything from the MDS cache
+        self.fs.fail()
+        self.fs.journal_tool(['journal', 'reset'], 0)
+        self.fs.set_joinable()
+        self.fs.wait_for_daemons()
+
+        self.mount_a.mount_wait()
+
+        # Case 1: un-decodeable backtrace
+
+        # Validate that the backtrace is present and decodable
+        self.fs.read_backtrace(file1_ino)
+        # Go corrupt the backtrace of alpha/target (used for resolving
+        # bravo/hardlink).
+        self.fs._write_data_xattr(file1_ino, "parent", "rhubarb")
+
+        # Check that touching the hardlink gives EIO
+        ran = self.mount_a.run_shell(["stat", "testdir/hardlink1"], wait=False)
+        try:
+            ran.wait()
+        except CommandFailedError:
+            self.assertTrue("Input/output error" in ran.stderr.getvalue())
+
+        # Check that an entry is created in the damage table
+        damage = json.loads(
+            self.fs.mon_manager.raw_cluster_cmd(
+                'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+                "damage", "ls", '--format=json-pretty'))
+        self.assertEqual(len(damage), 1)
+        self.assertEqual(damage[0]['damage_type'], "backtrace")
+        self.assertEqual(damage[0]['ino'], file1_ino)
+
+        self.fs.mon_manager.raw_cluster_cmd(
+            'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+            "damage", "rm", str(damage[0]['id']))
+
+
+        # Case 2: missing dirfrag for the target inode
+
+        self.fs.radosm(["rm", "{0:x}.00000000".format(dir2_ino)])
+
+        # Check that touching the hardlink gives EIO
+        ran = self.mount_a.run_shell(["stat", "testdir/hardlink2"], wait=False)
+        try:
+            ran.wait()
+        except CommandFailedError:
+            self.assertTrue("Input/output error" in ran.stderr.getvalue())
+
+        # Check that an entry is created in the damage table
+        damage = json.loads(
+            self.fs.mon_manager.raw_cluster_cmd(
+                'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+                "damage", "ls", '--format=json-pretty'))
+        self.assertEqual(len(damage), 2)
+        if damage[0]['damage_type'] == "backtrace" :
+            self.assertEqual(damage[0]['ino'], file2_ino)
+            self.assertEqual(damage[1]['damage_type'], "dir_frag")
+            self.assertEqual(damage[1]['ino'], dir2_ino)
+        else:
+            self.assertEqual(damage[0]['damage_type'], "dir_frag")
+            self.assertEqual(damage[0]['ino'], dir2_ino)
+            self.assertEqual(damage[1]['damage_type'], "backtrace")
+            self.assertEqual(damage[1]['ino'], file2_ino)
+
+        for entry in damage:
+            self.fs.mon_manager.raw_cluster_cmd(
+                'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+                "damage", "rm", str(entry['id']))
+
+    def test_dentry_first_existing(self):
+        """
+        That the MDS won't abort when the dentry is already known to be damaged.
+        """
+
+        def verify_corrupt():
+            info = self.fs.read_cache("/a", 0)
+            log.debug('%s', info)
+            self.assertEqual(len(info), 1)
+            dirfrags = info[0]['dirfrags']
+            self.assertEqual(len(dirfrags), 1)
+            dentries = dirfrags[0]['dentries']
+            self.assertEqual([dn['path'] for dn in dentries if dn['is_primary']], ['a/c'])
+            self.assertEqual(dentries[0]['snap_first'], 18446744073709551606) # SNAP_HEAD
+
+        self.mount_a.run_shell_payload("mkdir -p a/b")
+        self.fs.flush()
+        self.config_set("mds", "mds_abort_on_newly_corrupt_dentry", False)
+        self.config_set("mds", "mds_inject_rename_corrupt_dentry_first", "1.0")
+        time.sleep(5) # for conf to percolate
+        self.mount_a.run_shell_payload("mv a/b a/c; sync .")
+        self.mount_a.umount()
+        verify_corrupt()
+        self.fs.fail()
+        self.config_rm("mds", "mds_inject_rename_corrupt_dentry_first")
+        self.config_set("mds", "mds_abort_on_newly_corrupt_dentry", False)
+        self.fs.set_joinable()
+        status = self.fs.status()
+        self.fs.flush()
+        self.assertFalse(self.fs.status().hadfailover(status))
+        verify_corrupt()
+
+    def test_dentry_first_preflush(self):
+        """
+        That the MDS won't write a dentry with new damage to CDentry::first
+        to the journal.
+        """
+
+        rank0 = self.fs.get_rank()
+        self.fs.rank_freeze(True, rank=0)
+        self.mount_a.run_shell_payload("mkdir -p a/{b,c}/d")
+        self.fs.flush()
+        self.config_set("mds", "mds_inject_rename_corrupt_dentry_first", "1.0")
+        time.sleep(5) # for conf to percolate
+        with self.assert_cluster_log("MDS abort because newly corrupt dentry"):
+            p = self.mount_a.run_shell_payload("timeout 60 mv a/b a/z", wait=False)
+            self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(), timeout=self.fs.beacon_timeout)
+        self.config_rm("mds", "mds_inject_rename_corrupt_dentry_first")
+        self.fs.rank_freeze(False, rank=0)
+        self.delete_mds_coredump(rank0['name'])
+        self.fs.mds_restart(rank0['name'])
+        self.fs.wait_for_daemons()
+        p.wait()
+        self.mount_a.run_shell_payload("stat a/ && find a/")
+        self.fs.flush()
+
+    def test_dentry_first_precommit(self):
+        """
+        That the MDS won't write a dentry with new damage to CDentry::first
+        to the directory object.
+        """
+
+        fscid = self.fs.id
+        self.mount_a.run_shell_payload("mkdir -p a/{b,c}/d; sync .")
+        self.mount_a.umount() # allow immediate scatter write back
+        self.fs.flush()
+        # now just twiddle some inode metadata on a regular file
+        self.mount_a.mount_wait()
+        self.mount_a.run_shell_payload("chmod 711 a/b/d; sync .")
+        self.mount_a.umount() # avoid journaling session related things
+        # okay, now cause the dentry to get damaged after loading from the journal
+        self.fs.fail()
+        self.config_set("mds", "mds_inject_journal_corrupt_dentry_first", "1.0")
+        time.sleep(5) # for conf to percolate
+        self.fs.set_joinable()
+        self.fs.wait_for_daemons()
+        rank0 = self.fs.get_rank()
+        self.fs.rank_freeze(True, rank=0)
+        # so now we want to trigger commit but this will crash, so:
+        with self.assert_cluster_log("MDS abort because newly corrupt dentry"):
+            c = ['--connect-timeout=60', 'tell', f"mds.{fscid}:0", "flush", "journal"]
+            p = self.ceph_cluster.mon_manager.run_cluster_cmd(args=c, wait=False, timeoutcmd=30)
+            self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(), timeout=self.fs.beacon_timeout)
+        self.config_rm("mds", "mds_inject_journal_corrupt_dentry_first")
+        self.fs.rank_freeze(False, rank=0)
+        self.delete_mds_coredump(rank0['name'])
+        self.fs.mds_restart(rank0['name'])
+        self.fs.wait_for_daemons()
+        try:
+            p.wait()
+        except CommandFailedError as e:
+            print(e)
+        else:
+            self.fail("flush journal should fail!")
+        self.mount_a.mount_wait()
+        self.mount_a.run_shell_payload("stat a/ && find a/")
+        self.fs.flush()
diff --git a/qa/tasks/cephfs/test_data_scan.py b/qa/tasks/cephfs/test_data_scan.py
new file mode 100644
index 000000000..9a93bd622
--- /dev/null
+++ b/qa/tasks/cephfs/test_data_scan.py
@@ -0,0 +1,796 @@
+
+"""
+Test our tools for recovering metadata from the data pool
+"""
+import json
+
+import logging
+import os
+import time
+import traceback
+import stat
+
+from io import BytesIO, StringIO
+from collections import namedtuple, defaultdict
+from textwrap import dedent
+
+from teuthology.exceptions import CommandFailedError
+from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
+
+log = logging.getLogger(__name__)
+
+
+ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
+
+
+class Workload(object):
+    def __init__(self, filesystem, mount):
+        self._mount = mount
+        self._filesystem = filesystem
+        self._initial_state = None
+
+        # Accumulate backtraces for every failed validation, and return them.  Backtraces
+        # are rather verbose, but we only see them when something breaks, and they
+        # let us see which check failed without having to decorate each check with
+        # a string
+        self._errors = []
+
+    def assert_equal(self, a, b):
+        try:
+            if a != b:
+                raise AssertionError("{0} != {1}".format(a, b))
+        except AssertionError as e:
+            self._errors.append(
+                ValidationError(e, traceback.format_exc(3))
+            )
+
+    def assert_not_equal(self, a, b):
+        try:
+            if a == b:
+                raise AssertionError("{0} == {1}".format(a, b))
+        except AssertionError as e:
+            self._errors.append(
+                ValidationError(e, traceback.format_exc(3))
+            )
+
+    def assert_true(self, a):
+        try:
+            if not a:
+                raise AssertionError("{0} is not true".format(a))
+        except AssertionError as e:
+            self._errors.append(
+                ValidationError(e, traceback.format_exc(3))
+            )
+
+    def write(self):
+        """
+        Write the workload files to the mount
+        """
+        raise NotImplementedError()
+
+    def validate(self):
+        """
+        Read from the mount and validate that the workload files are present (i.e. have
+        survived or been reconstructed from the test scenario)
+        """
+        raise NotImplementedError()
+
+    def damage(self):
+        """
+        Damage the filesystem pools in ways that will be interesting to recover from.  By
+        default just wipe everything in the metadata pool
+        """
+        # Delete every object in the metadata pool
+        pool = self._filesystem.get_metadata_pool_name()
+        self._filesystem.rados(["purge", pool, '--yes-i-really-really-mean-it'])
+
+    def flush(self):
+        """
+        Called after client unmount, after write: flush whatever you want
+        """
+        self._filesystem.mds_asok(["flush", "journal"])
+
+    def scrub(self):
+        """
+        Called as a final step post recovery before verification. Right now, this
+        doesn't bother if errors are found in scrub - just that the MDS doesn't
+        crash and burn during scrub.
+        """
+        out_json = self._filesystem.run_scrub(["start", "/", "repair,recursive"])
+        self.assert_not_equal(out_json, None)
+        self.assert_equal(out_json["return_code"], 0)
+        self.assert_equal(self._filesystem.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+class SimpleWorkload(Workload):
+    """
+    Single file, single directory, check that it gets recovered and so does its size
+    """
+    def write(self):
+        self._mount.run_shell(["mkdir", "subdir"])
+        self._mount.write_n_mb("subdir/sixmegs", 6)
+        self._initial_state = self._mount.stat("subdir/sixmegs")
+
+    def validate(self):
+        self._mount.run_shell(["sudo", "ls", "subdir"], omit_sudo=False)
+        st = self._mount.stat("subdir/sixmegs", sudo=True)
+        self.assert_equal(st['st_size'], self._initial_state['st_size'])
+        return self._errors
+
+
+class SymlinkWorkload(Workload):
+    """
+    Symlink file, check that it gets recovered as symlink
+    """
+    def write(self):
+        self._mount.run_shell(["mkdir", "symdir"])
+        self._mount.write_n_mb("symdir/onemegs", 1)
+        self._mount.run_shell(["ln", "-s", "onemegs", "symdir/symlink_onemegs"])
+        self._mount.run_shell(["ln", "-s", "symdir/onemegs", "symlink1_onemegs"])
+
+    def validate(self):
+        self._mount.run_shell(["sudo", "ls", "symdir"], omit_sudo=False)
+        st = self._mount.lstat("symdir/symlink_onemegs")
+        self.assert_true(stat.S_ISLNK(st['st_mode']))
+        target = self._mount.readlink("symdir/symlink_onemegs")
+        self.assert_equal(target, "onemegs")
+
+        st = self._mount.lstat("symlink1_onemegs")
+        self.assert_true(stat.S_ISLNK(st['st_mode']))
+        target = self._mount.readlink("symlink1_onemegs")
+        self.assert_equal(target, "symdir/onemegs")
+        return self._errors
+
+
+class MovedFile(Workload):
+    def write(self):
+        # Create a file whose backtrace disagrees with his eventual position
+        # in the metadata.  We will see that he gets reconstructed in his
+        # original position according to his backtrace.
+        self._mount.run_shell(["mkdir", "subdir_alpha"])
+        self._mount.run_shell(["mkdir", "subdir_bravo"])
+        self._mount.write_n_mb("subdir_alpha/sixmegs", 6)
+        self._filesystem.mds_asok(["flush", "journal"])
+        self._mount.run_shell(["mv", "subdir_alpha/sixmegs", "subdir_bravo/sixmegs"])
+        self._initial_state = self._mount.stat("subdir_bravo/sixmegs")
+
+    def flush(self):
+        pass
+
+    def validate(self):
+        self.assert_equal(self._mount.ls(sudo=True), ["subdir_alpha"])
+        st = self._mount.stat("subdir_alpha/sixmegs", sudo=True)
+        self.assert_equal(st['st_size'], self._initial_state['st_size'])
+        return self._errors
+
+
+class BacktracelessFile(Workload):
+    def write(self):
+        self._mount.run_shell(["mkdir", "subdir"])
+        self._mount.write_n_mb("subdir/sixmegs", 6)
+        self._initial_state = self._mount.stat("subdir/sixmegs")
+
+    def flush(self):
+        # Never flush metadata, so backtrace won't be written
+        pass
+
+    def validate(self):
+        ino_name = "%x" % self._initial_state["st_ino"]
+
+        # The inode should be linked into lost+found because we had no path for it
+        self.assert_equal(self._mount.ls(sudo=True), ["lost+found"])
+        self.assert_equal(self._mount.ls("lost+found", sudo=True), [ino_name])
+        st = self._mount.stat(f"lost+found/{ino_name}", sudo=True)
+
+        # We might not have got the name or path, but we should still get the size
+        self.assert_equal(st['st_size'], self._initial_state['st_size'])
+
+        # remove the entry from lost+found directory
+        self._mount.run_shell(["sudo", "rm", "-f", f'lost+found/{ino_name}'], omit_sudo=False)
+        self.assert_equal(self._mount.ls("lost+found", sudo=True), [])
+
+        return self._errors
+
+
+class StripedStashedLayout(Workload):
+    def __init__(self, fs, m, pool=None):
+        super(StripedStashedLayout, self).__init__(fs, m)
+
+        # Nice small stripes so we can quickly do our writes+validates
+        self.sc = 4
+        self.ss = 65536
+        self.os = 262144
+        self.pool = pool and pool or self._filesystem.get_data_pool_name()
+
+        self.interesting_sizes = [
+            # Exactly stripe_count objects will exist
+            self.os * self.sc,
+            # Fewer than stripe_count objects will exist
+            self.os * self.sc // 2,
+            self.os * (self.sc - 1) + self.os // 2,
+            self.os * (self.sc - 1) + self.os // 2 - 1,
+            self.os * (self.sc + 1) + self.os // 2,
+            self.os * (self.sc + 1) + self.os // 2 + 1,
+            # More than stripe_count objects will exist
+            self.os * self.sc + self.os * self.sc // 2
+        ]
+
+    def write(self):
+        # Create a dir with a striped layout set on it
+        self._mount.run_shell(["mkdir", "stripey"])
+
+        self._mount.setfattr("./stripey", "ceph.dir.layout",
+             "stripe_unit={ss} stripe_count={sc} object_size={os} pool={pool}".format(
+                 ss=self.ss, os=self.os, sc=self.sc, pool=self.pool
+             ))
+
+        # Write files, then flush metadata so that its layout gets written into an xattr
+        for i, n_bytes in enumerate(self.interesting_sizes):
+            self._mount.write_test_pattern("stripey/flushed_file_{0}".format(i), n_bytes)
+            # This is really just validating the validator
+            self._mount.validate_test_pattern("stripey/flushed_file_{0}".format(i), n_bytes)
+        self._filesystem.mds_asok(["flush", "journal"])
+
+        # Write another file in the same way, but this time don't flush the metadata,
+        # so that it won't have the layout xattr
+        self._mount.write_test_pattern("stripey/unflushed_file", 1024 * 512)
+        self._mount.validate_test_pattern("stripey/unflushed_file", 1024 * 512)
+
+        self._initial_state = {
+            "unflushed_ino": self._mount.path_to_ino("stripey/unflushed_file")
+        }
+
+    def flush(self):
+        # Pass because we already selectively flushed during write
+        pass
+
+    def validate(self):
+        # The first files should have been recovered into its original location
+        # with the correct layout: read back correct data
+        for i, n_bytes in enumerate(self.interesting_sizes):
+            try:
+                self._mount.validate_test_pattern("stripey/flushed_file_{0}".format(i), n_bytes)
+            except CommandFailedError as e:
+                self._errors.append(
+                    ValidationError("File {0} (size {1}): {2}".format(i, n_bytes, e), traceback.format_exc(3))
+                )
+
+        # The unflushed file should have been recovered into lost+found without
+        # the correct layout: read back junk
+        ino_name = "%x" % self._initial_state["unflushed_ino"]
+        self.assert_equal(self._mount.ls("lost+found", sudo=True), [ino_name])
+        try:
+            self._mount.validate_test_pattern(os.path.join("lost+found", ino_name), 1024 * 512)
+        except CommandFailedError:
+            pass
+        else:
+            self._errors.append(
+                ValidationError("Unexpectedly valid data in unflushed striped file", "")
+            )
+
+        return self._errors
+
+
+class ManyFilesWorkload(Workload):
+    def __init__(self, filesystem, mount, file_count):
+        super(ManyFilesWorkload, self).__init__(filesystem, mount)
+        self.file_count = file_count
+
+    def write(self):
+        self._mount.run_shell(["mkdir", "subdir"])
+        for n in range(0, self.file_count):
+            self._mount.write_test_pattern("subdir/{0}".format(n), 6 * 1024 * 1024)
+
+    def validate(self):
+        for n in range(0, self.file_count):
+            try:
+                self._mount.validate_test_pattern("subdir/{0}".format(n), 6 * 1024 * 1024)
+            except CommandFailedError as e:
+                self._errors.append(
+                    ValidationError("File {0}: {1}".format(n, e), traceback.format_exc(3))
+                )
+
+        return self._errors
+
+
+class MovedDir(Workload):
+    def write(self):
+        # Create a nested dir that we will then move.  Two files with two different
+        # backtraces referring to the moved dir, claiming two different locations for
+        # it.  We will see that only one backtrace wins and the dir ends up with
+        # single linkage.
+        self._mount.run_shell(["mkdir", "-p", "grandmother/parent"])
+        self._mount.write_n_mb("grandmother/parent/orig_pos_file", 1)
+        self._filesystem.mds_asok(["flush", "journal"])
+        self._mount.run_shell(["mkdir", "grandfather"])
+        self._mount.run_shell(["mv", "grandmother/parent", "grandfather"])
+        self._mount.write_n_mb("grandfather/parent/new_pos_file", 2)
+        self._filesystem.mds_asok(["flush", "journal"])
+
+        self._initial_state = (
+            self._mount.stat("grandfather/parent/orig_pos_file"),
+            self._mount.stat("grandfather/parent/new_pos_file")
+        )
+
+    def validate(self):
+        root_files = self._mount.ls()
+        self.assert_equal(len(root_files), 1)
+        self.assert_equal(root_files[0] in ["grandfather", "grandmother"], True)
+        winner = root_files[0]
+        st_opf = self._mount.stat(f"{winner}/parent/orig_pos_file", sudo=True)
+        st_npf = self._mount.stat(f"{winner}/parent/new_pos_file", sudo=True)
+
+        self.assert_equal(st_opf['st_size'], self._initial_state[0]['st_size'])
+        self.assert_equal(st_npf['st_size'], self._initial_state[1]['st_size'])
+
+
+class MissingZerothObject(Workload):
+    def write(self):
+        self._mount.run_shell(["mkdir", "subdir"])
+        self._mount.write_n_mb("subdir/sixmegs", 6)
+        self._initial_state = self._mount.stat("subdir/sixmegs")
+
+    def damage(self):
+        super(MissingZerothObject, self).damage()
+        zeroth_id = "{0:x}.00000000".format(self._initial_state['st_ino'])
+        self._filesystem.rados(["rm", zeroth_id], pool=self._filesystem.get_data_pool_name())
+
+    def validate(self):
+        ino = self._initial_state['st_ino']
+        st = self._mount.stat(f"lost+found/{ino:x}", sudo=True)
+        self.assert_equal(st['st_size'], self._initial_state['st_size'])
+
+
+class NonDefaultLayout(Workload):
+    """
+    Check that the reconstruction copes with files that have a different
+    object size in their layout
+    """
+    def write(self):
+        self._mount.run_shell(["touch", "datafile"])
+        self._mount.setfattr("./datafile", "ceph.file.layout.object_size", "8388608")
+        self._mount.run_shell(["dd", "if=/dev/urandom", "of=./datafile", "bs=1M", "count=32"])
+        self._initial_state = self._mount.stat("datafile")
+
+    def validate(self):
+        # Check we got the layout reconstructed properly
+        object_size = int(self._mount.getfattr("./datafile", "ceph.file.layout.object_size", sudo=True))
+        self.assert_equal(object_size, 8388608)
+
+        # Check we got the file size reconstructed properly
+        st = self._mount.stat("datafile", sudo=True)
+        self.assert_equal(st['st_size'], self._initial_state['st_size'])
+
+
+class TestDataScan(CephFSTestCase):
+    MDSS_REQUIRED = 2
+
+    def is_marked_damaged(self, rank):
+        mds_map = self.fs.get_mds_map()
+        return rank in mds_map['damaged']
+
+    def _rebuild_metadata(self, workload, workers=1):
+        """
+        That when all objects in metadata pool are removed, we can rebuild a metadata pool
+        based on the contents of a data pool, and a client can see and read our files.
+        """
+
+        # First, inject some files
+
+        workload.write()
+
+        # Unmount the client and flush the journal: the tool should also cope with
+        # situations where there is dirty metadata, but we'll test that separately
+        self.mount_a.umount_wait()
+        workload.flush()
+
+        # Stop the MDS
+        self.fs.fail()
+
+        # After recovery, we need the MDS to not be strict about stats (in production these options
+        # are off by default, but in QA we need to explicitly disable them)
+        self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
+        self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
+
+        # Apply any data damage the workload wants
+        workload.damage()
+
+        # Reset the MDS map in case multiple ranks were in play: recovery procedure
+        # only understands how to rebuild metadata under rank 0
+        self.fs.reset()
+
+        self.fs.set_joinable() # redundant with reset
+
+        def get_state(mds_id):
+            info = self.mds_cluster.get_mds_info(mds_id)
+            return info['state'] if info is not None else None
+
+        self.wait_until_true(lambda: self.is_marked_damaged(0), 60)
+        for mds_id in self.fs.mds_ids:
+            self.wait_until_equal(
+                    lambda: get_state(mds_id),
+                    "up:standby",
+                    timeout=60)
+
+        self.fs.table_tool([self.fs.name + ":0", "reset", "session"])
+        self.fs.table_tool([self.fs.name + ":0", "reset", "snap"])
+        self.fs.table_tool([self.fs.name + ":0", "reset", "inode"])
+
+        # Run the recovery procedure
+        if False:
+            with self.assertRaises(CommandFailedError):
+                # Normal reset should fail when no objects are present, we'll use --force instead
+                self.fs.journal_tool(["journal", "reset"], 0)
+
+        self.fs.journal_tool(["journal", "reset", "--force"], 0)
+        self.fs.data_scan(["init"])
+        self.fs.data_scan(["scan_extents"], worker_count=workers)
+        self.fs.data_scan(["scan_inodes"], worker_count=workers)
+        self.fs.data_scan(["scan_links"])
+
+        # Mark the MDS repaired
+        self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
+
+        # Start the MDS
+        self.fs.mds_restart()
+        self.fs.wait_for_daemons()
+        log.info(str(self.mds_cluster.status()))
+
+        # Mount a client
+        self.mount_a.mount_wait()
+
+        # run scrub as it is recommended post recovery for most
+        # (if not all) recovery mechanisms.
+        workload.scrub()
+
+        # See that the files are present and correct
+        errors = workload.validate()
+        if errors:
+            log.error("Validation errors found: {0}".format(len(errors)))
+            for e in errors:
+                log.error(e.exception)
+                log.error(e.backtrace)
+            raise AssertionError("Validation failed, first error: {0}\n{1}".format(
+                errors[0].exception, errors[0].backtrace
+            ))
+
+    def test_rebuild_simple(self):
+        self._rebuild_metadata(SimpleWorkload(self.fs, self.mount_a))
+
+    def test_rebuild_symlink(self):
+        self._rebuild_metadata(SymlinkWorkload(self.fs, self.mount_a))
+
+    def test_rebuild_moved_file(self):
+        self._rebuild_metadata(MovedFile(self.fs, self.mount_a))
+
+    def test_rebuild_backtraceless(self):
+        self._rebuild_metadata(BacktracelessFile(self.fs, self.mount_a))
+
+    def test_rebuild_moved_dir(self):
+        self._rebuild_metadata(MovedDir(self.fs, self.mount_a))
+
+    def test_rebuild_missing_zeroth(self):
+        self._rebuild_metadata(MissingZerothObject(self.fs, self.mount_a))
+
+    def test_rebuild_nondefault_layout(self):
+        self._rebuild_metadata(NonDefaultLayout(self.fs, self.mount_a))
+
+    def test_stashed_layout(self):
+        self._rebuild_metadata(StripedStashedLayout(self.fs, self.mount_a))
+
+    def _dirfrag_keys(self, object_id):
+        keys_str = self.fs.radosmo(["listomapkeys", object_id], stdout=StringIO())
+        if keys_str:
+            return keys_str.strip().split("\n")
+        else:
+            return []
+
+    def test_fragmented_injection(self):
+        """
+        That when injecting a dentry into a fragmented directory, we put it in the right fragment.
+        """
+
+        file_count = 100
+        file_names = ["%s" % n for n in range(0, file_count)]
+
+        # Make sure and disable dirfrag auto merging and splitting
+        self.fs.set_ceph_conf('mds', 'mds bal merge size', 0)
+        self.fs.set_ceph_conf('mds', 'mds bal split size', 100 * file_count)
+
+        # Create a directory of `file_count` files, each named after its
+        # decimal number and containing the string of its decimal number
+        self.mount_a.run_python(dedent("""
+        import os
+        path = os.path.join("{path}", "subdir")
+        os.mkdir(path)
+        for n in range(0, {file_count}):
+            open(os.path.join(path, "%s" % n), 'w').write("%s" % n)
+        """.format(
+            path=self.mount_a.mountpoint,
+            file_count=file_count
+        )))
+
+        dir_ino = self.mount_a.path_to_ino("subdir")
+
+        # Only one MDS should be active!
+        self.assertEqual(len(self.fs.get_active_names()), 1)
+
+        # Ensure that one directory is fragmented
+        mds_id = self.fs.get_active_names()[0]
+        self.fs.mds_asok(["dirfrag", "split", "/subdir", "0/0", "1"], mds_id)
+
+        # Flush journal and stop MDS
+        self.mount_a.umount_wait()
+        self.fs.mds_asok(["flush", "journal"], mds_id)
+        self.fs.fail()
+
+        # Pick a dentry and wipe out its key
+        # Because I did a 1 bit split, I know one frag will be named <inode>.01000000
+        frag_obj_id = "{0:x}.01000000".format(dir_ino)
+        keys = self._dirfrag_keys(frag_obj_id)
+        victim_key = keys[7]  # arbitrary choice
+        log.info("victim_key={0}".format(victim_key))
+        victim_dentry = victim_key.split("_head")[0]
+        self.fs.radosm(["rmomapkey", frag_obj_id, victim_key])
+
+        # Start filesystem back up, observe that the file appears to be gone in an `ls`
+        self.fs.set_joinable()
+        self.fs.wait_for_daemons()
+        self.mount_a.mount_wait()
+        files = self.mount_a.run_shell(["ls", "subdir/"]).stdout.getvalue().strip().split("\n")
+        self.assertListEqual(sorted(files), sorted(list(set(file_names) - set([victim_dentry]))))
+
+        # Stop the filesystem
+        self.mount_a.umount_wait()
+        self.fs.fail()
+
+        # Run data-scan, observe that it inserts our dentry back into the correct fragment
+        # by checking the omap now has the dentry's key again
+        self.fs.data_scan(["scan_extents"])
+        self.fs.data_scan(["scan_inodes"])
+        self.fs.data_scan(["scan_links"])
+        self.assertIn(victim_key, self._dirfrag_keys(frag_obj_id))
+
+        # Start the filesystem and check that the dentry we deleted is now once again visible
+        # and points to the correct file data.
+        self.fs.set_joinable()
+        self.fs.wait_for_daemons()
+        self.mount_a.mount_wait()
+        self.mount_a.run_shell(["ls", "-l", "subdir/"]) # debugging
+        # Use sudo because cephfs-data-scan will reinsert the dentry with root ownership, it can't know the real owner.
+        out = self.mount_a.run_shell_payload(f"sudo cat subdir/{victim_dentry}", omit_sudo=False).stdout.getvalue().strip()
+        self.assertEqual(out, victim_dentry)
+
+        # Finally, close the loop by checking our injected dentry survives a merge
+        mds_id = self.fs.get_active_names()[0]
+        self.mount_a.ls("subdir")  # Do an ls to ensure both frags are in cache so the merge will work
+        self.fs.mds_asok(["dirfrag", "merge", "/subdir", "0/0"], mds_id)
+        self.fs.mds_asok(["flush", "journal"], mds_id)
+        frag_obj_id = "{0:x}.00000000".format(dir_ino)
+        keys = self._dirfrag_keys(frag_obj_id)
+        self.assertListEqual(sorted(keys), sorted(["%s_head" % f for f in file_names]))
+
+        # run scrub to update and make sure rstat.rbytes info in subdir inode and dirfrag
+        # are matched
+        out_json = self.fs.run_scrub(["start", "/subdir", "repair,recursive"])
+        self.assertNotEqual(out_json, None)
+        self.assertEqual(out_json["return_code"], 0)
+        self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+        # Remove the whole 'sudbdir' directory
+        self.mount_a.run_shell(["rm", "-rf", "subdir/"])
+
+    @for_teuthology
+    def test_parallel_execution(self):
+        self._rebuild_metadata(ManyFilesWorkload(self.fs, self.mount_a, 25), workers=7)
+
+    def test_pg_files(self):
+        """
+        That the pg files command tells us which files are associated with
+        a particular PG
+        """
+        file_count = 20
+        self.mount_a.run_shell(["mkdir", "mydir"])
+        self.mount_a.create_n_files("mydir/myfile", file_count)
+
+        # Some files elsewhere in the system that we will ignore
+        # to check that the tool is filtering properly
+        self.mount_a.run_shell(["mkdir", "otherdir"])
+        self.mount_a.create_n_files("otherdir/otherfile", file_count)
+
+        pgs_to_files = defaultdict(list)
+        # Rough (slow) reimplementation of the logic
+        for i in range(0, file_count):
+            file_path = "mydir/myfile_{0}".format(i)
+            ino = self.mount_a.path_to_ino(file_path)
+            obj = "{0:x}.{1:08x}".format(ino, 0)
+            pgid = json.loads(self.fs.mon_manager.raw_cluster_cmd(
+                "osd", "map", self.fs.get_data_pool_name(), obj,
+                "--format=json-pretty"
+            ))['pgid']
+            pgs_to_files[pgid].append(file_path)
+            log.info("{0}: {1}".format(file_path, pgid))
+
+        pg_count = self.fs.get_pool_pg_num(self.fs.get_data_pool_name())
+        for pg_n in range(0, pg_count):
+            pg_str = "{0}.{1:x}".format(self.fs.get_data_pool_id(), pg_n)
+            out = self.fs.data_scan(["pg_files", "mydir", pg_str])
+            lines = [l for l in out.split("\n") if l]
+            log.info("{0}: {1}".format(pg_str, lines))
+            self.assertSetEqual(set(lines), set(pgs_to_files[pg_str]))
+
+    def test_rebuild_linkage(self):
+        """
+        The scan_links command fixes linkage errors
+        """
+        self.mount_a.run_shell(["mkdir", "testdir1"])
+        self.mount_a.run_shell(["mkdir", "testdir2"])
+        dir1_ino = self.mount_a.path_to_ino("testdir1")
+        dir2_ino = self.mount_a.path_to_ino("testdir2")
+        dirfrag1_oid = "{0:x}.00000000".format(dir1_ino)
+        dirfrag2_oid = "{0:x}.00000000".format(dir2_ino)
+
+        self.mount_a.run_shell(["touch", "testdir1/file1"])
+        self.mount_a.run_shell(["ln", "testdir1/file1", "testdir1/link1"])
+        self.mount_a.run_shell(["ln", "testdir1/file1", "testdir2/link2"])
+
+        mds_id = self.fs.get_active_names()[0]
+        self.fs.mds_asok(["flush", "journal"], mds_id)
+
+        dirfrag1_keys = self._dirfrag_keys(dirfrag1_oid)
+
+        # introduce duplicated primary link
+        file1_key = "file1_head"
+        self.assertIn(file1_key, dirfrag1_keys)
+        file1_omap_data = self.fs.radosmo(["getomapval", dirfrag1_oid, file1_key, '-'])
+        self.fs.radosm(["setomapval", dirfrag2_oid, file1_key], stdin=BytesIO(file1_omap_data))
+        self.assertIn(file1_key, self._dirfrag_keys(dirfrag2_oid))
+
+        # remove a remote link, make inode link count incorrect
+        link1_key = 'link1_head'
+        self.assertIn(link1_key, dirfrag1_keys)
+        self.fs.radosm(["rmomapkey", dirfrag1_oid, link1_key])
+
+        # increase good primary link's version
+        self.mount_a.run_shell(["touch", "testdir1/file1"])
+        self.mount_a.umount_wait()
+
+        self.fs.mds_asok(["flush", "journal"], mds_id)
+        self.fs.fail()
+
+        # repair linkage errors
+        self.fs.data_scan(["scan_links"])
+
+        # primary link in testdir2 was deleted?
+        self.assertNotIn(file1_key, self._dirfrag_keys(dirfrag2_oid))
+
+        self.fs.set_joinable()
+        self.fs.wait_for_daemons()
+
+        self.mount_a.mount_wait()
+
+        # link count was adjusted?
+        file1_nlink = self.mount_a.path_to_nlink("testdir1/file1")
+        self.assertEqual(file1_nlink, 2)
+
+        out_json = self.fs.run_scrub(["start", "/testdir1", "repair,recursive"])
+        self.assertNotEqual(out_json, None)
+        self.assertEqual(out_json["return_code"], 0)
+        self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+    def test_rebuild_inotable(self):
+        """
+        The scan_links command repair inotables
+        """
+        self.fs.set_max_mds(2)
+        self.fs.wait_for_daemons()
+
+        active_mds_names = self.fs.get_active_names()
+        mds0_id = active_mds_names[0]
+        mds1_id = active_mds_names[1]
+
+        self.mount_a.run_shell(["mkdir", "dir1"])
+        dir_ino = self.mount_a.path_to_ino("dir1")
+        self.mount_a.setfattr("dir1", "ceph.dir.pin", "1")
+        # wait for subtree migration
+
+        file_ino = 0;
+        while True:
+            time.sleep(1)
+            # allocate an inode from mds.1
+            self.mount_a.run_shell(["touch", "dir1/file1"])
+            file_ino = self.mount_a.path_to_ino("dir1/file1")
+            if file_ino >= (2 << 40):
+                break
+            self.mount_a.run_shell(["rm", "-f", "dir1/file1"])
+
+        self.mount_a.umount_wait()
+
+        self.fs.mds_asok(["flush", "journal"], mds0_id)
+        self.fs.mds_asok(["flush", "journal"], mds1_id)
+        self.fs.fail()
+
+        self.fs.radosm(["rm", "mds0_inotable"])
+        self.fs.radosm(["rm", "mds1_inotable"])
+
+        self.fs.data_scan(["scan_links", "--filesystem", self.fs.name])
+
+        mds0_inotable = json.loads(self.fs.table_tool([self.fs.name + ":0", "show", "inode"]))
+        self.assertGreaterEqual(
+            mds0_inotable['0']['data']['inotable']['free'][0]['start'], dir_ino)
+
+        mds1_inotable = json.loads(self.fs.table_tool([self.fs.name + ":1", "show", "inode"]))
+        self.assertGreaterEqual(
+            mds1_inotable['1']['data']['inotable']['free'][0]['start'], file_ino)
+
+        self.fs.set_joinable()
+        self.fs.wait_for_daemons()
+
+        out_json = self.fs.run_scrub(["start", "/dir1", "repair,recursive"])
+        self.assertNotEqual(out_json, None)
+        self.assertEqual(out_json["return_code"], 0)
+        self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+    def test_rebuild_snaptable(self):
+        """
+        The scan_links command repair snaptable
+        """
+        self.fs.set_allow_new_snaps(True)
+
+        self.mount_a.run_shell(["mkdir", "dir1"])
+        self.mount_a.run_shell(["mkdir", "dir1/.snap/s1"])
+        self.mount_a.run_shell(["mkdir", "dir1/.snap/s2"])
+        self.mount_a.run_shell(["rmdir", "dir1/.snap/s2"])
+
+        self.mount_a.umount_wait()
+
+        mds0_id = self.fs.get_active_names()[0]
+        self.fs.mds_asok(["flush", "journal"], mds0_id)
+
+        # wait for mds to update removed snaps
+        time.sleep(10)
+
+        old_snaptable = json.loads(self.fs.table_tool([self.fs.name + ":0", "show", "snap"]))
+        # stamps may have minor difference
+        for item in old_snaptable['snapserver']['snaps']:
+            del item['stamp']
+
+        self.fs.radosm(["rm", "mds_snaptable"])
+        self.fs.data_scan(["scan_links", "--filesystem", self.fs.name])
+
+        new_snaptable = json.loads(self.fs.table_tool([self.fs.name + ":0", "show", "snap"]))
+        for item in new_snaptable['snapserver']['snaps']:
+            del item['stamp']
+        self.assertGreaterEqual(
+            new_snaptable['snapserver']['last_snap'], old_snaptable['snapserver']['last_snap'])
+        self.assertEqual(
+            new_snaptable['snapserver']['snaps'], old_snaptable['snapserver']['snaps'])
+
+        out_json = self.fs.run_scrub(["start", "/dir1", "repair,recursive"])
+        self.assertNotEqual(out_json, None)
+        self.assertEqual(out_json["return_code"], 0)
+        self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+    def _prepare_extra_data_pool(self, set_root_layout=True):
+        extra_data_pool_name = self.fs.get_data_pool_name() + '_extra'
+        self.fs.add_data_pool(extra_data_pool_name)
+        if set_root_layout:
+            self.mount_a.setfattr(".", "ceph.dir.layout.pool",
+                                  extra_data_pool_name)
+        return extra_data_pool_name
+
+    def test_extra_data_pool_rebuild_simple(self):
+        self._prepare_extra_data_pool()
+        self._rebuild_metadata(SimpleWorkload(self.fs, self.mount_a))
+
+    def test_extra_data_pool_rebuild_few_files(self):
+        self._prepare_extra_data_pool()
+        self._rebuild_metadata(ManyFilesWorkload(self.fs, self.mount_a, 5), workers=1)
+
+    @for_teuthology
+    def test_extra_data_pool_rebuild_many_files_many_workers(self):
+        self._prepare_extra_data_pool()
+        self._rebuild_metadata(ManyFilesWorkload(self.fs, self.mount_a, 25), workers=7)
+
+    def test_extra_data_pool_stashed_layout(self):
+        pool_name = self._prepare_extra_data_pool(False)
+        self._rebuild_metadata(StripedStashedLayout(self.fs, self.mount_a, pool_name))
diff --git a/qa/tasks/cephfs/test_dump_tree.py b/qa/tasks/cephfs/test_dump_tree.py
new file mode 100644
index 000000000..48a2c6f00
--- /dev/null
+++ b/qa/tasks/cephfs/test_dump_tree.py
@@ -0,0 +1,66 @@
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+import random
+import os
+
+class TestDumpTree(CephFSTestCase):
+    def get_paths_to_ino(self):
+        inos = {}
+        p = self.mount_a.run_shell(["find", "./"])
+        paths = p.stdout.getvalue().strip().split()
+        for path in paths:
+            inos[path] = self.mount_a.path_to_ino(path, False)
+
+        return inos
+
+    def populate(self):
+        self.mount_a.run_shell(["git", "clone",
+                                "https://github.com/ceph/ceph-qa-suite"])
+
+    def test_basic(self):
+        self.mount_a.run_shell(["mkdir", "parent"])
+        self.mount_a.run_shell(["mkdir", "parent/child"])
+        self.mount_a.run_shell(["touch", "parent/child/file"])
+        self.mount_a.run_shell(["mkdir", "parent/child/grandchild"])
+        self.mount_a.run_shell(["touch", "parent/child/grandchild/file"])
+
+        inos = self.get_paths_to_ino()
+        tree = self.fs.mds_asok(["dump", "tree", "/parent/child", "1"])
+
+        target_inos = [inos["./parent/child"], inos["./parent/child/file"],
+                       inos["./parent/child/grandchild"]]
+
+        for ino in tree:
+            del target_inos[target_inos.index(ino['ino'])] # don't catch!
+            
+        assert(len(target_inos) == 0)
+
+    def test_random(self):
+        random.seed(0)
+
+        self.populate()
+        inos = self.get_paths_to_ino()
+        target = random.sample(inos.keys(), 1)[0]
+
+        if target != "./":
+            target = os.path.dirname(target)
+
+        subtree = [path for path in inos.keys() if path.startswith(target)]
+        target_inos = [inos[path] for path in subtree]
+        tree = self.fs.mds_asok(["dump", "tree", target[1:]])
+
+        for ino in tree:
+            del target_inos[target_inos.index(ino['ino'])] # don't catch!
+            
+        assert(len(target_inos) == 0)
+
+        target_depth = target.count('/')
+        maxdepth = max([path.count('/') for path in subtree]) - target_depth
+        depth = random.randint(0, maxdepth)
+        target_inos = [inos[path] for path in subtree \
+                       if path.count('/') <= depth + target_depth]
+        tree = self.fs.mds_asok(["dump", "tree", target[1:], str(depth)])
+
+        for ino in tree:
+            del target_inos[target_inos.index(ino['ino'])] # don't catch!
+            
+        assert(len(target_inos) == 0)
diff --git a/qa/tasks/cephfs/test_exports.py b/qa/tasks/cephfs/test_exports.py
new file mode 100644
index 000000000..4b7e884ec
--- /dev/null
+++ b/qa/tasks/cephfs/test_exports.py
@@ -0,0 +1,582 @@
+import logging
+import random
+import time
+from tasks.cephfs.fuse_mount import FuseMount
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.exceptions import CommandFailedError
+
+log = logging.getLogger(__name__)
+
+class TestExports(CephFSTestCase):
+    MDSS_REQUIRED = 2
+    CLIENTS_REQUIRED = 2
+
+    def test_session_race(self):
+        """
+        Test session creation race.
+
+        See: https://tracker.ceph.com/issues/24072#change-113056
+        """
+
+        self.fs.set_max_mds(2)
+        status = self.fs.wait_for_daemons()
+
+        rank1 = self.fs.get_rank(rank=1, status=status)
+
+        # Create a directory that is pre-exported to rank 1
+        self.mount_a.run_shell(["mkdir", "-p", "a/aa"])
+        self.mount_a.setfattr("a", "ceph.dir.pin", "1")
+        self._wait_subtrees([('/a', 1)], status=status, rank=1)
+
+        # Now set the mds config to allow the race
+        self.fs.rank_asok(["config", "set", "mds_inject_migrator_session_race", "true"], rank=1)
+
+        # Now create another directory and try to export it
+        self.mount_b.run_shell(["mkdir", "-p", "b/bb"])
+        self.mount_b.setfattr("b", "ceph.dir.pin", "1")
+
+        time.sleep(5)
+
+        # Now turn off the race so that it doesn't wait again
+        self.fs.rank_asok(["config", "set", "mds_inject_migrator_session_race", "false"], rank=1)
+
+        # Now try to create a session with rank 1 by accessing a dir known to
+        # be there, if buggy, this should cause the rank 1 to crash:
+        self.mount_b.run_shell(["ls", "a"])
+
+        # Check if rank1 changed (standby tookover?)
+        new_rank1 = self.fs.get_rank(rank=1)
+        self.assertEqual(rank1['gid'], new_rank1['gid'])
+
+class TestExportPin(CephFSTestCase):
+    MDSS_REQUIRED = 3
+    CLIENTS_REQUIRED = 1
+
+    def setUp(self):
+        CephFSTestCase.setUp(self)
+
+        self.fs.set_max_mds(3)
+        self.status = self.fs.wait_for_daemons()
+
+        self.mount_a.run_shell_payload("mkdir -p 1/2/3/4")
+
+    def test_noop(self):
+        self.mount_a.setfattr("1", "ceph.dir.pin", "-1")
+        time.sleep(30) # for something to not happen
+        self._wait_subtrees([], status=self.status)
+
+    def test_negative(self):
+        self.mount_a.setfattr("1", "ceph.dir.pin", "-2341")
+        time.sleep(30) # for something to not happen
+        self._wait_subtrees([], status=self.status)
+
+    def test_empty_pin(self):
+        self.mount_a.setfattr("1/2/3/4", "ceph.dir.pin", "1")
+        time.sleep(30) # for something to not happen
+        self._wait_subtrees([], status=self.status)
+
+    def test_trivial(self):
+        self.mount_a.setfattr("1", "ceph.dir.pin", "1")
+        self._wait_subtrees([('/1', 1)], status=self.status, rank=1)
+
+    def test_export_targets(self):
+        self.mount_a.setfattr("1", "ceph.dir.pin", "1")
+        self._wait_subtrees([('/1', 1)], status=self.status, rank=1)
+        self.status = self.fs.status()
+        r0 = self.status.get_rank(self.fs.id, 0)
+        self.assertTrue(sorted(r0['export_targets']) == [1])
+
+    def test_redundant(self):
+        # redundant pin /1/2 to rank 1
+        self.mount_a.setfattr("1", "ceph.dir.pin", "1")
+        self._wait_subtrees([('/1', 1)], status=self.status, rank=1)
+        self.mount_a.setfattr("1/2", "ceph.dir.pin", "1")
+        self._wait_subtrees([('/1', 1), ('/1/2', 1)], status=self.status, rank=1)
+
+    def test_reassignment(self):
+        self.mount_a.setfattr("1/2", "ceph.dir.pin", "1")
+        self._wait_subtrees([('/1/2', 1)], status=self.status, rank=1)
+        self.mount_a.setfattr("1/2", "ceph.dir.pin", "0")
+        self._wait_subtrees([('/1/2', 0)], status=self.status, rank=0)
+
+    def test_phantom_rank(self):
+        self.mount_a.setfattr("1", "ceph.dir.pin", "0")
+        self.mount_a.setfattr("1/2", "ceph.dir.pin", "10")
+        time.sleep(30) # wait for nothing weird to happen
+        self._wait_subtrees([('/1', 0)], status=self.status)
+
+    def test_nested(self):
+        self.mount_a.setfattr("1", "ceph.dir.pin", "1")
+        self.mount_a.setfattr("1/2", "ceph.dir.pin", "0")
+        self.mount_a.setfattr("1/2/3", "ceph.dir.pin", "2")
+        self._wait_subtrees([('/1', 1), ('/1/2', 0), ('/1/2/3', 2)], status=self.status, rank=2)
+
+    def test_nested_unset(self):
+        self.mount_a.setfattr("1", "ceph.dir.pin", "1")
+        self.mount_a.setfattr("1/2", "ceph.dir.pin", "2")
+        self._wait_subtrees([('/1', 1), ('/1/2', 2)], status=self.status, rank=1)
+        self.mount_a.setfattr("1/2", "ceph.dir.pin", "-1")
+        self._wait_subtrees([('/1', 1)], status=self.status, rank=1)
+
+    def test_rename(self):
+        self.mount_a.setfattr("1", "ceph.dir.pin", "1")
+        self.mount_a.run_shell_payload("mkdir -p 9/8/7")
+        self.mount_a.setfattr("9/8", "ceph.dir.pin", "0")
+        self._wait_subtrees([('/1', 1), ("/9/8", 0)], status=self.status, rank=0)
+        self.mount_a.run_shell_payload("mv 9/8 1/2")
+        self._wait_subtrees([('/1', 1), ("/1/2/8", 0)], status=self.status, rank=0)
+
+    def test_getfattr(self):
+        # pin /1 to rank 0
+        self.mount_a.setfattr("1", "ceph.dir.pin", "1")
+        self.mount_a.setfattr("1/2", "ceph.dir.pin", "0")
+        self._wait_subtrees([('/1', 1), ('/1/2', 0)], status=self.status, rank=1)
+
+        if not isinstance(self.mount_a, FuseMount):
+            p = self.mount_a.client_remote.sh('uname -r', wait=True)
+            dir_pin = self.mount_a.getfattr("1", "ceph.dir.pin")
+            log.debug("mount.getfattr('1','ceph.dir.pin'): %s " % dir_pin)
+            if str(p) < "5" and not(dir_pin):
+                self.skipTest("Kernel does not support getting the extended attribute ceph.dir.pin")
+        self.assertEqual(self.mount_a.getfattr("1", "ceph.dir.pin"), '1')
+        self.assertEqual(self.mount_a.getfattr("1/2", "ceph.dir.pin"), '0')
+
+    def test_export_pin_cache_drop(self):
+        """
+        That the export pin does not prevent empty (nothing in cache) subtree merging.
+        """
+
+        self.mount_a.setfattr("1", "ceph.dir.pin", "0")
+        self.mount_a.setfattr("1/2", "ceph.dir.pin", "1")
+        self._wait_subtrees([('/1', 0), ('/1/2', 1)], status=self.status)
+        self.mount_a.umount_wait() # release all caps
+        def _drop():
+            self.fs.ranks_tell(["cache", "drop"], status=self.status)
+        # drop cache multiple times to clear replica pins
+        self._wait_subtrees([], status=self.status, action=_drop)
+
+    def test_open_file(self):
+        """
+        Test opening a file via a hard link that is not in the same mds as the inode.
+
+        See https://tracker.ceph.com/issues/58411
+        """
+
+        self.mount_a.run_shell_payload("mkdir -p target link")
+        self.mount_a.touch("target/test.txt")
+        self.mount_a.run_shell_payload("ln target/test.txt link/test.txt")
+        self.mount_a.setfattr("target", "ceph.dir.pin", "0")
+        self.mount_a.setfattr("link", "ceph.dir.pin", "1")
+        self._wait_subtrees([("/target", 0), ("/link", 1)], status=self.status)
+
+        # Release client cache, otherwise the bug may not be triggered even if buggy.
+        self.mount_a.remount()
+
+        # Open the file with access mode(O_CREAT|O_WRONLY|O_TRUNC),
+        # this should cause the rank 1 to crash if buggy.
+        # It's OK to use 'truncate -s 0 link/test.txt' here,
+        # its access mode is (O_CREAT|O_WRONLY), it can also trigger this bug.
+        log.info("test open mode (O_CREAT|O_WRONLY|O_TRUNC)")
+        proc = self.mount_a.open_for_writing("link/test.txt")
+        time.sleep(1)
+        success = proc.finished and self.fs.rank_is_running(rank=1)
+
+        # Test other write modes too.
+        if success:
+            self.mount_a.remount()
+            log.info("test open mode (O_WRONLY|O_TRUNC)")
+            proc = self.mount_a.open_for_writing("link/test.txt", creat=False)
+            time.sleep(1)
+            success = proc.finished and self.fs.rank_is_running(rank=1)
+        if success:
+            self.mount_a.remount()
+            log.info("test open mode (O_CREAT|O_WRONLY)")
+            proc = self.mount_a.open_for_writing("link/test.txt", trunc=False)
+            time.sleep(1)
+            success = proc.finished and self.fs.rank_is_running(rank=1)
+
+        # Test open modes too.
+        if success:
+            self.mount_a.remount()
+            log.info("test open mode (O_RDONLY)")
+            proc = self.mount_a.open_for_reading("link/test.txt")
+            time.sleep(1)
+            success = proc.finished and self.fs.rank_is_running(rank=1)
+
+        if success:
+            # All tests done, rank 1 didn't crash.
+            return
+
+        if not proc.finished:
+            log.warning("open operation is blocked, kill it")
+            proc.kill()
+
+        if not self.fs.rank_is_running(rank=1):
+            log.warning("rank 1 crashed")
+
+        self.mount_a.umount_wait(force=True)
+
+        self.assertTrue(success, "open operation failed")
+
+class TestEphemeralPins(CephFSTestCase):
+    MDSS_REQUIRED = 3
+    CLIENTS_REQUIRED = 1
+
+    def setUp(self):
+        CephFSTestCase.setUp(self)
+
+        self.config_set('mds', 'mds_export_ephemeral_random', True)
+        self.config_set('mds', 'mds_export_ephemeral_distributed', True)
+        self.config_set('mds', 'mds_export_ephemeral_random_max', 1.0)
+
+        self.mount_a.run_shell_payload("""
+set -e
+
+# Use up a random number of inode numbers so the ephemeral pinning is not the same every test.
+mkdir .inode_number_thrash
+count=$((RANDOM % 1024))
+for ((i = 0; i < count; i++)); do touch .inode_number_thrash/$i; done
+rm -rf .inode_number_thrash
+""")
+
+        self.fs.set_max_mds(3)
+        self.status = self.fs.wait_for_daemons()
+
+    def _setup_tree(self, path="tree", export=-1, distributed=False, random=0.0, count=100, wait=True):
+        return self.mount_a.run_shell_payload(f"""
+set -ex
+mkdir -p {path}
+{f"setfattr -n ceph.dir.pin -v {export} {path}" if export >= 0 else ""}
+{f"setfattr -n ceph.dir.pin.distributed -v 1 {path}" if distributed else ""}
+{f"setfattr -n ceph.dir.pin.random -v {random} {path}" if random > 0.0 else ""}
+for ((i = 0; i < {count}; i++)); do
+    mkdir -p "{path}/$i"
+    echo file > "{path}/$i/file"
+done
+""", wait=wait)
+
+    def test_ephemeral_pin_dist_override(self):
+        """
+        That an ephemeral distributed pin overrides a normal export pin.
+        """
+
+        self._setup_tree(distributed=True)
+        subtrees = self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all")
+        for s in subtrees:
+            path = s['dir']['path']
+            if path == '/tree':
+                self.assertTrue(s['distributed_ephemeral_pin'])
+
+    def test_ephemeral_pin_dist_override_pin(self):
+        """
+        That an export pin overrides an ephemerally pinned directory.
+        """
+
+        self._setup_tree(distributed=True)
+        subtrees = self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all")
+        self.mount_a.setfattr("tree", "ceph.dir.pin", "0")
+        time.sleep(15)
+        subtrees = self._get_subtrees(status=self.status, rank=0)
+        for s in subtrees:
+            path = s['dir']['path']
+            if path == '/tree':
+                self.assertEqual(s['auth_first'], 0)
+                self.assertFalse(s['distributed_ephemeral_pin'])
+        # it has been merged into /tree
+
+    def test_ephemeral_pin_dist_off(self):
+        """
+        That turning off ephemeral distributed pin merges subtrees.
+        """
+
+        self._setup_tree(distributed=True)
+        self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all")
+        self.mount_a.setfattr("tree", "ceph.dir.pin.distributed", "0")
+        time.sleep(15)
+        subtrees = self._get_subtrees(status=self.status, rank=0)
+        for s in subtrees:
+            path = s['dir']['path']
+            if path == '/tree':
+                self.assertFalse(s['distributed_ephemeral_pin'])
+
+
+    def test_ephemeral_pin_dist_conf_off(self):
+        """
+        That turning off ephemeral distributed pin config prevents distribution.
+        """
+
+        self._setup_tree()
+        self.config_set('mds', 'mds_export_ephemeral_distributed', False)
+        self.mount_a.setfattr("tree", "ceph.dir.pin.distributed", "1")
+        time.sleep(15)
+        subtrees = self._get_subtrees(status=self.status, rank=0)
+        for s in subtrees:
+            path = s['dir']['path']
+            if path == '/tree':
+                self.assertFalse(s['distributed_ephemeral_pin'])
+
+    def _test_ephemeral_pin_dist_conf_off_merge(self):
+        """
+        That turning off ephemeral distributed pin config merges subtrees.
+        FIXME: who triggers the merge?
+        """
+
+        self._setup_tree(distributed=True)
+        self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all")
+        self.config_set('mds', 'mds_export_ephemeral_distributed', False)
+        self._wait_subtrees([('/tree', 0)], timeout=60, status=self.status)
+
+    def test_ephemeral_pin_dist_override_before(self):
+        """
+        That a conventional export pin overrides the distributed policy _before_ distributed policy is set.
+        """
+
+        count = 10
+        self._setup_tree(count=count)
+        test = []
+        for i in range(count):
+            path = f"tree/{i}"
+            self.mount_a.setfattr(path, "ceph.dir.pin", "1")
+            test.append(("/"+path, 1))
+        self.mount_a.setfattr("tree", "ceph.dir.pin.distributed", "1")
+        time.sleep(15) # for something to not happen...
+        self._wait_subtrees(test, timeout=60, status=self.status, rank="all", path="/tree/")
+
+    def test_ephemeral_pin_dist_override_after(self):
+        """
+        That a conventional export pin overrides the distributed policy _after_ distributed policy is set.
+        """
+
+        self._setup_tree(distributed=True)
+        self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all")
+        test = []
+        for i in range(10):
+            path = f"tree/{i}"
+            self.mount_a.setfattr(path, "ceph.dir.pin", "1")
+            test.append(("/"+path, 1))
+        self._wait_subtrees(test, timeout=60, status=self.status, rank="all", path="/tree/")
+
+    def test_ephemeral_pin_dist_failover(self):
+        """
+        That MDS failover does not cause unnecessary migrations.
+        """
+
+        # pin /tree so it does not export during failover
+        self._setup_tree(distributed=True)
+        self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all")
+        #test = [(s['dir']['path'], s['auth_first']) for s in subtrees]
+        before = self.fs.ranks_perf(lambda p: p['mds']['exported'])
+        log.info(f"export stats: {before}")
+        self.fs.rank_fail(rank=1)
+        self.status = self.fs.wait_for_daemons()
+        time.sleep(10) # waiting for something to not happen
+        after = self.fs.ranks_perf(lambda p: p['mds']['exported'])
+        log.info(f"export stats: {after}")
+        self.assertEqual(before, after)
+
+    def test_ephemeral_pin_distribution(self):
+        """
+        That ephemerally pinned subtrees are somewhat evenly distributed.
+        """
+
+        max_mds = 3
+        frags = 128
+
+        self.fs.set_max_mds(max_mds)
+        self.status = self.fs.wait_for_daemons()
+
+        self.config_set('mds', 'mds_export_ephemeral_distributed_factor', (frags-1) / max_mds)
+        self._setup_tree(count=1000, distributed=True)
+
+        subtrees = self._wait_distributed_subtrees(frags, status=self.status, rank="all")
+        nsubtrees = len(subtrees)
+
+        # Check if distribution is uniform
+        rank0 = list(filter(lambda x: x['auth_first'] == 0, subtrees))
+        rank1 = list(filter(lambda x: x['auth_first'] == 1, subtrees))
+        rank2 = list(filter(lambda x: x['auth_first'] == 2, subtrees))
+        self.assertGreaterEqual(len(rank0)/nsubtrees, 0.15)
+        self.assertGreaterEqual(len(rank1)/nsubtrees, 0.15)
+        self.assertGreaterEqual(len(rank2)/nsubtrees, 0.15)
+
+
+    def test_ephemeral_random(self):
+        """
+        That 100% randomness causes all children to be pinned.
+        """
+        self._setup_tree(random=1.0)
+        self._wait_random_subtrees(100, status=self.status, rank="all")
+
+    def test_ephemeral_random_max(self):
+        """
+        That the config mds_export_ephemeral_random_max is not exceeded.
+        """
+
+        r = 0.5
+        count = 1000
+        self._setup_tree(count=count, random=r)
+        subtrees = self._wait_random_subtrees(int(r*count*.75), status=self.status, rank="all")
+        self.config_set('mds', 'mds_export_ephemeral_random_max', 0.01)
+        self._setup_tree(path="tree/new", count=count)
+        time.sleep(30) # for something not to happen...
+        subtrees = self._get_subtrees(status=self.status, rank="all", path="tree/new/")
+        self.assertLessEqual(len(subtrees), int(.01*count*1.25))
+
+    def test_ephemeral_random_max_config(self):
+        """
+        That the config mds_export_ephemeral_random_max config rejects new OOB policies.
+        """
+
+        self.config_set('mds', 'mds_export_ephemeral_random_max', 0.01)
+        try:
+            p = self._setup_tree(count=1, random=0.02, wait=False)
+            p.wait()
+        except CommandFailedError as e:
+            log.info(f"{e}")
+            self.assertIn("Invalid", p.stderr.getvalue())
+        else:
+            raise RuntimeError("mds_export_ephemeral_random_max ignored!")
+
+    def test_ephemeral_random_dist(self):
+        """
+        That ephemeral distributed pin overrides ephemeral random pin
+        """
+
+        self._setup_tree(random=1.0, distributed=True)
+        self._wait_distributed_subtrees(3 * 2, status=self.status)
+
+        time.sleep(15)
+        subtrees = self._get_subtrees(status=self.status, rank=0)
+        for s in subtrees:
+            path = s['dir']['path']
+            if path.startswith('/tree'):
+                self.assertFalse(s['random_ephemeral_pin'])
+
+    def test_ephemeral_random_pin_override_before(self):
+        """
+        That a conventional export pin overrides the random policy before creating new directories.
+        """
+
+        self._setup_tree(count=0, random=1.0)
+        self._setup_tree(path="tree/pin", count=10, export=1)
+        self._wait_subtrees([("/tree/pin", 1)], status=self.status, rank=1, path="/tree/pin")
+
+    def test_ephemeral_random_pin_override_after(self):
+        """
+        That a conventional export pin overrides the random policy after creating new directories.
+        """
+
+        count = 10
+        self._setup_tree(count=0, random=1.0)
+        self._setup_tree(path="tree/pin", count=count)
+        self._wait_random_subtrees(count+1, status=self.status, rank="all")
+        self.mount_a.setfattr("tree/pin", "ceph.dir.pin", "1")
+        self._wait_subtrees([("/tree/pin", 1)], status=self.status, rank=1, path="/tree/pin")
+
+    def test_ephemeral_randomness(self):
+        """
+        That the randomness is reasonable.
+        """
+
+        r = random.uniform(0.25, 0.75) # ratios don't work for small r!
+        count = 1000
+        self._setup_tree(count=count, random=r)
+        subtrees = self._wait_random_subtrees(int(r*count*.50), status=self.status, rank="all")
+        time.sleep(30) # for max to not be exceeded
+        subtrees = self._wait_random_subtrees(int(r*count*.50), status=self.status, rank="all")
+        self.assertLessEqual(len(subtrees), int(r*count*1.50))
+
+    def test_ephemeral_random_cache_drop(self):
+        """
+        That the random ephemeral pin does not prevent empty (nothing in cache) subtree merging.
+        """
+
+        count = 100
+        self._setup_tree(count=count, random=1.0)
+        self._wait_random_subtrees(count, status=self.status, rank="all")
+        self.mount_a.umount_wait() # release all caps
+        def _drop():
+            self.fs.ranks_tell(["cache", "drop"], status=self.status)
+        self._wait_subtrees([], status=self.status, action=_drop)
+
+    def test_ephemeral_random_failover(self):
+        """
+        That the random ephemeral pins stay pinned across MDS failover.
+        """
+
+        count = 100
+        r = 0.5
+        self._setup_tree(count=count, random=r)
+        # wait for all random subtrees to be created, not a specific count
+        time.sleep(30)
+        subtrees = self._wait_random_subtrees(1, status=self.status, rank=1)
+        before = [(s['dir']['path'], s['auth_first']) for s in subtrees]
+        before.sort();
+
+        self.fs.rank_fail(rank=1)
+        self.status = self.fs.wait_for_daemons()
+
+        time.sleep(30) # waiting for something to not happen
+        subtrees = self._wait_random_subtrees(1, status=self.status, rank=1)
+        after = [(s['dir']['path'], s['auth_first']) for s in subtrees]
+        after.sort();
+        log.info(f"subtrees before: {before}")
+        log.info(f"subtrees after: {after}")
+
+        self.assertEqual(before, after)
+
+    def test_ephemeral_pin_grow_mds(self):
+        """
+        That consistent hashing works to reduce the number of migrations.
+        """
+
+        self.fs.set_max_mds(2)
+        self.status = self.fs.wait_for_daemons()
+
+        self._setup_tree(random=1.0)
+        subtrees_old = self._wait_random_subtrees(100, status=self.status, rank="all")
+
+        self.fs.set_max_mds(3)
+        self.status = self.fs.wait_for_daemons()
+        
+        # Sleeping for a while to allow the ephemeral pin migrations to complete
+        time.sleep(30)
+        
+        subtrees_new = self._wait_random_subtrees(100, status=self.status, rank="all")
+        count = 0
+        for old_subtree in subtrees_old:
+            for new_subtree in subtrees_new:
+                if (old_subtree['dir']['path'] == new_subtree['dir']['path']) and (old_subtree['auth_first'] != new_subtree['auth_first']):
+                    count = count + 1
+                    break
+
+        log.info("{0} migrations have occured due to the cluster resizing".format(count))
+        # ~50% of subtrees from the two rank will migrate to another rank
+        self.assertLessEqual((count/len(subtrees_old)), (0.5)*1.25) # with 25% overbudget
+
+    def test_ephemeral_pin_shrink_mds(self):
+        """
+        That consistent hashing works to reduce the number of migrations.
+        """
+
+        self.fs.set_max_mds(3)
+        self.status = self.fs.wait_for_daemons()
+
+        self._setup_tree(random=1.0)
+        subtrees_old = self._wait_random_subtrees(100, status=self.status, rank="all")
+
+        self.fs.set_max_mds(2)
+        self.status = self.fs.wait_for_daemons()
+        time.sleep(30)
+
+        subtrees_new = self._wait_random_subtrees(100, status=self.status, rank="all")
+        count = 0
+        for old_subtree in subtrees_old:
+            for new_subtree in subtrees_new:
+                if (old_subtree['dir']['path'] == new_subtree['dir']['path']) and (old_subtree['auth_first'] != new_subtree['auth_first']):
+                    count = count + 1
+                    break
+
+        log.info("{0} migrations have occured due to the cluster resizing".format(count))
+        # rebalancing from 3 -> 2 may cause half of rank 0/1 to move and all of rank 2
+        self.assertLessEqual((count/len(subtrees_old)), (1.0/3.0/2.0 + 1.0/3.0/2.0 + 1.0/3.0)*1.25) # aka .66 with 25% overbudget
diff --git a/qa/tasks/cephfs/test_failover.py b/qa/tasks/cephfs/test_failover.py
new file mode 100644
index 000000000..ddcc58ccc
--- /dev/null
+++ b/qa/tasks/cephfs/test_failover.py
@@ -0,0 +1,819 @@
+import time
+import signal
+import logging
+import operator
+from random import randint, choice
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.exceptions import CommandFailedError
+from tasks.cephfs.fuse_mount import FuseMount
+
+log = logging.getLogger(__name__)
+
+class TestClusterAffinity(CephFSTestCase):
+    CLIENTS_REQUIRED = 0
+    MDSS_REQUIRED = 4
+
+    def _verify_join_fs(self, target, status=None, fs=None):
+        fs_select = fs
+        if fs_select is None:
+            fs_select = self.fs
+        if status is None:
+            status = fs_select.wait_for_daemons(timeout=30)
+            log.debug("%s", status)
+        target = sorted(target, key=operator.itemgetter('name'))
+        log.info("target = %s", target)
+        current = list(status.get_all())
+        current = sorted(current, key=operator.itemgetter('name'))
+        log.info("current = %s", current)
+        self.assertEqual(len(current), len(target))
+        for i in range(len(current)):
+            for attr in target[i]:
+                self.assertIn(attr, current[i])
+                self.assertEqual(target[i][attr], current[i][attr])
+
+    def _change_target_state(self, state, name, changes):
+        for entity in state:
+            if entity['name'] == name:
+                for k, v in changes.items():
+                    entity[k] = v
+                return
+        self.fail("no entity")
+
+    def _verify_init(self, fs=None):
+        fs_select = fs
+        if fs_select is None:
+            fs_select = self.fs
+        status = fs_select.status()
+        log.info("status = {0}".format(status))
+        target = [{'join_fscid': -1, 'name': info['name']} for info in status.get_all()]
+        self._verify_join_fs(target, status=status, fs=fs_select)
+        return (status, target)
+
+    def _reach_target(self, target):
+        def takeover():
+            try:
+                self._verify_join_fs(target)
+                return True
+            except AssertionError as e:
+                log.debug("%s", e)
+                return False
+        self.wait_until_true(takeover, 30)
+
+    def test_join_fs_runtime(self):
+        """
+        That setting mds_join_fs at runtime affects the cluster layout.
+        """
+        status, target = self._verify_init()
+        standbys = list(status.get_standbys())
+        self.config_set('mds.'+standbys[0]['name'], 'mds_join_fs', 'cephfs')
+        self._change_target_state(target, standbys[0]['name'], {'join_fscid': self.fs.id, 'state': 'up:active'})
+        self._reach_target(target)
+
+    def test_join_fs_unset(self):
+        """
+        That unsetting mds_join_fs will cause failover if another high-affinity standby exists.
+        """
+        status, target = self._verify_init()
+        standbys = list(status.get_standbys())
+        names = (standbys[0]['name'], standbys[1]['name'])
+        self.config_set('mds.'+names[0], 'mds_join_fs', 'cephfs')
+        self.config_set('mds.'+names[1], 'mds_join_fs', 'cephfs')
+        self._change_target_state(target, names[0], {'join_fscid': self.fs.id})
+        self._change_target_state(target, names[1], {'join_fscid': self.fs.id})
+        self._reach_target(target)
+        time.sleep(5) # MDSMonitor tick
+        status = self.fs.wait_for_daemons()
+        active = self.fs.get_active_names(status=status)[0]
+        self.assertIn(active, names)
+        self.config_rm('mds.'+active, 'mds_join_fs')
+        self._change_target_state(target, active, {'join_fscid': -1})
+        new_active = (set(names) - set((active,))).pop()
+        self._change_target_state(target, new_active, {'state': 'up:active'})
+        self._reach_target(target)
+
+    def test_join_fs_drop(self):
+        """
+        That unsetting mds_join_fs will not cause failover if no high-affinity standby exists.
+        """
+        status, target = self._verify_init()
+        standbys = list(status.get_standbys())
+        active = standbys[0]['name']
+        self.config_set('mds.'+active, 'mds_join_fs', 'cephfs')
+        self._change_target_state(target, active, {'join_fscid': self.fs.id, 'state': 'up:active'})
+        self._reach_target(target)
+        self.config_rm('mds.'+active, 'mds_join_fs')
+        self._change_target_state(target, active, {'join_fscid': -1})
+        self._reach_target(target)
+
+    def test_join_fs_vanilla(self):
+        """
+        That a vanilla standby is preferred over others with mds_join_fs set to another fs.
+        """
+        fs2 = self.mds_cluster.newfs(name="cephfs2")
+        status, target = self._verify_init()
+        active = self.fs.get_active_names(status=status)[0]
+        status2, _ = self._verify_init(fs=fs2)
+        active2 = fs2.get_active_names(status=status2)[0]
+        standbys = [info['name'] for info in status.get_standbys()]
+        victim = standbys.pop()
+        # Set a bogus fs on the others
+        for mds in standbys:
+            self.config_set('mds.'+mds, 'mds_join_fs', 'cephfs2')
+            self._change_target_state(target, mds, {'join_fscid': fs2.id})
+        # The active MDS for cephfs2 will be replaced by the MDS for which
+        # file system affinity has been set. Also, set the affinity for
+        # the earlier active MDS so that it is not chosen by the monitors
+        # as an active MDS for the existing file system.
+        log.info(f'assigning affinity to cephfs2 for active mds (mds.{active2})')
+        self.config_set(f'mds.{active2}', 'mds_join_fs', 'cephfs2')
+        self._change_target_state(target, active2, {'join_fscid': fs2.id})
+        self.fs.rank_fail()
+        self._change_target_state(target, victim, {'state': 'up:active'})
+        self._reach_target(target)
+        status = self.fs.status()
+        active = self.fs.get_active_names(status=status)[0]
+        self.assertEqual(active, victim)
+
+    def test_join_fs_last_resort(self):
+        """
+        That a standby with mds_join_fs set to another fs is still used if necessary.
+        """
+        status, target = self._verify_init()
+        standbys = [info['name'] for info in status.get_standbys()]
+        for mds in standbys:
+            self.config_set('mds.'+mds, 'mds_join_fs', 'cephfs2')
+        fs2 = self.mds_cluster.newfs(name="cephfs2")
+        for mds in standbys:
+            self._change_target_state(target, mds, {'join_fscid': fs2.id})
+        self.fs.rank_fail()
+        status = self.fs.status()
+        ranks = list(self.fs.get_ranks(status=status))
+        self.assertEqual(len(ranks), 1)
+        self.assertIn(ranks[0]['name'], standbys)
+        # Note that we would expect the former active to reclaim its spot, but
+        # we're not testing that here.
+
+    def test_join_fs_steady(self):
+        """
+        That a sole MDS with mds_join_fs set will come back as active eventually even after failover.
+        """
+        status, target = self._verify_init()
+        active = self.fs.get_active_names(status=status)[0]
+        self.config_set('mds.'+active, 'mds_join_fs', 'cephfs')
+        self._change_target_state(target, active, {'join_fscid': self.fs.id})
+        self._reach_target(target)
+        self.fs.rank_fail()
+        self._reach_target(target)
+
+    def test_join_fs_standby_replay(self):
+        """
+        That a standby-replay daemon with weak affinity is replaced by a stronger one.
+        """
+        status, target = self._verify_init()
+        standbys = [info['name'] for info in status.get_standbys()]
+        self.config_set('mds.'+standbys[0], 'mds_join_fs', 'cephfs')
+        self._change_target_state(target, standbys[0], {'join_fscid': self.fs.id, 'state': 'up:active'})
+        self._reach_target(target)
+        self.fs.set_allow_standby_replay(True)
+        status = self.fs.status()
+        standbys = [info['name'] for info in status.get_standbys()]
+        self.config_set('mds.'+standbys[0], 'mds_join_fs', 'cephfs')
+        self._change_target_state(target, standbys[0], {'join_fscid': self.fs.id, 'state': 'up:standby-replay'})
+        self._reach_target(target)
+
+class TestClusterResize(CephFSTestCase):
+    CLIENTS_REQUIRED = 0
+    MDSS_REQUIRED = 3
+
+    def test_grow(self):
+        """
+        That the MDS cluster grows after increasing max_mds.
+        """
+
+        # Need all my standbys up as well as the active daemons
+        # self.wait_for_daemon_start() necessary?
+
+        self.fs.grow(2)
+        self.fs.grow(3)
+
+
+    def test_shrink(self):
+        """
+        That the MDS cluster shrinks automatically after decreasing max_mds.
+        """
+
+        self.fs.grow(3)
+        self.fs.shrink(1)
+
+    def test_up_less_than_max(self):
+        """
+        That a health warning is generated when max_mds is greater than active count.
+        """
+
+        status = self.fs.status()
+        mdss = [info['gid'] for info in status.get_all()]
+        self.fs.set_max_mds(len(mdss)+1)
+        self.wait_for_health("MDS_UP_LESS_THAN_MAX", 30)
+        self.fs.shrink(2)
+        self.wait_for_health_clear(30)
+
+    def test_down_health(self):
+        """
+        That marking a FS down does not generate a health warning
+        """
+
+        self.fs.set_down()
+        try:
+            self.wait_for_health("", 30)
+            raise RuntimeError("got health warning?")
+        except RuntimeError as e:
+            if "Timed out after" in str(e):
+                pass
+            else:
+                raise
+
+    def test_down_twice(self):
+        """
+        That marking a FS down twice does not wipe old_max_mds.
+        """
+
+        self.fs.grow(2)
+        self.fs.set_down()
+        self.fs.wait_for_daemons()
+        self.fs.set_down(False)
+        self.assertEqual(self.fs.get_var("max_mds"), 2)
+        self.fs.wait_for_daemons(timeout=60)
+
+    def test_down_grow(self):
+        """
+        That setting max_mds undoes down.
+        """
+
+        self.fs.set_down()
+        self.fs.wait_for_daemons()
+        self.fs.grow(2)
+        self.fs.wait_for_daemons()
+
+    def test_down(self):
+        """
+        That down setting toggles and sets max_mds appropriately.
+        """
+
+        self.fs.set_down()
+        self.fs.wait_for_daemons()
+        self.assertEqual(self.fs.get_var("max_mds"), 0)
+        self.fs.set_down(False)
+        self.assertEqual(self.fs.get_var("max_mds"), 1)
+        self.fs.wait_for_daemons()
+        self.assertEqual(self.fs.get_var("max_mds"), 1)
+
+    def test_hole(self):
+        """
+        Test that a hole cannot be created in the FS ranks.
+        """
+
+        fscid = self.fs.id
+
+        self.fs.grow(2)
+
+        # Now add a delay which should slow down how quickly rank 1 stops
+        self.config_set('mds', 'ms_inject_delay_max', '5.0')
+        self.config_set('mds', 'ms_inject_delay_probability', '1.0')
+        self.fs.set_max_mds(1)
+        log.info("status = {0}".format(self.fs.status()))
+
+        # Don't wait for rank 1 to stop
+        self.fs.set_max_mds(3)
+        log.info("status = {0}".format(self.fs.status()))
+
+        # Now check that the mons didn't try to promote a standby to rank 2
+        self.fs.set_max_mds(2)
+        status = self.fs.status()
+        try:
+            status = self.fs.wait_for_daemons(timeout=90)
+            ranks = set([info['rank'] for info in status.get_ranks(fscid)])
+            self.assertEqual(ranks, set([0, 1]))
+        finally:
+            log.info("status = {0}".format(status))
+
+    def test_thrash(self):
+        """
+        Test that thrashing max_mds does not fail.
+        """
+
+        max_mds = 2
+        for i in range(0, 100):
+            self.fs.set_max_mds(max_mds)
+            max_mds = (max_mds+1)%3+1
+
+        self.fs.wait_for_daemons(timeout=90)
+
+class TestFailover(CephFSTestCase):
+    CLIENTS_REQUIRED = 1
+    MDSS_REQUIRED = 2
+
+    def test_repeated_boot(self):
+        """
+        That multiple boot messages do not result in the MDS getting evicted.
+        """
+
+        interval = 10
+        self.config_set("mon", "paxos_propose_interval", interval)
+
+        mds = choice(list(self.fs.status().get_all()))
+
+        with self.assert_cluster_log(f"daemon mds.{mds['name']} restarted", present=False):
+            # Avoid a beacon to the monitors with down:dne by restarting:
+            self.fs.mds_fail(mds_id=mds['name'])
+            # `ceph mds fail` won't return until the FSMap is committed, double-check:
+            self.assertIsNone(self.fs.status().get_mds_gid(mds['gid']))
+            time.sleep(2) # for mds to restart and accept asok commands
+            status1 = self.fs.mds_asok(['status'], mds_id=mds['name'])
+            time.sleep(interval*1.5)
+            status2 = self.fs.mds_asok(['status'], mds_id=mds['name'])
+            self.assertEqual(status1['id'], status2['id'])
+
+    def test_simple(self):
+        """
+        That when the active MDS is killed, a standby MDS is promoted into
+        its rank after the grace period.
+
+        This is just a simple unit test, the harder cases are covered
+        in thrashing tests.
+        """
+
+        (original_active, ) = self.fs.get_active_names()
+        original_standbys = self.mds_cluster.get_standby_daemons()
+
+        # Kill the rank 0 daemon's physical process
+        self.fs.mds_stop(original_active)
+
+        # Wait until the monitor promotes his replacement
+        def promoted():
+            ranks = list(self.fs.get_ranks())
+            return len(ranks) > 0 and ranks[0]['name'] in original_standbys
+
+        log.info("Waiting for promotion of one of the original standbys {0}".format(
+            original_standbys))
+        self.wait_until_true(promoted, timeout=self.fs.beacon_timeout)
+
+        # Start the original rank 0 daemon up again, see that he becomes a standby
+        self.fs.mds_restart(original_active)
+        self.wait_until_true(
+            lambda: original_active in self.mds_cluster.get_standby_daemons(),
+            timeout=60  # Approximately long enough for MDS to start and mon to notice
+        )
+
+    def test_client_abort(self):
+        """
+        That a client will respect fuse_require_active_mds and error out
+        when the cluster appears to be unavailable.
+        """
+
+        if not isinstance(self.mount_a, FuseMount):
+            self.skipTest("Requires FUSE client to inject client metadata")
+
+        require_active = self.fs.get_config("fuse_require_active_mds", service_type="mon").lower() == "true"
+        if not require_active:
+            self.skipTest("fuse_require_active_mds is not set")
+
+        # Check it's not laggy to begin with
+        (original_active, ) = self.fs.get_active_names()
+        self.assertNotIn("laggy_since", self.fs.status().get_mds(original_active))
+
+        self.mounts[0].umount_wait()
+
+        # Control: that we can mount and unmount usually, while the cluster is healthy
+        self.mounts[0].mount_wait()
+        self.mounts[0].umount_wait()
+
+        # Stop the daemon processes
+        self.fs.mds_stop()
+
+        # Wait for everyone to go laggy
+        def laggy():
+            mdsmap = self.fs.get_mds_map()
+            for info in mdsmap['info'].values():
+                if "laggy_since" not in info:
+                    return False
+
+            return True
+
+        self.wait_until_true(laggy, self.fs.beacon_timeout)
+        with self.assertRaises(CommandFailedError):
+            self.mounts[0].mount_wait()
+
+    def test_standby_count_wanted(self):
+        """
+        That cluster health warnings are generated by insufficient standbys available.
+        """
+
+        # Need all my standbys up as well as the active daemons
+        self.wait_for_daemon_start()
+
+        standbys = self.mds_cluster.get_standby_daemons()
+        self.assertGreaterEqual(len(standbys), 1)
+        self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys)))
+
+        # Kill a standby and check for warning
+        victim = standbys.pop()
+        self.fs.mds_stop(victim)
+        self.wait_for_health("MDS_INSUFFICIENT_STANDBY", self.fs.beacon_timeout)
+
+        # restart the standby, see that he becomes a standby, check health clears
+        self.fs.mds_restart(victim)
+        self.wait_until_true(
+            lambda: victim in self.mds_cluster.get_standby_daemons(),
+            timeout=60  # Approximately long enough for MDS to start and mon to notice
+        )
+        self.wait_for_health_clear(timeout=30)
+
+        # Set it one greater than standbys ever seen
+        standbys = self.mds_cluster.get_standby_daemons()
+        self.assertGreaterEqual(len(standbys), 1)
+        self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys)+1))
+        self.wait_for_health("MDS_INSUFFICIENT_STANDBY", self.fs.beacon_timeout)
+
+        # Set it to 0
+        self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', '0')
+        self.wait_for_health_clear(timeout=30)
+
+    def test_discontinuous_mdsmap(self):
+        """
+        That discontinuous mdsmap does not affect failover.
+        See http://tracker.ceph.com/issues/24856.
+        """
+        self.fs.set_max_mds(2)
+        status = self.fs.wait_for_daemons()
+
+        self.mount_a.umount_wait()
+
+        monc_timeout = float(self.fs.get_config("mon_client_ping_timeout", service_type="mds"))
+
+        mds_0 = self.fs.get_rank(rank=0, status=status)
+        self.fs.rank_freeze(True, rank=0) # prevent failover
+        self.fs.rank_signal(signal.SIGSTOP, rank=0, status=status)
+        self.wait_until_true(
+            lambda: "laggy_since" in self.fs.get_rank(),
+            timeout=self.fs.beacon_timeout
+        )
+
+        self.fs.rank_fail(rank=1)
+        self.fs.wait_for_state('up:resolve', rank=1, timeout=30)
+
+        # Make sure of mds_0's monitor connection gets reset
+        time.sleep(monc_timeout * 2)
+
+        # Continue rank 0, it will get discontinuous mdsmap
+        self.fs.rank_signal(signal.SIGCONT, rank=0)
+        self.wait_until_true(
+            lambda: "laggy_since" not in self.fs.get_rank(rank=0),
+            timeout=self.fs.beacon_timeout
+        )
+
+        # mds.b will be stuck at 'reconnect' state if snapserver gets confused
+        # by discontinuous mdsmap
+        self.fs.wait_for_state('up:active', rank=1, timeout=30)
+        self.assertEqual(mds_0['gid'], self.fs.get_rank(rank=0)['gid'])
+        self.fs.rank_freeze(False, rank=0)
+
+    def test_connect_bootstrapping(self):
+        self.config_set("mds", "mds_sleep_rank_change", 10000000.0)
+        self.config_set("mds", "mds_connect_bootstrapping", True)
+        self.fs.set_max_mds(2)
+        self.fs.wait_for_daemons()
+        self.fs.rank_fail(rank=0)
+        # rank 0 will get stuck in up:resolve, see https://tracker.ceph.com/issues/53194
+        self.fs.wait_for_daemons()
+
+
+class TestStandbyReplay(CephFSTestCase):
+    CLIENTS_REQUIRED = 0
+    MDSS_REQUIRED = 4
+
+    def _confirm_no_replay(self):
+        status = self.fs.status()
+        _ = len(list(status.get_standbys()))
+        self.assertEqual(0, len(list(self.fs.get_replays(status=status))))
+        return status
+
+    def _confirm_single_replay(self, full=True, status=None, retries=3):
+        status = self.fs.wait_for_daemons(status=status)
+        ranks = sorted(self.fs.get_mds_map(status=status)['in'])
+        replays = list(self.fs.get_replays(status=status))
+        checked_replays = set()
+        for rank in ranks:
+            has_replay = False
+            for replay in replays:
+                if replay['rank'] == rank:
+                    self.assertFalse(has_replay)
+                    has_replay = True
+                    checked_replays.add(replay['gid'])
+            if full and not has_replay:
+                if retries <= 0:
+                    raise RuntimeError("rank "+str(rank)+" has no standby-replay follower")
+                else:
+                    retries = retries-1
+                    time.sleep(2)
+        self.assertEqual(checked_replays, set(info['gid'] for info in replays))
+        return status
+
+    def _check_replay_takeover(self, status, rank=0):
+        replay = self.fs.get_replay(rank=rank, status=status)
+        new_status = self.fs.wait_for_daemons()
+        new_active = self.fs.get_rank(rank=rank, status=new_status)
+        if replay:
+            self.assertEqual(replay['gid'], new_active['gid'])
+        else:
+            # double check takeover came from a standby (or some new daemon via restart)
+            found = False
+            for info in status.get_standbys():
+                if info['gid'] == new_active['gid']:
+                    found = True
+                    break
+            if not found:
+                for info in status.get_all():
+                    self.assertNotEqual(info['gid'], new_active['gid'])
+        return new_status
+
+    def test_standby_replay_singleton(self):
+        """
+        That only one MDS becomes standby-replay.
+        """
+
+        self._confirm_no_replay()
+        self.fs.set_allow_standby_replay(True)
+        time.sleep(30)
+        self._confirm_single_replay()
+
+    def test_standby_replay_damaged(self):
+        """
+        That a standby-replay daemon can cause the rank to go damaged correctly.
+        """
+
+        self._confirm_no_replay()
+        self.config_set("mds", "mds_standby_replay_damaged", True)
+        self.fs.set_allow_standby_replay(True)
+        self.wait_until_true(
+            lambda: len(self.fs.get_damaged()) > 0,
+            timeout=30
+        )
+        status = self.fs.status()
+        self.assertListEqual([], list(self.fs.get_ranks(status=status)))
+        self.assertListEqual([0], self.fs.get_damaged(status=status))
+
+    def test_standby_replay_disable(self):
+        """
+        That turning off allow_standby_replay fails all standby-replay daemons.
+        """
+
+        self._confirm_no_replay()
+        self.fs.set_allow_standby_replay(True)
+        time.sleep(30)
+        self._confirm_single_replay()
+        self.fs.set_allow_standby_replay(False)
+        self._confirm_no_replay()
+
+    def test_standby_replay_singleton_fail(self):
+        """
+        That failures don't violate singleton constraint.
+        """
+
+        self._confirm_no_replay()
+        self.fs.set_allow_standby_replay(True)
+        status = self._confirm_single_replay()
+
+        for i in range(10):
+            time.sleep(randint(1, 5))
+            self.fs.rank_restart(status=status)
+            status = self._check_replay_takeover(status)
+            status = self._confirm_single_replay(status=status)
+
+        for i in range(10):
+            time.sleep(randint(1, 5))
+            self.fs.rank_fail()
+            status = self._check_replay_takeover(status)
+            status = self._confirm_single_replay(status=status)
+
+    def test_standby_replay_singleton_fail_multimds(self):
+        """
+        That failures don't violate singleton constraint with multiple actives.
+        """
+
+        status = self._confirm_no_replay()
+        new_max_mds = randint(2, len(list(status.get_standbys())))
+        self.fs.set_max_mds(new_max_mds)
+        self.fs.wait_for_daemons() # wait for actives to come online!
+        self.fs.set_allow_standby_replay(True)
+        status = self._confirm_single_replay(full=False)
+
+        for i in range(10):
+            time.sleep(randint(1, 5))
+            victim = randint(0, new_max_mds-1)
+            self.fs.rank_restart(rank=victim, status=status)
+            status = self._check_replay_takeover(status, rank=victim)
+            status = self._confirm_single_replay(status=status, full=False)
+
+        for i in range(10):
+            time.sleep(randint(1, 5))
+            victim = randint(0, new_max_mds-1)
+            self.fs.rank_fail(rank=victim)
+            status = self._check_replay_takeover(status, rank=victim)
+            status = self._confirm_single_replay(status=status, full=False)
+
+    def test_standby_replay_failure(self):
+        """
+        That the failure of a standby-replay daemon happens cleanly
+        and doesn't interrupt anything else.
+        """
+
+        status = self._confirm_no_replay()
+        self.fs.set_max_mds(1)
+        self.fs.set_allow_standby_replay(True)
+        status = self._confirm_single_replay()
+
+        for i in range(10):
+            time.sleep(randint(1, 5))
+            victim = self.fs.get_replay(status=status)
+            self.fs.mds_restart(mds_id=victim['name'])
+            status = self._confirm_single_replay(status=status)
+
+    def test_standby_replay_prepare_beacon(self):
+        """
+        That a MDSMonitor::prepare_beacon handles standby-replay daemons
+        correctly without removing the standby. (Note, usually a standby-replay
+        beacon will just be replied to by MDSMonitor::preprocess_beacon.)
+        """
+
+        status = self._confirm_no_replay()
+        self.fs.set_max_mds(1)
+        self.fs.set_allow_standby_replay(True)
+        status = self._confirm_single_replay()
+        replays = list(status.get_replays(self.fs.id))
+        self.assertEqual(len(replays), 1)
+        self.config_set('mds.'+replays[0]['name'], 'mds_inject_health_dummy', True)
+        time.sleep(10) # for something not to happen...
+        status = self._confirm_single_replay()
+        replays2 = list(status.get_replays(self.fs.id))
+        self.assertEqual(replays[0]['gid'], replays2[0]['gid'])
+
+    def test_rank_stopped(self):
+        """
+        That when a rank is STOPPED, standby replays for
+        that rank get torn down
+        """
+
+        status = self._confirm_no_replay()
+        standby_count = len(list(status.get_standbys()))
+        self.fs.set_max_mds(2)
+        self.fs.set_allow_standby_replay(True)
+        status = self._confirm_single_replay()
+
+        self.fs.set_max_mds(1) # stop rank 1
+
+        status = self._confirm_single_replay()
+        self.assertTrue(standby_count, len(list(status.get_standbys())))
+
+
+class TestMultiFilesystems(CephFSTestCase):
+    CLIENTS_REQUIRED = 2
+    MDSS_REQUIRED = 4
+
+    # We'll create our own filesystems and start our own daemons
+    REQUIRE_FILESYSTEM = False
+
+    def setUp(self):
+        super(TestMultiFilesystems, self).setUp()
+        self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set",
+            "enable_multiple", "true",
+            "--yes-i-really-mean-it")
+
+    def _setup_two(self):
+        fs_a = self.mds_cluster.newfs(name="alpha")
+        fs_b = self.mds_cluster.newfs(name="bravo")
+
+        self.mds_cluster.mds_restart()
+
+        # Wait for both filesystems to go healthy
+        fs_a.wait_for_daemons()
+        fs_b.wait_for_daemons()
+
+        # Reconfigure client auth caps
+        for mount in self.mounts:
+            self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+                'auth', 'caps', "client.{0}".format(mount.client_id),
+                'mds', 'allow',
+                'mon', 'allow r',
+                'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
+                    fs_a.get_data_pool_name(), fs_b.get_data_pool_name()))
+
+        return fs_a, fs_b
+
+    def test_clients(self):
+        fs_a, fs_b = self._setup_two()
+
+        # Mount a client on fs_a
+        self.mount_a.mount_wait(cephfs_name=fs_a.name)
+        self.mount_a.write_n_mb("pad.bin", 1)
+        self.mount_a.write_n_mb("test.bin", 2)
+        a_created_ino = self.mount_a.path_to_ino("test.bin")
+        self.mount_a.create_files()
+
+        # Mount a client on fs_b
+        self.mount_b.mount_wait(cephfs_name=fs_b.name)
+        self.mount_b.write_n_mb("test.bin", 1)
+        b_created_ino = self.mount_b.path_to_ino("test.bin")
+        self.mount_b.create_files()
+
+        # Check that a non-default filesystem mount survives an MDS
+        # failover (i.e. that map subscription is continuous, not
+        # just the first time), reproduces #16022
+        old_fs_b_mds = fs_b.get_active_names()[0]
+        self.mds_cluster.mds_stop(old_fs_b_mds)
+        self.mds_cluster.mds_fail(old_fs_b_mds)
+        fs_b.wait_for_daemons()
+        background = self.mount_b.write_background()
+        # Raise exception if the write doesn't finish (i.e. if client
+        # has not kept up with MDS failure)
+        try:
+            self.wait_until_true(lambda: background.finished, timeout=30)
+        except RuntimeError:
+            # The mount is stuck, we'll have to force it to fail cleanly
+            background.stdin.close()
+            self.mount_b.umount_wait(force=True)
+            raise
+
+        self.mount_a.umount_wait()
+        self.mount_b.umount_wait()
+
+        # See that the client's files went into the correct pool
+        self.assertTrue(fs_a.data_objects_present(a_created_ino, 1024 * 1024))
+        self.assertTrue(fs_b.data_objects_present(b_created_ino, 1024 * 1024))
+
+    def test_standby(self):
+        fs_a, fs_b = self._setup_two()
+
+        # Assert that the remaining two MDS daemons are now standbys
+        a_daemons = fs_a.get_active_names()
+        b_daemons = fs_b.get_active_names()
+        self.assertEqual(len(a_daemons), 1)
+        self.assertEqual(len(b_daemons), 1)
+        original_a = a_daemons[0]
+        original_b = b_daemons[0]
+        expect_standby_daemons = set(self.mds_cluster.mds_ids) - (set(a_daemons) | set(b_daemons))
+
+        # Need all my standbys up as well as the active daemons
+        self.wait_for_daemon_start()
+        self.assertEqual(expect_standby_daemons, self.mds_cluster.get_standby_daemons())
+
+        # Kill fs_a's active MDS, see a standby take over
+        self.mds_cluster.mds_stop(original_a)
+        self.mds_cluster.mon_manager.raw_cluster_cmd("mds", "fail", original_a)
+        self.wait_until_equal(lambda: len(fs_a.get_active_names()), 1, 30,
+                              reject_fn=lambda v: v > 1)
+        # Assert that it's a *different* daemon that has now appeared in the map for fs_a
+        self.assertNotEqual(fs_a.get_active_names()[0], original_a)
+
+        # Kill fs_b's active MDS, see a standby take over
+        self.mds_cluster.mds_stop(original_b)
+        self.mds_cluster.mon_manager.raw_cluster_cmd("mds", "fail", original_b)
+        self.wait_until_equal(lambda: len(fs_b.get_active_names()), 1, 30,
+                              reject_fn=lambda v: v > 1)
+        # Assert that it's a *different* daemon that has now appeared in the map for fs_a
+        self.assertNotEqual(fs_b.get_active_names()[0], original_b)
+
+        # Both of the original active daemons should be gone, and all standbys used up
+        self.assertEqual(self.mds_cluster.get_standby_daemons(), set())
+
+        # Restart the ones I killed, see them reappear as standbys
+        self.mds_cluster.mds_restart(original_a)
+        self.mds_cluster.mds_restart(original_b)
+        self.wait_until_true(
+            lambda: {original_a, original_b} == self.mds_cluster.get_standby_daemons(),
+            timeout=30
+        )
+
+    def test_grow_shrink(self):
+        # Usual setup...
+        fs_a, fs_b = self._setup_two()
+
+        # Increase max_mds on fs_b, see a standby take up the role
+        fs_b.set_max_mds(2)
+        self.wait_until_equal(lambda: len(fs_b.get_active_names()), 2, 30,
+                              reject_fn=lambda v: v > 2 or v < 1)
+
+        # Increase max_mds on fs_a, see a standby take up the role
+        fs_a.set_max_mds(2)
+        self.wait_until_equal(lambda: len(fs_a.get_active_names()), 2, 30,
+                              reject_fn=lambda v: v > 2 or v < 1)
+
+        # Shrink fs_b back to 1, see a daemon go back to standby
+        fs_b.set_max_mds(1)
+        self.wait_until_equal(lambda: len(fs_b.get_active_names()), 1, 30,
+                              reject_fn=lambda v: v > 2 or v < 1)
+
+        # Grow fs_a up to 3, see the former fs_b daemon join it.
+        fs_a.set_max_mds(3)
+        self.wait_until_equal(lambda: len(fs_a.get_active_names()), 3, 60,
+                              reject_fn=lambda v: v > 3 or v < 2)
diff --git a/qa/tasks/cephfs/test_flush.py b/qa/tasks/cephfs/test_flush.py
new file mode 100644
index 000000000..17cb84970
--- /dev/null
+++ b/qa/tasks/cephfs/test_flush.py
@@ -0,0 +1,112 @@
+
+from textwrap import dedent
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from tasks.cephfs.filesystem import ObjectNotFound, ROOT_INO
+
+
+class TestFlush(CephFSTestCase):
+    def test_flush(self):
+        self.mount_a.run_shell(["mkdir", "mydir"])
+        self.mount_a.run_shell(["touch", "mydir/alpha"])
+        dir_ino = self.mount_a.path_to_ino("mydir")
+        file_ino = self.mount_a.path_to_ino("mydir/alpha")
+
+        # Unmount the client so that it isn't still holding caps
+        self.mount_a.umount_wait()
+
+        # Before flush, the dirfrag object does not exist
+        with self.assertRaises(ObjectNotFound):
+            self.fs.list_dirfrag(dir_ino)
+
+        # Before flush, the file's backtrace has not been written
+        with self.assertRaises(ObjectNotFound):
+            self.fs.read_backtrace(file_ino)
+
+        # Before flush, there are no dentries in the root
+        self.assertEqual(self.fs.list_dirfrag(ROOT_INO), [])
+
+        # Execute flush
+        flush_data = self.fs.mds_asok(["flush", "journal"])
+        self.assertEqual(flush_data['return_code'], 0)
+
+        # After flush, the dirfrag object has been created
+        dir_list = self.fs.list_dirfrag(dir_ino)
+        self.assertEqual(dir_list, ["alpha_head"])
+
+        # And the 'mydir' dentry is in the root
+        self.assertEqual(self.fs.list_dirfrag(ROOT_INO), ['mydir_head'])
+
+        # ...and the data object has its backtrace
+        backtrace = self.fs.read_backtrace(file_ino)
+        self.assertEqual(['alpha', 'mydir'], [a['dname'] for a in backtrace['ancestors']])
+        self.assertEqual([dir_ino, 1], [a['dirino'] for a in backtrace['ancestors']])
+        self.assertEqual(file_ino, backtrace['ino'])
+
+        # ...and the journal is truncated to just a single subtreemap from the
+        # newly created segment
+        summary_output = self.fs.journal_tool(["event", "get", "summary"], 0)
+        try:
+            self.assertEqual(summary_output,
+                             dedent(
+                                 """
+                                 Events by type:
+                                   SUBTREEMAP: 1
+                                 Errors: 0
+                                 """
+                             ).strip())
+        except AssertionError:
+            # In some states, flushing the journal will leave you
+            # an extra event from locks a client held.   This is
+            # correct behaviour: the MDS is flushing the journal,
+            # it's just that new events are getting added too.
+            # In this case, we should nevertheless see a fully
+            # empty journal after a second flush.
+            self.assertEqual(summary_output,
+                             dedent(
+                                 """
+                                 Events by type:
+                                   SUBTREEMAP: 1
+                                   UPDATE: 1
+                                 Errors: 0
+                                 """
+                             ).strip())
+            flush_data = self.fs.mds_asok(["flush", "journal"])
+            self.assertEqual(flush_data['return_code'], 0)
+            self.assertEqual(self.fs.journal_tool(["event", "get", "summary"], 0),
+                             dedent(
+                                 """
+                                 Events by type:
+                                   SUBTREEMAP: 1
+                                 Errors: 0
+                                 """
+                             ).strip())
+
+        # Now for deletion!
+        # We will count the RADOS deletions and MDS file purges, to verify that
+        # the expected behaviour is happening as a result of the purge
+        initial_dels = self.fs.mds_asok(['perf', 'dump', 'objecter'])['objecter']['osdop_delete']
+        initial_purges = self.fs.mds_asok(['perf', 'dump', 'mds_cache'])['mds_cache']['strays_enqueued']
+
+        # Use a client to delete a file
+        self.mount_a.mount_wait()
+        self.mount_a.run_shell(["rm", "-rf", "mydir"])
+
+        # Flush the journal so that the directory inode can be purged
+        flush_data = self.fs.mds_asok(["flush", "journal"])
+        self.assertEqual(flush_data['return_code'], 0)
+
+        # We expect to see a single file purge
+        self.wait_until_true(
+            lambda: self.fs.mds_asok(['perf', 'dump', 'mds_cache'])['mds_cache']['strays_enqueued'] - initial_purges >= 2,
+            60)
+
+        # We expect two deletions, one of the dirfrag and one of the backtrace
+        self.wait_until_true(
+            lambda: self.fs.mds_asok(['perf', 'dump', 'objecter'])['objecter']['osdop_delete'] - initial_dels >= 2,
+            60)  # timeout is fairly long to allow for tick+rados latencies
+
+        with self.assertRaises(ObjectNotFound):
+            self.fs.list_dirfrag(dir_ino)
+        with self.assertRaises(ObjectNotFound):
+            self.fs.read_backtrace(file_ino)
+        self.assertEqual(self.fs.list_dirfrag(ROOT_INO), [])
diff --git a/qa/tasks/cephfs/test_forward_scrub.py b/qa/tasks/cephfs/test_forward_scrub.py
new file mode 100644
index 000000000..f3cec881b
--- /dev/null
+++ b/qa/tasks/cephfs/test_forward_scrub.py
@@ -0,0 +1,307 @@
+
+"""
+Test that the forward scrub functionality can traverse metadata and apply
+requested tags, on well formed metadata.
+
+This is *not* the real testing for forward scrub, which will need to test
+how the functionality responds to damaged metadata.
+
+"""
+import logging
+import json
+
+from collections import namedtuple
+from io import BytesIO
+from textwrap import dedent
+
+from teuthology.exceptions import CommandFailedError
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+import struct
+
+log = logging.getLogger(__name__)
+
+
+ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
+
+
+class TestForwardScrub(CephFSTestCase):
+    MDSS_REQUIRED = 1
+
+    def _read_str_xattr(self, pool, obj, attr):
+        """
+        Read a ceph-encoded string from a rados xattr
+        """
+        output = self.fs.mon_manager.do_rados(["getxattr", obj, attr], pool=pool,
+                               stdout=BytesIO()).stdout.getvalue()
+        strlen = struct.unpack('i', output[0:4])[0]
+        return output[4:(4 + strlen)].decode(encoding='ascii')
+
+    def _get_paths_to_ino(self):
+        inos = {}
+        p = self.mount_a.run_shell(["find", "./"])
+        paths = p.stdout.getvalue().strip().split()
+        for path in paths:
+            inos[path] = self.mount_a.path_to_ino(path)
+
+        return inos
+
+    def test_apply_tag(self):
+        self.mount_a.run_shell(["mkdir", "parentdir"])
+        self.mount_a.run_shell(["mkdir", "parentdir/childdir"])
+        self.mount_a.run_shell(["touch", "rfile"])
+        self.mount_a.run_shell(["touch", "parentdir/pfile"])
+        self.mount_a.run_shell(["touch", "parentdir/childdir/cfile"])
+
+        # Build a structure mapping path to inode, as we will later want
+        # to check object by object and objects are named after ino number
+        inos = self._get_paths_to_ino()
+
+        # Flush metadata: this is a friendly test of forward scrub so we're skipping
+        # the part where it's meant to cope with dirty metadata
+        self.mount_a.umount_wait()
+        self.fs.mds_asok(["flush", "journal"])
+
+        tag = "mytag"
+
+        # Execute tagging forward scrub
+        self.fs.mds_asok(["tag", "path", "/parentdir", tag])
+        # Wait for completion
+        import time
+        time.sleep(10)
+        # FIXME watching clog isn't a nice mechanism for this, once we have a ScrubMap we'll
+        # watch that instead
+
+        # Check that dirs were tagged
+        for dirpath in ["./parentdir", "./parentdir/childdir"]:
+            self.assertTagged(inos[dirpath], tag, self.fs.get_metadata_pool_name())
+
+        # Check that files were tagged
+        for filepath in ["./parentdir/pfile", "./parentdir/childdir/cfile"]:
+            self.assertTagged(inos[filepath], tag, self.fs.get_data_pool_name())
+
+        # This guy wasn't in the tag path, shouldn't have been tagged
+        self.assertUntagged(inos["./rfile"])
+
+    def assertUntagged(self, ino):
+        file_obj_name = "{0:x}.00000000".format(ino)
+        with self.assertRaises(CommandFailedError):
+            self._read_str_xattr(
+                self.fs.get_data_pool_name(),
+                file_obj_name,
+                "scrub_tag"
+            )
+
+    def assertTagged(self, ino, tag, pool):
+        file_obj_name = "{0:x}.00000000".format(ino)
+        wrote = self._read_str_xattr(
+            pool,
+            file_obj_name,
+            "scrub_tag"
+        )
+        self.assertEqual(wrote, tag)
+
+    def _validate_linkage(self, expected):
+        inos = self._get_paths_to_ino()
+        try:
+            self.assertDictEqual(inos, expected)
+        except AssertionError:
+            log.error("Expected: {0}".format(json.dumps(expected, indent=2)))
+            log.error("Actual: {0}".format(json.dumps(inos, indent=2)))
+            raise
+
+    def test_orphan_scan(self):
+        # Create some files whose metadata we will flush
+        self.mount_a.run_python(dedent("""
+            import os
+            mount_point = "{mount_point}"
+            parent = os.path.join(mount_point, "parent")
+            os.mkdir(parent)
+            flushed = os.path.join(parent, "flushed")
+            os.mkdir(flushed)
+            for f in ["alpha", "bravo", "charlie"]:
+                open(os.path.join(flushed, f), 'w').write(f)
+        """.format(mount_point=self.mount_a.mountpoint)))
+
+        inos = self._get_paths_to_ino()
+
+        # Flush journal
+        # Umount before flush to avoid cap releases putting
+        # things we don't want in the journal later.
+        self.mount_a.umount_wait()
+        self.fs.flush()
+
+        # Create a new inode that's just in the log, i.e. would
+        # look orphaned to backward scan if backward scan wisnae
+        # respectin' tha scrub_tag xattr.
+        self.mount_a.mount_wait()
+        self.mount_a.run_shell(["mkdir", "parent/unflushed"])
+        self.mount_a.run_shell(["dd", "if=/dev/urandom",
+                                "of=./parent/unflushed/jfile",
+                                "bs=1M", "count=8"])
+        inos["./parent/unflushed"] = self.mount_a.path_to_ino("./parent/unflushed")
+        inos["./parent/unflushed/jfile"] = self.mount_a.path_to_ino("./parent/unflushed/jfile")
+        self.mount_a.umount_wait()
+
+        # Orphan an inode by deleting its dentry
+        # Our victim will be.... bravo.
+        self.mount_a.umount_wait()
+        self.fs.fail()
+        self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
+        self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
+        frag_obj_id = "{0:x}.00000000".format(inos["./parent/flushed"])
+        self.fs.radosm(["rmomapkey", frag_obj_id, "bravo_head"])
+
+        self.fs.set_joinable()
+        self.fs.wait_for_daemons()
+
+        # See that the orphaned file is indeed missing from a client's POV
+        self.mount_a.mount_wait()
+        damaged_state = self._get_paths_to_ino()
+        self.assertNotIn("./parent/flushed/bravo", damaged_state)
+        self.mount_a.umount_wait()
+
+        # Run a tagging forward scrub
+        tag = "mytag123"
+        self.fs.rank_asok(["tag", "path", "/parent", tag])
+
+        # See that the orphan wisnae tagged
+        self.assertUntagged(inos['./parent/flushed/bravo'])
+
+        # See that the flushed-metadata-and-still-present files are tagged
+        self.assertTagged(inos['./parent/flushed/alpha'], tag, self.fs.get_data_pool_name())
+        self.assertTagged(inos['./parent/flushed/charlie'], tag, self.fs.get_data_pool_name())
+
+        # See that journalled-but-not-flushed file *was* tagged
+        self.assertTagged(inos['./parent/unflushed/jfile'], tag, self.fs.get_data_pool_name())
+
+        # okay, now we are going to run cephfs-data-scan. It's necessary to
+        # have a clean journal otherwise replay will blowup on mismatched
+        # inotable versions (due to scan_links)
+        self.fs.flush()
+        self.fs.fail()
+        self.fs.journal_tool(["journal", "reset", "--force"], 0)
+
+        # Run cephfs-data-scan targeting only orphans
+        self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()])
+        self.fs.data_scan([
+            "scan_inodes",
+            "--filter-tag", tag,
+            self.fs.get_data_pool_name()
+        ])
+        self.fs.data_scan(["scan_links"])
+
+        # After in-place injection stats should be kosher again
+        self.fs.set_ceph_conf('mds', 'mds verify scatter', True)
+        self.fs.set_ceph_conf('mds', 'mds debug scatterstat', True)
+
+        # And we should have all the same linkage we started with,
+        # and no lost+found, and no extra inodes!
+        self.fs.set_joinable()
+        self.fs.wait_for_daemons()
+        self.mount_a.mount_wait()
+        self._validate_linkage(inos)
+
+    def _stash_inotable(self):
+        # Get all active ranks
+        ranks = self.fs.get_all_mds_rank()
+
+        inotable_dict = {}
+        for rank in ranks:
+            inotable_oid = "mds{rank:d}_".format(rank=rank) + "inotable"
+            print("Trying to fetch inotable object: " + inotable_oid)
+
+            #self.fs.get_metadata_object("InoTable", "mds0_inotable")
+            inotable_raw = self.fs.radosmo(['get', inotable_oid, '-'])
+            inotable_dict[inotable_oid] = inotable_raw
+        return inotable_dict
+
+    def test_inotable_sync(self):
+        self.mount_a.write_n_mb("file1_sixmegs", 6)
+
+        # Flush journal
+        self.mount_a.umount_wait()
+        self.fs.mds_asok(["flush", "journal"])
+
+        inotable_copy = self._stash_inotable()
+
+        self.mount_a.mount_wait()
+
+        self.mount_a.write_n_mb("file2_sixmegs", 6)
+        self.mount_a.write_n_mb("file3_sixmegs", 6)
+
+        inos = self._get_paths_to_ino()
+
+        # Flush journal
+        self.mount_a.umount_wait()
+        self.fs.mds_asok(["flush", "journal"])
+
+        self.mount_a.umount_wait()
+
+        with self.assert_cluster_log("inode table repaired", invert_match=True):
+            out_json = self.fs.run_scrub(["start", "/", "repair,recursive"])
+            self.assertNotEqual(out_json, None)
+            self.assertEqual(out_json["return_code"], 0)
+            self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+        self.fs.fail()
+
+        # Truncate the journal (to ensure the inotable on disk
+        # is all that will be in the InoTable in memory)
+
+        self.fs.journal_tool(["event", "splice",
+                              "--inode={0}".format(inos["./file2_sixmegs"]), "summary"], 0)
+
+        self.fs.journal_tool(["event", "splice",
+                              "--inode={0}".format(inos["./file3_sixmegs"]), "summary"], 0)
+
+        # Revert to old inotable.
+        for key, value in inotable_copy.items():
+            self.fs.radosm(["put", key, "-"], stdin=BytesIO(value))
+
+        self.fs.set_joinable()
+        self.fs.wait_for_daemons()
+
+        with self.assert_cluster_log("inode table repaired"):
+            out_json = self.fs.run_scrub(["start", "/", "repair,recursive"])
+            self.assertNotEqual(out_json, None)
+            self.assertEqual(out_json["return_code"], 0)
+            self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+        self.fs.fail()
+        table_text = self.fs.table_tool(["0", "show", "inode"])
+        table = json.loads(table_text)
+        self.assertGreater(
+                table['0']['data']['inotable']['free'][0]['start'],
+                inos['./file3_sixmegs'])
+
+    def test_backtrace_repair(self):
+        """
+        That the MDS can repair an inodes backtrace in the data pool
+        if it is found to be damaged.
+        """
+        # Create a file for subsequent checks
+        self.mount_a.run_shell(["mkdir", "parent_a"])
+        self.mount_a.run_shell(["touch", "parent_a/alpha"])
+        file_ino = self.mount_a.path_to_ino("parent_a/alpha")
+
+        # That backtrace and layout are written after initial flush
+        self.fs.mds_asok(["flush", "journal"])
+        backtrace = self.fs.read_backtrace(file_ino)
+        self.assertEqual(['alpha', 'parent_a'],
+                         [a['dname'] for a in backtrace['ancestors']])
+
+        # Go corrupt the backtrace
+        self.fs._write_data_xattr(file_ino, "parent",
+                                  "oh i'm sorry did i overwrite your xattr?")
+
+        with self.assert_cluster_log("bad backtrace on inode"):
+            out_json = self.fs.run_scrub(["start", "/", "repair,recursive"])
+            self.assertNotEqual(out_json, None)
+            self.assertEqual(out_json["return_code"], 0)
+            self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+        self.fs.mds_asok(["flush", "journal"])
+        backtrace = self.fs.read_backtrace(file_ino)
+        self.assertEqual(['alpha', 'parent_a'],
+                         [a['dname'] for a in backtrace['ancestors']])
diff --git a/qa/tasks/cephfs/test_fragment.py b/qa/tasks/cephfs/test_fragment.py
new file mode 100644
index 000000000..7d35ec0df
--- /dev/null
+++ b/qa/tasks/cephfs/test_fragment.py
@@ -0,0 +1,359 @@
+from io import StringIO
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.orchestra import run
+
+import os
+import time
+import logging
+log = logging.getLogger(__name__)
+
+
+class TestFragmentation(CephFSTestCase):
+    CLIENTS_REQUIRED = 1
+    MDSS_REQUIRED = 1
+
+    def get_splits(self):
+        return self.fs.mds_asok(['perf', 'dump', 'mds'])['mds']['dir_split']
+
+    def get_merges(self):
+        return self.fs.mds_asok(['perf', 'dump', 'mds'])['mds']['dir_merge']
+
+    def get_dir_ino(self, path):
+        dir_cache = self.fs.read_cache(path, 0)
+        dir_ino = None
+        dir_inono = self.mount_a.path_to_ino(path.strip("/"))
+        for ino in dir_cache:
+            if ino['ino'] == dir_inono:
+                dir_ino = ino
+                break
+        self.assertIsNotNone(dir_ino)
+        return dir_ino
+
+    def _configure(self, **kwargs):
+        """
+        Apply kwargs as MDS configuration settings, enable dirfrags
+        and restart the MDSs.
+        """
+
+        for k, v in kwargs.items():
+            self.ceph_cluster.set_ceph_conf("mds", k, v.__str__())
+
+        self.mds_cluster.mds_fail_restart()
+        self.fs.wait_for_daemons()
+
+    def test_oversize(self):
+        """
+        That a directory is split when it becomes too large.
+        """
+
+        split_size = 20
+        merge_size = 5
+
+        self._configure(
+            mds_bal_split_size=split_size,
+            mds_bal_merge_size=merge_size,
+            mds_bal_split_bits=1
+        )
+
+        self.assertEqual(self.get_splits(), 0)
+
+        self.mount_a.create_n_files("splitdir/file", split_size + 1)
+
+        self.wait_until_true(
+            lambda: self.get_splits() == 1,
+            timeout=30
+        )
+
+        frags = self.get_dir_ino("/splitdir")['dirfrags']
+        self.assertEqual(len(frags), 2)
+        self.assertEqual(frags[0]['dirfrag'], "0x10000000000.0*")
+        self.assertEqual(frags[1]['dirfrag'], "0x10000000000.1*")
+        self.assertEqual(
+            sum([len(f['dentries']) for f in frags]),
+            split_size + 1
+        )
+
+        self.assertEqual(self.get_merges(), 0)
+
+        self.mount_a.run_shell(["rm", "-f", run.Raw("splitdir/file*")])
+
+        self.wait_until_true(
+            lambda: self.get_merges() == 1,
+            timeout=30
+        )
+
+        self.assertEqual(len(self.get_dir_ino("/splitdir")["dirfrags"]), 1)
+
+    def test_rapid_creation(self):
+        """
+        That the fast-splitting limit of 1.5x normal limit is
+        applied when creating dentries quickly.
+        """
+
+        split_size = 100
+        merge_size = 1
+
+        self._configure(
+            mds_bal_split_size=split_size,
+            mds_bal_merge_size=merge_size,
+            mds_bal_split_bits=3,
+            mds_bal_fragment_size_max=int(split_size * 1.5 + 2)
+        )
+
+        # We test this only at a single split level.  If a client was sending
+        # IO so fast that it hit a second split before the first split
+        # was complete, it could violate mds_bal_fragment_size_max -- there
+        # is a window where the child dirfrags of a split are unfrozen
+        # (so they can grow), but still have STATE_FRAGMENTING (so they
+        # can't be split).
+
+        # By writing 4x the split size when the split bits are set
+        # to 3 (i.e. 4-ways), I am reasonably sure to see precisely
+        # one split.  The test is to check whether that split
+        # happens soon enough that the client doesn't exceed
+        # 2x the split_size (the "immediate" split mode should
+        # kick in at 1.5x the split size).
+
+        self.assertEqual(self.get_splits(), 0)
+        self.mount_a.create_n_files("splitdir/file", split_size * 4)
+        self.wait_until_equal(
+            self.get_splits,
+            1,
+            reject_fn=lambda s: s > 1,
+            timeout=30
+        )
+
+    def test_deep_split(self):
+        """
+        That when the directory grows many times larger than split size,
+        the fragments get split again.
+        """
+
+        split_size = 100
+        merge_size = 1  # i.e. don't merge frag unless its empty
+        split_bits = 1
+
+        branch_factor = 2**split_bits
+
+        # Arbitrary: how many levels shall we try fragmenting before
+        # ending the test?
+        max_depth = 5
+
+        self._configure(
+            mds_bal_split_size=split_size,
+            mds_bal_merge_size=merge_size,
+            mds_bal_split_bits=split_bits
+        )
+
+        # Each iteration we will create another level of fragments.  The
+        # placement of dentries into fragments is by hashes (i.e. pseudo
+        # random), so we rely on statistics to get the behaviour that
+        # by writing about 1.5x as many dentries as the split_size times
+        # the number of frags, we will get them all to exceed their
+        # split size and trigger a split.
+        depth = 0
+        files_written = 0
+        splits_expected = 0
+        while depth < max_depth:
+            log.info("Writing files for depth {0}".format(depth))
+            target_files = branch_factor**depth * int(split_size * 1.5)
+            create_files = target_files - files_written
+
+            self.ceph_cluster.mon_manager.raw_cluster_cmd("log",
+                "{0} Writing {1} files (depth={2})".format(
+                    self.__class__.__name__, create_files, depth
+                ))
+            self.mount_a.create_n_files("splitdir/file_{0}".format(depth),
+                                        create_files)
+            self.ceph_cluster.mon_manager.raw_cluster_cmd("log",
+                "{0} Done".format(self.__class__.__name__))
+
+            files_written += create_files
+            log.info("Now have {0} files".format(files_written))
+
+            splits_expected += branch_factor**depth
+            log.info("Waiting to see {0} splits".format(splits_expected))
+            try:
+                self.wait_until_equal(
+                    self.get_splits,
+                    splits_expected,
+                    timeout=30,
+                    reject_fn=lambda x: x > splits_expected
+                )
+
+                frags = self.get_dir_ino("/splitdir")['dirfrags']
+                self.assertEqual(len(frags), branch_factor**(depth+1))
+                self.assertEqual(
+                    sum([len(f['dentries']) for f in frags]),
+                    target_files
+                )
+            except:
+                # On failures, log what fragmentation we actually ended
+                # up with.  This block is just for logging, at the end
+                # we raise the exception again.
+                frags = self.get_dir_ino("/splitdir")['dirfrags']
+                log.info("depth={0} splits_expected={1} files_written={2}".format(
+                    depth, splits_expected, files_written
+                ))
+                log.info("Dirfrags:")
+                for f in frags:
+                    log.info("{0}: {1}".format(
+                        f['dirfrag'], len(f['dentries'])
+                    ))
+                raise
+
+            depth += 1
+
+        # Remember the inode number because we will be checking for
+        # objects later.
+        dir_inode_no = self.mount_a.path_to_ino("splitdir")
+
+        self.mount_a.run_shell(["rm", "-rf", "splitdir/"])
+        self.mount_a.umount_wait()
+
+        self.fs.mds_asok(['flush', 'journal'])
+
+        def _check_pq_finished():
+            num_strays = self.fs.mds_asok(['perf', 'dump', 'mds_cache'])['mds_cache']['num_strays']
+            pq_ops = self.fs.mds_asok(['perf', 'dump', 'purge_queue'])['purge_queue']['pq_executing']
+            return num_strays == 0 and pq_ops == 0
+
+        # Wait for all strays to purge
+        self.wait_until_true(
+            lambda: _check_pq_finished(),
+            timeout=1200
+        )
+        # Check that the metadata pool objects for all the myriad
+        # child fragments are gone
+        metadata_objs = self.fs.radosmo(["ls"], stdout=StringIO()).strip()
+        frag_objs = []
+        for o in metadata_objs.split("\n"):
+            if o.startswith("{0:x}.".format(dir_inode_no)):
+                frag_objs.append(o)
+        self.assertListEqual(frag_objs, [])
+
+    def test_split_straydir(self):
+        """
+        That stray dir is split when it becomes too large.
+        """
+        def _count_fragmented():
+            mdsdir_cache = self.fs.read_cache("~mdsdir", 1)
+            num = 0
+            for ino in mdsdir_cache:
+                if ino["ino"] == 0x100:
+                    continue
+                if len(ino["dirfrags"]) > 1:
+                    log.info("straydir 0x{:X} is fragmented".format(ino["ino"]))
+                    num += 1;
+            return num
+
+        split_size = 50
+        merge_size = 5
+        split_bits = 1
+
+        self._configure(
+            mds_bal_split_size=split_size,
+            mds_bal_merge_size=merge_size,
+            mds_bal_split_bits=split_bits,
+            mds_bal_fragment_size_max=(split_size * 100)
+        )
+
+        # manually split/merge
+        self.assertEqual(_count_fragmented(), 0)
+        self.fs.mds_asok(["dirfrag", "split", "~mdsdir/stray8", "0/0", "1"])
+        self.fs.mds_asok(["dirfrag", "split", "~mdsdir/stray9", "0/0", "1"])
+        self.wait_until_true(
+            lambda: _count_fragmented() == 2,
+            timeout=30
+        )
+
+        time.sleep(30)
+
+        self.fs.mds_asok(["dirfrag", "merge", "~mdsdir/stray8", "0/0"])
+        self.wait_until_true(
+            lambda: _count_fragmented() == 1,
+            timeout=30
+        )
+
+        time.sleep(30)
+
+        # auto merge
+
+        # merging stray dirs is driven by MDCache::advance_stray()
+        # advance stray dir 10 times
+        for _ in range(10):
+            self.fs.mds_asok(['flush', 'journal'])
+
+        self.wait_until_true(
+            lambda: _count_fragmented() == 0,
+            timeout=30
+        )
+
+        # auto split
+
+        # there are 10 stray dirs. advance stray dir 20 times
+        self.mount_a.create_n_files("testdir1/file", split_size * 20)
+        self.mount_a.run_shell(["mkdir", "testdir2"])
+        testdir1_path = os.path.join(self.mount_a.mountpoint, "testdir1")
+        for i in self.mount_a.ls(testdir1_path):
+            self.mount_a.run_shell(["ln", "testdir1/{0}".format(i), "testdir2/"])
+
+        self.mount_a.umount_wait()
+        self.mount_a.mount_wait()
+        self.mount_a.wait_until_mounted()
+
+        # flush journal and restart mds. after restart, testdir2 is not in mds' cache
+        self.fs.mds_asok(['flush', 'journal'])
+        self.mds_cluster.mds_fail_restart()
+        self.fs.wait_for_daemons()
+        # splitting stray dirs is driven by MDCache::advance_stray()
+        # advance stray dir after unlink 'split_size' files.
+        self.fs.mds_asok(['config', 'set', 'mds_log_events_per_segment', str(split_size)])
+
+        self.assertEqual(_count_fragmented(), 0)
+        self.mount_a.run_shell(["rm", "-rf", "testdir1"])
+        self.wait_until_true(
+            lambda: _count_fragmented() > 0,
+            timeout=30
+        )
+
+    def test_dir_merge_with_snap_items(self):
+        """
+        That directory remain fragmented when snapshot items are taken into account.
+        """
+        split_size = 1000
+        merge_size = 100
+        self._configure(
+            mds_bal_split_size=split_size,
+            mds_bal_merge_size=merge_size,
+            mds_bal_split_bits=1
+        )
+
+        # split the dir
+        create_files = split_size + 50
+        self.mount_a.create_n_files("splitdir/file_", create_files)
+
+        self.wait_until_true(
+            lambda: self.get_splits() == 1,
+            timeout=30
+        )
+
+        frags = self.get_dir_ino("/splitdir")['dirfrags']
+        self.assertEqual(len(frags), 2)
+        self.assertEqual(frags[0]['dirfrag'], "0x10000000000.0*")
+        self.assertEqual(frags[1]['dirfrag'], "0x10000000000.1*")
+        self.assertEqual(
+            sum([len(f['dentries']) for f in frags]), create_files
+        )
+
+        self.assertEqual(self.get_merges(), 0)
+
+        self.mount_a.run_shell(["mkdir", "splitdir/.snap/snap_a"])
+        self.mount_a.run_shell(["mkdir", "splitdir/.snap/snap_b"])
+        self.mount_a.run_shell(["rm", "-f", run.Raw("splitdir/file*")])
+
+        time.sleep(30)
+
+        self.assertEqual(self.get_merges(), 0)
+        self.assertEqual(len(self.get_dir_ino("/splitdir")["dirfrags"]), 2)
diff --git a/qa/tasks/cephfs/test_fscrypt.py b/qa/tasks/cephfs/test_fscrypt.py
new file mode 100644
index 000000000..11dd2038f
--- /dev/null
+++ b/qa/tasks/cephfs/test_fscrypt.py
@@ -0,0 +1,77 @@
+from logging import getLogger
+
+from io import StringIO
+from tasks.cephfs.xfstests_dev import XFSTestsDev
+
+
+log = getLogger(__name__)
+
+
+class TestFscrypt(XFSTestsDev):
+
+    def setup_xfsprogs_devs(self):
+        self.install_xfsprogs = True
+
+    def require_kernel_mount(self):
+        from tasks.cephfs.fuse_mount import FuseMount
+        from tasks.cephfs.kernel_mount import KernelMount
+
+        # TODO: make xfstests-dev compatible with ceph-fuse. xfstests-dev
+        # remounts CephFS before running tests using kernel, so ceph-fuse
+        # mounts are never actually tested.
+        if isinstance(self.mount_a, FuseMount):
+            self.skipTest('Requires kernel client; xfstests-dev not '\
+                          'compatible with ceph-fuse ATM.')
+        elif isinstance(self.mount_a, KernelMount):
+            log.info('client is kernel mounted')
+
+    def test_fscrypt_encrypt(self):
+        self.require_kernel_mount()
+
+        # XXX: check_status is set to False so that we can check for command's
+        # failure on our own (since this command doesn't set right error code
+        # and error message in some cases) and print custom log messages
+        # accordingly.
+        proc = self.mount_a.client_remote.run(args=['sudo', 'env', 'DIFF_LENGTH=0',
+            './check', '-g', 'encrypt'], cwd=self.xfstests_repo_path, stdout=StringIO(),
+            stderr=StringIO(), timeout=900, check_status=False, omit_sudo=False,
+            label='running tests for encrypt from xfstests-dev')
+
+        if proc.returncode != 0:
+            log.info('Command failed.')
+        log.info(f'Command return value: {proc.returncode}')
+        stdout, stderr = proc.stdout.getvalue(), proc.stderr.getvalue()
+        log.info(f'Command stdout -\n{stdout}')
+        log.info(f'Command stderr -\n{stderr}')
+
+        # Currently only the 395,396,397,421,429,435,440,580,593,595 and 598
+        # of the 26 test cases will be actually ran, all the others will be
+        # skipped for now because of not supporting features in kernel or kceph.
+        self.assertEqual(proc.returncode, 0)
+        self.assertIn('Passed all 26 tests', stdout)
+
+    def test_fscrypt_dummy_encryption_with_quick_group(self):
+        self.require_kernel_mount()
+
+        self.write_local_config('test_dummy_encryption')
+
+        # XXX: check_status is set to False so that we can check for command's
+        # failure on our own (since this command doesn't set right error code
+        # and error message in some cases) and print custom log messages
+        # accordingly. This will take a long time and set the timeout to 3 hours.
+        proc = self.mount_a.client_remote.run(args=['sudo', 'env', 'DIFF_LENGTH=0',
+            './check', '-g', 'quick', '-E', './ceph.exclude'], cwd=self.xfstests_repo_path,
+            stdout=StringIO(), stderr=StringIO(), timeout=10800, check_status=False,
+            omit_sudo=False, label='running tests for dummy_encryption from xfstests-dev')
+
+        if proc.returncode != 0:
+            log.info('Command failed.')
+        log.info(f'Command return value: {proc.returncode}')
+        stdout, stderr = proc.stdout.getvalue(), proc.stderr.getvalue()
+        log.info(f'Command stdout -\n{stdout}')
+        log.info(f'Command stderr -\n{stderr}')
+
+        # Currently, many test cases will be skipped due to unsupported features,
+        # but still will be marked as successful.
+        self.assertEqual(proc.returncode, 0)
+        self.assertIn('Passed all ', stdout)
diff --git a/qa/tasks/cephfs/test_fstop.py b/qa/tasks/cephfs/test_fstop.py
new file mode 100644
index 000000000..ed76eaac2
--- /dev/null
+++ b/qa/tasks/cephfs/test_fstop.py
@@ -0,0 +1,114 @@
+import logging
+import json
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.exceptions import CommandFailedError
+from teuthology.contextutil import safe_while
+
+log = logging.getLogger(__name__)
+
+
+class TestFSTop(CephFSTestCase):
+    CLIENTS_REQUIRED = 2
+
+    def setUp(self):
+        super(TestFSTop, self).setUp()
+        self._enable_mgr_stats_plugin()
+
+    def tearDown(self):
+        self._disable_mgr_stats_plugin()
+        super(TestFSTop, self).tearDown()
+
+    def _enable_mgr_stats_plugin(self):
+        return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable", "stats")
+
+    def _disable_mgr_stats_plugin(self):
+        return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "disable", "stats")
+
+    def _fstop_dump(self, *args):
+        return self.mount_a.run_shell(['cephfs-top',
+                                       '--id=admin',
+                                       *args]).stdout.getvalue()
+
+    def _get_metrics(self, verifier_callback, trials, *args):
+        metrics = None
+        done = False
+        with safe_while(sleep=1, tries=trials, action='wait for metrics') as proceed:
+            while proceed():
+                metrics = json.loads(self._fstop_dump(*args))
+                done = verifier_callback(metrics)
+                if done:
+                    break
+        return done, metrics
+
+    # TESTS
+    def test_fstop_non_existent_cluster(self):
+        try:
+            self.mount_a.run_shell(['cephfs-top',
+                                    '--cluster=hpec',
+                                    '--id=admin',
+                                    '--selftest'])
+        except CommandFailedError:
+            pass
+        else:
+            raise RuntimeError('expected cephfs-top command to fail.')
+
+    def test_fstop(self):
+        try:
+            self.mount_a.run_shell(['cephfs-top',
+                                    '--id=admin',
+                                    '--selftest'])
+        except CommandFailedError:
+            raise RuntimeError('cephfs-top --selftest failed')
+
+    def test_dump(self):
+        """
+        Tests 'cephfs-top --dump' output is valid
+        """
+        def verify_fstop_metrics(metrics):
+            clients = metrics.get(self.fs.name, {})
+            if str(self.mount_a.get_global_id()) in clients and \
+               str(self.mount_b.get_global_id()) in clients:
+                return True
+            return False
+
+        # validate
+        valid, metrics = self._get_metrics(verify_fstop_metrics, 30, '--dump')
+        log.debug("metrics={0}".format(metrics))
+        self.assertTrue(valid)
+
+    def test_dumpfs(self):
+        """
+        Tests 'cephfs-top --dumpfs' output is valid
+        """
+        newfs_name = "cephfs_b"
+
+        def verify_fstop_metrics(metrics):
+            clients = metrics.get(newfs_name, {})
+            if self.fs.name not in metrics and \
+               str(self.mount_b.get_global_id()) in clients:
+                return True
+            return False
+
+        # umount mount_b, mount another filesystem on it and use --dumpfs filter
+        self.mount_b.umount_wait()
+
+        self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set", "enable_multiple", "true",
+                                                     "--yes-i-really-mean-it")
+
+        # create a new filesystem
+        fs_b = self.mds_cluster.newfs(name=newfs_name)
+
+        # mount cephfs_b on mount_b
+        self.mount_b.mount_wait(cephfs_name=fs_b.name)
+
+        # validate
+        valid, metrics = self._get_metrics(verify_fstop_metrics, 30,
+                                           '--dumpfs={}'.format(newfs_name))
+        log.debug("metrics={0}".format(metrics))
+
+        # restore mount_b
+        self.mount_b.umount_wait()
+        self.mount_b.mount_wait(cephfs_name=self.fs.name)
+
+        self.assertTrue(valid)
diff --git a/qa/tasks/cephfs/test_full.py b/qa/tasks/cephfs/test_full.py
new file mode 100644
index 000000000..2b3a7d5f9
--- /dev/null
+++ b/qa/tasks/cephfs/test_full.py
@@ -0,0 +1,398 @@
+import json
+import logging
+import os
+from textwrap import dedent
+from typing import Optional
+from teuthology.exceptions import CommandFailedError
+from tasks.cephfs.fuse_mount import FuseMount
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+
+log = logging.getLogger(__name__)
+
+
+class FullnessTestCase(CephFSTestCase):
+    CLIENTS_REQUIRED = 2
+
+    # Subclasses define whether they're filling whole cluster or just data pool
+    data_only = False
+
+    # Subclasses define how many bytes should be written to achieve fullness
+    pool_capacity: Optional[int] = None
+    fill_mb = None
+
+    def is_full(self):
+        return self.fs.is_full()
+
+    def setUp(self):
+        CephFSTestCase.setUp(self)
+
+        mds_status = self.fs.rank_asok(["status"])
+
+        # Capture the initial OSD map epoch for later use
+        self.initial_osd_epoch = mds_status['osdmap_epoch_barrier']
+
+    def test_barrier(self):
+        """
+        That when an OSD epoch barrier is set on an MDS, subsequently
+        issued capabilities cause clients to update their OSD map to that
+        epoch.
+        """
+
+        # script that sync up client with MDS OSD map barrier. The barrier should
+        # be updated by cap flush ack message.
+        pyscript = dedent("""
+            import os
+            fd = os.open("{path}", os.O_CREAT | os.O_RDWR, 0O600)
+            os.fchmod(fd, 0O666)
+            os.fsync(fd)
+            os.close(fd)
+            """)
+
+        # Sync up client with initial MDS OSD map barrier.
+        path = os.path.join(self.mount_a.mountpoint, "foo")
+        self.mount_a.run_python(pyscript.format(path=path))
+
+        # Grab mounts' initial OSD epochs: later we will check that
+        # it hasn't advanced beyond this point.
+        mount_a_initial_epoch, mount_a_initial_barrier = self.mount_a.get_osd_epoch()
+
+        # Freshly mounted at start of test, should be up to date with OSD map
+        self.assertGreaterEqual(mount_a_initial_epoch, self.initial_osd_epoch)
+
+        # Set and unset a flag to cause OSD epoch to increment
+        self.fs.mon_manager.raw_cluster_cmd("osd", "set", "pause")
+        self.fs.mon_manager.raw_cluster_cmd("osd", "unset", "pause")
+
+        out = self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json").strip()
+        new_epoch = json.loads(out)['epoch']
+        self.assertNotEqual(self.initial_osd_epoch, new_epoch)
+
+        # Do a metadata operation on clients, witness that they end up with
+        # the old OSD map from startup time (nothing has prompted client
+        # to update its map)
+        path = os.path.join(self.mount_a.mountpoint, "foo")
+        self.mount_a.run_python(pyscript.format(path=path))
+        mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch()
+        self.assertEqual(mount_a_epoch, mount_a_initial_epoch)
+        self.assertEqual(mount_a_barrier, mount_a_initial_barrier)
+
+        # Set a barrier on the MDS
+        self.fs.rank_asok(["osdmap", "barrier", new_epoch.__str__()])
+
+        # Sync up client with new MDS OSD map barrier
+        path = os.path.join(self.mount_a.mountpoint, "baz")
+        self.mount_a.run_python(pyscript.format(path=path))
+        mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch()
+        self.assertEqual(mount_a_barrier, new_epoch)
+
+        # Some time passes here because the metadata part of the operation
+        # completes immediately, while the resulting OSD map update happens
+        # asynchronously (it's an Objecter::_maybe_request_map) as a result
+        # of seeing the new epoch barrier.
+        self.wait_until_true(
+            lambda: self.mount_a.get_osd_epoch()[0] >= new_epoch,
+            timeout=30)
+
+    def _data_pool_name(self):
+        data_pool_names = self.fs.get_data_pool_names()
+        if len(data_pool_names) > 1:
+            raise RuntimeError("This test can't handle multiple data pools")
+        else:
+            return data_pool_names[0]
+
+    def _test_full(self, easy_case):
+        """
+        - That a client trying to write data to a file is prevented
+        from doing so with an -EFULL result
+        - That they are also prevented from creating new files by the MDS.
+        - That they may delete another file to get the system healthy again
+
+        :param easy_case: if true, delete a successfully written file to
+                          free up space.  else, delete the file that experienced
+                          the failed write.
+        """
+
+        osd_mon_report_interval = int(self.fs.get_config("osd_mon_report_interval", service_type='osd'))
+
+        log.info("Writing {0}MB should fill this cluster".format(self.fill_mb))
+
+        # Fill up the cluster.  This dd may or may not fail, as it depends on
+        # how soon the cluster recognises its own fullness
+        self.mount_a.write_n_mb("large_file_a", self.fill_mb // 2)
+        try:
+            self.mount_a.write_n_mb("large_file_b", (self.fill_mb * 1.1) // 2)
+        except CommandFailedError:
+            log.info("Writing file B failed (full status happened already)")
+            assert self.is_full()
+        else:
+            log.info("Writing file B succeeded (full status will happen soon)")
+            self.wait_until_true(lambda: self.is_full(),
+                                 timeout=osd_mon_report_interval * 120)
+
+        # Attempting to write more data should give me ENOSPC
+        with self.assertRaises(CommandFailedError) as ar:
+            self.mount_a.write_n_mb("large_file_b", 50, seek=self.fill_mb // 2)
+        self.assertEqual(ar.exception.exitstatus, 1)  # dd returns 1 on "No space"
+
+        # Wait for the MDS to see the latest OSD map so that it will reliably
+        # be applying the policy of rejecting non-deletion metadata operations
+        # while in the full state.
+        osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch']
+        self.wait_until_true(
+            lambda: self.fs.rank_asok(['status'])['osdmap_epoch'] >= osd_epoch,
+            timeout=10)
+
+        if not self.data_only:
+            with self.assertRaises(CommandFailedError):
+                self.mount_a.write_n_mb("small_file_1", 0)
+
+        # Clear out some space
+        if easy_case:
+            self.mount_a.run_shell(['rm', '-f', 'large_file_a'])
+            self.mount_a.run_shell(['rm', '-f', 'large_file_b'])
+        else:
+            # In the hard case it is the file that filled the system.
+            # Before the new #7317 (ENOSPC, epoch barrier) changes, this
+            # would fail because the last objects written would be
+            # stuck in the client cache as objecter operations.
+            self.mount_a.run_shell(['rm', '-f', 'large_file_b'])
+            self.mount_a.run_shell(['rm', '-f', 'large_file_a'])
+
+        # Here we are waiting for two things to happen:
+        # * The MDS to purge the stray folder and execute object deletions
+        #  * The OSDs to inform the mon that they are no longer full
+        self.wait_until_true(lambda: not self.is_full(),
+                             timeout=osd_mon_report_interval * 120)
+
+        # Wait for the MDS to see the latest OSD map so that it will reliably
+        # be applying the free space policy
+        osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch']
+        self.wait_until_true(
+            lambda: self.fs.rank_asok(['status'])['osdmap_epoch'] >= osd_epoch,
+            timeout=10)
+
+        # Now I should be able to write again
+        self.mount_a.write_n_mb("large_file", 50, seek=0)
+
+        # Ensure that the MDS keeps its OSD epoch barrier across a restart
+
+    def test_full_different_file(self):
+        self._test_full(True)
+
+    def test_full_same_file(self):
+        self._test_full(False)
+
+    def _remote_write_test(self, template):
+        """
+        Run some remote python in a way that's useful for
+        testing free space behaviour (see test_* methods using this)
+        """
+        file_path = os.path.join(self.mount_a.mountpoint, "full_test_file")
+
+        # Enough to trip the full flag
+        osd_mon_report_interval = int(self.fs.get_config("osd_mon_report_interval", service_type='osd'))
+        mon_tick_interval = int(self.fs.get_config("mon_tick_interval", service_type="mon"))
+
+        # Sufficient data to cause RADOS cluster to go 'full'
+        log.info("pool capacity {0}, {1}MB should be enough to fill it".format(self.pool_capacity, self.fill_mb))
+
+        # Long enough for RADOS cluster to notice it is full and set flag on mons
+        # (report_interval for mon to learn PG stats, tick interval for it to update OSD map,
+        #  factor of 1.5 for I/O + network latency in committing OSD map and distributing it
+        #  to the OSDs)
+        full_wait = (osd_mon_report_interval + mon_tick_interval) * 1.5
+
+        # Configs for this test should bring this setting down in order to
+        # run reasonably quickly
+        if osd_mon_report_interval > 10:
+            log.warning("This test may run rather slowly unless you decrease"
+                     "osd_mon_report_interval (5 is a good setting)!")
+
+        # set the object_size to 1MB to make the objects destributed more evenly
+        # among the OSDs to fix Tracker#45434
+        file_layout = "stripe_unit=1048576 stripe_count=1 object_size=1048576"
+        self.mount_a.run_python(template.format(
+            fill_mb=self.fill_mb,
+            file_path=file_path,
+            file_layout=file_layout,
+            full_wait=full_wait,
+            is_fuse=isinstance(self.mount_a, FuseMount)
+        ))
+
+    def test_full_fclose(self):
+        # A remote script which opens a file handle, fills up the filesystem, and then
+        # checks that ENOSPC errors on buffered writes appear correctly as errors in fsync
+        remote_script = dedent("""
+            import time
+            import datetime
+            import subprocess
+            import os
+
+            # Write some buffered data through before going full, all should be well
+            print("writing some data through which we expect to succeed")
+            bytes = 0
+            f = os.open("{file_path}", os.O_WRONLY | os.O_CREAT)
+            os.setxattr("{file_path}", 'ceph.file.layout', b'{file_layout}')
+            bytes += os.write(f, b'a' * 512 * 1024)
+            os.fsync(f)
+            print("fsync'ed data successfully, will now attempt to fill fs")
+
+            # Okay, now we're going to fill up the filesystem, and then keep
+            # writing until we see an error from fsync.  As long as we're doing
+            # buffered IO, the error should always only appear from fsync and not
+            # from write
+            full = False
+
+            for n in range(0, int({fill_mb} * 0.9)):
+                bytes += os.write(f, b'x' * 1024 * 1024)
+                print("wrote {{0}} bytes via buffered write, may repeat".format(bytes))
+            print("done writing {{0}} bytes".format(bytes))
+
+            # OK, now we should sneak in under the full condition
+            # due to the time it takes the OSDs to report to the
+            # mons, and get a successful fsync on our full-making data
+            os.fsync(f)
+            print("successfully fsync'ed prior to getting full state reported")
+
+            # buffered write, add more dirty data to the buffer
+            print("starting buffered write")
+            try:
+                for n in range(0, int({fill_mb} * 0.2)):
+                    bytes += os.write(f, b'x' * 1024 * 1024)
+                    print("sleeping a bit as we've exceeded 90% of our expected full ratio")
+                    time.sleep({full_wait})
+            except OSError:
+                pass;
+
+            print("wrote, now waiting 30s and then doing a close we expect to fail")
+
+            # Wait long enough for a background flush that should fail
+            time.sleep(30)
+
+            if {is_fuse}:
+                # ...and check that the failed background flush is reflected in fclose
+                try:
+                    os.close(f)
+                except OSError:
+                    print("close() returned an error as expected")
+                else:
+                    raise RuntimeError("close() failed to raise error")
+            else:
+                # The kernel cephfs client does not raise errors on fclose
+                os.close(f)
+
+            os.unlink("{file_path}")
+            """)
+        self._remote_write_test(remote_script)
+
+    def test_full_fsync(self):
+        """
+        That when the full flag is encountered during asynchronous
+        flushes, such that an fwrite() succeeds but an fsync/fclose()
+        should return the ENOSPC error.
+        """
+
+        # A remote script which opens a file handle, fills up the filesystem, and then
+        # checks that ENOSPC errors on buffered writes appear correctly as errors in fsync
+        remote_script = dedent("""
+            import time
+            import datetime
+            import subprocess
+            import os
+
+            # Write some buffered data through before going full, all should be well
+            print("writing some data through which we expect to succeed")
+            bytes = 0
+            f = os.open("{file_path}", os.O_WRONLY | os.O_CREAT)
+            os.setxattr("{file_path}", 'ceph.file.layout', b'{file_layout}')
+            bytes += os.write(f, b'a' * 4096)
+            os.fsync(f)
+            print("fsync'ed data successfully, will now attempt to fill fs")
+
+            # Okay, now we're going to fill up the filesystem, and then keep
+            # writing until we see an error from fsync.  As long as we're doing
+            # buffered IO, the error should always only appear from fsync and not
+            # from write
+            full = False
+
+            for n in range(0, int({fill_mb} * 1.1)):
+                try:
+                    bytes += os.write(f, b'x' * 1024 * 1024)
+                    print("wrote bytes via buffered write, moving on to fsync")
+                except OSError as e:
+                    if {is_fuse}:
+                        print("Unexpected error %s from write() instead of fsync()" % e)
+                        raise
+                    else:
+                        print("Reached fullness after %.2f MB" % (bytes / (1024.0 * 1024.0)))
+                        full = True
+                        break
+
+                try:
+                    os.fsync(f)
+                    print("fsync'ed successfully")
+                except OSError as e:
+                    print("Reached fullness after %.2f MB" % (bytes / (1024.0 * 1024.0)))
+                    full = True
+                    break
+                else:
+                    print("Not full yet after %.2f MB" % (bytes / (1024.0 * 1024.0)))
+
+                if n > {fill_mb} * 0.9:
+                    # Be cautious in the last region where we expect to hit
+                    # the full condition, so that we don't overshoot too dramatically
+                    print("sleeping a bit as we've exceeded 90% of our expected full ratio")
+                    time.sleep({full_wait})
+
+            if not full:
+                raise RuntimeError("Failed to reach fullness after writing %d bytes" % bytes)
+
+            # close() should not raise an error because we already caught it in
+            # fsync.  There shouldn't have been any more writeback errors
+            # since then because all IOs got cancelled on the full flag.
+            print("calling close")
+            os.close(f)
+            print("close() did not raise error")
+
+            os.unlink("{file_path}")
+            """)
+
+        self._remote_write_test(remote_script)
+
+
+class TestQuotaFull(FullnessTestCase):
+    """
+    Test per-pool fullness, which indicates quota limits exceeded
+    """
+    pool_capacity = 1024 * 1024 * 32  # arbitrary low-ish limit
+    fill_mb = pool_capacity // (1024 * 1024)  # type: ignore
+
+    # We are only testing quota handling on the data pool, not the metadata
+    # pool.
+    data_only = True
+
+    def setUp(self):
+        super(TestQuotaFull, self).setUp()
+
+        pool_name = self.fs.get_data_pool_name()
+        self.fs.mon_manager.raw_cluster_cmd("osd", "pool", "set-quota", pool_name,
+                                            "max_bytes", "{0}".format(self.pool_capacity))
+
+
+class TestClusterFull(FullnessTestCase):
+    """
+    Test data pool fullness, which indicates that an OSD has become too full
+    """
+    pool_capacity = None
+    REQUIRE_MEMSTORE = True
+
+    def setUp(self):
+        super(TestClusterFull, self).setUp()
+
+        if self.pool_capacity is None:
+            TestClusterFull.pool_capacity = self.fs.get_pool_df(self._data_pool_name())['max_avail']
+            TestClusterFull.fill_mb = (self.pool_capacity // (1024 * 1024))
+
+# Hide the parent class so that unittest.loader doesn't try to run it.
+del globals()['FullnessTestCase']
diff --git a/qa/tasks/cephfs/test_journal_migration.py b/qa/tasks/cephfs/test_journal_migration.py
new file mode 100644
index 000000000..67b514c22
--- /dev/null
+++ b/qa/tasks/cephfs/test_journal_migration.py
@@ -0,0 +1,100 @@
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from tasks.workunit import task as workunit
+
+JOURNAL_FORMAT_LEGACY = 0
+JOURNAL_FORMAT_RESILIENT = 1
+
+
+class TestJournalMigration(CephFSTestCase):
+    CLIENTS_REQUIRED = 1
+    MDSS_REQUIRED = 2
+
+    def test_journal_migration(self):
+        old_journal_version = JOURNAL_FORMAT_LEGACY
+        new_journal_version = JOURNAL_FORMAT_RESILIENT
+
+        self.mount_a.umount_wait()
+        self.fs.mds_stop()
+
+        # Create a filesystem using the older journal format.
+        self.fs.set_ceph_conf('mds', 'mds journal format', old_journal_version)
+        self.fs.mds_restart()
+        self.fs.recreate()
+
+        # Enable standby replay, to cover the bug case #8811 where
+        # a standby replay might mistakenly end up trying to rewrite
+        # the journal at the same time as an active daemon.
+        self.fs.set_allow_standby_replay(True)
+
+        status = self.fs.wait_for_daemons()
+
+        self.assertTrue(self.fs.get_replay(status=status) is not None)
+
+        # Do some client work so that the log is populated with something.
+        with self.mount_a.mounted_wait():
+            self.mount_a.create_files()
+            self.mount_a.check_files()  # sanity, this should always pass
+
+            # Run a more substantial workunit so that the length of the log to be
+            # coverted is going span at least a few segments
+            workunit(self.ctx, {
+                'clients': {
+                    "client.{0}".format(self.mount_a.client_id): ["suites/fsstress.sh"],
+                },
+                "timeout": "3h"
+            })
+
+        # Modify the ceph.conf to ask the MDS to use the new journal format.
+        self.fs.set_ceph_conf('mds', 'mds journal format', new_journal_version)
+
+        # Restart the MDS.
+        self.fs.mds_fail_restart()
+
+        # This ensures that all daemons come up into a valid state
+        status = self.fs.wait_for_daemons()
+
+        # Check that files created in the initial client workload are still visible
+        # in a client mount.
+        with self.mount_a.mounted_wait():
+            self.mount_a.check_files()
+
+        # Verify that the journal really has been rewritten.
+        journal_version = self.fs.get_journal_version()
+        if journal_version != new_journal_version:
+            raise RuntimeError("Journal was not upgraded, version should be {0} but is {1}".format(
+                new_journal_version, journal_version()
+            ))
+
+        # Verify that cephfs-journal-tool can now read the rewritten journal
+        inspect_out = self.fs.journal_tool(["journal", "inspect"], 0)
+        if not inspect_out.endswith(": OK"):
+            raise RuntimeError("Unexpected journal-tool result: '{0}'".format(
+                inspect_out
+            ))
+
+        self.fs.journal_tool(["event", "get", "json",
+                              "--path", "/tmp/journal.json"], 0)
+        p = self.fs.tool_remote.sh([
+                "python3",
+                "-c",
+                "import json; print(len(json.load(open('/tmp/journal.json'))))"
+            ])
+        event_count = int(p.strip())
+        if event_count < 1000:
+            # Approximate value of "lots", expected from having run fsstress
+            raise RuntimeError("Unexpectedly few journal events: {0}".format(event_count))
+
+        # Do some client work to check that writing the log is still working
+        with self.mount_a.mounted_wait():
+            workunit(self.ctx, {
+                'clients': {
+                    "client.{0}".format(self.mount_a.client_id): ["fs/misc/trivial_sync.sh"],
+                },
+                "timeout": "3h"
+            })
+
+        # Check that both an active and a standby replay are still up
+        status = self.fs.status()
+        self.assertEqual(len(list(self.fs.get_replays(status=status))), 1)
+        self.assertEqual(len(list(self.fs.get_ranks(status=status))), 1)
diff --git a/qa/tasks/cephfs/test_journal_repair.py b/qa/tasks/cephfs/test_journal_repair.py
new file mode 100644
index 000000000..c5769784d
--- /dev/null
+++ b/qa/tasks/cephfs/test_journal_repair.py
@@ -0,0 +1,405 @@
+
+"""
+Test our tools for recovering the content of damaged journals
+"""
+
+import json
+import logging
+from textwrap import dedent
+import time
+
+from teuthology.exceptions import CommandFailedError, ConnectionLostError
+from tasks.cephfs.filesystem import ObjectNotFound, ROOT_INO
+from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
+from tasks.workunit import task as workunit
+
+log = logging.getLogger(__name__)
+
+
+class TestJournalRepair(CephFSTestCase):
+    MDSS_REQUIRED = 2
+
+    def test_inject_to_empty(self):
+        """
+        That when some dentries in the journal but nothing is in
+        the backing store, we correctly populate the backing store
+        from the journalled dentries.
+        """
+
+        # Inject metadata operations
+        self.mount_a.run_shell(["touch", "rootfile"])
+        self.mount_a.run_shell(["mkdir", "subdir"])
+        self.mount_a.run_shell(["touch", "subdir/subdirfile"])
+        # There are several different paths for handling hardlinks, depending
+        # on whether an existing dentry (being overwritten) is also a hardlink
+        self.mount_a.run_shell(["mkdir", "linkdir"])
+
+        # Test inode -> remote transition for a dentry
+        self.mount_a.run_shell(["touch", "linkdir/link0"])
+        self.mount_a.run_shell(["rm", "-f", "linkdir/link0"])
+        self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link0"])
+
+        # Test nothing -> remote transition
+        self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link1"])
+
+        # Test remote -> inode transition
+        self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link2"])
+        self.mount_a.run_shell(["rm", "-f", "linkdir/link2"])
+        self.mount_a.run_shell(["touch", "linkdir/link2"])
+
+        # Test remote -> diff remote transition
+        self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link3"])
+        self.mount_a.run_shell(["rm", "-f", "linkdir/link3"])
+        self.mount_a.run_shell(["ln", "rootfile", "linkdir/link3"])
+
+        # Test an empty directory
+        self.mount_a.run_shell(["mkdir", "subdir/subsubdir"])
+        self.mount_a.run_shell(["sync"])
+
+        # Before we unmount, make a note of the inode numbers, later we will
+        # check that they match what we recover from the journal
+        rootfile_ino = self.mount_a.path_to_ino("rootfile")
+        subdir_ino = self.mount_a.path_to_ino("subdir")
+        linkdir_ino = self.mount_a.path_to_ino("linkdir")
+        subdirfile_ino = self.mount_a.path_to_ino("subdir/subdirfile")
+        subsubdir_ino = self.mount_a.path_to_ino("subdir/subsubdir")
+
+        self.mount_a.umount_wait()
+
+        # Stop the MDS
+        self.fs.fail()
+
+        # Now, the journal should contain the operations, but the backing
+        # store shouldn't
+        with self.assertRaises(ObjectNotFound):
+            self.fs.list_dirfrag(subdir_ino)
+        self.assertEqual(self.fs.list_dirfrag(ROOT_INO), [])
+
+        # Execute the dentry recovery, this should populate the backing store
+        self.fs.journal_tool(['event', 'recover_dentries', 'list'], 0)
+
+        # Dentries in ROOT_INO are present
+        self.assertEqual(sorted(self.fs.list_dirfrag(ROOT_INO)), sorted(['rootfile_head', 'subdir_head', 'linkdir_head']))
+        self.assertEqual(self.fs.list_dirfrag(subdir_ino), ['subdirfile_head', 'subsubdir_head'])
+        self.assertEqual(sorted(self.fs.list_dirfrag(linkdir_ino)),
+                         sorted(['link0_head', 'link1_head', 'link2_head', 'link3_head']))
+
+        # Now check the MDS can read what we wrote: truncate the journal
+        # and start the mds.
+        self.fs.journal_tool(['journal', 'reset'], 0)
+        self.fs.set_joinable()
+        self.fs.wait_for_daemons()
+
+        # List files
+        self.mount_a.mount_wait()
+
+        # First ls -R to populate MDCache, such that hardlinks will
+        # resolve properly (recover_dentries does not create backtraces,
+        # so ordinarily hardlinks to inodes that happen not to have backtraces
+        # will be invisible in readdir).
+        # FIXME: hook in forward scrub here to regenerate backtraces
+        proc = self.mount_a.run_shell(['ls', '-R'])
+        self.mount_a.umount_wait()  # remount to clear client cache before our second ls
+        self.mount_a.mount_wait()
+
+        proc = self.mount_a.run_shell(['ls', '-R'])
+        self.assertEqual(proc.stdout.getvalue().strip(),
+                         dedent("""
+                         .:
+                         linkdir
+                         rootfile
+                         subdir
+
+                         ./linkdir:
+                         link0
+                         link1
+                         link2
+                         link3
+
+                         ./subdir:
+                         subdirfile
+                         subsubdir
+
+                         ./subdir/subsubdir:
+                         """).strip())
+
+        # Check the correct inos were preserved by path
+        self.assertEqual(rootfile_ino, self.mount_a.path_to_ino("rootfile"))
+        self.assertEqual(subdir_ino, self.mount_a.path_to_ino("subdir"))
+        self.assertEqual(subdirfile_ino, self.mount_a.path_to_ino("subdir/subdirfile"))
+        self.assertEqual(subsubdir_ino, self.mount_a.path_to_ino("subdir/subsubdir"))
+
+        # Check that the hard link handling came out correctly
+        self.assertEqual(self.mount_a.path_to_ino("linkdir/link0"), subdirfile_ino)
+        self.assertEqual(self.mount_a.path_to_ino("linkdir/link1"), subdirfile_ino)
+        self.assertNotEqual(self.mount_a.path_to_ino("linkdir/link2"), subdirfile_ino)
+        self.assertEqual(self.mount_a.path_to_ino("linkdir/link3"), rootfile_ino)
+
+        # Create a new file, ensure it is not issued the same ino as one of the
+        # recovered ones
+        self.mount_a.run_shell(["touch", "afterwards"])
+        new_ino = self.mount_a.path_to_ino("afterwards")
+        self.assertNotIn(new_ino, [rootfile_ino, subdir_ino, subdirfile_ino])
+
+        # Check that we can do metadata ops in the recovered directory
+        self.mount_a.run_shell(["touch", "subdir/subsubdir/subsubdirfile"])
+
+    @for_teuthology # 308s
+    def test_reset(self):
+        """
+        That after forcibly modifying the backing store, we can get back into
+        a good state by resetting the MDSMap.
+
+        The scenario is that we have two active MDSs, and we lose the journals.  Once
+        we have completely lost confidence in the integrity of the metadata, we want to
+        return the system to a single-MDS state to go into a scrub to recover what we
+        can.
+        """
+
+        # Set max_mds to 2
+        self.fs.set_max_mds(2)
+        status = self.fs.wait_for_daemons()
+        rank0_gid = self.fs.get_rank(rank=0, status=status)['gid']
+        self.fs.set_joinable(False) # no unintended failover
+
+        # Create a dir on each rank
+        self.mount_a.run_shell_payload("mkdir {alpha,bravo} && touch {alpha,bravo}/file")
+        self.mount_a.setfattr("alpha/", "ceph.dir.pin", "0")
+        self.mount_a.setfattr("bravo/", "ceph.dir.pin", "1")
+
+        # Ensure the pinning has taken effect and the /bravo dir is now
+        # migrated to rank 1.
+        self._wait_subtrees([('/bravo', 1), ('/alpha', 0)], rank=0, status=status)
+
+        # Do some IO (this should be split across ranks according to
+        # the rank-pinned dirs)
+        self.mount_a.create_n_files("alpha/file", 1000)
+        self.mount_a.create_n_files("bravo/file", 1000)
+
+        # Flush the journals so that we have some backing store data
+        # belonging to one MDS, and some to the other MDS.
+        self.fs.rank_asok(["flush", "journal"], rank=0)
+        self.fs.rank_asok(["flush", "journal"], rank=1)
+
+        # Stop (hard) the second MDS daemon
+        self.fs.rank_fail(rank=1)
+
+        # Wipe out the tables for MDS rank 1 so that it is broken and can't start
+        # (this is the simulated failure that we will demonstrate that the disaster
+        #  recovery tools can get us back from)
+        self.fs.erase_metadata_objects(prefix="mds1_")
+
+        # Try to access files from the client
+        blocked_ls = self.mount_a.run_shell(["ls", "-R"], wait=False)
+
+        # Check that this "ls -R" blocked rather than completing: indicates
+        # it got stuck trying to access subtrees which were on the now-dead MDS.
+        log.info("Sleeping to check ls is blocked...")
+        time.sleep(60)
+        self.assertFalse(blocked_ls.finished)
+
+        # This mount is now useless because it will depend on MDS rank 1, and MDS rank 1
+        # is not coming back.  Kill it.
+        log.info("Killing mount, it's blocked on the MDS we killed")
+        self.mount_a.kill()
+        self.mount_a.kill_cleanup()
+        try:
+            # Now that the mount is dead, the ls -R should error out.
+            blocked_ls.wait()
+        except (CommandFailedError, ConnectionLostError):
+            # The ConnectionLostError case is for kernel client, where
+            # killing the mount also means killing the node.
+            pass
+
+        # See that the second MDS will crash when it starts and tries to
+        # acquire rank 1
+        self.fs.set_joinable(True)
+
+        # The daemon taking the damaged rank should start starting, then
+        # restart back into standby after asking the mon to mark the rank
+        # damaged.
+        def is_marked_damaged():
+            mds_map = self.fs.get_mds_map()
+            return 1 in mds_map['damaged']
+
+        self.wait_until_true(is_marked_damaged, 60)
+        self.assertEqual(rank0_gid, self.fs.get_rank(rank=0)['gid'])
+
+        # Now give up and go through a disaster recovery procedure
+        self.fs.fail()
+        # Invoke recover_dentries quietly, because otherwise log spews millions of lines
+        self.fs.journal_tool(["event", "recover_dentries", "summary"], 0, quiet=True)
+        self.fs.journal_tool(["event", "recover_dentries", "summary"], 1, quiet=True)
+        self.fs.table_tool(["0", "reset", "session"])
+        self.fs.journal_tool(["journal", "reset"], 0)
+        self.fs.erase_mds_objects(1)
+        self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name,
+                '--yes-i-really-mean-it')
+
+        # Bring an MDS back online, mount a client, and see that we can walk the full
+        # filesystem tree again
+        self.fs.set_joinable(True) # redundant with `fs reset`
+        status = self.fs.wait_for_daemons()
+        self.assertEqual(len(list(self.fs.get_ranks(status=status))), 1)
+        self.mount_a.mount_wait()
+        self.mount_a.run_shell(["ls", "-R"], wait=True)
+
+    def test_table_tool(self):
+        self.mount_a.run_shell(["touch", "foo"])
+        self.fs.rank_asok(["flush", "journal"])
+
+        log.info(self.fs.table_tool(["all", "show", "inode"]))
+        log.info(self.fs.table_tool(["all", "show", "snap"]))
+        log.info(self.fs.table_tool(["all", "show", "session"]))
+
+        # Inode table should always be the same because initial state
+        # and choice of inode are deterministic.
+        # Should see one inode consumed
+        self.assertEqual(
+            json.loads(self.fs.table_tool(["all", "show", "inode"])),
+            {"0": {
+                "data": {
+                    "version": 2,
+                    "inotable": {
+                        "projected_free": [
+                            {"start": 1099511628777,
+                             "len": 1099511626775}],
+                        "free": [
+                            {"start": 1099511628777,
+                             "len": 1099511626775}]}},
+                "result": 0}}
+
+        )
+
+        # Should see one session
+        session_data = json.loads(self.fs.table_tool(
+            ["all", "show", "session"]))
+        self.assertEqual(len(session_data["0"]["data"]["sessions"]), 1)
+        self.assertEqual(session_data["0"]["result"], 0)
+
+        # Should see no snaps
+        self.assertEqual(
+            json.loads(self.fs.table_tool(["all", "show", "snap"])),
+            {"version": 1,
+             "snapserver": {"last_snap": 1,
+                            "last_created": 1,
+                            "last_destroyed": 1,
+                            "pending_noop": [],
+                            "snaps": [],
+                            "need_to_purge": {},
+                            "pending_update": [],
+                            "pending_destroy": []},
+             "result": 0}
+        )
+
+        # Reset everything
+        for table in ["session", "inode", "snap"]:
+            self.fs.table_tool(["all", "reset", table])
+
+        log.info(self.fs.table_tool(["all", "show", "inode"]))
+        log.info(self.fs.table_tool(["all", "show", "snap"]))
+        log.info(self.fs.table_tool(["all", "show", "session"]))
+
+        # Should see 0 sessions
+        session_data = json.loads(self.fs.table_tool(
+            ["all", "show", "session"]))
+        self.assertEqual(len(session_data["0"]["data"]["sessions"]), 0)
+        self.assertEqual(session_data["0"]["result"], 0)
+
+        # Should see entire inode range now marked free
+        self.assertEqual(
+            json.loads(self.fs.table_tool(["all", "show", "inode"])),
+            {"0": {"data": {"version": 1,
+                            "inotable": {"projected_free": [
+                                {"start": 1099511627776,
+                                 "len": 1099511627776}],
+                                 "free": [
+                                    {"start": 1099511627776,
+                                    "len": 1099511627776}]}},
+                   "result": 0}}
+        )
+
+        # Should see no snaps
+        self.assertEqual(
+            json.loads(self.fs.table_tool(["all", "show", "snap"])),
+            {"version": 1,
+             "snapserver": {"last_snap": 1,
+                            "last_created": 1,
+                            "last_destroyed": 1,
+                            "pending_noop": [],
+                            "snaps": [],
+                            "need_to_purge": {},
+                            "pending_update": [],
+                            "pending_destroy": []},
+             "result": 0}
+        )
+
+    def test_table_tool_take_inos(self):
+        initial_range_start = 1099511627776
+        initial_range_len = 1099511627776
+        # Initially a completely clear range
+        self.assertEqual(
+            json.loads(self.fs.table_tool(["all", "show", "inode"])),
+            {"0": {"data": {"version": 0,
+                            "inotable": {"projected_free": [
+                                {"start": initial_range_start,
+                                 "len": initial_range_len}],
+                                "free": [
+                                    {"start": initial_range_start,
+                                     "len": initial_range_len}]}},
+                   "result": 0}}
+        )
+
+        # Remove some
+        self.assertEqual(
+            json.loads(self.fs.table_tool(["all", "take_inos", "{0}".format(initial_range_start + 100)])),
+            {"0": {"data": {"version": 1,
+                            "inotable": {"projected_free": [
+                                {"start": initial_range_start + 101,
+                                 "len": initial_range_len - 101}],
+                                "free": [
+                                    {"start": initial_range_start + 101,
+                                     "len": initial_range_len - 101}]}},
+                   "result": 0}}
+        )
+
+    @for_teuthology  # Hack: "for_teuthology" because .sh doesn't work outside teuth
+    def test_journal_smoke(self):
+        workunit(self.ctx, {
+            'clients': {
+                "client.{0}".format(self.mount_a.client_id): [
+                    "fs/misc/trivial_sync.sh"],
+            },
+            "timeout": "1h"
+        })
+
+        for mount in self.mounts:
+            mount.umount_wait()
+
+        self.fs.fail()
+
+        # journal tool smoke
+        workunit(self.ctx, {
+            'clients': {
+                "client.{0}".format(self.mount_a.client_id): [
+                    "suites/cephfs_journal_tool_smoke.sh"],
+            },
+            "timeout": "1h"
+        })
+
+
+
+        self.fs.set_joinable()
+        self.fs.wait_for_daemons()
+
+        self.mount_a.mount_wait()
+
+        # trivial sync moutn a
+        workunit(self.ctx, {
+            'clients': {
+                "client.{0}".format(self.mount_a.client_id): [
+                    "fs/misc/trivial_sync.sh"],
+            },
+            "timeout": "1h"
+        })
+
diff --git a/qa/tasks/cephfs/test_mantle.py b/qa/tasks/cephfs/test_mantle.py
new file mode 100644
index 000000000..746c2ffe3
--- /dev/null
+++ b/qa/tasks/cephfs/test_mantle.py
@@ -0,0 +1,111 @@
+from io import StringIO
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+import json
+import logging
+
+log = logging.getLogger(__name__)
+failure = "using old balancer; mantle failed for balancer="
+success = "mantle balancer version changed: "
+
+class TestMantle(CephFSTestCase):
+    def start_mantle(self):
+        self.wait_for_health_clear(timeout=30)
+        self.fs.set_max_mds(2)
+        self.wait_until_equal(lambda: len(self.fs.get_active_names()), 2, 30,
+                              reject_fn=lambda v: v > 2 or v < 1)
+
+        for m in self.fs.get_active_names():
+            self.fs.mds_asok(['config', 'set', 'debug_objecter', '20'], mds_id=m)
+            self.fs.mds_asok(['config', 'set', 'debug_ms', '0'], mds_id=m)
+            self.fs.mds_asok(['config', 'set', 'debug_mds', '0'], mds_id=m)
+            self.fs.mds_asok(['config', 'set', 'debug_mds_balancer', '5'], mds_id=m)
+
+    def push_balancer(self, obj, lua_code, expect):
+        self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', obj)
+        self.fs.radosm(["put", obj, "-"], stdin=StringIO(lua_code))
+        with self.assert_cluster_log(failure + obj + " " + expect):
+            log.info("run a " + obj + " balancer that expects=" + expect)
+
+    def test_version_empty(self):
+        self.start_mantle()
+        expect = " : (2) No such file or directory"
+
+        ret = self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer')
+        assert(ret == 22) # EINVAL
+
+        self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', " ")
+        with self.assert_cluster_log(failure + " " + expect): pass
+
+    def test_version_not_in_rados(self):
+        self.start_mantle()
+        expect = failure + "ghost.lua : (2) No such file or directory"
+        self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "ghost.lua")
+        with self.assert_cluster_log(expect): pass
+
+    def test_balancer_invalid(self):
+        self.start_mantle()
+        expect = ": (22) Invalid argument"
+
+        lua_code = "this is invalid lua code!"
+        self.push_balancer("invalid.lua", lua_code, expect)
+
+        lua_code = "BAL_LOG()"
+        self.push_balancer("invalid_log.lua", lua_code, expect)
+
+        lua_code = "BAL_LOG(0)"
+        self.push_balancer("invalid_log_again.lua", lua_code, expect)
+
+    def test_balancer_valid(self):
+        self.start_mantle()
+        lua_code = "BAL_LOG(0, \"test\")\nreturn {3, 4}"
+        self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "valid.lua")
+        self.fs.radosm(["put", "valid.lua", "-"], stdin=StringIO(lua_code))
+        with self.assert_cluster_log(success + "valid.lua"):
+            log.info("run a valid.lua balancer")
+
+    def test_return_invalid(self):
+        self.start_mantle()
+        expect = ": (22) Invalid argument"
+
+        lua_code = "return \"hello\""
+        self.push_balancer("string.lua", lua_code, expect)
+
+        lua_code = "return 3"
+        self.push_balancer("number.lua", lua_code, expect)
+
+        lua_code = "return {}"
+        self.push_balancer("dict_empty.lua", lua_code, expect)
+
+        lua_code = "return {\"this\", \"is\", \"a\", \"test\"}"
+        self.push_balancer("dict_of_strings.lua", lua_code, expect)
+
+        lua_code = "return {3, \"test\"}"
+        self.push_balancer("dict_of_mixed.lua", lua_code, expect)
+
+        lua_code = "return {3}"
+        self.push_balancer("not_enough_numbers.lua", lua_code, expect)
+
+        lua_code = "return {3, 4, 5, 6, 7, 8, 9}"
+        self.push_balancer("too_many_numbers.lua", lua_code, expect)
+
+    def test_dead_osd(self):
+        self.start_mantle()
+        expect = " : (110) Connection timed out"
+
+        # kill the OSDs so that the balancer pull from RADOS times out
+        osd_map = json.loads(self.fs.mon_manager.raw_cluster_cmd('osd', 'dump', '--format=json-pretty'))
+        for i in range(0, len(osd_map['osds'])):
+          self.fs.mon_manager.raw_cluster_cmd_result('osd', 'down', str(i))
+          self.fs.mon_manager.raw_cluster_cmd_result('osd', 'out', str(i))
+
+        # trigger a pull from RADOS
+        self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "valid.lua")
+
+        # make the timeout a little longer since dead OSDs spam ceph -w
+        with self.assert_cluster_log(failure + "valid.lua" + expect, timeout=30):
+            log.info("run a balancer that should timeout")
+
+        # cleanup
+        for i in range(0, len(osd_map['osds'])):
+          self.fs.mon_manager.raw_cluster_cmd_result('osd', 'in', str(i))
diff --git a/qa/tasks/cephfs/test_mds_metrics.py b/qa/tasks/cephfs/test_mds_metrics.py
new file mode 100644
index 000000000..ad877f622
--- /dev/null
+++ b/qa/tasks/cephfs/test_mds_metrics.py
@@ -0,0 +1,643 @@
+import os
+import json
+import time
+import random
+import logging
+import errno
+
+from teuthology.contextutil import safe_while, MaxWhileTries
+from teuthology.exceptions import CommandFailedError
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+log = logging.getLogger(__name__)
+
+class TestMDSMetrics(CephFSTestCase):
+    CLIENTS_REQUIRED = 2
+    MDSS_REQUIRED = 3
+
+    TEST_DIR_PERFIX = "test_mds_metrics"
+
+    def setUp(self):
+        super(TestMDSMetrics, self).setUp()
+        self._start_with_single_active_mds()
+        self._enable_mgr_stats_plugin()
+
+    def tearDown(self):
+        self._disable_mgr_stats_plugin()
+        super(TestMDSMetrics, self).tearDown()
+
+    def _start_with_single_active_mds(self):
+        curr_max_mds = self.fs.get_var('max_mds')
+        if curr_max_mds > 1:
+            self.fs.shrink(1)
+
+    def verify_mds_metrics(self, active_mds_count=1, client_count=1, ranks=[], mul_fs=[]):
+        def verify_metrics_cbk(metrics):
+            mds_metrics = metrics['metrics']
+            if not len(mds_metrics) == active_mds_count + 1: # n active mdss + delayed set
+                return False
+            fs_status = self.fs.status()
+            nonlocal ranks, mul_fs
+            if not ranks:
+                if not mul_fs:
+                    mul_fs = [self.fs.id]
+                for filesystem in mul_fs:
+                    ranks = set([info['rank'] for info in fs_status.get_ranks(filesystem)])
+            for rank in ranks:
+                r = mds_metrics.get("mds.{}".format(rank), None)
+                if not r or not len(mds_metrics['delayed_ranks']) == 0:
+                    return False
+            for item in mul_fs:
+                key = fs_status.get_fsmap(item)['mdsmap']['fs_name']
+                global_metrics = metrics['global_metrics'].get(key, {})
+                client_metadata = metrics['client_metadata'].get(key, {})
+                if not len(global_metrics) >= client_count or not len(client_metadata) >= client_count:
+                    return False
+            return True
+        return verify_metrics_cbk
+
+    def _fs_perf_stats(self, *args):
+        return self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "perf", "stats", *args)
+
+    def _enable_mgr_stats_plugin(self):
+        return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable", "stats")
+
+    def _disable_mgr_stats_plugin(self):
+        return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "disable", "stats")
+
+    def _spread_directory_on_all_ranks(self, fscid):
+        fs_status = self.fs.status()
+        ranks = set([info['rank'] for info in fs_status.get_ranks(fscid)])
+        # create a per-rank pinned directory
+        for rank in ranks:
+            dirname = "{0}_{1}".format(TestMDSMetrics.TEST_DIR_PERFIX, rank)
+            self.mount_a.run_shell(["mkdir", dirname])
+            self.mount_a.setfattr(dirname, "ceph.dir.pin", str(rank))
+            log.info("pinning directory {0} to rank {1}".format(dirname, rank))
+            for i in range(16):
+                filename = "{0}.{1}".format("test", i)
+                self.mount_a.write_n_mb(os.path.join(dirname, filename), 1)
+
+    def _do_spread_io(self, fscid):
+        # spread readdir I/O
+        self.mount_b.run_shell(["find", "."])
+
+    def _do_spread_io_all_clients(self, fscid):
+        # spread readdir I/O
+        self.mount_a.run_shell(["find", "."])
+        self.mount_b.run_shell(["find", "."])
+
+    def _cleanup_test_dirs(self):
+        dirnames = self.mount_a.run_shell(["ls"]).stdout.getvalue()
+        for dirname in dirnames.split("\n"):
+            if dirname.startswith(TestMDSMetrics.TEST_DIR_PERFIX):
+                log.info("cleaning directory {}".format(dirname))
+                self.mount_a.run_shell(["rm", "-rf", dirname])
+
+    def _get_metrics(self, verifier_callback, trials, *args):
+        metrics = None
+        done = False
+        with safe_while(sleep=1, tries=trials, action='wait for metrics') as proceed:
+            while proceed():
+                metrics = json.loads(self._fs_perf_stats(*args))
+                done = verifier_callback(metrics)
+                if done:
+                    break
+        return done, metrics
+
+    def _setup_fs(self, fs_name):
+        fs_a = self.mds_cluster.newfs(name=fs_name)
+
+        self.mds_cluster.mds_restart()
+
+        # Wait for filesystem to go healthy
+        fs_a.wait_for_daemons()
+
+        # Reconfigure client auth caps
+        for mount in self.mounts:
+            self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+                'auth', 'caps', f"client.{mount.client_id}",
+                'mds', 'allow',
+                'mon', 'allow r',
+                'osd', f'allow rw pool={fs_a.get_data_pool_name()}')
+
+        return fs_a
+
+    # basic check to verify if we get back metrics from each active mds rank
+
+    def test_metrics_from_rank(self):
+        # validate
+        valid, metrics = self._get_metrics(
+            self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+        log.debug("metrics={0}".format(metrics))
+        self.assertTrue(valid)
+
+    def test_metrics_post_client_disconnection(self):
+        # validate
+        valid, metrics = self._get_metrics(
+            self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+        log.debug("metrics={0}".format(metrics))
+        self.assertTrue(valid)
+
+        self.mount_a.umount_wait()
+
+        valid, metrics = self._get_metrics(
+            self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED - 1), 30)
+        log.debug("metrics={0}".format(metrics))
+        self.assertTrue(valid)
+
+    def test_metrics_mds_grow(self):
+        # validate
+        valid, metrics = self._get_metrics(
+            self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+        log.debug("metrics={0}".format(metrics))
+        self.assertTrue(valid)
+
+        # grow the mds cluster
+        self.fs.grow(2)
+
+        fscid = self.fs.id
+        # spread directory per rank
+        self._spread_directory_on_all_ranks(fscid)
+
+        # spread some I/O
+        self._do_spread_io(fscid)
+
+        # wait a bit for mgr to get updated metrics
+        time.sleep(5)
+
+        # validate
+        valid, metrics = self._get_metrics(self.verify_mds_metrics(
+            active_mds_count=2, client_count=TestMDSMetrics.CLIENTS_REQUIRED) , 30)
+        log.debug("metrics={0}".format(metrics))
+        self.assertTrue(valid)
+
+        # cleanup test directories
+        self._cleanup_test_dirs()
+
+    def test_metrics_mds_grow_and_shrink(self):
+        # validate
+        valid, metrics = self._get_metrics(
+            self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+        log.debug("metrics={0}".format(metrics))
+        self.assertTrue(valid)
+
+        # grow the mds cluster
+        self.fs.grow(2)
+
+        fscid = self.fs.id
+        # spread directory per rank
+        self._spread_directory_on_all_ranks(fscid)
+
+        # spread some I/O
+        self._do_spread_io(fscid)
+
+        # wait a bit for mgr to get updated metrics
+        time.sleep(5)
+
+        # validate
+        valid, metrics = self._get_metrics(
+            self.verify_mds_metrics(active_mds_count=2, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+        log.debug("metrics={0}".format(metrics))
+        self.assertTrue(valid)
+
+        # shrink mds cluster
+        self.fs.shrink(1)
+
+        # wait a bit for mgr to get updated metrics
+        time.sleep(5)
+
+        # validate
+        valid, metrics = self._get_metrics(
+            self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+        log.debug("metrics={0}".format(metrics))
+        self.assertTrue(valid)
+
+        # cleanup test directories
+        self._cleanup_test_dirs()
+
+    def test_delayed_metrics(self):
+        # validate
+        valid, metrics = self._get_metrics(
+            self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+        log.debug("metrics={0}".format(metrics))
+        self.assertTrue(valid)
+
+        # grow the mds cluster
+        self.fs.grow(2)
+
+        fscid = self.fs.id
+        # spread directory per rank
+        self._spread_directory_on_all_ranks(fscid)
+
+        # spread some I/O
+        self._do_spread_io(fscid)
+
+        # wait a bit for mgr to get updated metrics
+        time.sleep(5)
+
+        # validate
+        valid, metrics = self._get_metrics(
+            self.verify_mds_metrics(active_mds_count=2, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+        log.debug("metrics={0}".format(metrics))
+        self.assertTrue(valid)
+
+        # do not give this mds any chance
+        delayed_rank = 1
+        mds_id_rank0 = self.fs.get_rank(rank=0)['name']
+        mds_id_rank1 = self.fs.get_rank(rank=1)['name']
+
+        self.fs.set_inter_mds_block(True, mds_id_rank0, mds_id_rank1)
+
+        def verify_delayed_metrics(metrics):
+            mds_metrics = metrics['metrics']
+            r = mds_metrics.get("mds.{}".format(delayed_rank), None)
+            if not r or not delayed_rank in mds_metrics['delayed_ranks']:
+                return False
+            return True
+        # validate
+        valid, metrics = self._get_metrics(verify_delayed_metrics, 30)
+        log.debug("metrics={0}".format(metrics))
+
+        self.assertTrue(valid)
+        self.fs.set_inter_mds_block(False, mds_id_rank0, mds_id_rank1)
+
+        # validate
+        valid, metrics = self._get_metrics(
+            self.verify_mds_metrics(active_mds_count=2, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+        log.debug("metrics={0}".format(metrics))
+        self.assertTrue(valid)
+
+        # cleanup test directories
+        self._cleanup_test_dirs()
+
+    def test_query_mds_filter(self):
+        # validate
+        valid, metrics = self._get_metrics(
+            self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+        log.debug("metrics={0}".format(metrics))
+        self.assertTrue(valid)
+
+        # grow the mds cluster
+        self.fs.grow(2)
+
+        fscid = self.fs.id
+        # spread directory per rank
+        self._spread_directory_on_all_ranks(fscid)
+
+        # spread some I/O
+        self._do_spread_io(fscid)
+
+        # wait a bit for mgr to get updated metrics
+        time.sleep(5)
+
+        # validate
+        valid, metrics = self._get_metrics(
+            self.verify_mds_metrics(active_mds_count=2, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+        log.debug("metrics={0}".format(metrics))
+        self.assertTrue(valid)
+
+        filtered_mds = 1
+        def verify_filtered_mds_rank_metrics(metrics):
+        # checks if the metrics has only client_metadata and
+        # global_metrics filtered using --mds_rank=1
+            global_metrics = metrics['global_metrics'].get(self.fs.name, {})
+            client_metadata = metrics['client_metadata'].get(self.fs.name, {})
+            mds_metrics = metrics['metrics']
+            if len(mds_metrics) != 2 or f"mds.{filtered_mds}" not in mds_metrics:
+                return False
+            if len(global_metrics) > TestMDSMetrics.CLIENTS_REQUIRED or\
+                    len(client_metadata) > TestMDSMetrics.CLIENTS_REQUIRED:
+                return False
+            if len(set(global_metrics) - set(mds_metrics[f"mds.{filtered_mds}"])) or\
+                    len(set(client_metadata) - set(mds_metrics[f"mds.{filtered_mds}"])):
+                return False
+            return True
+        # initiate a new query with `--mds_rank` filter and validate if
+        # we get metrics *only* from that mds.
+        valid, metrics = self._get_metrics(verify_filtered_mds_rank_metrics, 30,
+                                           f'--mds_rank={filtered_mds}')
+        log.debug(f"metrics={metrics}")
+        self.assertTrue(valid, "Incorrect 'ceph fs perf stats' output"
+                        f" with filter '--mds_rank={filtered_mds}'")
+
+    def test_query_client_filter(self):
+        # validate
+        valid, metrics = self._get_metrics(
+            self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+        log.debug("metrics={0}".format(metrics))
+        self.assertTrue(valid)
+
+        mds_metrics = metrics['metrics']
+        # pick an random client
+        client = random.choice(list(mds_metrics['mds.0'].keys()))
+        # could have used regex to extract client id
+        client_id = (client.split(' ')[0]).split('.')[-1]
+
+        valid, metrics = self._get_metrics(
+            self.verify_mds_metrics(client_count=1), 30, '--client_id={}'.format(client_id))
+        log.debug("metrics={0}".format(metrics))
+        self.assertTrue(valid)
+
+    def test_query_client_ip_filter(self):
+        # validate
+        valid, metrics = self._get_metrics(
+            self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+        log.debug("metrics={0}".format(metrics))
+        self.assertTrue(valid)
+
+        client_matadata = metrics['client_metadata'][self.fs.name]
+        # pick an random client
+        client = random.choice(list(client_matadata.keys()))
+        # get IP of client to use in filter
+        client_ip = client_matadata[client]['IP']
+
+        valid, metrics = self._get_metrics(
+            self.verify_mds_metrics(client_count=1), 30, '--client_ip={}'.format(client_ip))
+        log.debug("metrics={0}".format(metrics))
+        self.assertTrue(valid)
+
+        # verify IP from output with filter IP
+        for i in metrics['client_metadata'][self.fs.name]:
+            self.assertEqual(client_ip, metrics['client_metadata'][self.fs.name][i]['IP'])
+
+    def test_query_mds_and_client_filter(self):
+        # validate
+        valid, metrics = self._get_metrics(
+            self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+        log.debug("metrics={0}".format(metrics))
+        self.assertTrue(valid)
+
+        # grow the mds cluster
+        self.fs.grow(2)
+
+        fscid = self.fs.id
+        # spread directory per rank
+        self._spread_directory_on_all_ranks(fscid)
+
+        # spread some I/O
+        self._do_spread_io_all_clients(fscid)
+
+        # wait a bit for mgr to get updated metrics
+        time.sleep(5)
+
+        # validate
+        valid, metrics = self._get_metrics(
+            self.verify_mds_metrics(active_mds_count=2, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+        log.debug("metrics={0}".format(metrics))
+        self.assertTrue(valid)
+
+        mds_metrics = metrics['metrics']
+
+        # pick an random client
+        client = random.choice(list(mds_metrics['mds.1'].keys()))
+        # could have used regex to extract client id
+        client_id = (client.split(' ')[0]).split('.')[-1]
+        filtered_mds = 1
+        valid, metrics = self._get_metrics(
+            self.verify_mds_metrics(client_count=1, ranks=[filtered_mds]),
+            30, '--mds_rank={}'.format(filtered_mds), '--client_id={}'.format(client_id))
+        log.debug("metrics={0}".format(metrics))
+        self.assertTrue(valid)
+
+    def test_for_invalid_mds_rank(self):
+        invalid_mds_rank = "1,"
+        # try, 'fs perf stat' command with invalid mds_rank
+        try:
+            self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "perf", "stats", "--mds_rank", invalid_mds_rank)
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.EINVAL:
+                raise
+        else:
+            raise RuntimeError("expected the 'fs perf stat' command to fail for invalid mds_rank")
+
+    def test_for_invalid_client_id(self):
+        invalid_client_id = "abcd"
+        # try, 'fs perf stat' command with invalid client_id
+        try:
+            self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "perf", "stats", "--client_id", invalid_client_id)
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.EINVAL:
+                raise
+        else:
+            raise RuntimeError("expected the 'fs perf stat' command to fail for invalid client_id")
+
+    def test_for_invalid_client_ip(self):
+        invalid_client_ip = "1.2.3"
+        # try, 'fs perf stat' command with invalid client_ip
+        try:
+            self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "perf", "stats", "--client_ip", invalid_client_ip)
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.EINVAL:
+                raise
+        else:
+            raise RuntimeError("expected the 'fs perf stat' command to fail for invalid client_ip")
+
+    def test_perf_stats_stale_metrics(self):
+        """
+        That `ceph fs perf stats` doesn't output stale metrics after the rank0 MDS failover
+        """
+        # validate
+        valid, metrics = self._get_metrics(self.verify_mds_metrics(
+            active_mds_count=1, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+        log.debug(f'metrics={metrics}')
+        self.assertTrue(valid)
+
+        # mount_a and mount_b are the clients mounted for TestMDSMetrics. So get their
+        # entries from the global_metrics.
+        client_a_name = f'client.{self.mount_a.get_global_id()}'
+        client_b_name = f'client.{self.mount_b.get_global_id()}'
+
+        global_metrics = metrics['global_metrics']
+        client_a_metrics = global_metrics[self.fs.name][client_a_name]
+        client_b_metrics = global_metrics[self.fs.name][client_b_name]
+
+        # fail rank0 mds
+        self.fs.rank_fail(rank=0)
+
+        # Wait for rank0 up:active state
+        self.fs.wait_for_state('up:active', rank=0, timeout=30)
+
+        fscid = self.fs.id
+
+        # spread directory per rank
+        self._spread_directory_on_all_ranks(fscid)
+
+        # spread some I/O
+        self._do_spread_io_all_clients(fscid)
+
+        # wait a bit for mgr to get updated metrics
+        time.sleep(5)
+
+        # validate
+        try:
+            valid, metrics_new = self._get_metrics(self.verify_mds_metrics(
+                active_mds_count=1, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+            log.debug(f'metrics={metrics_new}')
+            self.assertTrue(valid)
+
+            client_metadata = metrics_new['client_metadata']
+            client_a_metadata = client_metadata.get(self.fs.name, {}).get(client_a_name, {})
+            client_b_metadata = client_metadata.get(self.fs.name, {}).get(client_b_name, {})
+
+            global_metrics = metrics_new['global_metrics']
+            client_a_metrics_new = global_metrics.get(self.fs.name, {}).get(client_a_name, {})
+            client_b_metrics_new = global_metrics.get(self.fs.name, {}).get(client_b_name, {})
+
+            # the metrics should be different for the test to succeed.
+            self.assertTrue(client_a_metadata and client_b_metadata and
+                            client_a_metrics_new and client_b_metrics_new and
+                            (client_a_metrics_new != client_a_metrics) and
+                            (client_b_metrics_new != client_b_metrics),
+                            "Invalid 'ceph fs perf stats' metrics after rank0 mds failover")
+        except MaxWhileTries:
+            raise RuntimeError("Failed to fetch 'ceph fs perf stats' metrics")
+        finally:
+            # cleanup test directories
+            self._cleanup_test_dirs()
+
+    def test_client_metrics_and_metadata(self):
+        self.mount_a.umount_wait()
+        self.mount_b.umount_wait()
+        self.fs.delete_all_filesystems()
+
+        self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set",
+            "enable_multiple", "true", "--yes-i-really-mean-it")
+
+        # creating filesystem
+        fs_a = self._setup_fs(fs_name="fs1")
+
+        # Mount a client on fs_a
+        self.mount_a.mount_wait(cephfs_name=fs_a.name)
+        self.mount_a.write_n_mb("pad.bin", 1)
+        self.mount_a.write_n_mb("test.bin", 2)
+        self.mount_a.path_to_ino("test.bin")
+        self.mount_a.create_files()
+
+        # creating another filesystem
+        fs_b = self._setup_fs(fs_name="fs2")
+
+        # Mount a client on fs_b
+        self.mount_b.mount_wait(cephfs_name=fs_b.name)
+        self.mount_b.write_n_mb("test.bin", 1)
+        self.mount_b.path_to_ino("test.bin")
+        self.mount_b.create_files()
+
+        fscid_list = [fs_a.id, fs_b.id]
+
+        # validate
+        valid, metrics = self._get_metrics(
+            self.verify_mds_metrics(client_count=1, mul_fs=fscid_list), 30)
+        log.debug(f"metrics={metrics}")
+        self.assertTrue(valid)
+
+        client_metadata_a = metrics['client_metadata']['fs1']
+        client_metadata_b = metrics['client_metadata']['fs2']
+
+        for i in client_metadata_a:
+            if not (client_metadata_a[i]['hostname']):
+                raise RuntimeError("hostname of fs1 not found!")
+            if not (client_metadata_a[i]['valid_metrics']):
+                raise RuntimeError("valid_metrics of fs1 not found!")
+
+        for i in client_metadata_b:
+            if not (client_metadata_b[i]['hostname']):
+                raise RuntimeError("hostname of fs2 not found!")
+            if not (client_metadata_b[i]['valid_metrics']):
+                raise RuntimeError("valid_metrics of fs2 not found!")
+
+    def test_non_existing_mds_rank(self):
+        def verify_filtered_metrics(metrics):
+        # checks if the metrics has non empty client_metadata and global_metrics
+            if metrics['client_metadata'].get(self.fs.name, {})\
+              or metrics['global_metrics'].get(self.fs.name, {}):
+                return True
+            return False
+
+        try:
+            # validate
+            filter_rank = random.randint(1, 10)
+            valid, metrics = self._get_metrics(verify_filtered_metrics, 30,
+                                               '--mds_rank={}'.format(filter_rank))
+            log.info(f'metrics={metrics}')
+            self.assertFalse(valid, "Fetched 'ceph fs perf stats' metrics using nonexistent MDS rank")
+        except MaxWhileTries:
+            # success
+            pass
+
+    def test_perf_stats_stale_metrics_with_multiple_filesystem(self):
+        self.mount_a.umount_wait()
+        self.mount_b.umount_wait()
+
+        self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set",
+                    "enable_multiple", "true", "--yes-i-really-mean-it")
+
+        # creating filesystem
+        fs_b = self._setup_fs(fs_name="fs2")
+
+        # Mount a client on fs_b
+        self.mount_b.mount_wait(cephfs_name=fs_b.name)
+        self.mount_b.write_n_mb("test.bin", 1)
+        self.mount_b.path_to_ino("test.bin")
+        self.mount_b.create_files()
+
+        # creating another filesystem
+        fs_a = self._setup_fs(fs_name="fs1")
+
+        # Mount a client on fs_a
+        self.mount_a.mount_wait(cephfs_name=fs_a.name)
+        self.mount_a.write_n_mb("pad.bin", 1)
+        self.mount_a.write_n_mb("test.bin", 2)
+        self.mount_a.path_to_ino("test.bin")
+        self.mount_a.create_files()
+
+        # validate
+        valid, metrics = self._get_metrics(
+            self.verify_mds_metrics(client_count=1, mul_fs=[fs_a.id, fs_b.id]), 30)
+        log.debug(f"metrics={metrics}")
+        self.assertTrue(valid)
+
+        # get mounted client's entries from the global_metrics.
+        client_a_name = f'client.{self.mount_a.get_global_id()}'
+
+        global_metrics = metrics['global_metrics']
+        client_a_metrics = global_metrics.get("fs1", {}).get(client_a_name, {})
+
+        # fail active mds of fs_a
+        fs_a_mds = fs_a.get_active_names()[0]
+        self.mds_cluster.mds_fail(fs_a_mds)
+        fs_a.wait_for_state('up:active', rank=0, timeout=30)
+
+        # spread directory per rank
+        self._spread_directory_on_all_ranks(fs_a.id)
+
+        # spread some I/O
+        self._do_spread_io_all_clients(fs_a.id)
+
+        # wait a bit for mgr to get updated metrics
+        time.sleep(5)
+
+        # validate
+        try:
+            valid, metrics_new = self._get_metrics(
+                self.verify_mds_metrics(client_count=1, mul_fs=[fs_a.id, fs_b.id]), 30)
+            log.debug(f'metrics={metrics_new}')
+            self.assertTrue(valid)
+
+            client_metadata = metrics_new['client_metadata']
+            client_a_metadata = client_metadata.get("fs1", {}).get(client_a_name, {})
+
+            global_metrics = metrics_new['global_metrics']
+            client_a_metrics_new = global_metrics.get("fs1", {}).get(client_a_name, {})
+
+            # the metrics should be different for the test to succeed.
+            self.assertTrue(client_a_metadata and client_a_metrics_new
+                            and (client_a_metrics_new != client_a_metrics),
+                            "Invalid 'ceph fs perf stats' metrics after"
+                            f" rank0 mds of {fs_a.name} failover")
+        except MaxWhileTries:
+            raise RuntimeError("Failed to fetch `ceph fs perf stats` metrics")
+        finally:
+            # cleanup test directories
+            self._cleanup_test_dirs()
+
diff --git a/qa/tasks/cephfs/test_meta_injection.py b/qa/tasks/cephfs/test_meta_injection.py
new file mode 100644
index 000000000..916b30a25
--- /dev/null
+++ b/qa/tasks/cephfs/test_meta_injection.py
@@ -0,0 +1,38 @@
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+class TestMetaInjection(CephFSTestCase):
+    def test_meta_injection(self):
+        conf_ori = self.fs.mds_asok(['config', 'show'])
+        self.fs.mds_asok(['config', 'set', 'mds_log_max_segments', '1'])
+        self.mount_a.run_shell(["mkdir", "metadir"])
+        self.mount_a.run_shell(["touch", "metadir/metafile1"])
+        self.mount_a.run_shell(["touch", "metadir/metafile2"])
+        self.fs.mds_asok(['flush', 'journal'])
+        dirino = self.mount_a.path_to_ino("metadir") 
+        ino = self.mount_a.path_to_ino("metadir/metafile1")
+        
+        # export meta of ino
+        self.fs.meta_tool(['showm', '-i', str(ino), '-o', '/tmp/meta_out'], 0, True)
+        out = self.mount_a.run_shell(['grep', str(ino),'/tmp/meta_out']).stdout.getvalue().strip()
+        
+        # check the metadata of ino
+        self.assertNotEqual(out.find(u'"ino":'+ str(ino)), -1)
+        
+        # amend info of ino
+        self.fs.get_meta_of_fs_file(dirino, "metafile1", "/tmp/meta_obj")
+        self.fs.meta_tool(['amend', '-i', str(ino), '--in', '/tmp/meta_out', '--yes-i-really-really-mean-it'], 0, True)
+        self.fs.get_meta_of_fs_file(dirino, "metafile1", "/tmp/meta_obj_chg")
+        
+        # checkout meta_out after import it
+        ori_mds5 = self.mount_a.run_shell(["md5sum", "/tmp/meta_obj"]).stdout.getvalue().strip().split()
+        chg_mds5 = self.mount_a.run_shell(["md5sum", "/tmp/meta_obj_chg"]).stdout.getvalue().strip().split()
+        print(ori_mds5," ==> ", chg_mds5)
+        self.assertEqual(len(ori_mds5), 2)
+        self.assertEqual(len(chg_mds5), 2)
+        self.assertEqual(ori_mds5[0], chg_mds5[0])
+
+        self.mount_a.run_shell(["rm", "metadir", "-rf"])
+        self.mount_a.run_shell(["rm", "/tmp/meta_obj"])
+        self.mount_a.run_shell(["rm", "/tmp/meta_obj_chg"])
+        # restore config of mds_log_max_segments
+        self.fs.mds_asok(['config', 'set', 'mds_log_max_segments', conf_ori["mds_log_max_segments"]])
diff --git a/qa/tasks/cephfs/test_mirroring.py b/qa/tasks/cephfs/test_mirroring.py
new file mode 100644
index 000000000..c1a940e3f
--- /dev/null
+++ b/qa/tasks/cephfs/test_mirroring.py
@@ -0,0 +1,1298 @@
+import os
+import json
+import errno
+import logging
+import random
+import time
+
+from io import StringIO
+from collections import deque
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.exceptions import CommandFailedError
+from teuthology.contextutil import safe_while
+
+log = logging.getLogger(__name__)
+
+class TestMirroring(CephFSTestCase):
+    MDSS_REQUIRED = 5
+    CLIENTS_REQUIRED = 2
+    REQUIRE_BACKUP_FILESYSTEM = True
+
+    MODULE_NAME = "mirroring"
+
+    def setUp(self):
+        super(TestMirroring, self).setUp()
+        self.primary_fs_name = self.fs.name
+        self.primary_fs_id = self.fs.id
+        self.secondary_fs_name = self.backup_fs.name
+        self.secondary_fs_id = self.backup_fs.id
+        self.enable_mirroring_module()
+
+    def tearDown(self):
+        self.disable_mirroring_module()
+        super(TestMirroring, self).tearDown()
+
+    def enable_mirroring_module(self):
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable", TestMirroring.MODULE_NAME)
+
+    def disable_mirroring_module(self):
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "disable", TestMirroring.MODULE_NAME)
+
+    def enable_mirroring(self, fs_name, fs_id):
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "enable", fs_name)
+        time.sleep(10)
+        # verify via asok
+        res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}',
+                                         'fs', 'mirror', 'status', f'{fs_name}@{fs_id}')
+        self.assertTrue(res['peers'] == {})
+        self.assertTrue(res['snap_dirs']['dir_count'] == 0)
+
+    def disable_mirroring(self, fs_name, fs_id):
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "disable", fs_name)
+        time.sleep(10)
+        # verify via asok
+        try:
+            self.mirror_daemon_command(f'mirror status for fs: {fs_name}',
+                                       'fs', 'mirror', 'status', f'{fs_name}@{fs_id}')
+        except CommandFailedError:
+            pass
+        else:
+            raise RuntimeError('expected admin socket to be unavailable')
+
+    def verify_peer_added(self, fs_name, fs_id, peer_spec, remote_fs_name=None):
+        # verify via asok
+        res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}',
+                                         'fs', 'mirror', 'status', f'{fs_name}@{fs_id}')
+        peer_uuid = self.get_peer_uuid(peer_spec)
+        self.assertTrue(peer_uuid in res['peers'])
+        client_name = res['peers'][peer_uuid]['remote']['client_name']
+        cluster_name = res['peers'][peer_uuid]['remote']['cluster_name']
+        self.assertTrue(peer_spec == f'{client_name}@{cluster_name}')
+        if remote_fs_name:
+            self.assertTrue(self.secondary_fs_name == res['peers'][peer_uuid]['remote']['fs_name'])
+        else:
+            self.assertTrue(self.fs_name == res['peers'][peer_uuid]['remote']['fs_name'])
+
+    def peer_add(self, fs_name, fs_id, peer_spec, remote_fs_name=None):
+        if remote_fs_name:
+            self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_add", fs_name, peer_spec, remote_fs_name)
+        else:
+            self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_add", fs_name, peer_spec)
+        time.sleep(10)
+        self.verify_peer_added(fs_name, fs_id, peer_spec, remote_fs_name)
+
+    def peer_remove(self, fs_name, fs_id, peer_spec):
+        peer_uuid = self.get_peer_uuid(peer_spec)
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_remove", fs_name, peer_uuid)
+        time.sleep(10)
+        # verify via asok
+        res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}',
+                                         'fs', 'mirror', 'status', f'{fs_name}@{fs_id}')
+        self.assertTrue(res['peers'] == {} and res['snap_dirs']['dir_count'] == 0)
+
+    def bootstrap_peer(self, fs_name, client_name, site_name):
+        outj = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            "fs", "snapshot", "mirror", "peer_bootstrap", "create", fs_name, client_name, site_name))
+        return outj['token']
+
+    def import_peer(self, fs_name, token):
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_bootstrap", "import",
+                                                     fs_name, token)
+
+    def add_directory(self, fs_name, fs_id, dir_name):
+        # get initial dir count
+        res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}',
+                                         'fs', 'mirror', 'status', f'{fs_name}@{fs_id}')
+        dir_count = res['snap_dirs']['dir_count']
+        log.debug(f'initial dir_count={dir_count}')
+
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "add", fs_name, dir_name)
+
+        time.sleep(10)
+        # verify via asok
+        res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}',
+                                         'fs', 'mirror', 'status', f'{fs_name}@{fs_id}')
+        new_dir_count = res['snap_dirs']['dir_count']
+        log.debug(f'new dir_count={new_dir_count}')
+        self.assertTrue(new_dir_count > dir_count)
+
+    def remove_directory(self, fs_name, fs_id, dir_name):
+        # get initial dir count
+        res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}',
+                                         'fs', 'mirror', 'status', f'{fs_name}@{fs_id}')
+        dir_count = res['snap_dirs']['dir_count']
+        log.debug(f'initial dir_count={dir_count}')
+
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "remove", fs_name, dir_name)
+
+        time.sleep(10)
+        # verify via asok
+        res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}',
+                                         'fs', 'mirror', 'status', f'{fs_name}@{fs_id}')
+        new_dir_count = res['snap_dirs']['dir_count']
+        log.debug(f'new dir_count={new_dir_count}')
+        self.assertTrue(new_dir_count < dir_count)
+
+    def check_peer_status(self, fs_name, fs_id, peer_spec, dir_name, expected_snap_name,
+                          expected_snap_count):
+        peer_uuid = self.get_peer_uuid(peer_spec)
+        res = self.mirror_daemon_command(f'peer status for fs: {fs_name}',
+                                         'fs', 'mirror', 'peer', 'status',
+                                         f'{fs_name}@{fs_id}', peer_uuid)
+        self.assertTrue(dir_name in res)
+        self.assertTrue(res[dir_name]['last_synced_snap']['name'] == expected_snap_name)
+        self.assertTrue(res[dir_name]['snaps_synced'] == expected_snap_count)
+
+    def check_peer_status_deleted_snap(self, fs_name, fs_id, peer_spec, dir_name,
+                                      expected_delete_count):
+        peer_uuid = self.get_peer_uuid(peer_spec)
+        res = self.mirror_daemon_command(f'peer status for fs: {fs_name}',
+                                         'fs', 'mirror', 'peer', 'status',
+                                         f'{fs_name}@{fs_id}', peer_uuid)
+        self.assertTrue(dir_name in res)
+        self.assertTrue(res[dir_name]['snaps_deleted'] == expected_delete_count)
+
+    def check_peer_status_renamed_snap(self, fs_name, fs_id, peer_spec, dir_name,
+                                       expected_rename_count):
+        peer_uuid = self.get_peer_uuid(peer_spec)
+        res = self.mirror_daemon_command(f'peer status for fs: {fs_name}',
+                                         'fs', 'mirror', 'peer', 'status',
+                                         f'{fs_name}@{fs_id}', peer_uuid)
+        self.assertTrue(dir_name in res)
+        self.assertTrue(res[dir_name]['snaps_renamed'] == expected_rename_count)
+
+    def check_peer_snap_in_progress(self, fs_name, fs_id,
+                                    peer_spec, dir_name, snap_name):
+        peer_uuid = self.get_peer_uuid(peer_spec)
+        res = self.mirror_daemon_command(f'peer status for fs: {fs_name}',
+                                         'fs', 'mirror', 'peer', 'status',
+                                         f'{fs_name}@{fs_id}', peer_uuid)
+        self.assertTrue('syncing' == res[dir_name]['state'])
+        self.assertTrue(res[dir_name]['current_sycning_snap']['name'] == snap_name)
+
+    def verify_snapshot(self, dir_name, snap_name):
+        snap_list = self.mount_b.ls(path=f'{dir_name}/.snap')
+        self.assertTrue(snap_name in snap_list)
+
+        source_res = self.mount_a.dir_checksum(path=f'{dir_name}/.snap/{snap_name}',
+                                               follow_symlinks=True)
+        log.debug(f'source snapshot checksum {snap_name} {source_res}')
+
+        dest_res = self.mount_b.dir_checksum(path=f'{dir_name}/.snap/{snap_name}',
+                                             follow_symlinks=True)
+        log.debug(f'destination snapshot checksum {snap_name} {dest_res}')
+        self.assertTrue(source_res == dest_res)
+
+    def verify_failed_directory(self, fs_name, fs_id, peer_spec, dir_name):
+        peer_uuid = self.get_peer_uuid(peer_spec)
+        res = self.mirror_daemon_command(f'peer status for fs: {fs_name}',
+                                         'fs', 'mirror', 'peer', 'status',
+                                         f'{fs_name}@{fs_id}', peer_uuid)
+        self.assertTrue('failed' == res[dir_name]['state'])
+
+    def get_peer_uuid(self, peer_spec):
+        status = self.fs.status()
+        fs_map = status.get_fsmap_byname(self.primary_fs_name)
+        peers = fs_map['mirror_info']['peers']
+        for peer_uuid, mirror_info in peers.items():
+            client_name = mirror_info['remote']['client_name']
+            cluster_name = mirror_info['remote']['cluster_name']
+            remote_peer_spec = f'{client_name}@{cluster_name}'
+            if peer_spec == remote_peer_spec:
+                return peer_uuid
+        return None
+
+    def get_daemon_admin_socket(self):
+        """overloaded by teuthology override (fs/mirror/clients/mirror.yaml)"""
+        return "/var/run/ceph/cephfs-mirror.asok"
+
+    def get_mirror_daemon_pid(self):
+        """pid file overloaded in fs/mirror/clients/mirror.yaml"""
+        return self.mount_a.run_shell(['cat', '/var/run/ceph/cephfs-mirror.pid']).stdout.getvalue().strip()
+
+    def get_mirror_rados_addr(self, fs_name, fs_id):
+        """return the rados addr used by cephfs-mirror instance"""
+        res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}',
+                                         'fs', 'mirror', 'status', f'{fs_name}@{fs_id}')
+        return res['rados_inst']
+
+    def mirror_daemon_command(self, cmd_label, *args):
+        asok_path = self.get_daemon_admin_socket()
+        try:
+            # use mount_a's remote to execute command
+            p = self.mount_a.client_remote.run(args=
+                     ['ceph', '--admin-daemon', asok_path] + list(args),
+                     stdout=StringIO(), stderr=StringIO(), timeout=30,
+                     check_status=True, label=cmd_label)
+            p.wait()
+        except CommandFailedError as ce:
+            log.warn(f'mirror daemon command with label "{cmd_label}" failed: {ce}')
+            raise
+        res = p.stdout.getvalue().strip()
+        log.debug(f'command returned={res}')
+        return json.loads(res)
+
+    def get_mirror_daemon_status(self):
+        daemon_status = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "daemon", "status"))
+        log.debug(f'daemon_status: {daemon_status}')
+        # running a single mirror daemon is supported
+        status = daemon_status[0]
+        log.debug(f'status: {status}')
+        return status
+
+    def test_basic_mirror_commands(self):
+        self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+        self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+    def test_mirror_peer_commands(self):
+        self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+        # add peer
+        self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+        # remove peer
+        self.peer_remove(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph")
+
+        self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+    def test_mirror_disable_with_peer(self):
+        self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+        # add peer
+        self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+        self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+    def test_matching_peer(self):
+        self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+        try:
+            self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph")
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.EINVAL:
+                raise RuntimeError('invalid errno when adding a matching remote peer')
+        else:
+            raise RuntimeError('adding a peer matching local spec should fail')
+
+        # verify via asok -- nothing should get added
+        res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}',
+                                         'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}')
+        self.assertTrue(res['peers'] == {})
+
+        # and explicitly specifying the spec (via filesystem name) should fail too
+        try:
+            self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.primary_fs_name)
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.EINVAL:
+                raise RuntimeError('invalid errno when adding a matching remote peer')
+        else:
+            raise RuntimeError('adding a peer matching local spec should fail')
+
+        # verify via asok -- nothing should get added
+        res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}',
+                                         'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}')
+        self.assertTrue(res['peers'] == {})
+
+        self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+    def test_mirror_peer_add_existing(self):
+        self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+        # add peer
+        self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+        # adding the same peer should be idempotent
+        self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+        # remove peer
+        self.peer_remove(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph")
+
+        self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+    def test_peer_commands_with_mirroring_disabled(self):
+        # try adding peer when mirroring is not enabled
+        try:
+            self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.EINVAL:
+                raise RuntimeError(-errno.EINVAL, 'incorrect error code when adding a peer')
+        else:
+            raise RuntimeError(-errno.EINVAL, 'expected peer_add to fail')
+
+        # try removing peer
+        try:
+            self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_remove", self.primary_fs_name, 'dummy-uuid')
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.EINVAL:
+                raise RuntimeError(-errno.EINVAL, 'incorrect error code when removing a peer')
+        else:
+            raise RuntimeError(-errno.EINVAL, 'expected peer_remove to fail')
+
+    def test_add_directory_with_mirroring_disabled(self):
+        # try adding a directory when mirroring is not enabled
+        try:
+            self.add_directory(self.primary_fs_name, self.primary_fs_id, "/d1")
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.EINVAL:
+                raise RuntimeError(-errno.EINVAL, 'incorrect error code when adding a directory')
+        else:
+            raise RuntimeError(-errno.EINVAL, 'expected directory add to fail')
+
+    def test_directory_commands(self):
+        self.mount_a.run_shell(["mkdir", "d1"])
+        self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+        self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1')
+        try:
+            self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1')
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.EEXIST:
+                raise RuntimeError(-errno.EINVAL, 'incorrect error code when re-adding a directory')
+        else:
+            raise RuntimeError(-errno.EINVAL, 'expected directory add to fail')
+        self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d1')
+        try:
+            self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d1')
+        except CommandFailedError as ce:
+            if ce.exitstatus not in (errno.ENOENT, errno.EINVAL):
+                raise RuntimeError(-errno.EINVAL, 'incorrect error code when re-deleting a directory')
+        else:
+            raise RuntimeError(-errno.EINVAL, 'expected directory removal to fail')
+        self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+        self.mount_a.run_shell(["rmdir", "d1"])
+
+    def test_add_relative_directory_path(self):
+        self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+        try:
+            self.add_directory(self.primary_fs_name, self.primary_fs_id, './d1')
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.EINVAL:
+                raise RuntimeError(-errno.EINVAL, 'incorrect error code when adding a relative path dir')
+        else:
+            raise RuntimeError(-errno.EINVAL, 'expected directory add to fail')
+        self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+    def test_add_directory_path_normalization(self):
+        self.mount_a.run_shell(["mkdir", "-p", "d1/d2/d3"])
+        self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+        self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1/d2/d3')
+        def check_add_command_failure(dir_path):
+            try:
+                self.add_directory(self.primary_fs_name, self.primary_fs_id, dir_path)
+            except CommandFailedError as ce:
+                if ce.exitstatus != errno.EEXIST:
+                    raise RuntimeError(-errno.EINVAL, 'incorrect error code when re-adding a directory')
+            else:
+                raise RuntimeError(-errno.EINVAL, 'expected directory add to fail')
+
+        # everything points for /d1/d2/d3
+        check_add_command_failure('/d1/d2/././././././d3')
+        check_add_command_failure('/d1/d2/././././././d3//////')
+        check_add_command_failure('/d1/d2/../d2/././././d3')
+        check_add_command_failure('/././././d1/./././d2/./././d3//////')
+        check_add_command_failure('/./d1/./d2/./d3/../../../d1/d2/d3')
+
+        self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+        self.mount_a.run_shell(["rm", "-rf", "d1"])
+
+    def test_add_ancestor_and_child_directory(self):
+        self.mount_a.run_shell(["mkdir", "-p", "d1/d2/d3"])
+        self.mount_a.run_shell(["mkdir", "-p", "d1/d4"])
+        self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+        self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1/d2/')
+        def check_add_command_failure(dir_path):
+            try:
+                self.add_directory(self.primary_fs_name, self.primary_fs_id, dir_path)
+            except CommandFailedError as ce:
+                if ce.exitstatus != errno.EINVAL:
+                    raise RuntimeError(-errno.EINVAL, 'incorrect error code when adding a directory')
+            else:
+                raise RuntimeError(-errno.EINVAL, 'expected directory add to fail')
+
+        # cannot add ancestors or a subtree for an existing directory
+        check_add_command_failure('/')
+        check_add_command_failure('/d1')
+        check_add_command_failure('/d1/d2/d3')
+
+        # obviously, one can add a non-ancestor or non-subtree
+        self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1/d4/')
+
+        self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+        self.mount_a.run_shell(["rm", "-rf", "d1"])
+
+    def test_cephfs_mirror_blocklist(self):
+        self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+        # add peer
+        self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+        res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}',
+                                         'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}')
+        peers_1 = set(res['peers'])
+
+        # fetch rados address for blacklist check
+        rados_inst = self.get_mirror_rados_addr(self.primary_fs_name, self.primary_fs_id)
+
+        # simulate non-responding mirror daemon by sending SIGSTOP
+        pid = self.get_mirror_daemon_pid()
+        log.debug(f'SIGSTOP to cephfs-mirror pid {pid}')
+        self.mount_a.run_shell(['kill', '-SIGSTOP', pid])
+
+        # wait for blocklist timeout -- the manager module would blocklist
+        # the mirror daemon
+        time.sleep(40)
+
+        # wake up the mirror daemon -- at this point, the daemon should know
+        # that it has been blocklisted
+        log.debug('SIGCONT to cephfs-mirror')
+        self.mount_a.run_shell(['kill', '-SIGCONT', pid])
+
+        # check if the rados addr is blocklisted
+        self.assertTrue(self.mds_cluster.is_addr_blocklisted(rados_inst))
+
+        # wait enough so that the mirror daemon restarts blocklisted instances
+        time.sleep(40)
+        rados_inst_new = self.get_mirror_rados_addr(self.primary_fs_name, self.primary_fs_id)
+
+        # and we should get a new rados instance
+        self.assertTrue(rados_inst != rados_inst_new)
+
+        # along with peers that were added
+        res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}',
+                                         'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}')
+        peers_2 = set(res['peers'])
+        self.assertTrue(peers_1, peers_2)
+
+        self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+    def test_cephfs_mirror_stats(self):
+        log.debug('reconfigure client auth caps')
+        self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+            'auth', 'caps', "client.{0}".format(self.mount_b.client_id),
+                'mds', 'allow rw',
+                'mon', 'allow r',
+                'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
+                    self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name()))
+
+        log.debug(f'mounting filesystem {self.secondary_fs_name}')
+        self.mount_b.umount_wait()
+        self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name)
+
+        # create a bunch of files in a directory to snap
+        self.mount_a.run_shell(["mkdir", "d0"])
+        self.mount_a.create_n_files('d0/file', 50, sync=True)
+
+        self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+        self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+        self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+        # take a snapshot
+        self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"])
+
+        time.sleep(30)
+        self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+                               "client.mirror_remote@ceph", '/d0', 'snap0', 1)
+        self.verify_snapshot('d0', 'snap0')
+
+        # some more IO
+        self.mount_a.run_shell(["mkdir", "d0/d00"])
+        self.mount_a.run_shell(["mkdir", "d0/d01"])
+
+        self.mount_a.create_n_files('d0/d00/more_file', 20, sync=True)
+        self.mount_a.create_n_files('d0/d01/some_more_file', 75, sync=True)
+
+        # take another snapshot
+        self.mount_a.run_shell(["mkdir", "d0/.snap/snap1"])
+
+        time.sleep(60)
+        self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+                               "client.mirror_remote@ceph", '/d0', 'snap1', 2)
+        self.verify_snapshot('d0', 'snap1')
+
+        # delete a snapshot
+        self.mount_a.run_shell(["rmdir", "d0/.snap/snap0"])
+
+        time.sleep(10)
+        snap_list = self.mount_b.ls(path='d0/.snap')
+        self.assertTrue('snap0' not in snap_list)
+        self.check_peer_status_deleted_snap(self.primary_fs_name, self.primary_fs_id,
+                                            "client.mirror_remote@ceph", '/d0', 1)
+
+        # rename a snapshot
+        self.mount_a.run_shell(["mv", "d0/.snap/snap1", "d0/.snap/snap2"])
+
+        time.sleep(10)
+        snap_list = self.mount_b.ls(path='d0/.snap')
+        self.assertTrue('snap1' not in snap_list)
+        self.assertTrue('snap2' in snap_list)
+        self.check_peer_status_renamed_snap(self.primary_fs_name, self.primary_fs_id,
+                                            "client.mirror_remote@ceph", '/d0', 1)
+
+        self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+        self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+    def test_cephfs_mirror_cancel_sync(self):
+        log.debug('reconfigure client auth caps')
+        self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+            'auth', 'caps', "client.{0}".format(self.mount_b.client_id),
+                'mds', 'allow rw',
+                'mon', 'allow r',
+                'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
+                    self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name()))
+
+        log.debug(f'mounting filesystem {self.secondary_fs_name}')
+        self.mount_b.umount_wait()
+        self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name)
+
+        # create a bunch of files in a directory to snap
+        self.mount_a.run_shell(["mkdir", "d0"])
+        for i in range(8):
+            filename = f'file.{i}'
+            self.mount_a.write_n_mb(os.path.join('d0', filename), 1024)
+
+        self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+        self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+        self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+        # take a snapshot
+        self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"])
+
+        time.sleep(10)
+        self.check_peer_snap_in_progress(self.primary_fs_name, self.primary_fs_id,
+                                         "client.mirror_remote@ceph", '/d0', 'snap0')
+
+        self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+
+        snap_list = self.mount_b.ls(path='d0/.snap')
+        self.assertTrue('snap0' not in snap_list)
+        self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+    def test_cephfs_mirror_restart_sync_on_blocklist(self):
+        log.debug('reconfigure client auth caps')
+        self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+            'auth', 'caps', "client.{0}".format(self.mount_b.client_id),
+                'mds', 'allow rw',
+                'mon', 'allow r',
+                'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
+                    self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name()))
+
+        log.debug(f'mounting filesystem {self.secondary_fs_name}')
+        self.mount_b.umount_wait()
+        self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name)
+
+        # create a bunch of files in a directory to snap
+        self.mount_a.run_shell(["mkdir", "d0"])
+        for i in range(8):
+            filename = f'file.{i}'
+            self.mount_a.write_n_mb(os.path.join('d0', filename), 1024)
+
+        self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+        self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+        self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+        # fetch rados address for blacklist check
+        rados_inst = self.get_mirror_rados_addr(self.primary_fs_name, self.primary_fs_id)
+
+        # take a snapshot
+        self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"])
+
+        time.sleep(10)
+        self.check_peer_snap_in_progress(self.primary_fs_name, self.primary_fs_id,
+                                         "client.mirror_remote@ceph", '/d0', 'snap0')
+
+        # simulate non-responding mirror daemon by sending SIGSTOP
+        pid = self.get_mirror_daemon_pid()
+        log.debug(f'SIGSTOP to cephfs-mirror pid {pid}')
+        self.mount_a.run_shell(['kill', '-SIGSTOP', pid])
+
+        # wait for blocklist timeout -- the manager module would blocklist
+        # the mirror daemon
+        time.sleep(40)
+
+        # wake up the mirror daemon -- at this point, the daemon should know
+        # that it has been blocklisted
+        log.debug('SIGCONT to cephfs-mirror')
+        self.mount_a.run_shell(['kill', '-SIGCONT', pid])
+
+        # check if the rados addr is blocklisted
+        self.assertTrue(self.mds_cluster.is_addr_blocklisted(rados_inst))
+
+        time.sleep(500)
+        self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+                               "client.mirror_remote@ceph", '/d0', 'snap0', expected_snap_count=1)
+        self.verify_snapshot('d0', 'snap0')
+
+        self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+        self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+    def test_cephfs_mirror_failed_sync_with_correction(self):
+        self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+        self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+        # add a non-existent directory for synchronization
+        self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+
+        # wait for mirror daemon to mark it the directory as failed
+        time.sleep(120)
+        self.verify_failed_directory(self.primary_fs_name, self.primary_fs_id,
+                                     "client.mirror_remote@ceph", '/d0')
+
+        # create the directory
+        self.mount_a.run_shell(["mkdir", "d0"])
+        self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"])
+
+        # wait for correction
+        time.sleep(120)
+        self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+                               "client.mirror_remote@ceph", '/d0', 'snap0', 1)
+        self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+    def test_cephfs_mirror_service_daemon_status(self):
+        self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+        self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+        time.sleep(30)
+        status = self.get_mirror_daemon_status()
+
+        # assumption for this test: mirroring enabled for a single filesystem w/ single
+        # peer
+
+        # we have not added any directories
+        peer = status['filesystems'][0]['peers'][0]
+        self.assertEquals(status['filesystems'][0]['directory_count'], 0)
+        self.assertEquals(peer['stats']['failure_count'], 0)
+        self.assertEquals(peer['stats']['recovery_count'], 0)
+
+        # add a non-existent directory for synchronization -- check if its reported
+        # in daemon stats
+        self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+
+        time.sleep(120)
+        status = self.get_mirror_daemon_status()
+        # we added one
+        peer = status['filesystems'][0]['peers'][0]
+        self.assertEquals(status['filesystems'][0]['directory_count'], 1)
+        # failure count should be reflected
+        self.assertEquals(peer['stats']['failure_count'], 1)
+        self.assertEquals(peer['stats']['recovery_count'], 0)
+
+        # create the directory, mirror daemon would recover
+        self.mount_a.run_shell(["mkdir", "d0"])
+
+        time.sleep(120)
+        status = self.get_mirror_daemon_status()
+        peer = status['filesystems'][0]['peers'][0]
+        self.assertEquals(status['filesystems'][0]['directory_count'], 1)
+        # failure and recovery count should be reflected
+        self.assertEquals(peer['stats']['failure_count'], 1)
+        self.assertEquals(peer['stats']['recovery_count'], 1)
+
+        self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+    def test_mirroring_init_failure(self):
+        """Test mirror daemon init failure"""
+
+        # disable mgr mirroring plugin as it would try to load dir map on
+        # on mirroring enabled for a filesystem (an throw up erorrs in
+        # the logs)
+        self.disable_mirroring_module()
+
+        # enable mirroring through mon interface -- this should result in the mirror daemon
+        # failing to enable mirroring due to absence of `cephfs_mirorr` index object.
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "mirror", "enable", self.primary_fs_name)
+
+        with safe_while(sleep=5, tries=10, action='wait for failed state') as proceed:
+            while proceed():
+                try:
+                    # verify via asok
+                    res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}',
+                                                     'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}')
+                    if not 'state' in res:
+                        return
+                    self.assertTrue(res['state'] == "failed")
+                    return True
+                except:
+                    pass
+
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "mirror", "disable", self.primary_fs_name)
+        time.sleep(10)
+        # verify via asok
+        try:
+            self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}',
+                                       'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}')
+        except CommandFailedError:
+            pass
+        else:
+            raise RuntimeError('expected admin socket to be unavailable')
+
+    def test_mirroring_init_failure_with_recovery(self):
+        """Test if the mirror daemon can recover from a init failure"""
+
+        # disable mgr mirroring plugin as it would try to load dir map on
+        # on mirroring enabled for a filesystem (an throw up erorrs in
+        # the logs)
+        self.disable_mirroring_module()
+
+        # enable mirroring through mon interface -- this should result in the mirror daemon
+        # failing to enable mirroring due to absence of `cephfs_mirror` index object.
+
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "mirror", "enable", self.primary_fs_name)
+        # need safe_while since non-failed status pops up as mirroring is restarted
+        # internally in mirror daemon.
+        with safe_while(sleep=5, tries=20, action='wait for failed state') as proceed:
+            while proceed():
+                try:
+                    # verify via asok
+                    res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}',
+                                                     'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}')
+                    if not 'state' in res:
+                        return
+                    self.assertTrue(res['state'] == "failed")
+                    return True
+                except:
+                    pass
+
+        # create the index object and check daemon recovery
+        try:
+            p = self.mount_a.client_remote.run(args=['rados', '-p', self.fs.metadata_pool_name, 'create', 'cephfs_mirror'],
+                                               stdout=StringIO(), stderr=StringIO(), timeout=30,
+                                               check_status=True, label="create index object")
+            p.wait()
+        except CommandFailedError as ce:
+            log.warn(f'mirror daemon command to create mirror index object failed: {ce}')
+            raise
+        time.sleep(30)
+        res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}',
+                                         'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}')
+        self.assertTrue(res['peers'] == {})
+        self.assertTrue(res['snap_dirs']['dir_count'] == 0)
+
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "mirror", "disable", self.primary_fs_name)
+        time.sleep(10)
+        # verify via asok
+        try:
+            self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}',
+                                       'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}')
+        except CommandFailedError:
+            pass
+        else:
+            raise RuntimeError('expected admin socket to be unavailable')
+
+    def test_cephfs_mirror_peer_bootstrap(self):
+        """Test importing peer bootstrap token"""
+        self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+        # create a bootstrap token for the peer
+        bootstrap_token = self.bootstrap_peer(self.secondary_fs_name, "client.mirror_peer_bootstrap", "site-remote")
+
+        # import the peer via bootstrap token
+        self.import_peer(self.primary_fs_name, bootstrap_token)
+        time.sleep(10)
+        self.verify_peer_added(self.primary_fs_name, self.primary_fs_id, "client.mirror_peer_bootstrap@site-remote",
+                               self.secondary_fs_name)
+
+        # verify via peer_list interface
+        peer_uuid = self.get_peer_uuid("client.mirror_peer_bootstrap@site-remote")
+        res = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_list", self.primary_fs_name))
+        self.assertTrue(peer_uuid in res)
+        self.assertTrue('mon_host' in res[peer_uuid] and res[peer_uuid]['mon_host'] != '')
+
+        # remove peer
+        self.peer_remove(self.primary_fs_name, self.primary_fs_id, "client.mirror_peer_bootstrap@site-remote")
+        # disable mirroring
+        self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+    def test_cephfs_mirror_symlink_sync(self):
+        log.debug('reconfigure client auth caps')
+        self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+            'auth', 'caps', "client.{0}".format(self.mount_b.client_id),
+                'mds', 'allow rw',
+                'mon', 'allow r',
+                'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
+                    self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name()))
+
+        log.debug(f'mounting filesystem {self.secondary_fs_name}')
+        self.mount_b.umount_wait()
+        self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name)
+
+        # create a bunch of files w/ symbolic links in a directory to snap
+        self.mount_a.run_shell(["mkdir", "d0"])
+        self.mount_a.create_n_files('d0/file', 10, sync=True)
+        self.mount_a.run_shell(["ln", "-s", "./file_0", "d0/sym_0"])
+        self.mount_a.run_shell(["ln", "-s", "./file_1", "d0/sym_1"])
+        self.mount_a.run_shell(["ln", "-s", "./file_2", "d0/sym_2"])
+
+        self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+        self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+        self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+        # take a snapshot
+        self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"])
+
+        time.sleep(30)
+        self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+                               "client.mirror_remote@ceph", '/d0', 'snap0', 1)
+        self.verify_snapshot('d0', 'snap0')
+
+        self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+        self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+    def test_cephfs_mirror_with_parent_snapshot(self):
+        """Test snapshot synchronization with parent directory snapshots"""
+        self.mount_a.run_shell(["mkdir", "-p", "d0/d1/d2/d3"])
+
+        self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+        self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0/d1/d2/d3')
+        self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+        # take a snapshot
+        self.mount_a.run_shell(["mkdir", "d0/d1/d2/d3/.snap/snap0"])
+
+        time.sleep(30)
+        self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+                               "client.mirror_remote@ceph", '/d0/d1/d2/d3', 'snap0', 1)
+
+        # create snapshots in parent directories
+        self.mount_a.run_shell(["mkdir", "d0/.snap/snap_d0"])
+        self.mount_a.run_shell(["mkdir", "d0/d1/.snap/snap_d1"])
+        self.mount_a.run_shell(["mkdir", "d0/d1/d2/.snap/snap_d2"])
+
+        # try syncing more snapshots
+        self.mount_a.run_shell(["mkdir", "d0/d1/d2/d3/.snap/snap1"])
+        time.sleep(30)
+        self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+                               "client.mirror_remote@ceph", '/d0/d1/d2/d3', 'snap1', 2)
+
+        self.mount_a.run_shell(["rmdir", "d0/d1/d2/d3/.snap/snap0"])
+        self.mount_a.run_shell(["rmdir", "d0/d1/d2/d3/.snap/snap1"])
+        time.sleep(15)
+        self.check_peer_status_deleted_snap(self.primary_fs_name, self.primary_fs_id,
+                                            "client.mirror_remote@ceph", '/d0/d1/d2/d3', 2)
+
+        self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0/d1/d2/d3')
+        self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+    def test_cephfs_mirror_remove_on_stall(self):
+        self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+        # fetch rados address for blacklist check
+        rados_inst = self.get_mirror_rados_addr(self.primary_fs_name, self.primary_fs_id)
+
+        # simulate non-responding mirror daemon by sending SIGSTOP
+        pid = self.get_mirror_daemon_pid()
+        log.debug(f'SIGSTOP to cephfs-mirror pid {pid}')
+        self.mount_a.run_shell(['kill', '-SIGSTOP', pid])
+
+        # wait for blocklist timeout -- the manager module would blocklist
+        # the mirror daemon
+        time.sleep(40)
+
+        # make sure the rados addr is blocklisted
+        self.assertTrue(self.mds_cluster.is_addr_blocklisted(rados_inst))
+
+        # now we are sure that there are no "active" mirror daemons -- add a directory path.
+        dir_path_p = "/d0/d1"
+        dir_path = "/d0/d1/d2"
+
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "add", self.primary_fs_name, dir_path)
+
+        time.sleep(10)
+        # this uses an undocumented interface to get dirpath map state
+        res_json = self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path)
+        res = json.loads(res_json)
+        # there are no mirror daemons
+        self.assertTrue(res['state'], 'stalled')
+
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "remove", self.primary_fs_name, dir_path)
+
+        time.sleep(10)
+        try:
+            self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path)
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.ENOENT:
+                raise RuntimeError('invalid errno when checking dirmap status for non-existent directory')
+        else:
+            raise RuntimeError('incorrect errno when checking dirmap state for non-existent directory')
+
+        # adding a parent directory should be allowed
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "add", self.primary_fs_name, dir_path_p)
+
+        time.sleep(10)
+        # however, this directory path should get stalled too
+        res_json = self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path_p)
+        res = json.loads(res_json)
+        # there are no mirror daemons
+        self.assertTrue(res['state'], 'stalled')
+
+        # wake up the mirror daemon -- at this point, the daemon should know
+        # that it has been blocklisted
+        log.debug('SIGCONT to cephfs-mirror')
+        self.mount_a.run_shell(['kill', '-SIGCONT', pid])
+
+        # wait for restart mirror on blocklist
+        time.sleep(60)
+        res_json = self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path_p)
+        res = json.loads(res_json)
+        # there are no mirror daemons
+        self.assertTrue(res['state'], 'mapped')
+
+        self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+    def test_cephfs_mirror_incremental_sync(self):
+        """ Test incremental snapshot synchronization (based on mtime differences)."""
+        log.debug('reconfigure client auth caps')
+        self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+            'auth', 'caps', "client.{0}".format(self.mount_b.client_id),
+            'mds', 'allow rw',
+            'mon', 'allow r',
+            'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
+                self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name()))
+        log.debug(f'mounting filesystem {self.secondary_fs_name}')
+        self.mount_b.umount_wait()
+        self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name)
+
+        repo = 'ceph-qa-suite'
+        repo_dir = 'ceph_repo'
+        repo_path = f'{repo_dir}/{repo}'
+
+        def clone_repo():
+            self.mount_a.run_shell([
+                'git', 'clone', '--branch', 'giant',
+                f'http://github.com/ceph/{repo}', repo_path])
+
+        def exec_git_cmd(cmd_list):
+            self.mount_a.run_shell(['git', '--git-dir', f'{self.mount_a.mountpoint}/{repo_path}/.git', *cmd_list])
+
+        self.mount_a.run_shell(["mkdir", repo_dir])
+        clone_repo()
+
+        self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+        self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+        self.add_directory(self.primary_fs_name, self.primary_fs_id, f'/{repo_path}')
+        self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_a'])
+
+        # full copy, takes time
+        time.sleep(500)
+        self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+                               "client.mirror_remote@ceph", f'/{repo_path}', 'snap_a', 1)
+        self.verify_snapshot(repo_path, 'snap_a')
+
+        # create some diff
+        num = random.randint(5, 20)
+        log.debug(f'resetting to HEAD~{num}')
+        exec_git_cmd(["reset", "--hard", f'HEAD~{num}'])
+
+        self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_b'])
+        # incremental copy, should be fast
+        time.sleep(180)
+        self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+                               "client.mirror_remote@ceph", f'/{repo_path}', 'snap_b', 2)
+        self.verify_snapshot(repo_path, 'snap_b')
+
+        # diff again, this time back to HEAD
+        log.debug('resetting to HEAD')
+        exec_git_cmd(["pull"])
+
+        self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_c'])
+        # incremental copy, should be fast
+        time.sleep(180)
+        self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+                               "client.mirror_remote@ceph", f'/{repo_path}', 'snap_c', 3)
+        self.verify_snapshot(repo_path, 'snap_c')
+
+        self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+    def test_cephfs_mirror_incremental_sync_with_type_mixup(self):
+        """ Test incremental snapshot synchronization with file type changes.
+
+        The same filename exist as a different type in subsequent snapshot.
+        This verifies if the mirror daemon can identify file type mismatch and
+        sync snapshots.
+
+              \    snap_0       snap_1      snap_2      snap_3
+               \-----------------------------------------------
+        file_x |   reg          sym         dir         reg
+               |
+        file_y |   dir          reg         sym         dir
+               |
+        file_z |   sym          dir         reg         sym
+        """
+        log.debug('reconfigure client auth caps')
+        self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+            'auth', 'caps', "client.{0}".format(self.mount_b.client_id),
+                'mds', 'allow rw',
+                'mon', 'allow r',
+                'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
+                    self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name()))
+        log.debug(f'mounting filesystem {self.secondary_fs_name}')
+        self.mount_b.umount_wait()
+        self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name)
+
+        typs = deque(['reg', 'dir', 'sym'])
+        def cleanup_and_create_with_type(dirname, fnames):
+            self.mount_a.run_shell_payload(f"rm -rf {dirname}/*")
+            fidx = 0
+            for t in typs:
+                fname = f'{dirname}/{fnames[fidx]}'
+                log.debug(f'file: {fname} type: {t}')
+                if t == 'reg':
+                    self.mount_a.run_shell(["touch", fname])
+                    self.mount_a.write_file(fname, data=fname)
+                elif t == 'dir':
+                    self.mount_a.run_shell(["mkdir", fname])
+                elif t == 'sym':
+                    # verify ELOOP in mirror daemon
+                    self.mount_a.run_shell(["ln", "-s", "..", fname])
+                fidx += 1
+
+        def verify_types(dirname, fnames, snap_name):
+            tidx = 0
+            for fname in fnames:
+                t = self.mount_b.run_shell_payload(f"stat -c %F {dirname}/.snap/{snap_name}/{fname}").stdout.getvalue().strip()
+                if typs[tidx] == 'reg':
+                    self.assertEquals('regular file', t)
+                elif typs[tidx] == 'dir':
+                    self.assertEquals('directory', t)
+                elif typs[tidx] == 'sym':
+                    self.assertEquals('symbolic link', t)
+                tidx += 1
+
+        self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+        self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+        self.mount_a.run_shell(["mkdir", "d0"])
+        self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+
+        fnames = ['file_x', 'file_y', 'file_z']
+        turns = 0
+        while turns != len(typs):
+            snapname = f'snap_{turns}'
+            cleanup_and_create_with_type('d0', fnames)
+            self.mount_a.run_shell(['mkdir', f'd0/.snap/{snapname}'])
+            time.sleep(30)
+            self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+                                   "client.mirror_remote@ceph", '/d0', snapname, turns+1)
+            verify_types('d0', fnames, snapname)
+            # next type
+            typs.rotate(1)
+            turns += 1
+
+        self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+    def test_cephfs_mirror_sync_with_purged_snapshot(self):
+        """Test snapshot synchronization in midst of snapshot deletes.
+
+        Deleted the previous snapshot when the mirror daemon is figuring out
+        incremental differences between current and previous snaphot. The
+        mirror daemon should identify the purge and switch to using remote
+        comparison to sync the snapshot (in the next iteration of course).
+        """
+
+        log.debug('reconfigure client auth caps')
+        self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+            'auth', 'caps', "client.{0}".format(self.mount_b.client_id),
+            'mds', 'allow rw',
+            'mon', 'allow r',
+            'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
+                self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name()))
+        log.debug(f'mounting filesystem {self.secondary_fs_name}')
+        self.mount_b.umount_wait()
+        self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name)
+
+        repo = 'ceph-qa-suite'
+        repo_dir = 'ceph_repo'
+        repo_path = f'{repo_dir}/{repo}'
+
+        def clone_repo():
+            self.mount_a.run_shell([
+                'git', 'clone', '--branch', 'giant',
+                f'http://github.com/ceph/{repo}', repo_path])
+
+        def exec_git_cmd(cmd_list):
+            self.mount_a.run_shell(['git', '--git-dir', f'{self.mount_a.mountpoint}/{repo_path}/.git', *cmd_list])
+
+        self.mount_a.run_shell(["mkdir", repo_dir])
+        clone_repo()
+
+        self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+        self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+        self.add_directory(self.primary_fs_name, self.primary_fs_id, f'/{repo_path}')
+        self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_a'])
+
+        # full copy, takes time
+        time.sleep(500)
+        self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+                               "client.mirror_remote@ceph", f'/{repo_path}', 'snap_a', 1)
+        self.verify_snapshot(repo_path, 'snap_a')
+
+        # create some diff
+        num = random.randint(60, 100)
+        log.debug(f'resetting to HEAD~{num}')
+        exec_git_cmd(["reset", "--hard", f'HEAD~{num}'])
+
+        self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_b'])
+
+        time.sleep(15)
+        self.mount_a.run_shell(['rmdir', f'{repo_path}/.snap/snap_a'])
+
+        # incremental copy but based on remote dir_root
+        time.sleep(300)
+        self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+                               "client.mirror_remote@ceph", f'/{repo_path}', 'snap_b', 2)
+        self.verify_snapshot(repo_path, 'snap_b')
+
+        self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+    def test_cephfs_mirror_peer_add_primary(self):
+        self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+        self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+        # try adding the primary file system as a peer to secondary file
+        # system
+        try:
+            self.peer_add(self.secondary_fs_name, self.secondary_fs_id, "client.mirror_remote@ceph", self.primary_fs_name)
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.EINVAL:
+                raise RuntimeError('invalid errno when adding a primary file system')
+        else:
+            raise RuntimeError('adding peer should fail')
+
+        self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+    def test_cephfs_mirror_cancel_mirroring_and_readd(self):
+        """
+        Test adding a directory path for synchronization post removal of already added directory paths
+
+        ... to ensure that synchronization of the newly added directory path functions
+        as expected. Note that we schedule three (3) directories for mirroring to ensure
+        that all replayer threads (3 by default) in the mirror daemon are busy.
+        """
+        log.debug('reconfigure client auth caps')
+        self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+            'auth', 'caps', "client.{0}".format(self.mount_b.client_id),
+                'mds', 'allow rw',
+                'mon', 'allow r',
+                'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
+                    self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name()))
+
+        log.debug(f'mounting filesystem {self.secondary_fs_name}')
+        self.mount_b.umount_wait()
+        self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name)
+
+        # create a bunch of files in a directory to snap
+        self.mount_a.run_shell(["mkdir", "d0"])
+        self.mount_a.run_shell(["mkdir", "d1"])
+        self.mount_a.run_shell(["mkdir", "d2"])
+        for i in range(4):
+            filename = f'file.{i}'
+            self.mount_a.write_n_mb(os.path.join('d0', filename), 1024)
+            self.mount_a.write_n_mb(os.path.join('d1', filename), 1024)
+            self.mount_a.write_n_mb(os.path.join('d2', filename), 1024)
+
+        log.debug('enabling mirroring')
+        self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+        log.debug('adding directory paths')
+        self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+        self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1')
+        self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d2')
+        self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+        # take snapshots
+        log.debug('taking snapshots')
+        self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"])
+        self.mount_a.run_shell(["mkdir", "d1/.snap/snap0"])
+        self.mount_a.run_shell(["mkdir", "d2/.snap/snap0"])
+
+        time.sleep(10)
+        log.debug('checking snap in progress')
+        self.check_peer_snap_in_progress(self.primary_fs_name, self.primary_fs_id,
+                                         "client.mirror_remote@ceph", '/d0', 'snap0')
+        self.check_peer_snap_in_progress(self.primary_fs_name, self.primary_fs_id,
+                                         "client.mirror_remote@ceph", '/d1', 'snap0')
+        self.check_peer_snap_in_progress(self.primary_fs_name, self.primary_fs_id,
+                                         "client.mirror_remote@ceph", '/d2', 'snap0')
+
+        log.debug('removing directories 1')
+        self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+        log.debug('removing directories 2')
+        self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d1')
+        log.debug('removing directories 3')
+        self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d2')
+
+        log.debug('removing snapshots')
+        self.mount_a.run_shell(["rmdir", "d0/.snap/snap0"])
+        self.mount_a.run_shell(["rmdir", "d1/.snap/snap0"])
+        self.mount_a.run_shell(["rmdir", "d2/.snap/snap0"])
+
+        for i in range(4):
+            filename = f'file.{i}'
+            log.debug(f'deleting {filename}')
+            self.mount_a.run_shell(["rm", "-f", os.path.join('d0', filename)])
+            self.mount_a.run_shell(["rm", "-f", os.path.join('d1', filename)])
+            self.mount_a.run_shell(["rm", "-f", os.path.join('d2', filename)])
+
+        log.debug('creating new files...')
+        self.mount_a.create_n_files('d0/file', 50, sync=True)
+        self.mount_a.create_n_files('d1/file', 50, sync=True)
+        self.mount_a.create_n_files('d2/file', 50, sync=True)
+
+        log.debug('adding directory paths')
+        self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+        self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1')
+        self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d2')
+
+        log.debug('creating new snapshots...')
+        self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"])
+        self.mount_a.run_shell(["mkdir", "d1/.snap/snap0"])
+        self.mount_a.run_shell(["mkdir", "d2/.snap/snap0"])
+
+        time.sleep(60)
+        self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+                               "client.mirror_remote@ceph", '/d0', 'snap0', 1)
+        self.verify_snapshot('d0', 'snap0')
+
+        self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+                               "client.mirror_remote@ceph", '/d1', 'snap0', 1)
+        self.verify_snapshot('d1', 'snap0')
+
+        self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+                               "client.mirror_remote@ceph", '/d2', 'snap0', 1)
+        self.verify_snapshot('d2', 'snap0')
+
+        self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+    def test_local_and_remote_dir_root_mode(self):
+        log.debug('reconfigure client auth caps')
+        cid = self.mount_b.client_id
+        data_pool = self.backup_fs.get_data_pool_name()
+        self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+            'auth', 'caps', f"client.{cid}",
+            'mds', 'allow rw',
+            'mon', 'allow r',
+            'osd', f"allow rw pool={data_pool}, allow rw pool={data_pool}")
+
+        log.debug(f'mounting filesystem {self.secondary_fs_name}')
+        self.mount_b.umount_wait()
+        self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name)
+
+        self.mount_a.run_shell(["mkdir", "l1"])
+        self.mount_a.run_shell(["mkdir", "l1/.snap/snap0"])
+        self.mount_a.run_shell(["chmod", "go-rwx", "l1"])
+
+        self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+        self.add_directory(self.primary_fs_name, self.primary_fs_id, '/l1')
+        self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+        time.sleep(60)
+        self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+                               "client.mirror_remote@ceph", '/l1', 'snap0', 1)
+
+        mode_local = self.mount_a.run_shell(["stat", "--format=%A", "l1"]).stdout.getvalue().strip()
+        mode_remote = self.mount_b.run_shell(["stat", "--format=%A", "l1"]).stdout.getvalue().strip()
+
+        self.assertTrue(mode_local == mode_remote, f"mode mismatch, local mode: {mode_local}, remote mode: {mode_remote}")
+
+        self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+        self.mount_a.run_shell(["rmdir", "l1/.snap/snap0"])
+        self.mount_a.run_shell(["rmdir", "l1"])
diff --git a/qa/tasks/cephfs/test_misc.py b/qa/tasks/cephfs/test_misc.py
new file mode 100644
index 000000000..8b48dee69
--- /dev/null
+++ b/qa/tasks/cephfs/test_misc.py
@@ -0,0 +1,640 @@
+from io import StringIO
+
+from tasks.cephfs.fuse_mount import FuseMount
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.exceptions import CommandFailedError
+from textwrap import dedent
+from threading import Thread
+import errno
+import platform
+import time
+import json
+import logging
+import os
+import re
+
+log = logging.getLogger(__name__)
+
+class TestMisc(CephFSTestCase):
+    CLIENTS_REQUIRED = 2
+
+    def test_statfs_on_deleted_fs(self):
+        """
+        That statfs does not cause monitors to SIGSEGV after fs deletion.
+        """
+
+        self.mount_b.umount_wait()
+        self.mount_a.run_shell_payload("stat -f .")
+        self.fs.delete_all_filesystems()
+        # This will hang either way, run in background.
+        p = self.mount_a.run_shell_payload("stat -f .", wait=False, timeout=60, check_status=False)
+        time.sleep(30)
+        self.assertFalse(p.finished)
+        # the process is stuck in uninterruptible sleep, just kill the mount
+        self.mount_a.umount_wait(force=True)
+        p.wait()
+
+    def test_fuse_mount_on_already_mounted_path(self):
+        if platform.system() != "Linux":
+            self.skipTest("Require Linux platform")
+
+        if not isinstance(self.mount_a, FuseMount):
+            self.skipTest("Require FUSE client")
+
+        # Try to mount already mounted path
+        # expecting EBUSY error
+        try:
+            mount_cmd = ['sudo'] + self.mount_a._mount_bin + [self.mount_a.hostfs_mntpt]
+            self.mount_a.client_remote.run(args=mount_cmd, stderr=StringIO(),
+                    stdout=StringIO(), timeout=60, omit_sudo=False)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.EBUSY)
+        else:
+            self.fail("Expected EBUSY")
+
+    def test_getattr_caps(self):
+        """
+        Check if MDS recognizes the 'mask' parameter of open request.
+        The parameter allows client to request caps when opening file
+        """
+
+        if not isinstance(self.mount_a, FuseMount):
+            self.skipTest("Require FUSE client")
+
+        # Enable debug. Client will requests CEPH_CAP_XATTR_SHARED
+        # on lookup/open
+        self.mount_b.umount_wait()
+        self.set_conf('client', 'client debug getattr caps', 'true')
+        self.mount_b.mount_wait()
+
+        # create a file and hold it open. MDS will issue CEPH_CAP_EXCL_*
+        # to mount_a
+        p = self.mount_a.open_background("testfile")
+        self.mount_b.wait_for_visible("testfile")
+
+        # this triggers a lookup request and an open request. The debug
+        # code will check if lookup/open reply contains xattrs
+        self.mount_b.run_shell(["cat", "testfile"])
+
+        self.mount_a.kill_background(p)
+
+    def test_root_rctime(self):
+        """
+        Check that the root inode has a non-default rctime on startup.
+        """
+
+        t = time.time()
+        rctime = self.mount_a.getfattr(".", "ceph.dir.rctime")
+        log.info("rctime = {}".format(rctime))
+        self.assertGreaterEqual(float(rctime), t - 10)
+
+    def test_fs_new(self):
+        self.mount_a.umount_wait()
+        self.mount_b.umount_wait()
+
+        data_pool_name = self.fs.get_data_pool_name()
+
+        self.fs.fail()
+
+        self.fs.mon_manager.raw_cluster_cmd('fs', 'rm', self.fs.name,
+                                            '--yes-i-really-mean-it')
+
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete',
+                                            self.fs.metadata_pool_name,
+                                            self.fs.metadata_pool_name,
+                                            '--yes-i-really-really-mean-it')
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
+                                            self.fs.metadata_pool_name,
+                                            '--pg_num_min', str(self.fs.pg_num_min))
+
+        # insert a garbage object
+        self.fs.radosm(["put", "foo", "-"], stdin=StringIO("bar"))
+
+        def get_pool_df(fs, name):
+            try:
+                return fs.get_pool_df(name)['objects'] > 0
+            except RuntimeError:
+                return False
+
+        self.wait_until_true(lambda: get_pool_df(self.fs, self.fs.metadata_pool_name), timeout=30)
+
+        try:
+            self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name,
+                                                self.fs.metadata_pool_name,
+                                                data_pool_name)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.EINVAL)
+        else:
+            raise AssertionError("Expected EINVAL")
+
+        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name,
+                                            self.fs.metadata_pool_name,
+                                            data_pool_name, "--force")
+
+        self.fs.mon_manager.raw_cluster_cmd('fs', 'fail', self.fs.name)
+
+        self.fs.mon_manager.raw_cluster_cmd('fs', 'rm', self.fs.name,
+                                            '--yes-i-really-mean-it')
+
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete',
+                                            self.fs.metadata_pool_name,
+                                            self.fs.metadata_pool_name,
+                                            '--yes-i-really-really-mean-it')
+        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
+                                            self.fs.metadata_pool_name,
+                                            '--pg_num_min', str(self.fs.pg_num_min))
+        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name,
+                                            self.fs.metadata_pool_name,
+                                            data_pool_name,
+                                            '--allow_dangerous_metadata_overlay')
+
+    def test_cap_revoke_nonresponder(self):
+        """
+        Check that a client is evicted if it has not responded to cap revoke
+        request for configured number of seconds.
+        """
+        session_timeout = self.fs.get_var("session_timeout")
+        eviction_timeout = session_timeout / 2.0
+
+        self.fs.mds_asok(['config', 'set', 'mds_cap_revoke_eviction_timeout',
+                          str(eviction_timeout)])
+
+        cap_holder = self.mount_a.open_background()
+
+        # Wait for the file to be visible from another client, indicating
+        # that mount_a has completed its network ops
+        self.mount_b.wait_for_visible()
+
+        # Simulate client death
+        self.mount_a.suspend_netns()
+
+        try:
+            # The waiter should get stuck waiting for the capability
+            # held on the MDS by the now-dead client A
+            cap_waiter = self.mount_b.write_background()
+
+            a = time.time()
+            time.sleep(eviction_timeout)
+            cap_waiter.wait()
+            b = time.time()
+            cap_waited = b - a
+            log.info("cap_waiter waited {0}s".format(cap_waited))
+
+            # check if the cap is transferred before session timeout kicked in.
+            # this is a good enough check to ensure that the client got evicted
+            # by the cap auto evicter rather than transitioning to stale state
+            # and then getting evicted.
+            self.assertLess(cap_waited, session_timeout,
+                            "Capability handover took {0}, expected less than {1}".format(
+                                cap_waited, session_timeout
+                            ))
+
+            self.assertTrue(self.mds_cluster.is_addr_blocklisted(
+                self.mount_a.get_global_addr()))
+            self.mount_a._kill_background(cap_holder)
+        finally:
+            self.mount_a.resume_netns()
+
+    def test_filtered_df(self):
+        pool_name = self.fs.get_data_pool_name()
+        raw_df = self.fs.get_pool_df(pool_name)
+        raw_avail = float(raw_df["max_avail"])
+        out = self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'get',
+                                                  pool_name, 'size',
+                                                  '-f', 'json-pretty')
+        _ = json.loads(out)
+
+        proc = self.mount_a.run_shell(['df', '.'])
+        output = proc.stdout.getvalue()
+        fs_avail = output.split('\n')[1].split()[3]
+        fs_avail = float(fs_avail) * 1024
+
+        ratio = raw_avail / fs_avail
+        assert 0.9 < ratio < 1.1
+
+    def test_dump_inode(self):
+        info = self.fs.mds_asok(['dump', 'inode', '1'])
+        assert(info['path'] == "/")
+
+    def test_dump_inode_hexademical(self):
+        self.mount_a.run_shell(["mkdir", "-p", "foo"])
+        ino = self.mount_a.path_to_ino("foo")
+        assert type(ino) is int
+        info = self.fs.mds_asok(['dump', 'inode', hex(ino)])
+        assert info['path'] == "/foo"
+
+    def test_fs_lsflags(self):
+        """
+        Check that the lsflags displays the default state and the new state of flags
+        """
+        # Set some flags
+        self.fs.set_joinable(False)
+        self.fs.set_allow_new_snaps(False)
+        self.fs.set_allow_standby_replay(True)
+
+        lsflags = json.loads(self.fs.mon_manager.raw_cluster_cmd('fs', 'lsflags',
+                                                                 self.fs.name,
+                                                                 "--format=json-pretty"))
+        self.assertEqual(lsflags["joinable"], False)
+        self.assertEqual(lsflags["allow_snaps"], False)
+        self.assertEqual(lsflags["allow_multimds_snaps"], True)
+        self.assertEqual(lsflags["allow_standby_replay"], True)
+
+    def _test_sync_stuck_for_around_5s(self, dir_path, file_sync=False):
+        self.mount_a.run_shell(["mkdir", dir_path])
+
+        sync_dir_pyscript = dedent("""
+                import os
+
+                path = "{path}"
+                dfd = os.open(path, os.O_DIRECTORY)
+                os.fsync(dfd)
+                os.close(dfd)
+            """.format(path=dir_path))
+
+        # run create/delete directories and test the sync time duration
+        for i in range(300):
+            for j in range(5):
+                self.mount_a.run_shell(["mkdir", os.path.join(dir_path, f"{i}_{j}")])
+            start = time.time()
+            if file_sync:
+                self.mount_a.run_shell(['python3', '-c', sync_dir_pyscript])
+            else:
+                self.mount_a.run_shell(["sync"])
+            duration = time.time() - start
+            log.info(f"sync mkdir i = {i}, duration = {duration}")
+            self.assertLess(duration, 4)
+
+            for j in range(5):
+                self.mount_a.run_shell(["rm", "-rf", os.path.join(dir_path, f"{i}_{j}")])
+            start = time.time()
+            if file_sync:
+                self.mount_a.run_shell(['python3', '-c', sync_dir_pyscript])
+            else:
+                self.mount_a.run_shell(["sync"])
+            duration = time.time() - start
+            log.info(f"sync rmdir i = {i}, duration = {duration}")
+            self.assertLess(duration, 4)
+
+        self.mount_a.run_shell(["rm", "-rf", dir_path])
+
+    def test_filesystem_sync_stuck_for_around_5s(self):
+        """
+        To check whether the fsync will be stuck to wait for the mdlog to be
+        flushed for at most 5 seconds.
+        """
+
+        dir_path = "filesystem_sync_do_not_wait_mdlog_testdir"
+        self._test_sync_stuck_for_around_5s(dir_path)
+
+    def test_file_sync_stuck_for_around_5s(self):
+        """
+        To check whether the filesystem sync will be stuck to wait for the
+        mdlog to be flushed for at most 5 seconds.
+        """
+
+        dir_path = "file_sync_do_not_wait_mdlog_testdir"
+        self._test_sync_stuck_for_around_5s(dir_path, True)
+
+    def test_file_filesystem_sync_crash(self):
+        """
+        To check whether the kernel crashes when doing the file/filesystem sync.
+        """
+
+        stop_thread = False
+        dir_path = "file_filesystem_sync_crash_testdir"
+        self.mount_a.run_shell(["mkdir", dir_path])
+
+        def mkdir_rmdir_thread(mount, path):
+            #global stop_thread
+
+            log.info(" mkdir_rmdir_thread starting...")
+            num = 0
+            while not stop_thread:
+                n = num
+                m = num
+                for __ in range(10):
+                    mount.run_shell(["mkdir", os.path.join(path, f"{n}")])
+                    n += 1
+                for __ in range(10):
+                    mount.run_shell(["rm", "-rf", os.path.join(path, f"{m}")])
+                    m += 1
+                num += 10
+            log.info(" mkdir_rmdir_thread stopped")
+
+        def filesystem_sync_thread(mount, path):
+            #global stop_thread
+
+            log.info(" filesystem_sync_thread starting...")
+            while not stop_thread:
+                mount.run_shell(["sync"])
+            log.info(" filesystem_sync_thread stopped")
+
+        def file_sync_thread(mount, path):
+            #global stop_thread
+
+            log.info(" file_sync_thread starting...")
+            pyscript = dedent("""
+                    import os
+
+                    path = "{path}"
+                    dfd = os.open(path, os.O_DIRECTORY)
+                    os.fsync(dfd)
+                    os.close(dfd)
+                """.format(path=path))
+
+            while not stop_thread:
+                mount.run_shell(['python3', '-c', pyscript])
+            log.info(" file_sync_thread stopped")
+
+        td1 = Thread(target=mkdir_rmdir_thread, args=(self.mount_a, dir_path,))
+        td2 = Thread(target=filesystem_sync_thread, args=(self.mount_a, dir_path,))
+        td3 = Thread(target=file_sync_thread, args=(self.mount_a, dir_path,))
+
+        td1.start()
+        td2.start()
+        td3.start()
+        time.sleep(1200) # run 20 minutes
+        stop_thread = True
+        td1.join()
+        td2.join()
+        td3.join()
+        self.mount_a.run_shell(["rm", "-rf", dir_path])
+
+    def test_dump_inmemory_log_on_client_eviction(self):
+        """
+        That the in-memory logs are dumped during a client eviction event.
+        """
+        self.fs.mds_asok(['config', 'set', 'debug_mds', '1/10'])
+        self.fs.mds_asok(['config', 'set', 'mds_extraordinary_events_dump_interval', '1'])
+        mount_a_client_id = self.mount_a.get_global_id()
+        infos = self.fs.status().get_ranks(self.fs.id)
+
+        #evict the client
+        self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
+        time.sleep(10) #wait for 10 seconds for the logs dumping to complete.
+
+        #The client is evicted, so unmount it.
+        try:
+            self.mount_a.umount_wait(require_clean=True, timeout=30)
+        except:
+            pass #continue with grepping the log
+
+        eviction_log = f"Evicting (\(and blocklisting\) )?client session {mount_a_client_id} \(.+:.+/.+\)"
+        search_range = "/^--- begin dump of recent events ---$/,/^--- end dump of recent events ---$/p"
+        for info in infos:
+            mds_id = info['name']
+            try:
+                remote = self.fs.mon_manager.find_remote('mds', mds_id)
+                out = remote.run(args=["sed",
+                                       "-n",
+                                       "{0}".format(search_range),
+                                       f"/var/log/ceph/{self.mount_a.cluster_name}-mds.{mds_id}.log"],
+                                 stdout=StringIO(), timeout=30)
+            except:
+                continue #continue with the next info
+            if out.stdout and re.search(eviction_log, out.stdout.getvalue().strip()):
+                return
+        self.assertTrue(False, "Failed to dump in-memory logs during client eviction")
+
+    def test_dump_inmemory_log_on_missed_beacon_ack_from_monitors(self):
+        """
+        That the in-memory logs are dumped when the mds misses beacon ACKs from monitors.
+        """
+        self.fs.mds_asok(['config', 'set', 'debug_mds', '1/10'])
+        self.fs.mds_asok(['config', 'set', 'mds_extraordinary_events_dump_interval', '1'])
+        try:
+            mons = json.loads(self.fs.mon_manager.raw_cluster_cmd('mon', 'dump', '-f', 'json'))['mons']
+        except:
+            self.assertTrue(False, "Error fetching monitors")
+
+        #Freeze all monitors
+        for mon in mons:
+            mon_name = mon['name']
+            log.info(f'Sending STOP to mon {mon_name}')
+            self.fs.mon_manager.signal_mon(mon_name, 19)
+
+        time.sleep(10) #wait for 10 seconds to get the in-memory logs dumped
+
+        #Unfreeze all monitors
+        for mon in mons:
+            mon_name = mon['name']
+            log.info(f'Sending CONT to mon {mon_name}')
+            self.fs.mon_manager.signal_mon(mon_name, 18)
+
+        missed_beacon_ack_log = "missed beacon ack from the monitors"
+        search_range = "/^--- begin dump of recent events ---$/,/^--- end dump of recent events ---$/p"
+        for info in self.fs.status().get_ranks(self.fs.id):
+            mds_id = info['name']
+            try:
+                remote = self.fs.mon_manager.find_remote('mds', mds_id)
+                out = remote.run(args=["sed",
+                                       "-n",
+                                       "{0}".format(search_range),
+                                       f"/var/log/ceph/{self.mount_a.cluster_name}-mds.{mds_id}.log"],
+                                 stdout=StringIO(), timeout=30)
+            except:
+                continue #continue with the next info
+            if out.stdout and (missed_beacon_ack_log in out.stdout.getvalue().strip()):
+                return
+        self.assertTrue(False, "Failed to dump in-memory logs during missed beacon ack")
+
+    def test_dump_inmemory_log_on_missed_internal_heartbeats(self):
+        """
+        That the in-memory logs are dumped when the mds misses internal heartbeats.
+        """
+        self.fs.mds_asok(['config', 'set', 'debug_mds', '1/10'])
+        self.fs.mds_asok(['config', 'set', 'mds_heartbeat_grace', '1'])
+        self.fs.mds_asok(['config', 'set', 'mds_extraordinary_events_dump_interval', '1'])
+        try:
+            mons = json.loads(self.fs.mon_manager.raw_cluster_cmd('mon', 'dump', '-f', 'json'))['mons']
+        except:
+            self.assertTrue(False, "Error fetching monitors")
+
+        #Freeze all monitors
+        for mon in mons:
+            mon_name = mon['name']
+            log.info(f'Sending STOP to mon {mon_name}')
+            self.fs.mon_manager.signal_mon(mon_name, 19)
+
+        time.sleep(10) #wait for 10 seconds to get the in-memory logs dumped
+
+        #Unfreeze all monitors
+        for mon in mons:
+            mon_name = mon['name']
+            log.info(f'Sending CONT to mon {mon_name}')
+            self.fs.mon_manager.signal_mon(mon_name, 18)
+
+        missed_internal_heartbeat_log = \
+        "Skipping beacon heartbeat to monitors \(last acked .+s ago\); MDS internal heartbeat is not healthy!"
+        search_range = "/^--- begin dump of recent events ---$/,/^--- end dump of recent events ---$/p"
+        for info in self.fs.status().get_ranks(self.fs.id):
+            mds_id = info['name']
+            try:
+                remote = self.fs.mon_manager.find_remote('mds', mds_id)
+                out = remote.run(args=["sed",
+                                       "-n",
+                                       "{0}".format(search_range),
+                                       f"/var/log/ceph/{self.mount_a.cluster_name}-mds.{mds_id}.log"],
+                                 stdout=StringIO(), timeout=30)
+            except:
+                continue #continue with the next info
+            if out.stdout and re.search(missed_internal_heartbeat_log, out.stdout.getvalue().strip()):
+                return
+        self.assertTrue(False, "Failed to dump in-memory logs during missed internal heartbeat")
+
+    def _session_client_ls(self, cmd):
+        mount_a_client_id = self.mount_a.get_global_id()
+        info = self.fs.rank_asok(cmd)
+        mount_a_mountpoint = self.mount_a.mountpoint
+        mount_b_mountpoint = self.mount_b.mountpoint
+        self.assertIsNotNone(info)
+        for i in range(0, len(info)):
+            self.assertIn(info[i]["client_metadata"]["mount_point"], 
+                             [mount_a_mountpoint, mount_b_mountpoint])        
+        info = self.fs.rank_asok(cmd + [f"id={mount_a_client_id}"])
+        self.assertEqual(len(info), 1)
+        self.assertEqual(info[0]["id"], mount_a_client_id)
+        self.assertEqual(info[0]["client_metadata"]["mount_point"], mount_a_mountpoint)
+        info = self.fs.rank_asok(cmd + ['--cap_dump'])
+        for i in range(0, len(info)):
+            self.assertIn("caps", info[i])
+
+    def test_session_ls(self):
+        self._session_client_ls(['session', 'ls'])
+
+    def test_client_ls(self):
+        self._session_client_ls(['client', 'ls'])
+        
+class TestCacheDrop(CephFSTestCase):
+    CLIENTS_REQUIRED = 1
+
+    def _run_drop_cache_cmd(self, timeout=None):
+        result = None
+        args = ["cache", "drop"]
+        if timeout is not None:
+            args.append(str(timeout))
+        result = self.fs.rank_tell(args)
+        return result
+
+    def _setup(self, max_caps=20, threshold=400):
+        # create some files
+        self.mount_a.create_n_files("dc-dir/dc-file", 1000, sync=True)
+
+        # Reduce this so the MDS doesn't rkcall the maximum for simple tests
+        self.fs.rank_asok(['config', 'set', 'mds_recall_max_caps', str(max_caps)])
+        self.fs.rank_asok(['config', 'set', 'mds_recall_max_decay_threshold', str(threshold)])
+
+    def test_drop_cache_command(self):
+        """
+        Basic test for checking drop cache command.
+        Confirm it halts without a timeout.
+        Note that the cache size post trimming is not checked here.
+        """
+        mds_min_caps_per_client = int(self.fs.get_config("mds_min_caps_per_client"))
+        self._setup()
+        result = self._run_drop_cache_cmd()
+        self.assertEqual(result['client_recall']['return_code'], 0)
+        self.assertEqual(result['flush_journal']['return_code'], 0)
+        # It should take at least 1 second
+        self.assertGreater(result['duration'], 1)
+        self.assertGreaterEqual(result['trim_cache']['trimmed'], 1000-2*mds_min_caps_per_client)
+
+    def test_drop_cache_command_timeout(self):
+        """
+        Basic test for checking drop cache command.
+        Confirm recall halts early via a timeout.
+        Note that the cache size post trimming is not checked here.
+        """
+        self._setup()
+        result = self._run_drop_cache_cmd(timeout=10)
+        self.assertEqual(result['client_recall']['return_code'], -errno.ETIMEDOUT)
+        self.assertEqual(result['flush_journal']['return_code'], 0)
+        self.assertGreater(result['duration'], 10)
+        self.assertGreaterEqual(result['trim_cache']['trimmed'], 100) # we did something, right?
+
+    def test_drop_cache_command_dead_timeout(self):
+        """
+        Check drop cache command with non-responding client using tell
+        interface. Note that the cache size post trimming is not checked
+        here.
+        """
+        self._setup()
+        self.mount_a.suspend_netns()
+        # Note: recall is subject to the timeout. The journal flush will
+        # be delayed due to the client being dead.
+        result = self._run_drop_cache_cmd(timeout=5)
+        self.assertEqual(result['client_recall']['return_code'], -errno.ETIMEDOUT)
+        self.assertEqual(result['flush_journal']['return_code'], 0)
+        self.assertGreater(result['duration'], 5)
+        self.assertLess(result['duration'], 120)
+        # Note: result['trim_cache']['trimmed'] may be >0 because dropping the
+        # cache now causes the Locker to drive eviction of stale clients (a
+        # stale session will be autoclosed at mdsmap['session_timeout']). The
+        # particular operation causing this is journal flush which causes the
+        # MDS to wait wait for cap revoke.
+        #self.assertEqual(0, result['trim_cache']['trimmed'])
+        self.mount_a.resume_netns()
+
+    def test_drop_cache_command_dead(self):
+        """
+        Check drop cache command with non-responding client using tell
+        interface. Note that the cache size post trimming is not checked
+        here.
+        """
+        self._setup()
+        self.mount_a.suspend_netns()
+        result = self._run_drop_cache_cmd()
+        self.assertEqual(result['client_recall']['return_code'], 0)
+        self.assertEqual(result['flush_journal']['return_code'], 0)
+        self.assertGreater(result['duration'], 5)
+        self.assertLess(result['duration'], 120)
+        # Note: result['trim_cache']['trimmed'] may be >0 because dropping the
+        # cache now causes the Locker to drive eviction of stale clients (a
+        # stale session will be autoclosed at mdsmap['session_timeout']). The
+        # particular operation causing this is journal flush which causes the
+        # MDS to wait wait for cap revoke.
+        self.mount_a.resume_netns()
+
+class TestSkipReplayInoTable(CephFSTestCase):
+    MDSS_REQUIRED = 1
+    CLIENTS_REQUIRED = 1
+
+    def test_alloc_cinode_assert(self):
+        """
+        Test alloc CInode assert.
+
+        See: https://tracker.ceph.com/issues/52280
+        """
+
+        # Create a directory and the mds will journal this and then crash
+        self.mount_a.run_shell(["rm", "-rf", "test_alloc_ino"])
+        self.mount_a.run_shell(["mkdir", "test_alloc_ino"])
+
+        status = self.fs.status()
+        rank0 = self.fs.get_rank(rank=0, status=status)
+
+        self.fs.mds_asok(['config', 'set', 'mds_kill_skip_replaying_inotable', "true"])
+        # This will make the MDS crash, since we only have one MDS in the
+        # cluster and without the "wait=False" it will stuck here forever.
+        self.mount_a.run_shell(["mkdir", "test_alloc_ino/dir1"], wait=False)
+
+        # sleep 10 seconds to make sure the journal logs are flushed and
+        # the mds crashes
+        time.sleep(10)
+
+        # Now set the mds config to skip replaying the inotable
+        self.fs.set_ceph_conf('mds', 'mds_inject_skip_replaying_inotable', True)
+        self.fs.set_ceph_conf('mds', 'mds_wipe_sessions', True)
+
+        self.fs.mds_restart()
+        # sleep 5 seconds to make sure the mds tell command won't stuck
+        time.sleep(5)
+        self.fs.wait_for_daemons()
+
+        self.delete_mds_coredump(rank0['name']);
+
+        self.mount_a.run_shell(["mkdir", "test_alloc_ino/dir2"])
+
+        ls_out = set(self.mount_a.ls("test_alloc_ino/"))
+        self.assertEqual(ls_out, set({"dir1", "dir2"}))
diff --git a/qa/tasks/cephfs/test_multifs_auth.py b/qa/tasks/cephfs/test_multifs_auth.py
new file mode 100644
index 000000000..c9ea5f528
--- /dev/null
+++ b/qa/tasks/cephfs/test_multifs_auth.py
@@ -0,0 +1,297 @@
+"""
+Test for Ceph clusters with multiple FSs.
+"""
+import logging
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from tasks.cephfs.caps_helper import CapTester
+
+from teuthology.exceptions import CommandFailedError
+
+
+log = logging.getLogger(__name__)
+
+
+class TestMultiFS(CephFSTestCase):
+    client_id = 'testuser'
+    client_name = 'client.' + client_id
+    # one dedicated for each FS
+    MDSS_REQUIRED = 2
+    CLIENTS_REQUIRED = 2
+
+    def setUp(self):
+        super(TestMultiFS, self).setUp()
+
+        self.captester = CapTester()
+
+        # we might have it - the client - if the same cluster was used for a
+        # different vstart_runner.py run.
+        self.run_cluster_cmd(f'auth rm {self.client_name}')
+
+        self.fs1 = self.fs
+        self.fs2 = self.mds_cluster.newfs(name='cephfs2', create=True)
+
+        # we'll reassign caps to client.1 so that it can operate with cephfs2
+        self.run_cluster_cmd(f'auth caps client.{self.mount_b.client_id} mon '
+                             f'"allow r" osd "allow rw '
+                             f'pool={self.fs2.get_data_pool_name()}" mds allow')
+        self.mount_b.remount(cephfs_name=self.fs2.name)
+
+
+class TestMONCaps(TestMultiFS):
+
+    def test_moncap_with_one_fs_names(self):
+        moncap = f'allow r fsname={self.fs1.name}'
+        self.create_client(self.client_id, moncap)
+
+        self.captester.run_mon_cap_tests(self.fs1, self.client_id)
+
+    def test_moncap_with_multiple_fs_names(self):
+        moncap = (f'allow r fsname={self.fs1.name}, '
+                  f'allow r fsname={self.fs2.name}')
+        self.create_client(self.client_id, moncap)
+
+        self.captester.run_mon_cap_tests(self.fs1, self.client_id)
+
+    def test_moncap_with_blanket_allow(self):
+        moncap = 'allow r'
+        self.create_client(self.client_id, moncap)
+
+        self.captester.run_mon_cap_tests(self.fs1, self.client_id)
+
+
+#TODO: add tests for capsecs 'p' and 's'.
+class TestMDSCaps(TestMultiFS):
+    """
+    0. Have 2 FSs on Ceph cluster.
+    1. Create new files on both FSs.
+    2. Create a new client that has authorization for both FSs.
+    3. Remount the current mounts with this new client.
+    4. Test read and write on both FSs.
+    """
+    def setUp(self):
+        super(self.__class__, self).setUp()
+        self.mounts = (self.mount_a, self.mount_b)
+
+    def test_rw_with_fsname_and_no_path_in_cap(self):
+        PERM = 'rw'
+        self.captester.write_test_files(self.mounts)
+        keyring_paths = self._create_client(PERM, fsname=True)
+        self.remount_with_new_client(keyring_paths)
+
+        self.captester.run_mds_cap_tests(PERM)
+
+    def test_r_with_fsname_and_no_path_in_cap(self):
+        PERM = 'r'
+        self.captester.write_test_files(self.mounts)
+        keyring_paths = self._create_client(PERM, fsname=True)
+        self.remount_with_new_client(keyring_paths)
+
+        self.captester.run_mds_cap_tests(PERM)
+
+    def test_rw_with_fsname_and_path_in_cap(self):
+        PERM, CEPHFS_MNTPT = 'rw', 'dir1'
+        self.mount_a.run_shell(f'mkdir {CEPHFS_MNTPT}')
+        self.mount_b.run_shell(f'mkdir {CEPHFS_MNTPT}')
+        self.captester.write_test_files(self.mounts, CEPHFS_MNTPT)
+        keyring_paths = self._create_client(PERM, fsname=True)
+        self.remount_with_new_client(keyring_paths, CEPHFS_MNTPT)
+
+        self.captester.run_mds_cap_tests(PERM, CEPHFS_MNTPT)
+
+    def test_r_with_fsname_and_path_in_cap(self):
+        PERM, CEPHFS_MNTPT = 'r', 'dir1'
+        self.mount_a.run_shell(f'mkdir {CEPHFS_MNTPT}')
+        self.mount_b.run_shell(f'mkdir {CEPHFS_MNTPT}')
+        self.captester.write_test_files(self.mounts, CEPHFS_MNTPT)
+        keyring_paths = self._create_client(PERM, fsname=True)
+        self.remount_with_new_client(keyring_paths, CEPHFS_MNTPT)
+
+        self.captester.run_mds_cap_tests(PERM, CEPHFS_MNTPT)
+
+    # XXX: this tests the backward compatibility; "allow rw path=<dir1>" is
+    # treated as "allow rw fsname=* path=<dir1>"
+    def test_rw_with_no_fsname_and_path_in_cap(self):
+        PERM, CEPHFS_MNTPT = 'rw', 'dir1'
+        self.mount_a.run_shell(f'mkdir {CEPHFS_MNTPT}')
+        self.mount_b.run_shell(f'mkdir {CEPHFS_MNTPT}')
+        self.captester.write_test_files(self.mounts, CEPHFS_MNTPT)
+        keyring_paths = self._create_client(PERM)
+        self.remount_with_new_client(keyring_paths, CEPHFS_MNTPT)
+
+        self.captester.run_mds_cap_tests(PERM, CEPHFS_MNTPT)
+
+    # XXX: this tests the backward compatibility; "allow r path=<dir1>" is
+    # treated as "allow r fsname=* path=<dir1>"
+    def test_r_with_no_fsname_and_path_in_cap(self):
+        PERM, CEPHFS_MNTPT = 'r', 'dir1'
+        self.mount_a.run_shell(f'mkdir {CEPHFS_MNTPT}')
+        self.mount_b.run_shell(f'mkdir {CEPHFS_MNTPT}')
+        self.captester.write_test_files(self.mounts, CEPHFS_MNTPT)
+        keyring_paths = self._create_client(PERM)
+        self.remount_with_new_client(keyring_paths, CEPHFS_MNTPT)
+
+        self.captester.run_mds_cap_tests(PERM, CEPHFS_MNTPT)
+
+    def test_rw_with_no_fsname_and_no_path(self):
+        PERM = 'rw'
+        self.captester.write_test_files(self.mounts)
+        keyring_paths = self._create_client(PERM)
+        self.remount_with_new_client(keyring_paths)
+
+        self.captester.run_mds_cap_tests(PERM)
+
+    def test_r_with_no_fsname_and_no_path(self):
+        PERM = 'r'
+        self.captester.write_test_files(self.mounts)
+        keyring_paths = self._create_client(PERM)
+        self.remount_with_new_client(keyring_paths)
+
+        self.captester.run_mds_cap_tests(PERM)
+
+    def tearDown(self):
+        self.mount_a.umount_wait()
+        self.mount_b.umount_wait()
+
+        super(type(self), self).tearDown()
+
+    def generate_caps(self, perm, fsname, cephfs_mntpt):
+        moncap = 'allow r'
+        osdcap = (f'allow {perm} tag cephfs data={self.fs1.name}, '
+                  f'allow {perm} tag cephfs data={self.fs2.name}')
+
+        if fsname:
+            if cephfs_mntpt == '/':
+                mdscap = (f'allow {perm} fsname={self.fs1.name}, '
+                          f'allow {perm} fsname={self.fs2.name}')
+            else:
+                mdscap = (f'allow {perm} fsname={self.fs1.name} '
+                          f'path=/{cephfs_mntpt}, '
+                          f'allow {perm} fsname={self.fs2.name} '
+                          f'path=/{cephfs_mntpt}')
+        else:
+            if cephfs_mntpt == '/':
+                mdscap = f'allow {perm}'
+            else:
+                mdscap = f'allow {perm} path=/{cephfs_mntpt}'
+
+        return moncap, osdcap, mdscap
+
+    def _create_client(self, perm, fsname=False, cephfs_mntpt='/'):
+        moncap, osdcap, mdscap = self.generate_caps(perm, fsname,
+                                                    cephfs_mntpt)
+
+        keyring = self.create_client(self.client_id, moncap, osdcap, mdscap)
+        keyring_paths = []
+        for mount_x in self.mounts:
+            keyring_paths.append(mount_x.client_remote.mktemp(data=keyring))
+
+        return keyring_paths
+
+    def remount_with_new_client(self, keyring_paths, cephfs_mntpt='/'):
+        if isinstance(cephfs_mntpt, str) and cephfs_mntpt != '/' :
+            cephfs_mntpt = '/' + cephfs_mntpt
+
+        self.mount_a.remount(client_id=self.client_id,
+                             client_keyring_path=keyring_paths[0],
+                             client_remote=self.mount_a.client_remote,
+                             cephfs_name=self.fs1.name,
+                             cephfs_mntpt=cephfs_mntpt,
+                             hostfs_mntpt=self.mount_a.hostfs_mntpt,
+                             wait=True)
+        self.mount_b.remount(client_id=self.client_id,
+                             client_keyring_path=keyring_paths[1],
+                             client_remote=self.mount_b.client_remote,
+                             cephfs_name=self.fs2.name,
+                             cephfs_mntpt=cephfs_mntpt,
+                             hostfs_mntpt=self.mount_b.hostfs_mntpt,
+                             wait=True)
+
+
+class TestClientsWithoutAuth(TestMultiFS):
+
+    def setUp(self):
+        super(TestClientsWithoutAuth, self).setUp()
+
+        # TODO: When MON and OSD caps for a Ceph FS are assigned to a
+        # client but MDS caps are not, mount.ceph prints "permission
+        # denied". But when MON caps are not assigned and MDS and OSD
+        # caps are, mount.ceph prints "no mds server or cluster laggy"
+        # instead of "permission denied".
+        #
+        # Before uncommenting the following line a fix would be required
+        # for latter case to change "no mds server is up or the cluster is
+        #  laggy" to "permission denied".
+        self.kernel_errmsgs = ('permission denied', 'no mds server is up or '
+                               'the cluster is laggy', 'no such file or '
+                               'directory',
+                               'input/output error')
+
+        # TODO: When MON and OSD caps are assigned for a Ceph FS to a
+        # client but MDS caps are not, ceph-fuse prints "operation not
+        # permitted". But when MON caps are not assigned and MDS and OSD
+        # caps are, ceph-fuse prints "no such file or directory" instead
+        # of "operation not permitted".
+        #
+        # Before uncommenting the following line a fix would be required
+        # for the latter case to change "no such file or directory" to
+        # "operation not permitted".
+        #self.assertIn('operation not permitted', retval[2].lower())
+        self.fuse_errmsgs = ('operation not permitted', 'no such file or '
+                             'directory')
+
+        if 'kernel' in str(type(self.mount_a)).lower():
+            self.errmsgs = self.kernel_errmsgs
+        elif 'fuse' in str(type(self.mount_a)).lower():
+            self.errmsgs = self.fuse_errmsgs
+        else:
+            raise RuntimeError('strange, the client was neither based on '
+                               'kernel nor FUSE.')
+
+    def check_that_mount_failed_for_right_reason(self, stderr):
+        stderr = stderr.lower()
+        for errmsg in self.errmsgs:
+            if errmsg in stderr:
+                break
+        else:
+            raise AssertionError('can\'t find expected set of words in the '
+                                 f'stderr\nself.errmsgs - {self.errmsgs}\n'
+                                 f'stderr - {stderr}')
+
+    def test_mount_all_caps_absent(self):
+        # setup part...
+        keyring = self.fs1.authorize(self.client_id, ('/', 'rw'))
+        keyring_path = self.mount_a.client_remote.mktemp(data=keyring)
+
+        # mount the FS for which client has no auth...
+        retval = self.mount_a.remount(client_id=self.client_id,
+                                      client_keyring_path=keyring_path,
+                                      cephfs_name=self.fs2.name,
+                                      check_status=False)
+
+        # tests...
+        self.assertIsInstance(retval, tuple)
+        self.assertEqual(len(retval), 3)
+        self.assertIsInstance(retval[0], CommandFailedError)
+        self.check_that_mount_failed_for_right_reason(retval[2])
+
+    def test_mount_mon_and_osd_caps_present_mds_caps_absent(self):
+        # setup part...
+        moncap = f'allow rw fsname={self.fs1.name}, allow rw fsname={self.fs2.name}'
+        mdscap = f'allow rw fsname={self.fs1.name}'
+        osdcap = (f'allow rw tag cephfs data={self.fs1.name}, allow rw tag '
+                  f'cephfs data={self.fs2.name}')
+        keyring = self.create_client(self.client_id, moncap, osdcap, mdscap)
+        keyring_path = self.mount_a.client_remote.mktemp(data=keyring)
+
+        # mount the FS for which client has no auth...
+        retval = self.mount_a.remount(client_id=self.client_id,
+                                      client_keyring_path=keyring_path,
+                                      cephfs_name=self.fs2.name,
+                                      check_status=False)
+
+        # tests...
+        self.assertIsInstance(retval, tuple)
+        self.assertEqual(len(retval), 3)
+        self.assertIsInstance(retval[0], CommandFailedError)
+        self.check_that_mount_failed_for_right_reason(retval[2])
diff --git a/qa/tasks/cephfs/test_multimds_misc.py b/qa/tasks/cephfs/test_multimds_misc.py
new file mode 100644
index 000000000..2bb6257c7
--- /dev/null
+++ b/qa/tasks/cephfs/test_multimds_misc.py
@@ -0,0 +1,223 @@
+import logging
+import errno
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.contextutil import safe_while
+from teuthology.exceptions import CommandFailedError
+
+log = logging.getLogger(__name__)
+
+class TestScrub2(CephFSTestCase):
+    MDSS_REQUIRED = 3
+    CLIENTS_REQUIRED = 1
+
+    def _check_scrub_status(self, result=None, reverse=False):
+        self.assertEqual(self.fs.wait_until_scrub_complete(result=result, rank=1,
+                                                           sleep=5, timeout=30,
+                                                           reverse=reverse), True)
+        self.assertEqual(self.fs.wait_until_scrub_complete(result=result, rank=2,
+                                                           sleep=5, timeout=30,
+                                                           reverse=reverse), True)
+        self.assertEqual(self.fs.wait_until_scrub_complete(result=result, rank=0,
+                                                           sleep=5, timeout=30,
+                                                           reverse=reverse), True)
+
+    def _check_task_status_na(self, timo=120):
+        """ check absence of scrub status in ceph status """
+        with safe_while(sleep=1, tries=120, action='wait for task status') as proceed:
+            while proceed():
+                active = self.fs.get_active_names()
+                log.debug("current active={0}".format(active))
+                task_status = self.fs.get_task_status("scrub status")
+                if not active[0] in task_status:
+                    return True
+
+    def _check_task_status(self, expected_status, timo=120):
+        """ check scrub status for current active mds in ceph status """
+        with safe_while(sleep=1, tries=120, action='wait for task status') as proceed:
+            while proceed():
+                active = self.fs.get_active_names()
+                log.debug("current active={0}".format(active))
+                task_status = self.fs.get_task_status("scrub status")
+                try:
+                    if task_status[active[0]].startswith(expected_status):
+                        return True
+                except KeyError:
+                    pass
+
+    def _find_path_inos(self, root_path):
+        inos = []
+        p = self.mount_a.run_shell(["find", root_path])
+        paths = p.stdout.getvalue().strip().split()
+        for path in paths:
+            inos.append(self.mount_a.path_to_ino(path))
+        return inos
+
+    def _setup_subtrees(self):
+        self.fs.set_max_mds(3)
+        self.fs.wait_for_daemons()
+        status = self.fs.status()
+
+        path = 'd1/d2/d3/d4/d5/d6/d7/d8'
+        self.mount_a.run_shell(['mkdir', '-p', path])
+        self.mount_a.run_shell(['sync', path])
+
+        self.mount_a.setfattr("d1/d2", "ceph.dir.pin", "0")
+        self.mount_a.setfattr("d1/d2/d3/d4", "ceph.dir.pin", "1")
+        self.mount_a.setfattr("d1/d2/d3/d4/d5/d6", "ceph.dir.pin", "2")
+        
+        self._wait_subtrees([('/d1/d2', 0), ('/d1/d2/d3/d4', 1)], status, 0)
+        self._wait_subtrees([('/d1/d2/d3/d4', 1), ('/d1/d2/d3/d4/d5/d6', 2)], status, 1)
+        self._wait_subtrees([('/d1/d2/d3/d4', 1), ('/d1/d2/d3/d4/d5/d6', 2)], status, 2)
+
+        for rank in range(3):
+            self.fs.rank_tell(["flush", "journal"], rank)
+
+    def test_apply_tag(self):
+        self._setup_subtrees()
+        inos = self._find_path_inos('d1/d2/d3/')
+
+        tag = "tag123"
+        out_json = self.fs.rank_tell(["tag", "path", "/d1/d2/d3", tag], 0)
+        self.assertNotEqual(out_json, None)
+        self.assertEqual(out_json["return_code"], 0)
+        self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+        def assertTagged(ino):
+            file_obj_name = "{0:x}.00000000".format(ino)
+            self.fs.radosm(["getxattr", file_obj_name, "scrub_tag"])
+
+        for ino in inos:
+            assertTagged(ino)
+
+    def test_scrub_backtrace(self):
+        self._setup_subtrees()
+        inos = self._find_path_inos('d1/d2/d3/')
+
+        for ino in inos:
+            file_obj_name = "{0:x}.00000000".format(ino)
+            self.fs.radosm(["rmxattr", file_obj_name, "parent"])
+
+        out_json = self.fs.run_scrub(["start", "/d1/d2/d3", "recursive,force"], 0)
+        self.assertNotEqual(out_json, None)
+        self.assertEqual(out_json["return_code"], 0)
+        self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+        def _check_damage(mds_rank, inos):
+            all_damage = self.fs.rank_tell(["damage", "ls"], mds_rank)
+            damage = [d for d in all_damage if d['ino'] in inos and d['damage_type'] == "backtrace"]
+            return len(damage) >= len(inos)
+
+        self.assertTrue(_check_damage(0, inos[0:2]))
+        self.assertTrue(_check_damage(1, inos[2:4]))
+        self.assertTrue(_check_damage(2, inos[4:6]))
+
+    def test_scrub_non_mds0(self):
+        self._setup_subtrees()
+
+        def expect_exdev(cmd, mds):
+            try:
+                self.fs.mon_manager.raw_cluster_cmd('tell', 'mds.{0}'.format(mds), *cmd)
+            except CommandFailedError as e:
+                if e.exitstatus == errno.EXDEV:
+                    pass
+                else:
+                    raise
+            else:
+                raise RuntimeError("expected failure")
+
+        rank1 = self.fs.get_rank(rank=1)
+        expect_exdev(["scrub", "start", "/d1/d2/d3"], rank1["name"])
+        expect_exdev(["scrub", "abort"], rank1["name"])
+        expect_exdev(["scrub", "pause"], rank1["name"])
+        expect_exdev(["scrub", "resume"], rank1["name"])
+
+    def test_scrub_abort_mds0(self):
+        self._setup_subtrees()
+
+        inos = self._find_path_inos('d1/d2/d3/')
+
+        for ino in inos:
+            file_obj_name = "{0:x}.00000000".format(ino)
+            self.fs.radosm(["rmxattr", file_obj_name, "parent"])
+
+        out_json = self.fs.run_scrub(["start", "/d1/d2/d3", "recursive,force"], 0)
+        self.assertNotEqual(out_json, None)
+        
+        res = self.fs.run_scrub(["abort"])
+        self.assertEqual(res['return_code'], 0)
+
+        # Abort and verify in both mdss. We also check the status in rank 0 mds because
+        # it is supposed to gather the scrub status from other mdss.
+        self._check_scrub_status()
+
+        # sleep enough to fetch updated task status
+        checked = self._check_task_status_na()
+        self.assertTrue(checked)
+
+    def test_scrub_pause_and_resume_mds0(self):
+        self._setup_subtrees()
+
+        inos = self._find_path_inos('d1/d2/d3/')
+
+        for ino in inos:
+            file_obj_name = "{0:x}.00000000".format(ino)
+            self.fs.radosm(["rmxattr", file_obj_name, "parent"])
+
+        out_json = self.fs.run_scrub(["start", "/d1/d2/d3", "recursive,force"], 0)
+        self.assertNotEqual(out_json, None)
+
+        res = self.fs.run_scrub(["pause"])
+        self.assertEqual(res['return_code'], 0)
+
+        self._check_scrub_status(result="PAUSED")
+
+        checked = self._check_task_status("paused")
+        self.assertTrue(checked)
+
+        # resume and verify
+        res = self.fs.run_scrub(["resume"])
+        self.assertEqual(res['return_code'], 0)
+        
+        self._check_scrub_status(result="PAUSED", reverse=True)
+
+        checked = self._check_task_status_na()
+        self.assertTrue(checked)
+
+    def test_scrub_pause_and_resume_with_abort_mds0(self):
+        self._setup_subtrees()
+
+        inos = self._find_path_inos('d1/d2/d3/')
+
+        for ino in inos:
+            file_obj_name = "{0:x}.00000000".format(ino)
+            self.fs.radosm(["rmxattr", file_obj_name, "parent"])
+
+        out_json = self.fs.run_scrub(["start", "/d1/d2/d3", "recursive,force"], 0)
+        self.assertNotEqual(out_json, None)
+
+        res = self.fs.run_scrub(["pause"])
+        self.assertEqual(res['return_code'], 0)
+
+        self._check_scrub_status(result="PAUSED")
+
+        checked = self._check_task_status("paused")
+        self.assertTrue(checked)
+
+        res = self.fs.run_scrub(["abort"])
+        self.assertEqual(res['return_code'], 0)
+
+        self._check_scrub_status(result="PAUSED")
+        self._check_scrub_status(result="0 inodes")
+
+        # scrub status should still be paused...
+        checked = self._check_task_status("paused")
+        self.assertTrue(checked)
+
+        # resume and verify
+        res = self.fs.run_scrub(["resume"])
+        self.assertEqual(res['return_code'], 0)
+
+        self._check_scrub_status(result="PAUSED", reverse=True)
+
+        checked = self._check_task_status_na()
+        self.assertTrue(checked)
diff --git a/qa/tasks/cephfs/test_newops.py b/qa/tasks/cephfs/test_newops.py
new file mode 100644
index 000000000..0071cb5d3
--- /dev/null
+++ b/qa/tasks/cephfs/test_newops.py
@@ -0,0 +1,18 @@
+import logging
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+log = logging.getLogger(__name__)
+
+class TestNewOps(CephFSTestCase):
+    def test_newops_getvxattr(self):
+        """
+        For nautilus it will crash the MDSs when receive unknown OPs, as a workaround
+        the clients should avoid sending them to nautilus
+        """
+
+        log.info("Test for new getvxattr op...")
+        self.mount_a.run_shell(["mkdir", "newop_getvxattr_dir"])
+
+        # to test whether will nautilus crash the MDSs
+        self.mount_a.getfattr("./newop_getvxattr_dir", "ceph.dir.pin.random")
+        log.info("Test for new getvxattr op succeeds")
diff --git a/qa/tasks/cephfs/test_nfs.py b/qa/tasks/cephfs/test_nfs.py
new file mode 100644
index 000000000..0a10709e6
--- /dev/null
+++ b/qa/tasks/cephfs/test_nfs.py
@@ -0,0 +1,880 @@
+# NOTE: these tests are not yet compatible with vstart_runner.py.
+import errno
+import json
+import time
+import logging
+from io import BytesIO, StringIO
+
+from tasks.mgr.mgr_test_case import MgrTestCase
+from teuthology import contextutil
+from teuthology.exceptions import CommandFailedError
+
+log = logging.getLogger(__name__)
+
+NFS_POOL_NAME = '.nfs'  # should match mgr_module.py
+
+# TODO Add test for cluster update when ganesha can be deployed on multiple ports.
+class TestNFS(MgrTestCase):
+    def _cmd(self, *args):
+        return self.mgr_cluster.mon_manager.raw_cluster_cmd(*args)
+
+    def _nfs_cmd(self, *args):
+        return self._cmd("nfs", *args)
+
+    def _nfs_complete_cmd(self, cmd):
+        return self.mgr_cluster.mon_manager.run_cluster_cmd(args=f"nfs {cmd}",
+                                                            stdout=StringIO(),
+                                                            stderr=StringIO(),
+                                                            check_status=False)
+
+    def _orch_cmd(self, *args):
+        return self._cmd("orch", *args)
+
+    def _sys_cmd(self, cmd):
+        ret = self.ctx.cluster.run(args=cmd, check_status=False, stdout=BytesIO(), stderr=BytesIO())
+        stdout = ret[0].stdout
+        if stdout:
+            return stdout.getvalue()
+
+    def setUp(self):
+        super(TestNFS, self).setUp()
+        self._load_module('nfs')
+        self.cluster_id = "test"
+        self.export_type = "cephfs"
+        self.pseudo_path = "/cephfs"
+        self.path = "/"
+        self.fs_name = "nfs-cephfs"
+        self.expected_name = "nfs.test"
+        self.sample_export = {
+         "export_id": 1,
+         "path": self.path,
+         "cluster_id": self.cluster_id,
+         "pseudo": self.pseudo_path,
+         "access_type": "RW",
+         "squash": "none",
+         "security_label": True,
+         "protocols": [
+           4
+         ],
+         "transports": [
+           "TCP"
+         ],
+         "fsal": {
+           "name": "CEPH",
+           "user_id": "nfs.test.1",
+           "fs_name": self.fs_name,
+         },
+         "clients": []
+        }
+
+    def _check_nfs_server_status(self):
+        res = self._sys_cmd(['sudo', 'systemctl', 'status', 'nfs-server'])
+        if isinstance(res, bytes) and b'Active: active' in res:
+            self._disable_nfs()
+
+    def _disable_nfs(self):
+        log.info("Disabling NFS")
+        self._sys_cmd(['sudo', 'systemctl', 'disable', 'nfs-server', '--now'])
+
+    def _fetch_nfs_daemons_details(self, enable_json=False):
+        args = ('ps', f'--service_name={self.expected_name}')
+        if enable_json:
+            args = (*args, '--format=json')
+        return self._orch_cmd(*args)
+
+    def _check_nfs_cluster_event(self, expected_event):
+        '''
+        Check whether an event occured during the lifetime of the NFS service
+        :param expected_event: event that was expected to occur
+        '''
+        event_occurred = False
+        # Wait few seconds for NFS daemons' status to be updated
+        with contextutil.safe_while(sleep=10, tries=18, _raise=False) as proceed:
+            while not event_occurred and proceed():
+                daemons_details = json.loads(
+                    self._fetch_nfs_daemons_details(enable_json=True))
+                log.info('daemons details %s', daemons_details)
+                # 'events' key may not exist in the daemon description
+                # after a mgr fail over and could take some time to appear
+                # (it's populated on first daemon event)
+                if 'events' not in daemons_details[0]:
+                    continue
+                for event in daemons_details[0]['events']:
+                    log.info('daemon event %s', event)
+                    if expected_event in event:
+                        event_occurred = True
+                        break
+        return event_occurred
+
+    def _check_nfs_cluster_status(self, expected_status, fail_msg):
+        '''
+        Check the current status of the NFS service
+        :param expected_status: Status to be verified
+        :param fail_msg: Message to be printed if test failed
+        '''
+        # Wait for a minute as ganesha daemon takes some time to be
+        # deleted/created
+        with contextutil.safe_while(sleep=6, tries=10, _raise=False) as proceed:
+            while proceed():
+                if expected_status in self._fetch_nfs_daemons_details():
+                    return
+        self.fail(fail_msg)
+
+    def _check_auth_ls(self, export_id=1, check_in=False):
+        '''
+        Tests export user id creation or deletion.
+        :param export_id: Denotes export number
+        :param check_in: Check specified export id
+        '''
+        output = self._cmd('auth', 'ls')
+        client_id = f'client.nfs.{self.cluster_id}'
+        if check_in:
+            self.assertIn(f'{client_id}.{export_id}', output)
+        else:
+            self.assertNotIn(f'{client_id}.{export_id}', output)
+
+    def _test_idempotency(self, cmd_func, cmd_args):
+        '''
+        Test idempotency of commands. It first runs the TestNFS test method
+        for a command and then checks the result of command run again. TestNFS
+        test method has required checks to verify that command works.
+        :param cmd_func: TestNFS method
+        :param cmd_args: nfs command arguments to be run
+        '''
+        cmd_func()
+        ret = self.mgr_cluster.mon_manager.raw_cluster_cmd_result(*cmd_args)
+        if ret != 0:
+            self.fail("Idempotency test failed")
+
+    def _test_create_cluster(self):
+        '''
+        Test single nfs cluster deployment.
+        '''
+        with contextutil.safe_while(sleep=4, tries=10) as proceed:
+            while proceed():
+                try:
+                    # Disable any running nfs ganesha daemon
+                    self._check_nfs_server_status()
+                    cluster_create = self._nfs_complete_cmd(
+                        f'cluster create {self.cluster_id}')
+                    if cluster_create.stderr and 'cluster already exists' \
+                            in cluster_create.stderr.getvalue():
+                        self._test_delete_cluster()
+                        continue
+                    # Check for expected status and daemon name
+                    # (nfs.<cluster_id>)
+                    self._check_nfs_cluster_status(
+                        'running', 'NFS Ganesha cluster deployment failed')
+                    break
+                except (AssertionError, CommandFailedError) as e:
+                    log.warning(f'{e}, retrying')
+
+    def _test_delete_cluster(self):
+        '''
+        Test deletion of a single nfs cluster.
+        '''
+        self._nfs_cmd('cluster', 'rm', self.cluster_id)
+        self._check_nfs_cluster_status('No daemons reported',
+                                       'NFS Ganesha cluster could not be deleted')
+
+    def _test_list_cluster(self, empty=False):
+        '''
+        Test listing of deployed nfs clusters. If nfs cluster is deployed then
+        it checks for expected cluster id. Otherwise checks nothing is listed.
+        :param empty: If true it denotes no cluster is deployed.
+        '''
+        nfs_output = self._nfs_cmd('cluster', 'ls')
+        jdata = json.loads(nfs_output)
+        if empty:
+            self.assertEqual(len(jdata), 0)
+        else:
+            cluster_id = self.cluster_id
+            self.assertEqual([cluster_id], jdata)
+
+    def _create_export(self, export_id, create_fs=False, extra_cmd=None):
+        '''
+        Test creation of a single export.
+        :param export_id: Denotes export number
+        :param create_fs: If false filesytem exists. Otherwise create it.
+        :param extra_cmd: List of extra arguments for creating export.
+        '''
+        if create_fs:
+            self._cmd('fs', 'volume', 'create', self.fs_name)
+            with contextutil.safe_while(sleep=5, tries=30) as proceed:
+                while proceed():
+                    output = self._cmd(
+                        'orch', 'ls', '-f', 'json',
+                        '--service-name', f'mds.{self.fs_name}'
+                    )
+                    j = json.loads(output)
+                    if j[0]['status']['running']:
+                        break
+        export_cmd = ['nfs', 'export', 'create', 'cephfs',
+                      '--fsname', self.fs_name, '--cluster-id', self.cluster_id]
+        if isinstance(extra_cmd, list):
+            export_cmd.extend(extra_cmd)
+        else:
+            export_cmd.extend(['--pseudo-path', self.pseudo_path])
+        # Runs the nfs export create command
+        self._cmd(*export_cmd)
+        # Check if user id for export is created
+        self._check_auth_ls(export_id, check_in=True)
+        res = self._sys_cmd(['rados', '-p', NFS_POOL_NAME, '-N', self.cluster_id, 'get',
+                             f'export-{export_id}', '-'])
+        # Check if export object is created
+        if res == b'':
+            self.fail("Export cannot be created")
+
+    def _create_default_export(self):
+        '''
+        Deploy a single nfs cluster and create export with default options.
+        '''
+        self._test_create_cluster()
+        self._create_export(export_id='1', create_fs=True)
+
+    def _delete_export(self):
+        '''
+        Delete an export.
+        '''
+        self._nfs_cmd('export', 'rm', self.cluster_id, self.pseudo_path)
+        self._check_auth_ls()
+
+    def _test_list_export(self):
+        '''
+        Test listing of created exports.
+        '''
+        nfs_output = json.loads(self._nfs_cmd('export', 'ls', self.cluster_id))
+        self.assertIn(self.pseudo_path, nfs_output)
+
+    def _test_list_detailed(self, sub_vol_path):
+        '''
+        Test listing of created exports with detailed option.
+        :param sub_vol_path: Denotes path of subvolume
+        '''
+        nfs_output = json.loads(self._nfs_cmd('export', 'ls', self.cluster_id, '--detailed'))
+        # Export-1 with default values (access type = rw and path = '\')
+        self.assertDictEqual(self.sample_export, nfs_output[0])
+        # Export-2 with r only
+        self.sample_export['export_id'] = 2
+        self.sample_export['pseudo'] = self.pseudo_path + '1'
+        self.sample_export['access_type'] = 'RO'
+        self.sample_export['fsal']['user_id'] = f'{self.expected_name}.2'
+        self.assertDictEqual(self.sample_export, nfs_output[1])
+        # Export-3 for subvolume with r only
+        self.sample_export['export_id'] = 3
+        self.sample_export['path'] = sub_vol_path
+        self.sample_export['pseudo'] = self.pseudo_path + '2'
+        self.sample_export['fsal']['user_id'] = f'{self.expected_name}.3'
+        self.assertDictEqual(self.sample_export, nfs_output[2])
+        # Export-4 for subvolume
+        self.sample_export['export_id'] = 4
+        self.sample_export['pseudo'] = self.pseudo_path + '3'
+        self.sample_export['access_type'] = 'RW'
+        self.sample_export['fsal']['user_id'] = f'{self.expected_name}.4'
+        self.assertDictEqual(self.sample_export, nfs_output[3])
+
+    def _get_export(self):
+        '''
+        Returns export block in json format
+        '''
+        return json.loads(self._nfs_cmd('export', 'info', self.cluster_id, self.pseudo_path))
+
+    def _test_get_export(self):
+        '''
+        Test fetching of created export.
+        '''
+        nfs_output = self._get_export()
+        self.assertDictEqual(self.sample_export, nfs_output)
+
+    def _check_export_obj_deleted(self, conf_obj=False):
+        '''
+        Test if export or config object are deleted successfully.
+        :param conf_obj: It denotes config object needs to be checked
+        '''
+        rados_obj_ls = self._sys_cmd(['rados', '-p', NFS_POOL_NAME, '-N', self.cluster_id, 'ls'])
+
+        if b'export-' in rados_obj_ls or (conf_obj and b'conf-nfs' in rados_obj_ls):
+            self.fail("Delete export failed")
+
+    def _get_port_ip_info(self):
+        '''
+        Return port and ip for a cluster
+        '''
+        #{'test': {'backend': [{'hostname': 'smithi068', 'ip': '172.21.15.68',
+        #'port': 2049}]}}
+        with contextutil.safe_while(sleep=5, tries=6) as proceed:
+            while proceed():
+                try:
+                    info_output = json.loads(
+                        self._nfs_cmd('cluster', 'info',
+                                      self.cluster_id))['test']['backend'][0]
+                    return info_output["port"], info_output["ip"]
+                except (IndexError, CommandFailedError) as e:
+                    if 'list index out of range' in str(e):
+                        log.warning('no port and/or ip found, retrying')
+                    else:
+                        log.warning(f'{e}, retrying')
+
+    def _test_mnt(self, pseudo_path, port, ip, check=True):
+        '''
+        Test mounting of created exports
+        :param pseudo_path: It is the pseudo root name
+        :param port: Port of deployed nfs cluster
+        :param ip: IP of deployed nfs cluster
+        :param check: It denotes if i/o testing needs to be done
+        '''
+        tries = 3
+        while True:
+            try:
+                self.ctx.cluster.run(
+                    args=['sudo', 'mount', '-t', 'nfs', '-o', f'port={port}',
+                          f'{ip}:{pseudo_path}', '/mnt'])
+                break
+            except CommandFailedError as e:
+                if tries:
+                    tries -= 1
+                    time.sleep(2)
+                    continue
+                # Check if mount failed only when non existing pseudo path is passed
+                if not check and e.exitstatus == 32:
+                    return
+                raise
+
+        self.ctx.cluster.run(args=['sudo', 'chmod', '1777', '/mnt'])
+
+        try:
+            self.ctx.cluster.run(args=['touch', '/mnt/test'])
+            out_mnt = self._sys_cmd(['ls', '/mnt'])
+            self.assertEqual(out_mnt,  b'test\n')
+        finally:
+            self.ctx.cluster.run(args=['sudo', 'umount', '/mnt'])
+
+    def _write_to_read_only_export(self, pseudo_path, port, ip):
+        '''
+        Check if write to read only export fails
+        '''
+        try:
+            self._test_mnt(pseudo_path, port, ip)
+        except CommandFailedError as e:
+            # Write to cephfs export should fail for test to pass
+            self.assertEqual(
+                e.exitstatus, errno.EPERM,
+                'invalid error code on trying to write to read-only export')
+        else:
+            self.fail('expected write to a read-only export to fail')
+
+    def _create_cluster_with_fs(self, fs_name, mnt_pt=None):
+        """
+        create a cluster along with fs and mount it to the path supplied
+        :param fs_name: name of CephFS volume to be created
+        :param mnt_pt: mount fs to the path
+        """
+        self._test_create_cluster()
+        self._cmd('fs', 'volume', 'create', fs_name)
+        with contextutil.safe_while(sleep=5, tries=30) as proceed:
+            while proceed():
+                output = self._cmd(
+                    'orch', 'ls', '-f', 'json',
+                    '--service-name', f'mds.{fs_name}'
+                )
+                j = json.loads(output)
+                if j[0]['status']['running']:
+                    break
+        if mnt_pt:
+            with contextutil.safe_while(sleep=3, tries=3) as proceed:
+                while proceed():
+                    try:
+                        self.ctx.cluster.run(args=['sudo', 'ceph-fuse', mnt_pt])
+                        break
+                    except CommandFailedError as e:
+                        log.warning(f'{e}, retrying')
+            self.ctx.cluster.run(args=['sudo', 'chmod', '1777', mnt_pt])
+
+    def _delete_cluster_with_fs(self, fs_name, mnt_pt=None, mode=None):
+        """
+        delete cluster along with fs and unmount it from the path supplied
+        :param fs_name: name of CephFS volume to be deleted
+        :param mnt_pt: unmount fs from the path
+        :param mode: revert to this mode
+        """
+        if mnt_pt:
+            self.ctx.cluster.run(args=['sudo', 'umount', mnt_pt])
+            if mode:
+                if isinstance(mode, bytes):
+                    mode = mode.decode().strip()
+                self.ctx.cluster.run(args=['sudo', 'chmod', mode, mnt_pt])
+        self._cmd('fs', 'volume', 'rm', fs_name, '--yes-i-really-mean-it')
+        self._test_delete_cluster()
+
+    def test_create_and_delete_cluster(self):
+        '''
+        Test successful creation and deletion of the nfs cluster.
+        '''
+        self._test_create_cluster()
+        self._test_list_cluster()
+        self._test_delete_cluster()
+        # List clusters again to ensure no cluster is shown
+        self._test_list_cluster(empty=True)
+
+    def test_create_delete_cluster_idempotency(self):
+        '''
+        Test idempotency of cluster create and delete commands.
+        '''
+        self._test_idempotency(self._test_create_cluster, ['nfs', 'cluster', 'create', self.cluster_id])
+        self._test_idempotency(self._test_delete_cluster, ['nfs', 'cluster', 'rm', self.cluster_id])
+
+    def test_create_cluster_with_invalid_cluster_id(self):
+        '''
+        Test nfs cluster deployment failure with invalid cluster id.
+        '''
+        try:
+            invalid_cluster_id = '/cluster_test'  # Only [A-Za-z0-9-_.] chars are valid
+            self._nfs_cmd('cluster', 'create', invalid_cluster_id)
+            self.fail(f"Cluster successfully created with invalid cluster id {invalid_cluster_id}")
+        except CommandFailedError as e:
+            # Command should fail for test to pass
+            if e.exitstatus != errno.EINVAL:
+                raise
+
+    def test_create_and_delete_export(self):
+        '''
+        Test successful creation and deletion of the cephfs export.
+        '''
+        self._create_default_export()
+        self._test_get_export()
+        port, ip = self._get_port_ip_info()
+        self._test_mnt(self.pseudo_path, port, ip)
+        self._delete_export()
+        # Check if rados export object is deleted
+        self._check_export_obj_deleted()
+        self._test_mnt(self.pseudo_path, port, ip, False)
+        self._test_delete_cluster()
+
+    def test_create_delete_export_idempotency(self):
+        '''
+        Test idempotency of export create and delete commands.
+        '''
+        self._test_idempotency(self._create_default_export, [
+            'nfs', 'export', 'create', 'cephfs',
+            '--fsname', self.fs_name, '--cluster-id', self.cluster_id,
+            '--pseudo-path', self.pseudo_path])
+        self._test_idempotency(self._delete_export, ['nfs', 'export', 'rm', self.cluster_id,
+                                                     self.pseudo_path])
+        self._test_delete_cluster()
+
+    def test_create_multiple_exports(self):
+        '''
+        Test creating multiple exports with different access type and path.
+        '''
+        # Export-1 with default values (access type = rw and path = '\')
+        self._create_default_export()
+        # Export-2 with r only
+        self._create_export(export_id='2',
+                            extra_cmd=['--pseudo-path', self.pseudo_path+'1', '--readonly'])
+        # Export-3 for subvolume with r only
+        self._cmd('fs', 'subvolume', 'create', self.fs_name, 'sub_vol')
+        fs_path = self._cmd('fs', 'subvolume', 'getpath', self.fs_name, 'sub_vol').strip()
+        self._create_export(export_id='3',
+                            extra_cmd=['--pseudo-path', self.pseudo_path+'2', '--readonly',
+                                       '--path', fs_path])
+        # Export-4 for subvolume
+        self._create_export(export_id='4',
+                            extra_cmd=['--pseudo-path', self.pseudo_path+'3',
+                                       '--path', fs_path])
+        # Check if exports gets listed
+        self._test_list_detailed(fs_path)
+        self._test_delete_cluster()
+        # Check if rados ganesha conf object is deleted
+        self._check_export_obj_deleted(conf_obj=True)
+        self._check_auth_ls()
+
+    def test_exports_on_mgr_restart(self):
+        '''
+        Test export availability on restarting mgr.
+        '''
+        self._create_default_export()
+        # unload and load module will restart the mgr
+        self._unload_module("cephadm")
+        self._load_module("cephadm")
+        self._orch_cmd("set", "backend", "cephadm")
+        # Check if ganesha daemon is running
+        self._check_nfs_cluster_status('running', 'Failed to redeploy NFS Ganesha cluster')
+        # Checks if created export is listed
+        self._test_list_export()
+        port, ip = self._get_port_ip_info()
+        self._test_mnt(self.pseudo_path, port, ip)
+        self._delete_export()
+        self._test_delete_cluster()
+
+    def test_export_create_with_non_existing_fsname(self):
+        '''
+        Test creating export with non-existing filesystem.
+        '''
+        try:
+            fs_name = 'nfs-test'
+            self._test_create_cluster()
+            self._nfs_cmd('export', 'create', 'cephfs',
+                          '--fsname', fs_name, '--cluster-id', self.cluster_id,
+                          '--pseudo-path', self.pseudo_path)
+            self.fail(f"Export created with non-existing filesystem {fs_name}")
+        except CommandFailedError as e:
+            # Command should fail for test to pass
+            if e.exitstatus != errno.ENOENT:
+                raise
+        finally:
+            self._test_delete_cluster()
+
+    def test_export_create_with_non_existing_clusterid(self):
+        '''
+        Test creating cephfs export with non-existing nfs cluster.
+        '''
+        try:
+            cluster_id = 'invalidtest'
+            self._nfs_cmd('export', 'create', 'cephfs', '--fsname', self.fs_name,
+                          '--cluster-id', cluster_id, '--pseudo-path', self.pseudo_path)
+            self.fail(f"Export created with non-existing cluster id {cluster_id}")
+        except CommandFailedError as e:
+            # Command should fail for test to pass
+            if e.exitstatus != errno.ENOENT:
+                raise
+
+    def test_export_create_with_relative_pseudo_path_and_root_directory(self):
+        '''
+        Test creating cephfs export with relative or '/' pseudo path.
+        '''
+        def check_pseudo_path(pseudo_path):
+            try:
+                self._nfs_cmd('export', 'create', 'cephfs', '--fsname', self.fs_name,
+                              '--cluster-id', self.cluster_id,
+                              '--pseudo-path', pseudo_path)
+                self.fail(f"Export created for {pseudo_path}")
+            except CommandFailedError as e:
+                # Command should fail for test to pass
+                if e.exitstatus != errno.EINVAL:
+                    raise
+
+        self._test_create_cluster()
+        self._cmd('fs', 'volume', 'create', self.fs_name)
+        check_pseudo_path('invalidpath')
+        check_pseudo_path('/')
+        check_pseudo_path('//')
+        self._cmd('fs', 'volume', 'rm', self.fs_name, '--yes-i-really-mean-it')
+        self._test_delete_cluster()
+
+    def test_write_to_read_only_export(self):
+        '''
+        Test write to readonly export.
+        '''
+        self._test_create_cluster()
+        self._create_export(export_id='1', create_fs=True,
+                            extra_cmd=['--pseudo-path', self.pseudo_path, '--readonly'])
+        port, ip = self._get_port_ip_info()
+        self._check_nfs_cluster_status('running', 'NFS Ganesha cluster restart failed')
+        self._write_to_read_only_export(self.pseudo_path, port, ip)
+        self._test_delete_cluster()
+
+    def test_cluster_info(self):
+        '''
+        Test cluster info outputs correct ip and hostname
+        '''
+        self._test_create_cluster()
+        info_output = json.loads(self._nfs_cmd('cluster', 'info', self.cluster_id))
+        print(f'info {info_output}')
+        info_ip = info_output[self.cluster_id].get('backend', [])[0].pop("ip")
+        host_details = {
+            self.cluster_id: {
+                'backend': [
+                    {
+                        "hostname": self._sys_cmd(['hostname']).decode("utf-8").strip(),
+                        "port": 2049
+                    }
+                ],
+                "virtual_ip": None,
+            }
+        }
+        host_ip = self._sys_cmd(['hostname', '-I']).decode("utf-8").split()
+        print(f'host_ip is {host_ip}, info_ip is {info_ip}')
+        self.assertDictEqual(info_output, host_details)
+        self.assertTrue(info_ip in host_ip)
+        self._test_delete_cluster()
+
+    def test_cluster_set_reset_user_config(self):
+        '''
+        Test cluster is created using user config and reverts back to default
+        config on reset.
+        '''
+        self._test_create_cluster()
+
+        pool = NFS_POOL_NAME
+        user_id = 'test'
+        fs_name = 'user_test_fs'
+        pseudo_path = '/ceph'
+        self._cmd('fs', 'volume', 'create', fs_name)
+        time.sleep(20)
+        key = self._cmd('auth', 'get-or-create-key', f'client.{user_id}', 'mon',
+            'allow r', 'osd',
+            f'allow rw pool={pool} namespace={self.cluster_id}, allow rw tag cephfs data={fs_name}',
+            'mds', f'allow rw path={self.path}').strip()
+        config = f""" LOG {{
+        Default_log_level = FULL_DEBUG;
+        }}
+
+        EXPORT {{
+	        Export_Id = 100;
+	        Transports = TCP;
+	        Path = /;
+	        Pseudo = {pseudo_path};
+	        Protocols = 4;
+	        Access_Type = RW;
+	        Attr_Expiration_Time = 0;
+	        Squash = None;
+	        FSAL {{
+	              Name = CEPH;
+                      Filesystem = {fs_name};
+                      User_Id = {user_id};
+                      Secret_Access_Key = '{key}';
+	        }}
+        }}"""
+        port, ip = self._get_port_ip_info()
+        self.ctx.cluster.run(args=['ceph', 'nfs', 'cluster', 'config',
+            'set', self.cluster_id, '-i', '-'], stdin=config)
+        time.sleep(30)
+        res = self._sys_cmd(['rados', '-p', pool, '-N', self.cluster_id, 'get',
+                             f'userconf-nfs.{user_id}', '-'])
+        self.assertEqual(config, res.decode('utf-8'))
+        self._test_mnt(pseudo_path, port, ip)
+        self._nfs_cmd('cluster', 'config', 'reset', self.cluster_id)
+        rados_obj_ls = self._sys_cmd(['rados', '-p', NFS_POOL_NAME, '-N', self.cluster_id, 'ls'])
+        if b'conf-nfs' not in rados_obj_ls and b'userconf-nfs' in rados_obj_ls:
+            self.fail("User config not deleted")
+        time.sleep(30)
+        self._test_mnt(pseudo_path, port, ip, False)
+        self._cmd('fs', 'volume', 'rm', fs_name, '--yes-i-really-mean-it')
+        self._test_delete_cluster()
+
+    def test_cluster_set_user_config_with_non_existing_clusterid(self):
+        '''
+        Test setting user config for non-existing nfs cluster.
+        '''
+        cluster_id = 'invalidtest'
+        with contextutil.safe_while(sleep=3, tries=3) as proceed:
+            while proceed():
+                try:
+                    self.ctx.cluster.run(args=['ceph', 'nfs', 'cluster',
+                                               'config', 'set', cluster_id,
+                                               '-i', '-'], stdin='testing')
+                    self.fail(f"User config set for non-existing cluster"
+                              f"{cluster_id}")
+                except CommandFailedError as e:
+                    # Command should fail for test to pass
+                    if e.exitstatus == errno.ENOENT:
+                        break
+                    log.warning('exitstatus != ENOENT, retrying')
+
+    def test_cluster_reset_user_config_with_non_existing_clusterid(self):
+        '''
+        Test resetting user config for non-existing nfs cluster.
+        '''
+        try:
+            cluster_id = 'invalidtest'
+            self._nfs_cmd('cluster', 'config', 'reset', cluster_id)
+            self.fail(f"User config reset for non-existing cluster {cluster_id}")
+        except CommandFailedError as e:
+            # Command should fail for test to pass
+            if e.exitstatus != errno.ENOENT:
+                raise
+
+    def test_create_export_via_apply(self):
+        '''
+        Test creation of export via apply
+        '''
+        self._test_create_cluster()
+        self.ctx.cluster.run(args=['ceph', 'nfs', 'export', 'apply',
+                                   self.cluster_id, '-i', '-'],
+                             stdin=json.dumps({
+                                 "path": "/",
+                                 "pseudo": "/cephfs",
+                                 "squash": "none",
+                                 "access_type": "rw",
+                                 "protocols": [4],
+                                 "fsal": {
+                                     "name": "CEPH",
+                                     "fs_name": self.fs_name
+                                 }
+                             }))
+        port, ip = self._get_port_ip_info()
+        self._test_mnt(self.pseudo_path, port, ip)
+        self._check_nfs_cluster_status(
+            'running', 'NFS Ganesha cluster not running after new export was applied')
+        self._test_delete_cluster()
+
+    def test_update_export(self):
+        '''
+        Test update of export's pseudo path and access type from rw to ro
+        '''
+        self._create_default_export()
+        port, ip = self._get_port_ip_info()
+        self._test_mnt(self.pseudo_path, port, ip)
+        export_block = self._get_export()
+        new_pseudo_path = '/testing'
+        export_block['pseudo'] = new_pseudo_path
+        export_block['access_type'] = 'RO'
+        self.ctx.cluster.run(args=['ceph', 'nfs', 'export', 'apply',
+                                   self.cluster_id, '-i', '-'],
+                             stdin=json.dumps(export_block))
+        if not self._check_nfs_cluster_event('restart'):
+            self.fail("updating export's pseudo path should trigger restart of NFS service")
+        self._check_nfs_cluster_status('running', 'NFS Ganesha cluster not running after restart')
+        self._write_to_read_only_export(new_pseudo_path, port, ip)
+        self._test_delete_cluster()
+
+    def test_update_export_ro_to_rw(self):
+        '''
+        Test update of export's access level from ro to rw
+        '''
+        self._test_create_cluster()
+        self._create_export(
+            export_id='1', create_fs=True,
+            extra_cmd=['--pseudo-path', self.pseudo_path, '--readonly'])
+        port, ip = self._get_port_ip_info()
+        self._write_to_read_only_export(self.pseudo_path, port, ip)
+        export_block = self._get_export()
+        export_block['access_type'] = 'RW'
+        self.ctx.cluster.run(
+            args=['ceph', 'nfs', 'export', 'apply', self.cluster_id, '-i', '-'],
+            stdin=json.dumps(export_block))
+        if self._check_nfs_cluster_event('restart'):
+            self.fail("update of export's access type should not trigger NFS service restart")
+        self._test_mnt(self.pseudo_path, port, ip)
+        self._test_delete_cluster()
+
+    def test_update_export_with_invalid_values(self):
+        '''
+        Test update of export with invalid values
+        '''
+        self._create_default_export()
+        export_block = self._get_export()
+
+        def update_with_invalid_values(key, value, fsal=False):
+            export_block_new = dict(export_block)
+            if fsal:
+                export_block_new['fsal'] = dict(export_block['fsal'])
+                export_block_new['fsal'][key] = value
+            else:
+                export_block_new[key] = value
+            try:
+                self.ctx.cluster.run(args=['ceph', 'nfs', 'export', 'apply',
+                                           self.cluster_id, '-i', '-'],
+                        stdin=json.dumps(export_block_new))
+            except CommandFailedError:
+                pass
+
+        update_with_invalid_values('export_id', 9)
+        update_with_invalid_values('cluster_id', 'testing_new')
+        update_with_invalid_values('pseudo', 'test_relpath')
+        update_with_invalid_values('access_type', 'W')
+        update_with_invalid_values('squash', 'no_squash')
+        update_with_invalid_values('security_label', 'invalid')
+        update_with_invalid_values('protocols', [2])
+        update_with_invalid_values('transports', ['UD'])
+        update_with_invalid_values('name', 'RGW', True)
+        update_with_invalid_values('user_id', 'testing_export', True)
+        update_with_invalid_values('fs_name', 'b', True)
+        self._test_delete_cluster()
+
+    def test_cmds_without_reqd_args(self):
+        '''
+        Test that cmd fails on not passing required arguments
+        '''
+        def exec_cmd_invalid(*cmd):
+            try:
+                self._nfs_cmd(*cmd)
+                self.fail(f"nfs {cmd} command executed successfully without required arguments")
+            except CommandFailedError as e:
+                # Command should fail for test to pass
+                if e.exitstatus != errno.EINVAL:
+                    raise
+
+        exec_cmd_invalid('cluster', 'create')
+        exec_cmd_invalid('cluster', 'delete')
+        exec_cmd_invalid('cluster', 'config', 'set')
+        exec_cmd_invalid('cluster', 'config', 'reset')
+        exec_cmd_invalid('export', 'create', 'cephfs')
+        exec_cmd_invalid('export', 'create', 'cephfs', 'clusterid')
+        exec_cmd_invalid('export', 'create', 'cephfs', 'clusterid', 'a_fs')
+        exec_cmd_invalid('export', 'ls')
+        exec_cmd_invalid('export', 'delete')
+        exec_cmd_invalid('export', 'delete', 'clusterid')
+        exec_cmd_invalid('export', 'info')
+        exec_cmd_invalid('export', 'info', 'clusterid')
+        exec_cmd_invalid('export', 'apply')
+
+    def test_non_existent_cluster(self):
+        """
+        Test that cluster info doesn't throw junk data for non-existent cluster
+        """
+        cluster_ls = self._nfs_cmd('cluster', 'ls')
+        self.assertNotIn('foo', cluster_ls, 'cluster foo exists')
+        try:
+            self._nfs_cmd('cluster', 'info', 'foo')
+            self.fail("nfs cluster info foo returned successfully for non-existent cluster")
+        except CommandFailedError as e:
+            if e.exitstatus != errno.ENOENT:
+                raise
+
+    def test_nfs_export_with_invalid_path(self):
+        """
+        Test that nfs exports can't be created with invalid path
+        """
+        mnt_pt = '/mnt'
+        preserve_mode = self._sys_cmd(['stat', '-c', '%a', mnt_pt])
+        self._create_cluster_with_fs(self.fs_name, mnt_pt)
+        try:
+            self._create_export(export_id='123',
+                                extra_cmd=['--pseudo-path', self.pseudo_path,
+                                           '--path', '/non_existent_dir'])
+        except CommandFailedError as e:
+            if e.exitstatus != errno.ENOENT:
+                raise
+        self._delete_cluster_with_fs(self.fs_name, mnt_pt, preserve_mode)
+
+    def test_nfs_export_creation_at_filepath(self):
+        """
+        Test that nfs exports can't be created at a filepath
+        """
+        mnt_pt = '/mnt'
+        preserve_mode = self._sys_cmd(['stat', '-c', '%a', mnt_pt])
+        self._create_cluster_with_fs(self.fs_name, mnt_pt)
+        self.ctx.cluster.run(args=['touch', f'{mnt_pt}/testfile'])
+        try:
+            self._create_export(export_id='123', extra_cmd=['--pseudo-path',
+                                                            self.pseudo_path,
+                                                            '--path',
+                                                            '/testfile'])
+        except CommandFailedError as e:
+            if e.exitstatus != errno.ENOTDIR:
+                raise
+        self.ctx.cluster.run(args=['rm', '-rf', '/mnt/testfile'])
+        self._delete_cluster_with_fs(self.fs_name, mnt_pt, preserve_mode)
+
+    def test_nfs_export_creation_at_symlink(self):
+        """
+        Test that nfs exports can't be created at a symlink path
+        """
+        mnt_pt = '/mnt'
+        preserve_mode = self._sys_cmd(['stat', '-c', '%a', mnt_pt])
+        self._create_cluster_with_fs(self.fs_name, mnt_pt)
+        self.ctx.cluster.run(args=['mkdir', f'{mnt_pt}/testdir'])
+        self.ctx.cluster.run(args=['ln', '-s', f'{mnt_pt}/testdir',
+                                   f'{mnt_pt}/testdir_symlink'])
+        try:
+            self._create_export(export_id='123',
+                                extra_cmd=['--pseudo-path',
+                                           self.pseudo_path,
+                                           '--path',
+                                           '/testdir_symlink'])
+        except CommandFailedError as e:
+            if e.exitstatus != errno.ENOTDIR:
+                raise
+        self.ctx.cluster.run(args=['rm', '-rf', f'{mnt_pt}/*'])
+        self._delete_cluster_with_fs(self.fs_name, mnt_pt, preserve_mode)
diff --git a/qa/tasks/cephfs/test_openfiletable.py b/qa/tasks/cephfs/test_openfiletable.py
new file mode 100644
index 000000000..eff6b5093
--- /dev/null
+++ b/qa/tasks/cephfs/test_openfiletable.py
@@ -0,0 +1,85 @@
+import time
+import logging
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+log = logging.getLogger(__name__)
+
+class OpenFileTable(CephFSTestCase):
+    CLIENTS_REQUIRED = 1
+    MDSS_REQUIRED = 1
+
+    def _check_oft_counter(self, name, count):
+        perf_dump = self.fs.mds_asok(['perf', 'dump'])
+        if perf_dump['oft'][name] == count:
+            return True
+        return False
+
+    def test_max_items_per_obj(self):
+        """
+        The maximum number of openfiles omap objects keys are now equal to
+        osd_deep_scrub_large_omap_object_key_threshold option.
+        """
+        self.set_conf("mds", "osd_deep_scrub_large_omap_object_key_threshold", "5")
+
+        self.fs.mds_restart()
+        self.fs.wait_for_daemons()
+
+        # Write some bytes to a file
+        size_mb = 1
+
+        # Hold the file open
+        file_count = 8
+        for i in range(0, file_count):
+            filename = "open_file{}".format(i)
+            p = self.mount_a.open_background(filename)
+            self.mount_a.write_n_mb(filename, size_mb)
+
+        time.sleep(10)
+
+        """
+        With osd_deep_scrub_large_omap_object_key_threshold value as 5 and
+        opening 8 files we should have a new rados object with name
+        mds0_openfiles.1 to hold the extra keys.
+        """
+
+        self.fs.radosm(["stat", "mds0_openfiles.1"])
+
+        # Now close the file
+        self.mount_a.kill_background(p)
+
+    def test_perf_counters(self):
+        """
+        Opening a file should increment omap_total_updates by 1.
+        """
+
+        self.set_conf("mds", "osd_deep_scrub_large_omap_object_key_threshold", "1")
+        self.fs.mds_restart()
+        self.fs.wait_for_daemons()
+
+        perf_dump = self.fs.mds_asok(['perf', 'dump'])
+        omap_total_updates_0 = perf_dump['oft']['omap_total_updates']
+        log.info("omap_total_updates_0:{}".format(omap_total_updates_0))
+        
+        # Open the file
+        p = self.mount_a.open_background("omap_counter_test_file")
+        self.wait_until_true(lambda: self._check_oft_counter('omap_total_updates', 2), timeout=120)
+        
+        perf_dump = self.fs.mds_asok(['perf', 'dump'])
+        omap_total_updates_1 = perf_dump['oft']['omap_total_updates']
+        log.info("omap_total_updates_1:{}".format(omap_total_updates_1))
+        
+        self.assertTrue((omap_total_updates_1 - omap_total_updates_0) == 2)
+        
+        # Now close the file
+        self.mount_a.kill_background(p)
+        # Ensure that the file does not exist any more
+        self.wait_until_true(lambda: self._check_oft_counter('omap_total_removes', 1), timeout=120)
+        self.wait_until_true(lambda: self._check_oft_counter('omap_total_kv_pairs', 1), timeout=120)
+
+        perf_dump = self.fs.mds_asok(['perf', 'dump'])
+        omap_total_removes = perf_dump['oft']['omap_total_removes']
+        omap_total_kv_pairs = perf_dump['oft']['omap_total_kv_pairs']
+        log.info("omap_total_removes:{}".format(omap_total_removes))
+        log.info("omap_total_kv_pairs:{}".format(omap_total_kv_pairs))
+        self.assertTrue(omap_total_removes == 1)
+        self.assertTrue(omap_total_kv_pairs == 1)
diff --git a/qa/tasks/cephfs/test_pool_perm.py b/qa/tasks/cephfs/test_pool_perm.py
new file mode 100644
index 000000000..9912debed
--- /dev/null
+++ b/qa/tasks/cephfs/test_pool_perm.py
@@ -0,0 +1,109 @@
+from textwrap import dedent
+from teuthology.exceptions import CommandFailedError
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+import os
+
+
+class TestPoolPerm(CephFSTestCase):
+    def test_pool_perm(self):
+        self.mount_a.run_shell(["touch", "test_file"])
+
+        file_path = os.path.join(self.mount_a.mountpoint, "test_file")
+
+        remote_script = dedent("""
+            import os
+            import errno
+
+            fd = os.open("{path}", os.O_RDWR)
+            try:
+                if {check_read}:
+                    ret = os.read(fd, 1024)
+                else:
+                    os.write(fd, b'content')
+            except OSError as e:
+                if e.errno != errno.EPERM:
+                    raise
+            else:
+                raise RuntimeError("client does not check permission of data pool")
+            """)
+
+        client_name = "client.{0}".format(self.mount_a.client_id)
+
+        # set data pool read only
+        self.fs.mon_manager.raw_cluster_cmd_result(
+            'auth', 'caps', client_name, 'mds', 'allow', 'mon', 'allow r', 'osd',
+            'allow r pool={0}'.format(self.fs.get_data_pool_name()))
+
+        self.mount_a.umount_wait()
+        self.mount_a.mount_wait()
+
+        # write should fail
+        self.mount_a.run_python(remote_script.format(path=file_path, check_read=str(False)))
+
+        # set data pool write only
+        self.fs.mon_manager.raw_cluster_cmd_result(
+            'auth', 'caps', client_name, 'mds', 'allow', 'mon', 'allow r', 'osd',
+            'allow w pool={0}'.format(self.fs.get_data_pool_name()))
+
+        self.mount_a.umount_wait()
+        self.mount_a.mount_wait()
+
+        # read should fail
+        self.mount_a.run_python(remote_script.format(path=file_path, check_read=str(True)))
+
+    def test_forbidden_modification(self):
+        """
+        That a client who does not have the capability for setting
+        layout pools is prevented from doing so.
+        """
+
+        # Set up
+        client_name = "client.{0}".format(self.mount_a.client_id)
+        new_pool_name = "data_new"
+        self.fs.add_data_pool(new_pool_name)
+
+        self.mount_a.run_shell(["touch", "layoutfile"])
+        self.mount_a.run_shell(["mkdir", "layoutdir"])
+
+        # Set MDS 'rw' perms: missing 'p' means no setting pool layouts
+        self.fs.mon_manager.raw_cluster_cmd_result(
+            'auth', 'caps', client_name, 'mds', 'allow rw', 'mon', 'allow r',
+            'osd',
+            'allow rw pool={0},allow rw pool={1}'.format(
+                self.fs.get_data_pool_names()[0],
+                self.fs.get_data_pool_names()[1],
+            ))
+
+        self.mount_a.umount_wait()
+        self.mount_a.mount_wait()
+
+        with self.assertRaises(CommandFailedError):
+            self.mount_a.setfattr("layoutfile", "ceph.file.layout.pool",
+                                  new_pool_name)
+        with self.assertRaises(CommandFailedError):
+            self.mount_a.setfattr("layoutdir", "ceph.dir.layout.pool",
+                                  new_pool_name)
+        self.mount_a.umount_wait()
+
+        # Set MDS 'rwp' perms: should now be able to set layouts
+        self.fs.mon_manager.raw_cluster_cmd_result(
+            'auth', 'caps', client_name, 'mds', 'allow rwp', 'mon', 'allow r',
+            'osd',
+            'allow rw pool={0},allow rw pool={1}'.format(
+                self.fs.get_data_pool_names()[0],
+                self.fs.get_data_pool_names()[1],
+            ))
+        self.mount_a.mount_wait()
+        self.mount_a.setfattr("layoutfile", "ceph.file.layout.pool",
+                              new_pool_name)
+        self.mount_a.setfattr("layoutdir", "ceph.dir.layout.pool",
+                              new_pool_name)
+        self.mount_a.umount_wait()
+
+    def tearDown(self):
+        self.fs.mon_manager.raw_cluster_cmd_result(
+            'auth', 'caps', "client.{0}".format(self.mount_a.client_id),
+            'mds', 'allow', 'mon', 'allow r', 'osd',
+            'allow rw pool={0}'.format(self.fs.get_data_pool_names()[0]))
+        super(TestPoolPerm, self).tearDown()
+
diff --git a/qa/tasks/cephfs/test_quota.py b/qa/tasks/cephfs/test_quota.py
new file mode 100644
index 000000000..0386672bd
--- /dev/null
+++ b/qa/tasks/cephfs/test_quota.py
@@ -0,0 +1,106 @@
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+from teuthology.exceptions import CommandFailedError
+
+class TestQuota(CephFSTestCase):
+    CLIENTS_REQUIRED = 2
+    MDSS_REQUIRED = 1
+
+    def test_remote_update_getfattr(self):
+        """
+        That quota changes made from one client are visible to another
+        client looking at ceph.quota xattrs
+        """
+        self.mount_a.run_shell(["mkdir", "subdir"])
+
+        self.assertEqual(
+            self.mount_a.getfattr("./subdir", "ceph.quota.max_files"),
+            None)
+        self.assertEqual(
+            self.mount_b.getfattr("./subdir", "ceph.quota.max_files"),
+            None)
+
+        self.mount_a.setfattr("./subdir", "ceph.quota.max_files", "10")
+        self.assertEqual(
+            self.mount_a.getfattr("./subdir", "ceph.quota.max_files"),
+            "10")
+
+        # Should be visible as soon as setxattr operation completes on
+        # mds (we get here sooner because setfattr gets an early reply)
+        self.wait_until_equal(
+            lambda: self.mount_b.getfattr("./subdir", "ceph.quota.max_files"),
+            "10", timeout=10)
+
+    def test_remote_update_df(self):
+        """
+        That when a client modifies the quota on a directory used
+        as another client's root, the other client sees the change
+        reflected in their statfs output.
+        """
+
+        self.mount_b.umount_wait()
+
+        self.mount_a.run_shell(["mkdir", "subdir"])
+
+        size_before = 1024 * 1024 * 128
+        self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes",
+                              "%s" % size_before)
+
+        self.mount_b.mount_wait(cephfs_mntpt="/subdir")
+
+        self.assertDictEqual(
+            self.mount_b.df(),
+            {
+                "total": size_before,
+                "used": 0,
+                "available": size_before
+            })
+
+        size_after = 1024 * 1024 * 256
+        self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes",
+                              "%s" % size_after)
+
+        # Should be visible as soon as setxattr operation completes on
+        # mds (we get here sooner because setfattr gets an early reply)
+        self.wait_until_equal(
+            lambda: self.mount_b.df(),
+            {
+                "total": size_after,
+                "used": 0,
+                "available": size_after
+            },
+            timeout=10
+        )
+
+    def test_remote_update_write(self):
+        """
+        That when a client modifies the quota on a directory used
+        as another client's root, the other client sees the effect
+        of the change when writing data.
+        """
+
+        self.mount_a.run_shell(["mkdir", "subdir_files"])
+        self.mount_a.run_shell(["mkdir", "subdir_data"])
+
+        # Set some nice high quotas that mount_b's initial operations
+        # will be well within
+        self.mount_a.setfattr("./subdir_files", "ceph.quota.max_files", "100")
+        self.mount_a.setfattr("./subdir_data", "ceph.quota.max_bytes", "104857600")
+
+        # Do some writes within my quota
+        self.mount_b.create_n_files("subdir_files/file", 20)
+        self.mount_b.write_n_mb("subdir_data/file", 20)
+
+        # Set quotas lower than what mount_b already wrote, it should
+        # refuse to write more once it's seen them
+        self.mount_a.setfattr("./subdir_files", "ceph.quota.max_files", "10")
+        self.mount_a.setfattr("./subdir_data", "ceph.quota.max_bytes", "1048576")
+
+        # Do some writes that would have been okay within the old quota,
+        # but are forbidden under the new quota
+        with self.assertRaises(CommandFailedError):
+            self.mount_b.create_n_files("subdir_files/file", 40)
+        with self.assertRaises(CommandFailedError):
+            self.mount_b.write_n_mb("subdir_data/file", 40)
+
diff --git a/qa/tasks/cephfs/test_readahead.py b/qa/tasks/cephfs/test_readahead.py
new file mode 100644
index 000000000..7e6270f03
--- /dev/null
+++ b/qa/tasks/cephfs/test_readahead.py
@@ -0,0 +1,26 @@
+import logging
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+log = logging.getLogger(__name__)
+
+
+class TestReadahead(CephFSTestCase):
+    def test_flush(self):
+        # Create 32MB file
+        self.mount_a.run_shell(["dd", "if=/dev/urandom", "of=foo", "bs=1M", "count=32"])
+
+        # Unmount and remount the client to flush cache
+        self.mount_a.umount_wait()
+        self.mount_a.mount_wait()
+
+        initial_op_read = self.mount_a.get_op_read_count()
+        self.mount_a.run_shell(["dd", "if=foo", "of=/dev/null", "bs=128k", "count=32"])
+        op_read = self.mount_a.get_op_read_count()
+        self.assertGreaterEqual(op_read, initial_op_read)
+        op_read -= initial_op_read
+        log.info("read operations: {0}".format(op_read))
+
+        # with exponentially increasing readahead, we should see fewer than 10 operations
+        # but this test simply checks if the client is doing a remote read for each local read
+        if op_read >= 32:
+            raise RuntimeError("readahead not working")
diff --git a/qa/tasks/cephfs/test_recovery_fs.py b/qa/tasks/cephfs/test_recovery_fs.py
new file mode 100644
index 000000000..bbcdf9769
--- /dev/null
+++ b/qa/tasks/cephfs/test_recovery_fs.py
@@ -0,0 +1,38 @@
+import logging
+from os.path import join as os_path_join
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+log = logging.getLogger(__name__)
+
+class TestFSRecovery(CephFSTestCase):
+    """
+    Tests for recovering FS after loss of FSMap
+    """
+
+    CLIENTS_REQUIRED = 1
+    MDSS_REQUIRED = 3
+
+    def test_recover_fs_after_fsmap_removal(self):
+        data_pool = self.fs.get_data_pool_name()
+        metadata_pool = self.fs.get_metadata_pool_name()
+        # write data in mount, and fsync
+        self.mount_a.create_n_files('file_on_fs', 1, sync=True)
+        # faild MDSs to allow removing the file system in the next step
+        self.fs.fail()
+        # Remove file system to lose FSMap and keep the pools intact.
+        # This mimics the scenario where the monitor store is rebuilt
+        # using  OSDs to recover a cluster with corrupt monitor store.
+        # The FSMap is permanently lost, but the FS pools are
+        # recovered/intact
+        self.fs.rm()
+        # Recreate file system with pool and previous fscid
+        self.fs.mon_manager.raw_cluster_cmd(
+            'fs', 'new', self.fs.name, metadata_pool, data_pool,
+            '--recover', '--force', '--fscid', f'{self.fs.id}')
+        self.fs.set_joinable()
+        # Check status of file system
+        self.fs.wait_for_daemons()
+        # check data in file sytem is intact
+        filepath = os_path_join(self.mount_a.hostfs_mntpt, 'file_on_fs_0')
+        self.assertEqual(self.mount_a.read_file(filepath), "0")
diff --git a/qa/tasks/cephfs/test_recovery_pool.py b/qa/tasks/cephfs/test_recovery_pool.py
new file mode 100644
index 000000000..8c4e1967d
--- /dev/null
+++ b/qa/tasks/cephfs/test_recovery_pool.py
@@ -0,0 +1,179 @@
+"""
+Test our tools for recovering metadata from the data pool into an alternate pool
+"""
+
+import logging
+import traceback
+from collections import namedtuple
+
+from teuthology.exceptions import CommandFailedError
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+log = logging.getLogger(__name__)
+
+
+ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
+
+
+class OverlayWorkload(object):
+    def __init__(self):
+        self._initial_state = None
+
+        # Accumulate backtraces for every failed validation, and return them.  Backtraces
+        # are rather verbose, but we only see them when something breaks, and they
+        # let us see which check failed without having to decorate each check with
+        # a string
+        self._errors = []
+
+    def assert_equal(self, a, b):
+        try:
+            if a != b:
+                raise AssertionError("{0} != {1}".format(a, b))
+        except AssertionError as e:
+            self._errors.append(
+                ValidationError(e, traceback.format_exc(3))
+            )
+
+    def write(self):
+        """
+        Write the workload files to the mount
+        """
+        raise NotImplementedError()
+
+    def validate(self):
+        """
+        Read from the mount and validate that the workload files are present (i.e. have
+        survived or been reconstructed from the test scenario)
+        """
+        raise NotImplementedError()
+
+    def damage(self, fs):
+        """
+        Damage the filesystem pools in ways that will be interesting to recover from.  By
+        default just wipe everything in the metadata pool
+        """
+
+        pool = fs.get_metadata_pool_name()
+        fs.rados(["purge", pool, '--yes-i-really-really-mean-it'])
+
+    def flush(self, fs):
+        """
+        Called after client unmount, after write: flush whatever you want
+        """
+        fs.rank_asok(["flush", "journal"])
+
+
+class SimpleOverlayWorkload(OverlayWorkload):
+    """
+    Single file, single directory, check that it gets recovered and so does its size
+    """
+    def write(self, mount):
+        mount.run_shell(["mkdir", "subdir"])
+        mount.write_n_mb("subdir/sixmegs", 6)
+        self._initial_state = mount.stat("subdir/sixmegs")
+
+    def validate(self, recovery_mount):
+        recovery_mount.run_shell(["ls", "subdir"])
+        st = recovery_mount.stat("subdir/sixmegs")
+        self.assert_equal(st['st_size'], self._initial_state['st_size'])
+        return self._errors
+
+class TestRecoveryPool(CephFSTestCase):
+    MDSS_REQUIRED = 2
+    CLIENTS_REQUIRED = 1
+    REQUIRE_RECOVERY_FILESYSTEM = True
+
+    def is_marked_damaged(self, rank):
+        mds_map = self.fs.get_mds_map()
+        return rank in mds_map['damaged']
+
+    def _rebuild_metadata(self, workload, other_pool=None, workers=1):
+        """
+        That when all objects in metadata pool are removed, we can rebuild a metadata pool
+        based on the contents of a data pool, and a client can see and read our files.
+        """
+
+        # First, inject some files
+
+        workload.write(self.mount_a)
+
+        # Unmount the client and flush the journal: the tool should also cope with
+        # situations where there is dirty metadata, but we'll test that separately
+        self.mount_a.umount_wait()
+        workload.flush(self.fs)
+        self.fs.fail()
+
+        # After recovery, we need the MDS to not be strict about stats (in production these options
+        # are off by default, but in QA we need to explicitly disable them)
+        # Note: these have to be written to ceph.conf to override existing ceph.conf values.
+        self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
+        self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
+        self.fs.mds_restart()
+
+        # Apply any data damage the workload wants
+        workload.damage(self.fs)
+
+        # Create the alternate pool if requested
+        recovery_fs = self.mds_cluster.newfs(name="recovery_fs", create=False)
+        recovery_fs.set_data_pool_name(self.fs.get_data_pool_name())
+        recovery_fs.create(recover=True, metadata_overlay=True)
+
+        recovery_pool = recovery_fs.get_metadata_pool_name()
+        recovery_fs.mon_manager.raw_cluster_cmd('-s')
+
+        # Reset the MDS map in case multiple ranks were in play: recovery procedure
+        # only understands how to rebuild metadata under rank 0
+        #self.fs.reset()
+        #self.fs.table_tool([self.fs.name + ":0", "reset", "session"])
+        #self.fs.table_tool([self.fs.name + ":0", "reset", "snap"])
+        #self.fs.table_tool([self.fs.name + ":0", "reset", "inode"])
+
+        # Run the recovery procedure
+        recovery_fs.data_scan(['init', '--force-init',
+                               '--filesystem', recovery_fs.name,
+                               '--alternate-pool', recovery_pool])
+        recovery_fs.table_tool([recovery_fs.name + ":0", "reset", "session"])
+        recovery_fs.table_tool([recovery_fs.name + ":0", "reset", "snap"])
+        recovery_fs.table_tool([recovery_fs.name + ":0", "reset", "inode"])
+        if False:
+            with self.assertRaises(CommandFailedError):
+                # Normal reset should fail when no objects are present, we'll use --force instead
+                self.fs.journal_tool(["journal", "reset"], 0)
+
+        recovery_fs.data_scan(['scan_extents', '--alternate-pool',
+                           recovery_pool, '--filesystem', self.fs.name,
+                           self.fs.get_data_pool_name()])
+        recovery_fs.data_scan(['scan_inodes', '--alternate-pool',
+                           recovery_pool, '--filesystem', self.fs.name,
+                           '--force-corrupt', '--force-init',
+                           self.fs.get_data_pool_name()])
+        recovery_fs.data_scan(['scan_links', '--filesystem', recovery_fs.name])
+        recovery_fs.journal_tool(['event', 'recover_dentries', 'list',
+                              '--alternate-pool', recovery_pool], 0)
+        recovery_fs.journal_tool(["journal", "reset", "--force"], 0)
+
+        # Start the MDS
+        recovery_fs.set_joinable()
+        status = recovery_fs.wait_for_daemons()
+
+        self.config_set('mds', 'debug_mds', '20')
+        for rank in recovery_fs.get_ranks(status=status):
+            recovery_fs.rank_tell(['scrub', 'start', '/', 'force,recursive,repair'], rank=rank['rank'], status=status)
+        log.info(str(recovery_fs.status()))
+
+        # Mount a client
+        self.mount_a.mount_wait(cephfs_name=recovery_fs.name)
+
+        # See that the files are present and correct
+        errors = workload.validate(self.mount_a)
+        if errors:
+            log.error("Validation errors found: {0}".format(len(errors)))
+            for e in errors:
+                log.error(e.exception)
+                log.error(e.backtrace)
+            raise AssertionError("Validation failed, first error: {0}\n{1}".format(
+                errors[0].exception, errors[0].backtrace
+            ))
+
+    def test_rebuild_simple(self):
+        self._rebuild_metadata(SimpleOverlayWorkload())
diff --git a/qa/tasks/cephfs/test_scrub.py b/qa/tasks/cephfs/test_scrub.py
new file mode 100644
index 000000000..647860129
--- /dev/null
+++ b/qa/tasks/cephfs/test_scrub.py
@@ -0,0 +1,187 @@
+"""
+Test CephFS scrub (distinct from OSD scrub) functionality
+"""
+
+from io import BytesIO
+import logging
+from collections import namedtuple
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+log = logging.getLogger(__name__)
+
+ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
+
+
+class Workload(CephFSTestCase):
+    def __init__(self, test, filesystem, mount):
+        super().__init__()
+        self._test =  test
+        self._mount = mount
+        self._filesystem = filesystem
+        self._initial_state = None
+
+        # Accumulate backtraces for every failed validation, and return them.  Backtraces
+        # are rather verbose, but we only see them when something breaks, and they
+        # let us see which check failed without having to decorate each check with
+        # a string
+        self._errors = []
+
+    def write(self):
+        """
+        Write the workload files to the mount
+        """
+        raise NotImplementedError()
+
+    def validate(self):
+        """
+        Read from the mount and validate that the workload files are present (i.e. have
+        survived or been reconstructed from the test scenario)
+        """
+        raise NotImplementedError()
+
+    def damage(self):
+        """
+        Damage the filesystem pools in ways that will be interesting to recover from.  By
+        default just wipe everything in the metadata pool
+        """
+        # Delete every object in the metadata pool
+        pool = self._filesystem.get_metadata_pool_name()
+        self._filesystem.rados(["purge", pool, '--yes-i-really-really-mean-it'])
+
+    def flush(self):
+        """
+        Called after client unmount, after write: flush whatever you want
+        """
+        self._filesystem.mds_asok(["flush", "journal"])
+
+
+class BacktraceWorkload(Workload):
+    """
+    Single file, single directory, wipe the backtrace and check it.
+    """
+    def write(self):
+        self._mount.run_shell(["mkdir", "subdir"])
+        self._mount.write_n_mb("subdir/sixmegs", 6)
+
+    def validate(self):
+        st = self._mount.stat("subdir/sixmegs")
+        self._filesystem.mds_asok(["flush", "journal"])
+        bt = self._filesystem.read_backtrace(st['st_ino'])
+        parent = bt['ancestors'][0]['dname']
+        self.assertEqual(parent, 'sixmegs')
+        return self._errors
+
+    def damage(self):
+        st = self._mount.stat("subdir/sixmegs")
+        self._filesystem.mds_asok(["flush", "journal"])
+        self._filesystem._write_data_xattr(st['st_ino'], "parent", "")
+
+    def create_files(self, nfiles=1000):
+        self._mount.create_n_files("scrub-new-files/file", nfiles)
+
+
+class DupInodeWorkload(Workload):
+    """
+    Duplicate an inode and try scrubbing it twice."
+    """
+
+    def write(self):
+        self._mount.run_shell(["mkdir", "parent"])
+        self._mount.run_shell(["mkdir", "parent/child"])
+        self._mount.write_n_mb("parent/parentfile", 6)
+        self._mount.write_n_mb("parent/child/childfile", 6)
+
+    def damage(self):
+        self._mount.umount_wait()
+        self._filesystem.mds_asok(["flush", "journal"])
+        self._filesystem.fail()
+        d = self._filesystem.radosmo(["getomapval", "10000000000.00000000", "parentfile_head", "-"])
+        self._filesystem.radosm(["setomapval", "10000000000.00000000", "shadow_head"], stdin=BytesIO(d))
+        self._test.config_set('mds', 'mds_hack_allow_loading_invalid_metadata', True)
+        self._filesystem.set_joinable()
+        self._filesystem.wait_for_daemons()
+
+    def validate(self):
+        out_json = self._filesystem.run_scrub(["start", "/", "recursive,repair"])
+        self.assertNotEqual(out_json, None)
+        self.assertEqual(out_json["return_code"], 0)
+        self.assertEqual(self._filesystem.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+        self.assertTrue(self._filesystem.are_daemons_healthy())
+        return self._errors
+
+
+class TestScrub(CephFSTestCase):
+    MDSS_REQUIRED = 1
+
+    def setUp(self):
+        super().setUp()
+
+    def _scrub(self, workload, workers=1):
+        """
+        That when all objects in metadata pool are removed, we can rebuild a metadata pool
+        based on the contents of a data pool, and a client can see and read our files.
+        """
+
+        # First, inject some files
+
+        workload.write()
+
+        # are off by default, but in QA we need to explicitly disable them)
+        self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
+        self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
+
+        # Apply any data damage the workload wants
+        workload.damage()
+
+        out_json = self.fs.run_scrub(["start", "/", "recursive,repair"])
+        self.assertNotEqual(out_json, None)
+        self.assertEqual(out_json["return_code"], 0)
+        self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+        # See that the files are present and correct
+        errors = workload.validate()
+        if errors:
+            log.error("Validation errors found: {0}".format(len(errors)))
+            for e in errors:
+                log.error(e.exception)
+                log.error(e.backtrace)
+            raise AssertionError("Validation failed, first error: {0}\n{1}".format(
+                errors[0].exception, errors[0].backtrace
+            ))
+
+    def _get_damage_count(self, damage_type='backtrace'):
+        out_json = self.fs.rank_tell(["damage", "ls"])
+        self.assertNotEqual(out_json, None)
+
+        damage_count = 0
+        for it in out_json:
+            if it['damage_type'] == damage_type:
+                damage_count += 1
+        return damage_count
+
+    def _scrub_new_files(self, workload):
+        """
+        That scrubbing new files does not lead to errors
+        """
+        workload.create_files(1000)
+        self.fs.wait_until_scrub_complete()
+        self.assertEqual(self._get_damage_count(), 0)
+
+    def test_scrub_backtrace_for_new_files(self):
+        self._scrub_new_files(BacktraceWorkload(self, self.fs, self.mount_a))
+
+    def test_scrub_backtrace(self):
+        self._scrub(BacktraceWorkload(self, self.fs, self.mount_a))
+
+    def test_scrub_dup_inode(self):
+        self._scrub(DupInodeWorkload(self, self.fs, self.mount_a))
+
+    def test_mdsdir_scrub_backtrace(self):
+        damage_count = self._get_damage_count()
+        self.assertNotIn("MDS_DAMAGE", self.mds_cluster.mon_manager.get_mon_health()['checks'])
+
+        out_json = self.fs.run_scrub(["start", "~mdsdir", "recursive"])
+        self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+        self.assertEqual(self._get_damage_count(), damage_count)
+        self.assertNotIn("MDS_DAMAGE", self.mds_cluster.mon_manager.get_mon_health()['checks'])
diff --git a/qa/tasks/cephfs/test_scrub_checks.py b/qa/tasks/cephfs/test_scrub_checks.py
new file mode 100644
index 000000000..e41b997a6
--- /dev/null
+++ b/qa/tasks/cephfs/test_scrub_checks.py
@@ -0,0 +1,462 @@
+"""
+MDS admin socket scrubbing-related tests.
+"""
+import json
+import logging
+import errno
+import time
+from teuthology.exceptions import CommandFailedError
+from teuthology.contextutil import safe_while
+import os
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+log = logging.getLogger(__name__)
+
+class TestScrubControls(CephFSTestCase):
+    """
+    Test basic scrub control operations such as abort, pause and resume.
+    """
+
+    MDSS_REQUIRED = 2
+    CLIENTS_REQUIRED = 1
+
+    def _abort_scrub(self, expected):
+        res = self.fs.run_scrub(["abort"])
+        self.assertEqual(res['return_code'], expected)
+    def _pause_scrub(self, expected):
+        res = self.fs.run_scrub(["pause"])
+        self.assertEqual(res['return_code'], expected)
+    def _resume_scrub(self, expected):
+        res = self.fs.run_scrub(["resume"])
+        self.assertEqual(res['return_code'], expected)
+    def _check_task_status(self, expected_status, timo=120):
+        """ check scrub status for current active mds in ceph status """
+        with safe_while(sleep=1, tries=120, action='wait for task status') as proceed:
+            while proceed():
+                active = self.fs.get_active_names()
+                log.debug("current active={0}".format(active))
+                task_status = self.fs.get_task_status("scrub status")
+                try:
+                    if task_status[active[0]].startswith(expected_status):
+                        return True
+                except KeyError:
+                    pass
+
+    def _check_task_status_na(self, timo=120):
+        """ check absence of scrub status in ceph status """
+        with safe_while(sleep=1, tries=120, action='wait for task status') as proceed:
+            while proceed():
+                active = self.fs.get_active_names()
+                log.debug("current active={0}".format(active))
+                task_status = self.fs.get_task_status("scrub status")
+                if not active[0] in task_status:
+                    return True
+
+    def create_scrub_data(self, test_dir):
+        for i in range(32):
+            dirname = "dir.{0}".format(i)
+            dirpath = os.path.join(test_dir, dirname)
+            self.mount_a.run_shell_payload(f"""
+set -e
+mkdir -p {dirpath}
+for ((i = 0; i < 32; i++)); do
+    dd if=/dev/urandom of={dirpath}/filename.$i bs=1M conv=fdatasync count=1
+done
+""")
+
+    def test_scrub_abort(self):
+        test_dir = "scrub_control_test_path"
+        abs_test_path = "/{0}".format(test_dir)
+
+        self.create_scrub_data(test_dir)
+
+        out_json = self.fs.run_scrub(["start", abs_test_path, "recursive"])
+        self.assertNotEqual(out_json, None)
+
+        # abort and verify
+        self._abort_scrub(0)
+        self.fs.wait_until_scrub_complete(sleep=5, timeout=30)
+
+        # sleep enough to fetch updated task status
+        checked = self._check_task_status_na()
+        self.assertTrue(checked)
+
+    def test_scrub_pause_and_resume(self):
+        test_dir = "scrub_control_test_path"
+        abs_test_path = "/{0}".format(test_dir)
+
+        log.info("mountpoint: {0}".format(self.mount_a.mountpoint))
+        client_path = os.path.join(self.mount_a.mountpoint, test_dir)
+        log.info("client_path: {0}".format(client_path))
+
+        self.create_scrub_data(test_dir)
+
+        out_json = self.fs.run_scrub(["start", abs_test_path, "recursive"])
+        self.assertNotEqual(out_json, None)
+
+        # pause and verify
+        self._pause_scrub(0)
+        out_json = self.fs.get_scrub_status()
+        self.assertTrue("PAUSED" in out_json['status'])
+
+        checked = self._check_task_status("paused")
+        self.assertTrue(checked)
+
+        # resume and verify
+        self._resume_scrub(0)
+        out_json = self.fs.get_scrub_status()
+        self.assertFalse("PAUSED" in out_json['status'])
+
+        checked = self._check_task_status_na()
+        self.assertTrue(checked)
+
+    def test_scrub_pause_and_resume_with_abort(self):
+        test_dir = "scrub_control_test_path"
+        abs_test_path = "/{0}".format(test_dir)
+
+        self.create_scrub_data(test_dir)
+
+        out_json = self.fs.run_scrub(["start", abs_test_path, "recursive"])
+        self.assertNotEqual(out_json, None)
+
+        # pause and verify
+        self._pause_scrub(0)
+        out_json = self.fs.get_scrub_status()
+        self.assertTrue("PAUSED" in out_json['status'])
+
+        checked = self._check_task_status("paused")
+        self.assertTrue(checked)
+
+        # abort and verify
+        self._abort_scrub(0)
+        out_json = self.fs.get_scrub_status()
+        self.assertTrue("PAUSED" in out_json['status'])
+        self.assertTrue("0 inodes" in out_json['status'])
+
+        # scrub status should still be paused...
+        checked = self._check_task_status("paused")
+        self.assertTrue(checked)
+
+        # resume and verify
+        self._resume_scrub(0)
+        self.assertTrue(self.fs.wait_until_scrub_complete(sleep=5, timeout=30))
+
+        checked = self._check_task_status_na()
+        self.assertTrue(checked)
+
+    def test_scrub_task_status_on_mds_failover(self):
+        (original_active, ) = self.fs.get_active_names()
+        original_standbys = self.mds_cluster.get_standby_daemons()
+
+        test_dir = "scrub_control_test_path"
+        abs_test_path = "/{0}".format(test_dir)
+
+        self.create_scrub_data(test_dir)
+
+        out_json = self.fs.run_scrub(["start", abs_test_path, "recursive"])
+        self.assertNotEqual(out_json, None)
+
+        # pause and verify
+        self._pause_scrub(0)
+        out_json = self.fs.get_scrub_status()
+        self.assertTrue("PAUSED" in out_json['status'])
+
+        checked = self._check_task_status("paused")
+        self.assertTrue(checked)
+
+        # Kill the rank 0
+        self.fs.mds_stop(original_active)
+
+        def promoted():
+            active = self.fs.get_active_names()
+            return active and active[0] in original_standbys
+
+        log.info("Waiting for promotion of one of the original standbys {0}".format(
+            original_standbys))
+        self.wait_until_true(promoted, timeout=self.fs.beacon_timeout)
+
+        self._check_task_status_na()
+
+class TestScrubChecks(CephFSTestCase):
+    """
+    Run flush and scrub commands on the specified files in the filesystem. This
+    task will run through a sequence of operations, but it is not comprehensive
+    on its own -- it doesn't manipulate the mds cache state to test on both
+    in- and out-of-memory parts of the hierarchy. So it's designed to be run
+    multiple times within a single test run, so that the test can manipulate
+    memory state.
+
+    Usage:
+    mds_scrub_checks:
+      mds_rank: 0
+      path: path/to/test/dir
+      client: 0
+      run_seq: [0-9]+
+
+    Increment the run_seq on subsequent invocations within a single test run;
+    it uses that value to generate unique folder and file names.
+    """
+
+    MDSS_REQUIRED = 1
+    CLIENTS_REQUIRED = 1
+
+    def test_scrub_checks(self):
+        self._checks(0)
+        self._checks(1)
+
+    def _checks(self, run_seq):
+        mds_rank = 0
+        test_dir = "scrub_test_path"
+
+        abs_test_path = "/{0}".format(test_dir)
+
+        log.info("mountpoint: {0}".format(self.mount_a.mountpoint))
+        client_path = os.path.join(self.mount_a.mountpoint, test_dir)
+        log.info("client_path: {0}".format(client_path))
+
+        log.info("Cloning repo into place")
+        repo_path = TestScrubChecks.clone_repo(self.mount_a, client_path)
+
+        log.info("Initiating mds_scrub_checks on mds.{id_} test_path {path}, run_seq {seq}".format(
+            id_=mds_rank, path=abs_test_path, seq=run_seq)
+        )
+
+
+        success_validator = lambda j, r: self.json_validator(j, r, "return_code", 0)
+
+        nep = "{test_path}/i/dont/exist".format(test_path=abs_test_path)
+        self.asok_command(mds_rank, "flush_path {nep}".format(nep=nep),
+                          lambda j, r: self.json_validator(j, r, "return_code", -errno.ENOENT))
+        self.tell_command(mds_rank, "scrub start {nep}".format(nep=nep),
+                          lambda j, r: self.json_validator(j, r, "return_code", -errno.ENOENT))
+
+        test_repo_path = "{test_path}/ceph-qa-suite".format(test_path=abs_test_path)
+        dirpath = "{repo_path}/suites".format(repo_path=test_repo_path)
+
+        if run_seq == 0:
+            log.info("First run: flushing {dirpath}".format(dirpath=dirpath))
+            command = "flush_path {dirpath}".format(dirpath=dirpath)
+            self.asok_command(mds_rank, command, success_validator)
+        command = "scrub start {dirpath}".format(dirpath=dirpath)
+        self.tell_command(mds_rank, command, success_validator)
+
+        filepath = "{repo_path}/suites/fs/verify/validater/valgrind.yaml".format(
+            repo_path=test_repo_path)
+        if run_seq == 0:
+            log.info("First run: flushing {filepath}".format(filepath=filepath))
+            command = "flush_path {filepath}".format(filepath=filepath)
+            self.asok_command(mds_rank, command, success_validator)
+        command = "scrub start {filepath}".format(filepath=filepath)
+        self.tell_command(mds_rank, command, success_validator)
+
+        if run_seq == 0:
+            log.info("First run: flushing base dir /")
+            command = "flush_path /"
+            self.asok_command(mds_rank, command, success_validator)
+        command = "scrub start /"
+        self.tell_command(mds_rank, command, success_validator)
+
+        new_dir = "{repo_path}/new_dir_{i}".format(repo_path=repo_path, i=run_seq)
+        test_new_dir = "{repo_path}/new_dir_{i}".format(repo_path=test_repo_path,
+                                                        i=run_seq)
+        self.mount_a.run_shell(["mkdir", new_dir])
+        command = "flush_path {dir}".format(dir=test_new_dir)
+        self.asok_command(mds_rank, command, success_validator)
+
+        new_file = "{repo_path}/new_file_{i}".format(repo_path=repo_path,
+                                                     i=run_seq)
+        test_new_file = "{repo_path}/new_file_{i}".format(repo_path=test_repo_path,
+                                                          i=run_seq)
+        self.mount_a.write_n_mb(new_file, 1)
+
+        command = "flush_path {file}".format(file=test_new_file)
+        self.asok_command(mds_rank, command, success_validator)
+
+        # check that scrub fails on errors
+        ino = self.mount_a.path_to_ino(new_file)
+        rados_obj_name = "{ino:x}.00000000".format(ino=ino)
+        command = "scrub start {file}".format(file=test_new_file)
+
+        def _check_and_clear_damage(ino, dtype):
+            all_damage = self.fs.rank_tell(["damage", "ls"], mds_rank)
+            damage = [d for d in all_damage if d['ino'] == ino and d['damage_type'] == dtype]
+            for d in damage:
+                self.fs.mon_manager.raw_cluster_cmd(
+                    'tell', 'mds.{0}'.format(self.fs.get_active_names()[mds_rank]),
+                    "damage", "rm", str(d['id']))
+            return len(damage) > 0
+
+        # Missing parent xattr
+        self.assertFalse(_check_and_clear_damage(ino, "backtrace"));
+        self.fs.rados(["rmxattr", rados_obj_name, "parent"], pool=self.fs.get_data_pool_name())
+        self.tell_command(mds_rank, command, success_validator)
+        self.fs.wait_until_scrub_complete(sleep=5, timeout=30)
+        self.assertTrue(_check_and_clear_damage(ino, "backtrace"));
+
+        command = "flush_path /"
+        self.asok_command(mds_rank, command, success_validator)
+
+    def scrub_with_stray_evaluation(self, fs, mnt, path, flag, files=2000,
+                                    _hard_links=3):
+        fs.set_allow_new_snaps(True)
+
+        test_dir = "stray_eval_dir"
+        mnt.run_shell(["mkdir", test_dir])
+        client_path = os.path.join(mnt.mountpoint, test_dir)
+        mnt.create_n_files(fs_path=f"{test_dir}/file", count=files,
+                           hard_links=_hard_links)
+        mnt.run_shell(["mkdir", f"{client_path}/.snap/snap1-{test_dir}"])
+        mnt.run_shell(f"find {client_path}/ -type f -delete")
+        mnt.run_shell(["rmdir", f"{client_path}/.snap/snap1-{test_dir}"])
+        perf_dump = fs.rank_tell(["perf", "dump"], 0)
+        self.assertNotEqual(perf_dump.get('mds_cache').get('num_strays'),
+                            0, "mdcache.num_strays is zero")
+
+        log.info(
+            f"num of strays: {perf_dump.get('mds_cache').get('num_strays')}")
+
+        out_json = fs.run_scrub(["start", path, flag])
+        self.assertNotEqual(out_json, None)
+        self.assertEqual(out_json["return_code"], 0)
+
+        self.assertEqual(
+            fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+        perf_dump = fs.rank_tell(["perf", "dump"], 0)
+        self.assertEqual(int(perf_dump.get('mds_cache').get('num_strays')),
+                         0, "mdcache.num_strays is non-zero")
+
+    def test_scrub_repair(self):
+        mds_rank = 0
+        test_dir = "scrub_repair_path"
+
+        self.mount_a.run_shell(["mkdir", test_dir])
+        self.mount_a.run_shell(["touch", "{0}/file".format(test_dir)])
+        dir_objname = "{:x}.00000000".format(self.mount_a.path_to_ino(test_dir))
+
+        self.mount_a.umount_wait()
+
+        # flush journal entries to dirfrag objects, and expire journal
+        self.fs.mds_asok(['flush', 'journal'])
+        self.fs.mds_stop()
+
+        # remove the dentry from dirfrag, cause incorrect fragstat/rstat
+        self.fs.radosm(["rmomapkey", dir_objname, "file_head"])
+
+        self.fs.mds_fail_restart()
+        self.fs.wait_for_daemons()
+
+        self.mount_a.mount_wait()
+
+        # fragstat indicates the directory is not empty, rmdir should fail
+        with self.assertRaises(CommandFailedError) as ar:
+            self.mount_a.run_shell(["rmdir", test_dir])
+        self.assertEqual(ar.exception.exitstatus, 1)
+
+        self.tell_command(mds_rank, "scrub start /{0} repair".format(test_dir),
+                          lambda j, r: self.json_validator(j, r, "return_code", 0))
+
+        # wait a few second for background repair
+        time.sleep(10)
+
+        # fragstat should be fixed
+        self.mount_a.run_shell(["rmdir", test_dir])
+
+    def test_stray_evaluation_with_scrub(self):
+        """
+        test that scrub can iterate over ~mdsdir and evaluate strays
+        """
+        self.scrub_with_stray_evaluation(self.fs, self.mount_a, "~mdsdir",
+                                         "recursive")
+
+    def test_flag_scrub_mdsdir(self):
+        """
+        test flag scrub_mdsdir
+        """
+        self.scrub_with_stray_evaluation(self.fs, self.mount_a, "/",
+                                         "recursive,scrub_mdsdir")
+
+    @staticmethod
+    def json_validator(json_out, rc, element, expected_value):
+        if rc != 0:
+            return False, "asok command returned error {rc}".format(rc=rc)
+        element_value = json_out.get(element)
+        if element_value != expected_value:
+            return False, "unexpectedly got {jv} instead of {ev}!".format(
+                jv=element_value, ev=expected_value)
+        return True, "Succeeded"
+
+    def tell_command(self, mds_rank, command, validator):
+        log.info("Running command '{command}'".format(command=command))
+
+        command_list = command.split()
+        jout = self.fs.rank_tell(command_list, mds_rank)
+
+        log.info("command '{command}' returned '{jout}'".format(
+                     command=command, jout=jout))
+
+        success, errstring = validator(jout, 0)
+        if not success:
+            raise AsokCommandFailedError(command, 0, jout, errstring)
+        return jout
+
+    def asok_command(self, mds_rank, command, validator):
+        log.info("Running command '{command}'".format(command=command))
+
+        command_list = command.split()
+
+        # we just assume there's an active mds for every rank
+        mds_id = self.fs.get_active_names()[mds_rank]
+        proc = self.fs.mon_manager.admin_socket('mds', mds_id,
+                                                command_list, check_status=False)
+        rout = proc.exitstatus
+        sout = proc.stdout.getvalue()
+
+        if sout.strip():
+            jout = json.loads(sout)
+        else:
+            jout = None
+
+        log.info("command '{command}' got response code '{rout}' and stdout '{sout}'".format(
+            command=command, rout=rout, sout=sout))
+
+        success, errstring = validator(jout, rout)
+
+        if not success:
+            raise AsokCommandFailedError(command, rout, jout, errstring)
+
+        return jout
+
+    @staticmethod
+    def clone_repo(client_mount, path):
+        repo = "ceph-qa-suite"
+        repo_path = os.path.join(path, repo)
+        client_mount.run_shell(["mkdir", "-p", path])
+
+        try:
+            client_mount.stat(repo_path)
+        except CommandFailedError:
+            client_mount.run_shell([
+                "git", "clone", '--branch', 'giant',
+                "http://github.com/ceph/{repo}".format(repo=repo),
+                "{path}/{repo}".format(path=path, repo=repo)
+            ])
+
+        return repo_path
+
+
+class AsokCommandFailedError(Exception):
+    """
+    Exception thrown when we get an unexpected response
+    on an admin socket command
+    """
+
+    def __init__(self, command, rc, json_out, errstring):
+        self.command = command
+        self.rc = rc
+        self.json = json_out
+        self.errstring = errstring
+
+    def __str__(self):
+        return "Admin socket: {command} failed with rc={rc} json output={json}, because '{es}'".format(
+            command=self.command, rc=self.rc, json=self.json, es=self.errstring)
diff --git a/qa/tasks/cephfs/test_sessionmap.py b/qa/tasks/cephfs/test_sessionmap.py
new file mode 100644
index 000000000..ad6fd1d60
--- /dev/null
+++ b/qa/tasks/cephfs/test_sessionmap.py
@@ -0,0 +1,232 @@
+import time
+import json
+import logging
+
+from tasks.cephfs.fuse_mount import FuseMount
+from teuthology.exceptions import CommandFailedError
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+log = logging.getLogger(__name__)
+
+
+class TestSessionMap(CephFSTestCase):
+    CLIENTS_REQUIRED = 2
+    MDSS_REQUIRED = 2
+
+    def test_tell_session_drop(self):
+        """
+        That when a `tell` command is sent using the python CLI,
+        its MDS session is gone after it terminates
+        """
+        self.mount_a.umount_wait()
+        self.mount_b.umount_wait()
+
+        status = self.fs.status()
+        self.fs.rank_tell(["session", "ls"], status=status)
+
+        ls_data = self.fs.rank_asok(['session', 'ls'], status=status)
+        self.assertEqual(len(ls_data), 0)
+
+    def _get_connection_count(self, status=None):
+        perf = self.fs.rank_asok(["perf", "dump"], status=status)
+        conn = 0
+        for module, dump in perf.items():
+            if "AsyncMessenger::Worker" in module:
+                conn += dump['msgr_active_connections']
+        return conn
+
+    def test_tell_conn_close(self):
+        """
+        That when a `tell` command is sent using the python CLI,
+        the conn count goes back to where it started (i.e. we aren't
+        leaving connections open)
+        """
+        self.config_set('mds', 'ms_async_reap_threshold', '1')
+
+        self.mount_a.umount_wait()
+        self.mount_b.umount_wait()
+
+        status = self.fs.status()
+        s = self._get_connection_count(status=status)
+        self.fs.rank_tell(["session", "ls"], status=status)
+        self.wait_until_true(
+            lambda: self._get_connection_count(status=status) == s,
+            timeout=30
+        )
+
+    def test_mount_conn_close(self):
+        """
+        That when a client unmounts, the thread count on the MDS goes back
+        to what it was before the client mounted
+        """
+        self.config_set('mds', 'ms_async_reap_threshold', '1')
+
+        self.mount_a.umount_wait()
+        self.mount_b.umount_wait()
+
+        status = self.fs.status()
+        s = self._get_connection_count(status=status)
+        self.mount_a.mount_wait()
+        self.assertGreater(self._get_connection_count(status=status), s)
+        self.mount_a.umount_wait()
+        self.wait_until_true(
+            lambda: self._get_connection_count(status=status) == s,
+            timeout=30
+        )
+
+    def test_version_splitting(self):
+        """
+        That when many sessions are updated, they are correctly
+        split into multiple versions to obey mds_sessionmap_keys_per_op
+        """
+
+        self.mount_a.umount_wait()
+        self.mount_b.umount_wait()
+
+        # Configure MDS to write one OMAP key at once
+        self.set_conf('mds', 'mds_sessionmap_keys_per_op', 1)
+        self.fs.mds_fail_restart()
+        status = self.fs.wait_for_daemons()
+
+        # Bring the clients back
+        self.mount_a.mount_wait()
+        self.mount_b.mount_wait()
+
+        # See that they've got sessions
+        self.assert_session_count(2, mds_id=self.fs.get_rank(status=status)['name'])
+
+        # See that we persist their sessions
+        self.fs.rank_asok(["flush", "journal"], rank=0, status=status)
+        table_json = json.loads(self.fs.table_tool(["0", "show", "session"]))
+        log.info("SessionMap: {0}".format(json.dumps(table_json, indent=2)))
+        self.assertEqual(table_json['0']['result'], 0)
+        self.assertEqual(len(table_json['0']['data']['sessions']), 2)
+
+        # Now, induce a "force_open_sessions" event by exporting a dir
+        self.mount_a.run_shell(["mkdir", "bravo"])
+        self.mount_a.run_shell(["touch", "bravo/file_a"])
+        self.mount_b.run_shell(["touch", "bravo/file_b"])
+
+        self.fs.set_max_mds(2)
+        status = self.fs.wait_for_daemons()
+
+        def get_omap_wrs():
+            return self.fs.rank_asok(['perf', 'dump', 'objecter'], rank=1, status=status)['objecter']['omap_wr']
+
+        # Flush so that there are no dirty sessions on rank 1
+        self.fs.rank_asok(["flush", "journal"], rank=1, status=status)
+
+        # Export so that we get a force_open to rank 1 for the two sessions from rank 0
+        initial_omap_wrs = get_omap_wrs()
+        self.fs.rank_asok(['export', 'dir', '/bravo', '1'], rank=0, status=status)
+
+        # This is the critical (if rather subtle) check: that in the process of doing an export dir,
+        # we hit force_open_sessions, and as a result we end up writing out the sessionmap.  There
+        # will be two sessions dirtied here, and because we have set keys_per_op to 1, we should see
+        # a single session get written out (the first of the two, triggered by the second getting marked
+        # dirty)
+        # The number of writes is two per session, because the header (sessionmap version) update and
+        # KV write both count. Also, multiply by 2 for each openfile table update.
+        self.wait_until_true(
+            lambda: get_omap_wrs() - initial_omap_wrs == 2*2,
+            timeout=30  # Long enough for an export to get acked
+        )
+
+        # Now end our sessions and check the backing sessionmap is updated correctly
+        self.mount_a.umount_wait()
+        self.mount_b.umount_wait()
+
+        # In-memory sessionmap check
+        self.assert_session_count(0, mds_id=self.fs.get_rank(status=status)['name'])
+
+        # On-disk sessionmap check
+        self.fs.rank_asok(["flush", "journal"], rank=0, status=status)
+        table_json = json.loads(self.fs.table_tool(["0", "show", "session"]))
+        log.info("SessionMap: {0}".format(json.dumps(table_json, indent=2)))
+        self.assertEqual(table_json['0']['result'], 0)
+        self.assertEqual(len(table_json['0']['data']['sessions']), 0)
+
+    def _configure_auth(self, mount, id_name, mds_caps, osd_caps=None, mon_caps=None):
+        """
+        Set up auth credentials for a client mount, and write out the keyring
+        for the client to use.
+        """
+
+        if osd_caps is None:
+            osd_caps = "allow rw"
+
+        if mon_caps is None:
+            mon_caps = "allow r"
+
+        out = self.fs.mon_manager.raw_cluster_cmd(
+            "auth", "get-or-create", "client.{name}".format(name=id_name),
+            "mds", mds_caps,
+            "osd", osd_caps,
+            "mon", mon_caps
+        )
+        mount.client_id = id_name
+        mount.client_remote.write_file(mount.get_keyring_path(), out, sudo=True)
+        self.set_conf("client.{name}".format(name=id_name), "keyring", mount.get_keyring_path())
+
+    def test_session_reject(self):
+        if not isinstance(self.mount_a, FuseMount):
+            self.skipTest("Requires FUSE client to inject client metadata")
+
+        self.mount_a.run_shell(["mkdir", "foo"])
+        self.mount_a.run_shell(["mkdir", "foo/bar"])
+        self.mount_a.umount_wait()
+
+        # Mount B will be my rejected client
+        self.mount_b.umount_wait()
+
+        # Configure a client that is limited to /foo/bar
+        self._configure_auth(self.mount_b, "badguy", "allow rw path=/foo/bar")
+        # Check he can mount that dir and do IO
+        self.mount_b.mount_wait(cephfs_mntpt="/foo/bar")
+        self.mount_b.create_destroy()
+        self.mount_b.umount_wait()
+
+        # Configure the client to claim that its mount point metadata is /baz
+        self.set_conf("client.badguy", "client_metadata", "root=/baz")
+        # Try to mount the client, see that it fails
+        with self.assert_cluster_log("client session with non-allowable root '/baz' denied"):
+            with self.assertRaises(CommandFailedError):
+                self.mount_b.mount_wait(cephfs_mntpt="/foo/bar")
+
+    def test_session_evict_blocklisted(self):
+        """
+        Check that mds evicts blocklisted client
+        """
+        if not isinstance(self.mount_a, FuseMount):
+            self.skipTest("Requires FUSE client to use "
+                          "mds_cluster.is_addr_blocklisted()")
+
+        self.fs.set_max_mds(2)
+        status = self.fs.wait_for_daemons()
+
+        self.mount_a.run_shell_payload("mkdir {d0,d1} && touch {d0,d1}/file")
+        self.mount_a.setfattr("d0", "ceph.dir.pin", "0")
+        self.mount_a.setfattr("d1", "ceph.dir.pin", "1")
+        self._wait_subtrees([('/d0', 0), ('/d1', 1)], status=status)
+
+        self.mount_a.run_shell(["touch", "d0/f0"])
+        self.mount_a.run_shell(["touch", "d1/f0"])
+        self.mount_b.run_shell(["touch", "d0/f1"])
+        self.mount_b.run_shell(["touch", "d1/f1"])
+
+        self.assert_session_count(2, mds_id=self.fs.get_rank(rank=0, status=status)['name'])
+        self.assert_session_count(2, mds_id=self.fs.get_rank(rank=1, status=status)['name'])
+
+        mount_a_client_id = self.mount_a.get_global_id()
+        self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id],
+                         mds_id=self.fs.get_rank(rank=0, status=status)['name'])
+        self.wait_until_true(lambda: self.mds_cluster.is_addr_blocklisted(
+            self.mount_a.get_global_addr()), timeout=30)
+
+        # 10 seconds should be enough for evicting client
+        time.sleep(10)
+        self.assert_session_count(1, mds_id=self.fs.get_rank(rank=0, status=status)['name'])
+        self.assert_session_count(1, mds_id=self.fs.get_rank(rank=1, status=status)['name'])
+
+        self.mount_a.kill_cleanup()
+        self.mount_a.mount_wait()
diff --git a/qa/tasks/cephfs/test_snap_schedules.py b/qa/tasks/cephfs/test_snap_schedules.py
new file mode 100644
index 000000000..0264cac32
--- /dev/null
+++ b/qa/tasks/cephfs/test_snap_schedules.py
@@ -0,0 +1,607 @@
+import os
+import json
+import time
+import errno
+import logging
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.exceptions import CommandFailedError
+from datetime import datetime, timedelta
+
+log = logging.getLogger(__name__)
+
+def extract_schedule_and_retention_spec(spec=[]):
+    schedule = set([s[0] for s in spec])
+    retention = set([s[1] for s in spec])
+    return (schedule, retention)
+
+def seconds_upto_next_schedule(time_from, timo):
+    ts = int(time_from)
+    return ((int(ts / 60) * 60) + timo) - ts
+
+class TestSnapSchedulesHelper(CephFSTestCase):
+    CLIENTS_REQUIRED = 1
+
+    TEST_VOLUME_NAME = 'snap_vol'
+    TEST_DIRECTORY = 'snap_test_dir1'
+
+    # this should be in sync with snap_schedule format
+    SNAPSHOT_TS_FORMAT = '%Y-%m-%d-%H_%M_%S'
+
+    def check_scheduled_snapshot(self, exec_time, timo):
+        now = time.time()
+        delta = now - exec_time
+        log.debug(f'exec={exec_time}, now = {now}, timo = {timo}')
+        # tolerate snapshot existance in the range [-5,+5]
+        self.assertTrue((delta <= timo + 5) and (delta >= timo - 5))
+
+    def _fs_cmd(self, *args):
+        return self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", *args)
+
+    def fs_snap_schedule_cmd(self, *args, **kwargs):
+        if 'fs' in kwargs:
+            fs = kwargs.pop('fs')
+            args += ('--fs', fs)
+        if 'format' in kwargs:
+            fmt = kwargs.pop('format')
+            args += ('--format', fmt)
+        for name, val in kwargs.items():
+            args += (str(val),)
+        res = self._fs_cmd('snap-schedule', *args)
+        log.debug(f'res={res}')
+        return res
+
+    def _create_or_reuse_test_volume(self):
+        result = json.loads(self._fs_cmd("volume", "ls"))
+        if len(result) == 0:
+            self.vol_created = True
+            self.volname = TestSnapSchedulesHelper.TEST_VOLUME_NAME
+            self._fs_cmd("volume", "create", self.volname)
+        else:
+            self.volname = result[0]['name']
+
+    def _enable_snap_schedule(self):
+        return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable", "snap_schedule")
+
+    def _disable_snap_schedule(self):
+        return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "disable", "snap_schedule")
+
+    def _allow_minute_granularity_snapshots(self):
+        self.config_set('mgr', 'mgr/snap_schedule/allow_m_granularity', True)
+
+    def _dump_on_update(self):
+        self.config_set('mgr', 'mgr/snap_schedule/dump_on_update', True)
+
+    def setUp(self):
+        super(TestSnapSchedulesHelper, self).setUp()
+        self.volname = None
+        self.vol_created = False
+        self._create_or_reuse_test_volume()
+        self.create_cbks = []
+        self.remove_cbks = []
+        # used to figure out which snapshots are created/deleted
+        self.snapshots = set()
+        self._enable_snap_schedule()
+        self._allow_minute_granularity_snapshots()
+        self._dump_on_update()
+
+    def tearDown(self):
+        if self.vol_created:
+            self._delete_test_volume()
+        self._disable_snap_schedule()
+        super(TestSnapSchedulesHelper, self).tearDown()
+
+    def _schedule_to_timeout(self, schedule):
+        mult = schedule[-1]
+        period = int(schedule[0:-1])
+        if mult == 'M':
+            return period * 60
+        elif mult == 'h':
+            return period * 60 * 60
+        elif mult == 'd':
+            return period * 60 * 60 * 24
+        elif mult == 'w':
+            return period * 60 * 60 * 24 * 7
+        else:
+            raise RuntimeError('schedule multiplier not recognized')
+
+    def add_snap_create_cbk(self, cbk):
+        self.create_cbks.append(cbk)
+    def remove_snap_create_cbk(self, cbk):
+        self.create_cbks.remove(cbk)
+
+    def add_snap_remove_cbk(self, cbk):
+        self.remove_cbks.append(cbk)
+    def remove_snap_remove_cbk(self, cbk):
+        self.remove_cbks.remove(cbk)
+
+    def assert_if_not_verified(self):
+        self.assertListEqual(self.create_cbks, [])
+        self.assertListEqual(self.remove_cbks, [])
+
+    def verify(self, dir_path, max_trials):
+        trials = 0
+        snap_path = f'{dir_path}/.snap'
+        while (len(self.create_cbks) or len(self.remove_cbks)) and trials < max_trials:
+            snapshots = set(self.mount_a.ls(path=snap_path))
+            log.info(f'snapshots: {snapshots}')
+            added = snapshots - self.snapshots
+            log.info(f'added: {added}')
+            removed = self.snapshots - snapshots
+            log.info(f'removed: {removed}')
+            if added:
+                for cbk in list(self.create_cbks):
+                    res = cbk(list(added))
+                    if res:
+                        self.remove_snap_create_cbk(cbk)
+                        break
+            if removed:
+                for cbk in list(self.remove_cbks):
+                    res = cbk(list(removed))
+                    if res:
+                        self.remove_snap_remove_cbk(cbk)
+                        break
+            self.snapshots = snapshots
+            trials += 1
+            time.sleep(1)
+
+    def calc_wait_time_and_snap_name(self, snap_sched_exec_epoch, schedule):
+        timo = self._schedule_to_timeout(schedule)
+        # calculate wait time upto the next minute
+        wait_timo = seconds_upto_next_schedule(snap_sched_exec_epoch, timo)
+
+        # expected "scheduled" snapshot name
+        ts_name = (datetime.utcfromtimestamp(snap_sched_exec_epoch)
+                   + timedelta(seconds=wait_timo)).strftime(TestSnapSchedulesHelper.SNAPSHOT_TS_FORMAT)
+        return (wait_timo, ts_name)
+
+    def verify_schedule(self, dir_path, schedules, retentions=[]):
+        log.debug(f'expected_schedule: {schedules}, expected_retention: {retentions}')
+
+        result = self.fs_snap_schedule_cmd('list', path=dir_path, format='json')
+        json_res = json.loads(result)
+        log.debug(f'json_res: {json_res}')
+
+        for schedule in schedules:
+            self.assertTrue(schedule in json_res['schedule'])
+        for retention in retentions:
+            self.assertTrue(retention in json_res['retention'])
+    
+class TestSnapSchedules(TestSnapSchedulesHelper):
+    def remove_snapshots(self, dir_path):
+        snap_path = f'{dir_path}/.snap'
+
+        snapshots = self.mount_a.ls(path=snap_path)
+        for snapshot in snapshots:
+            snapshot_path = os.path.join(snap_path, snapshot)
+            log.debug(f'removing snapshot: {snapshot_path}')
+            self.mount_a.run_shell(['rmdir', snapshot_path])
+
+    def test_non_existent_snap_schedule_list(self):
+        """Test listing snap schedules on a non-existing filesystem path failure"""
+        try:
+            self.fs_snap_schedule_cmd('list', path=TestSnapSchedules.TEST_DIRECTORY)
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.ENOENT:
+                raise RuntimeError('incorrect errno when listing a non-existing snap schedule')
+        else:
+            raise RuntimeError('expected "fs snap-schedule list" to fail')
+
+    def test_non_existent_schedule(self):
+        """Test listing non-existing snap schedules failure"""
+        self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY])
+
+        try:
+            self.fs_snap_schedule_cmd('list', path=TestSnapSchedules.TEST_DIRECTORY)
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.ENOENT:
+                raise RuntimeError('incorrect errno when listing a non-existing snap schedule')
+        else:
+            raise RuntimeError('expected "fs snap-schedule list" returned fail')
+
+        self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY])
+
+    def test_snap_schedule_list_post_schedule_remove(self):
+        """Test listing snap schedules post removal of a schedule"""
+        self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY])
+
+        self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1h')
+
+        self.fs_snap_schedule_cmd('remove', path=TestSnapSchedules.TEST_DIRECTORY)
+
+        try:
+            self.fs_snap_schedule_cmd('list', path=TestSnapSchedules.TEST_DIRECTORY)
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.ENOENT:
+                raise RuntimeError('incorrect errno when listing a non-existing snap schedule')
+        else:
+            raise RuntimeError('"fs snap-schedule list" returned error')
+
+        self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY])
+
+    def test_snap_schedule(self):
+        """Test existence of a scheduled snapshot"""
+        self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY])
+
+        # set a schedule on the dir
+        self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1M')
+        exec_time = time.time()
+
+        timo, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M')
+        log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo}s...')
+        to_wait = timo + 2 # some leeway to avoid false failures...
+
+        # verify snapshot schedule
+        self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1M'])
+
+        def verify_added(snaps_added):
+            log.debug(f'snapshots added={snaps_added}')
+            self.assertEqual(len(snaps_added), 1)
+            snapname = snaps_added[0]
+            if snapname.startswith('scheduled-'):
+                if snapname[10:26] == snap_sfx[:16]:
+                    self.check_scheduled_snapshot(exec_time, timo)
+                    return True
+            return False
+        self.add_snap_create_cbk(verify_added)
+        self.verify(TestSnapSchedules.TEST_DIRECTORY, to_wait)
+        self.assert_if_not_verified()
+
+        # remove snapshot schedule
+        self.fs_snap_schedule_cmd('remove', path=TestSnapSchedules.TEST_DIRECTORY)
+
+        # remove all scheduled snapshots
+        self.remove_snapshots(TestSnapSchedules.TEST_DIRECTORY)
+
+        self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY])
+
+    def test_multi_snap_schedule(self):
+        """Test exisitence of multiple scheduled snapshots"""
+        self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY])
+
+        # set schedules on the dir
+        self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1M')
+        self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='2M')
+        exec_time = time.time()
+
+        timo_1, snap_sfx_1 = self.calc_wait_time_and_snap_name(exec_time, '1M')
+        log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx_1} in ~{timo_1}s...')
+        timo_2, snap_sfx_2 = self.calc_wait_time_and_snap_name(exec_time, '2M')
+        log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx_2} in ~{timo_2}s...')
+        to_wait = timo_2 + 2 # use max timeout
+
+        # verify snapshot schedule
+        self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1M', '2M'])
+
+        def verify_added_1(snaps_added):
+            log.debug(f'snapshots added={snaps_added}')
+            self.assertEqual(len(snaps_added), 1)
+            snapname = snaps_added[0]
+            if snapname.startswith('scheduled-'):
+                if snapname[10:26] == snap_sfx_1[:16]:
+                    self.check_scheduled_snapshot(exec_time, timo_1)
+                    return True
+            return False
+        def verify_added_2(snaps_added):
+            log.debug(f'snapshots added={snaps_added}')
+            self.assertEqual(len(snaps_added), 1)
+            snapname = snaps_added[0]
+            if snapname.startswith('scheduled-'):
+                if snapname[10:26] == snap_sfx_2[:16]:
+                    self.check_scheduled_snapshot(exec_time, timo_2)
+                    return True
+            return False
+        self.add_snap_create_cbk(verify_added_1)
+        self.add_snap_create_cbk(verify_added_2)
+        self.verify(TestSnapSchedules.TEST_DIRECTORY, to_wait)
+        self.assert_if_not_verified()
+
+        # remove snapshot schedule
+        self.fs_snap_schedule_cmd('remove', path=TestSnapSchedules.TEST_DIRECTORY)
+
+        # remove all scheduled snapshots
+        self.remove_snapshots(TestSnapSchedules.TEST_DIRECTORY)
+
+        self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY])
+
+    def test_snap_schedule_with_retention(self):
+        """Test scheduled snapshots along with rentention policy"""
+        self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY])
+
+        # set a schedule on the dir
+        self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1M')
+        self.fs_snap_schedule_cmd('retention', 'add', path=TestSnapSchedules.TEST_DIRECTORY, retention_spec_or_period='1M')
+        exec_time = time.time()
+
+        timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M')
+        log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo_1}s...')
+        to_wait = timo_1 + 2 # some leeway to avoid false failures...
+
+        # verify snapshot schedule
+        self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1M'], retentions=[{'M':1}])
+
+        def verify_added(snaps_added):
+            log.debug(f'snapshots added={snaps_added}')
+            self.assertEqual(len(snaps_added), 1)
+            snapname = snaps_added[0]
+            if snapname.startswith('scheduled-'):
+                if snapname[10:26] == snap_sfx[:16]:
+                    self.check_scheduled_snapshot(exec_time, timo_1)
+                    return True
+            return False
+        self.add_snap_create_cbk(verify_added)
+        self.verify(TestSnapSchedules.TEST_DIRECTORY, to_wait)
+        self.assert_if_not_verified()
+
+        timo_2 = timo_1 + 60 # expected snapshot removal timeout
+        def verify_removed(snaps_removed):
+            log.debug(f'snapshots removed={snaps_removed}')
+            self.assertEqual(len(snaps_removed), 1)
+            snapname = snaps_removed[0]
+            if snapname.startswith('scheduled-'):
+                if snapname[10:26] == snap_sfx[:16]:
+                    self.check_scheduled_snapshot(exec_time, timo_2)
+                    return True
+            return False
+        log.debug(f'expecting removal of snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo_2}s...')
+        to_wait = timo_2
+        self.add_snap_remove_cbk(verify_removed)
+        self.verify(TestSnapSchedules.TEST_DIRECTORY, to_wait+2)
+        self.assert_if_not_verified()
+
+        # remove snapshot schedule
+        self.fs_snap_schedule_cmd('remove', path=TestSnapSchedules.TEST_DIRECTORY)
+
+        # remove all scheduled snapshots
+        self.remove_snapshots(TestSnapSchedules.TEST_DIRECTORY)
+
+        self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY])
+
+    def get_snap_stats(self, dir_path):
+        snap_path = f"{dir_path}/.snap"[1:]
+        snapshots = self.mount_a.ls(path=snap_path)
+        fs_count = len(snapshots)
+        log.debug(f'snapshots: {snapshots}')
+
+        result = self.fs_snap_schedule_cmd('status', path=dir_path,
+                                           format='json')
+        json_res = json.loads(result)[0]
+        db_count = int(json_res['created_count'])
+        log.debug(f'json_res: {json_res}')
+
+        snap_stats = dict()
+        snap_stats['fs_count'] = fs_count
+        snap_stats['db_count'] = db_count
+
+        log.debug(f'fs_count: {fs_count}')
+        log.debug(f'db_count: {db_count}')
+
+        return snap_stats
+
+    def verify_snap_stats(self, dir_path):
+        snap_stats = self.get_snap_stats(dir_path)
+        self.assertTrue(snap_stats['fs_count'] == snap_stats['db_count'])
+
+    def test_concurrent_snap_creates(self):
+        """Test concurrent snap creates in same file-system without db issues"""
+        """
+        Test snap creates at same cadence on same fs to verify correct stats.
+        A single SQLite DB Connection handle cannot be used to run concurrent
+        transactions and results transaction aborts. This test makes sure that
+        proper care has been taken in the code to avoid such situation by
+        verifying number of dirs created on the file system with the
+        created_count in the schedule_meta table for the specific path.
+        """
+        self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY])
+
+        testdirs = []
+        for d in range(10):
+            testdirs.append(os.path.join("/", TestSnapSchedules.TEST_DIRECTORY, "dir" + str(d)))
+
+        for d in testdirs:
+            self.mount_a.run_shell(['mkdir', '-p', d[1:]])
+            self.fs_snap_schedule_cmd('add', path=d, snap_schedule='1M')
+
+        exec_time = time.time()
+        timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M')
+
+        for d in testdirs:
+            self.fs_snap_schedule_cmd('activate', path=d, snap_schedule='1M')
+
+        # we wait for 10 snaps to be taken
+        wait_time = timo_1 + 10 * 60 + 15
+        time.sleep(wait_time)
+
+        for d in testdirs:
+            self.fs_snap_schedule_cmd('deactivate', path=d, snap_schedule='1M')
+
+        for d in testdirs:
+            self.verify_snap_stats(d)
+
+        for d in testdirs:
+            self.fs_snap_schedule_cmd('remove', path=d, snap_schedule='1M')
+            self.remove_snapshots(d[1:])
+            self.mount_a.run_shell(['rmdir', d[1:]])
+
+    def test_snap_schedule_with_mgr_restart(self):
+        """Test that snap schedule is resumed after mgr restart"""
+        self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY])
+        testdir = os.path.join("/", TestSnapSchedules.TEST_DIRECTORY, "test_restart")
+        self.mount_a.run_shell(['mkdir', '-p', testdir[1:]])
+        self.fs_snap_schedule_cmd('add', path=testdir, snap_schedule='1M')
+
+        exec_time = time.time()
+        timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M')
+
+        self.fs_snap_schedule_cmd('activate', path=testdir, snap_schedule='1M')
+
+        # we wait for 10 snaps to be taken
+        wait_time = timo_1 + 10 * 60 + 15
+        time.sleep(wait_time)
+
+        old_stats = self.get_snap_stats(testdir)
+        self.assertTrue(old_stats['fs_count'] == old_stats['db_count'])
+        self.assertTrue(old_stats['fs_count'] > 9)
+
+        # restart mgr
+        active_mgr = self.mgr_cluster.mon_manager.get_mgr_dump()['active_name']
+        log.debug(f'restarting active mgr: {active_mgr}')
+        self.mgr_cluster.mon_manager.revive_mgr(active_mgr)
+        time.sleep(300)  # sleep for 5 minutes
+        self.fs_snap_schedule_cmd('deactivate', path=testdir, snap_schedule='1M')
+
+        new_stats = self.get_snap_stats(testdir)
+        self.assertTrue(new_stats['fs_count'] == new_stats['db_count'])
+        self.assertTrue(new_stats['fs_count'] > old_stats['fs_count'])
+        self.assertTrue(new_stats['db_count'] > old_stats['db_count'])
+
+        # cleanup
+        self.fs_snap_schedule_cmd('remove', path=testdir, snap_schedule='1M')
+        self.remove_snapshots(testdir[1:])
+        self.mount_a.run_shell(['rmdir', testdir[1:]])    
+
+    def test_schedule_auto_deactivation_for_non_existent_path(self):
+        """
+        Test that a non-existent path leads to schedule deactivation after a few retries.
+        """
+        self.fs_snap_schedule_cmd('add', path="/bad-path", snap_schedule='1M')
+        start_time = time.time()
+
+        while time.time() - start_time < 60.0:
+            s = self.fs_snap_schedule_cmd('status', path="/bad-path", format='json')
+            json_status = json.loads(s)[0]
+
+            self.assertTrue(int(json_status['active']) == 1)
+            time.sleep(60)
+
+        s = self.fs_snap_schedule_cmd('status', path="/bad-path", format='json')
+        json_status = json.loads(s)[0]
+        self.assertTrue(int(json_status['active']) == 0)
+
+        # remove snapshot schedule
+        self.fs_snap_schedule_cmd('remove', path="/bad-path")
+
+    def test_snap_schedule_for_number_of_snaps_retention(self):
+        """
+        Test that number of snaps retained are as per user spec.
+        """
+        total_snaps = 55
+        test_dir = '/' + TestSnapSchedules.TEST_DIRECTORY
+
+        self.mount_a.run_shell(['mkdir', '-p', test_dir[1:]])
+
+        # set a schedule on the dir
+        self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='1M')
+        self.fs_snap_schedule_cmd('retention', 'add', path=test_dir,
+                                  retention_spec_or_period=f'{total_snaps}n')
+        exec_time = time.time()
+
+        timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M')
+
+        # verify snapshot schedule
+        self.verify_schedule(test_dir, ['1M'])
+
+        # we wait for total_snaps snaps to be taken
+        wait_time = timo_1 + total_snaps * 60 + 15
+        time.sleep(wait_time)
+
+        snap_stats = self.get_snap_stats(test_dir)
+        self.assertTrue(snap_stats['fs_count'] == total_snaps)
+        self.assertTrue(snap_stats['db_count'] >= total_snaps)
+
+        # remove snapshot schedule
+        self.fs_snap_schedule_cmd('remove', path=test_dir)
+
+        # remove all scheduled snapshots
+        self.remove_snapshots(test_dir[1:])
+
+        self.mount_a.run_shell(['rmdir', test_dir[1:]])
+
+
+class TestSnapSchedulesSnapdir(TestSnapSchedulesHelper):
+    def remove_snapshots(self, dir_path, sdn):
+        snap_path = f'{dir_path}/{sdn}'
+
+        snapshots = self.mount_a.ls(path=snap_path)
+        for snapshot in snapshots:
+            snapshot_path = os.path.join(snap_path, snapshot)
+            log.debug(f'removing snapshot: {snapshot_path}')
+            self.mount_a.run_shell(['rmdir', snapshot_path])
+
+    def get_snap_dir_name(self):
+        from tasks.cephfs.fuse_mount import FuseMount
+        from tasks.cephfs.kernel_mount import KernelMount
+
+        if isinstance(self.mount_a, KernelMount):
+            sdn = self.mount_a.client_config.get('snapdirname', '.snap')
+        elif isinstance(self.mount_a, FuseMount):
+            sdn = self.mount_a.client_config.get('client_snapdir', '.snap')
+            self.fs.set_ceph_conf('client', 'client snapdir', sdn)
+            self.mount_a.remount()
+        return sdn
+
+    def test_snap_dir_name(self):
+        """Test the correctness of snap directory name"""
+        self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedulesSnapdir.TEST_DIRECTORY])
+
+        # set a schedule on the dir
+        self.fs_snap_schedule_cmd('add', path=TestSnapSchedulesSnapdir.TEST_DIRECTORY, snap_schedule='1M')
+        self.fs_snap_schedule_cmd('retention', 'add', path=TestSnapSchedulesSnapdir.TEST_DIRECTORY, retention_spec_or_period='1M')
+        exec_time = time.time()
+
+        timo, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M')
+        sdn = self.get_snap_dir_name()
+        log.info(f'expecting snap {TestSnapSchedulesSnapdir.TEST_DIRECTORY}/{sdn}/scheduled-{snap_sfx} in ~{timo}s...')
+        
+        # verify snapshot schedule
+        self.verify_schedule(TestSnapSchedulesSnapdir.TEST_DIRECTORY, ['1M'], retentions=[{'M':1}])
+        
+        # remove snapshot schedule
+        self.fs_snap_schedule_cmd('remove', path=TestSnapSchedulesSnapdir.TEST_DIRECTORY)
+
+        # remove all scheduled snapshots
+        self.remove_snapshots(TestSnapSchedulesSnapdir.TEST_DIRECTORY, sdn)
+
+        self.mount_a.run_shell(['rmdir', TestSnapSchedulesSnapdir.TEST_DIRECTORY])
+
+
+"""
+Note that the class TestSnapSchedulesMandatoryFSArgument tests snap-schedule
+commands only for multi-fs scenario. Commands for a single default fs should
+pass for tests defined above or elsewhere.
+"""
+
+
+class TestSnapSchedulesMandatoryFSArgument(TestSnapSchedulesHelper):
+    REQUIRE_BACKUP_FILESYSTEM = True
+    TEST_DIRECTORY = 'mandatory_fs_argument_test_dir'
+
+    def test_snap_schedule_without_fs_argument(self):
+        """Test command fails without --fs argument in presence of multiple fs"""
+        test_path = TestSnapSchedulesMandatoryFSArgument.TEST_DIRECTORY
+        self.mount_a.run_shell(['mkdir', '-p', test_path])
+
+        # try setting a schedule on the dir; this should fail now that we are
+        # working with mutliple fs; we need the --fs argument if there are more
+        # than one fs hosted by the same cluster
+        with self.assertRaises(CommandFailedError):
+            self.fs_snap_schedule_cmd('add', test_path, snap_schedule='1M')
+
+        self.mount_a.run_shell(['rmdir', test_path])
+
+    def test_snap_schedule_for_non_default_fs(self):
+        """Test command succes with --fs argument for non-default fs"""
+        test_path = TestSnapSchedulesMandatoryFSArgument.TEST_DIRECTORY
+        self.mount_a.run_shell(['mkdir', '-p', test_path])
+
+        # use the backup fs as the second fs; all these commands must pass
+        self.fs_snap_schedule_cmd('add', test_path, snap_schedule='1M', fs='backup_fs')
+        self.fs_snap_schedule_cmd('activate', test_path, snap_schedule='1M', fs='backup_fs')
+        self.fs_snap_schedule_cmd('retention', 'add', test_path, retention_spec_or_period='1M', fs='backup_fs')
+        self.fs_snap_schedule_cmd('list', test_path, fs='backup_fs', format='json')
+        self.fs_snap_schedule_cmd('status', test_path, fs='backup_fs', format='json')
+        self.fs_snap_schedule_cmd('retention', 'remove', test_path, retention_spec_or_period='1M', fs='backup_fs')
+        self.fs_snap_schedule_cmd('deactivate', test_path, snap_schedule='1M', fs='backup_fs')
+        self.fs_snap_schedule_cmd('remove', test_path, snap_schedule='1M', fs='backup_fs')
+
+        self.mount_a.run_shell(['rmdir', test_path])
diff --git a/qa/tasks/cephfs/test_snapshots.py b/qa/tasks/cephfs/test_snapshots.py
new file mode 100644
index 000000000..608dcc81f
--- /dev/null
+++ b/qa/tasks/cephfs/test_snapshots.py
@@ -0,0 +1,605 @@
+import errno
+import logging
+import signal
+from textwrap import dedent
+from tasks.cephfs.fuse_mount import FuseMount
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.orchestra.run import Raw
+from teuthology.exceptions import CommandFailedError
+
+log = logging.getLogger(__name__)
+
+MDS_RESTART_GRACE = 60
+
+class TestSnapshots(CephFSTestCase):
+    MDSS_REQUIRED = 3
+    LOAD_SETTINGS = ["mds_max_snaps_per_dir"]
+
+    def _check_subtree(self, rank, path, status=None):
+        got_subtrees = self.fs.rank_asok(["get", "subtrees"], rank=rank, status=status)
+        for s in got_subtrees:
+            if s['dir']['path'] == path and s['auth_first'] == rank:
+                return True
+        return False
+
+    def _get_snapclient_dump(self, rank=0, status=None):
+        return self.fs.rank_asok(["dump", "snaps"], rank=rank, status=status)
+
+    def _get_snapserver_dump(self, rank=0, status=None):
+        return self.fs.rank_asok(["dump", "snaps", "--server"], rank=rank, status=status)
+
+    def _get_last_created_snap(self, rank=0, status=None):
+        return int(self._get_snapserver_dump(rank,status=status)["last_created"])
+
+    def _get_last_destroyed_snap(self, rank=0, status=None):
+        return int(self._get_snapserver_dump(rank,status=status)["last_destroyed"])
+
+    def _get_pending_snap_update(self, rank=0, status=None):
+        return self._get_snapserver_dump(rank,status=status)["pending_update"]
+
+    def _get_pending_snap_destroy(self, rank=0, status=None):
+        return self._get_snapserver_dump(rank,status=status)["pending_destroy"]
+
+    def test_allow_new_snaps_config(self):
+        """
+        Check whether 'allow_new_snaps' setting works
+        """
+        self.mount_a.run_shell(["mkdir", "test-allow-snaps"])
+
+        self.fs.set_allow_new_snaps(False);
+        try:
+            self.mount_a.run_shell(["mkdir", "test-allow-snaps/.snap/snap00"])
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EPERM, "expected EPERM")
+        else:
+            self.fail("expected snap creatiion to fail")
+
+        self.fs.set_allow_new_snaps(True);
+        self.mount_a.run_shell(["mkdir", "test-allow-snaps/.snap/snap00"])
+        self.mount_a.run_shell(["rmdir", "test-allow-snaps/.snap/snap00"])
+        self.mount_a.run_shell(["rmdir", "test-allow-snaps"])
+
+    def test_kill_mdstable(self):
+        """
+        check snaptable transcation
+        """
+        if not isinstance(self.mount_a, FuseMount):
+            self.skipTest("Require FUSE client to forcibly kill mount")
+
+        self.fs.set_allow_new_snaps(True);
+        self.fs.set_max_mds(2)
+        status = self.fs.wait_for_daemons()
+
+        # setup subtrees
+        self.mount_a.run_shell(["mkdir", "-p", "d1/dir"])
+        self.mount_a.setfattr("d1", "ceph.dir.pin", "1")
+        self._wait_subtrees([("/d1", 1)], rank=1, path="/d1")
+
+        last_created = self._get_last_created_snap(rank=0,status=status)
+
+        # mds_kill_mdstable_at:
+        #  1: MDSTableServer::handle_prepare
+        #  2: MDSTableServer::_prepare_logged
+        #  5: MDSTableServer::handle_commit
+        #  6: MDSTableServer::_commit_logged
+        for i in [1,2,5,6]:
+            log.info("testing snapserver mds_kill_mdstable_at={0}".format(i))
+
+            status = self.fs.status()
+            rank0 = self.fs.get_rank(rank=0, status=status)
+            self.fs.rank_freeze(True, rank=0)
+            self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "{0}".format(i)], rank=0, status=status)
+            proc = self.mount_a.run_shell(["mkdir", "d1/dir/.snap/s1{0}".format(i)], wait=False)
+            self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=0), timeout=self.fs.beacon_timeout);
+            self.delete_mds_coredump(rank0['name']);
+
+            self.fs.rank_fail(rank=0)
+            self.fs.mds_restart(rank0['name'])
+            self.wait_for_daemon_start([rank0['name']])
+            status = self.fs.wait_for_daemons()
+
+            proc.wait()
+            last_created += 1
+            self.wait_until_true(lambda: self._get_last_created_snap(rank=0) == last_created, timeout=30)
+
+        self.set_conf("mds", "mds_reconnect_timeout", "5")
+
+        self.mount_a.run_shell(["rmdir", Raw("d1/dir/.snap/*")])
+
+        # set mds_kill_mdstable_at, also kill snapclient
+        for i in [2,5,6]:
+            log.info("testing snapserver mds_kill_mdstable_at={0}, also kill snapclient".format(i))
+            status = self.fs.status()
+            last_created = self._get_last_created_snap(rank=0, status=status)
+
+            rank0 = self.fs.get_rank(rank=0, status=status)
+            rank1 = self.fs.get_rank(rank=1, status=status)
+            self.fs.rank_freeze(True, rank=0) # prevent failover...
+            self.fs.rank_freeze(True, rank=1) # prevent failover...
+            self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "{0}".format(i)], rank=0, status=status)
+            proc = self.mount_a.run_shell(["mkdir", "d1/dir/.snap/s2{0}".format(i)], wait=False)
+            self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=0), timeout=self.fs.beacon_timeout);
+            self.delete_mds_coredump(rank0['name']);
+
+            self.fs.rank_signal(signal.SIGKILL, rank=1)
+
+            self.mount_a.kill()
+            self.mount_a.kill_cleanup()
+
+            self.fs.rank_fail(rank=0)
+            self.fs.mds_restart(rank0['name'])
+            self.wait_for_daemon_start([rank0['name']])
+
+            self.fs.wait_for_state('up:resolve', rank=0, timeout=MDS_RESTART_GRACE)
+            if i in [2,5]:
+                self.assertEqual(len(self._get_pending_snap_update(rank=0)), 1)
+            elif i == 6:
+                self.assertEqual(len(self._get_pending_snap_update(rank=0)), 0)
+                self.assertGreater(self._get_last_created_snap(rank=0), last_created)
+
+            self.fs.rank_fail(rank=1)
+            self.fs.mds_restart(rank1['name'])
+            self.wait_for_daemon_start([rank1['name']])
+            self.fs.wait_for_state('up:active', rank=0, timeout=MDS_RESTART_GRACE)
+
+            if i in [2,5]:
+                self.wait_until_true(lambda: len(self._get_pending_snap_update(rank=0)) == 0, timeout=30)
+                if i == 2:
+                    self.assertEqual(self._get_last_created_snap(rank=0), last_created)
+                else:
+                    self.assertGreater(self._get_last_created_snap(rank=0), last_created)
+
+            self.mount_a.mount_wait()
+
+        self.mount_a.run_shell(["rmdir", Raw("d1/dir/.snap/*")])
+
+        # mds_kill_mdstable_at:
+        #  3: MDSTableClient::handle_request (got agree)
+        #  4: MDSTableClient::commit
+        #  7: MDSTableClient::handle_request (got ack)
+        for i in [3,4,7]:
+            log.info("testing snapclient mds_kill_mdstable_at={0}".format(i))
+            last_created = self._get_last_created_snap(rank=0)
+
+            status = self.fs.status()
+            rank1 = self.fs.get_rank(rank=1, status=status)
+            self.fs.rank_freeze(True, rank=1) # prevent failover...
+            self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "{0}".format(i)], rank=1, status=status)
+            proc = self.mount_a.run_shell(["mkdir", "d1/dir/.snap/s3{0}".format(i)], wait=False)
+            self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=1), timeout=self.fs.beacon_timeout);
+            self.delete_mds_coredump(rank1['name']);
+
+            self.mount_a.kill()
+            self.mount_a.kill_cleanup()
+
+            if i in [3,4]:
+                self.assertEqual(len(self._get_pending_snap_update(rank=0)), 1)
+            elif i == 7:
+                self.assertEqual(len(self._get_pending_snap_update(rank=0)), 0)
+                self.assertGreater(self._get_last_created_snap(rank=0), last_created)
+
+            self.fs.rank_fail(rank=1)
+            self.fs.mds_restart(rank1['name'])
+            self.wait_for_daemon_start([rank1['name']])
+            status = self.fs.wait_for_daemons(timeout=MDS_RESTART_GRACE)
+
+            if i in [3,4]:
+                self.wait_until_true(lambda: len(self._get_pending_snap_update(rank=0)) == 0, timeout=30)
+                if i == 3:
+                    self.assertEqual(self._get_last_created_snap(rank=0), last_created)
+                else:
+                    self.assertGreater(self._get_last_created_snap(rank=0), last_created)
+
+            self.mount_a.mount_wait()
+
+        self.mount_a.run_shell(["rmdir", Raw("d1/dir/.snap/*")])
+
+        # mds_kill_mdstable_at:
+        #  3: MDSTableClient::handle_request (got agree)
+        #  8: MDSTableServer::handle_rollback
+        log.info("testing snapclient mds_kill_mdstable_at=3, snapserver mds_kill_mdstable_at=8")
+        last_created = self._get_last_created_snap(rank=0)
+
+        status = self.fs.status()
+        rank0 = self.fs.get_rank(rank=0, status=status)
+        rank1 = self.fs.get_rank(rank=1, status=status)
+        self.fs.rank_freeze(True, rank=0)
+        self.fs.rank_freeze(True, rank=1)
+        self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "8"], rank=0, status=status)
+        self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "3"], rank=1, status=status)
+        proc = self.mount_a.run_shell(["mkdir", "d1/dir/.snap/s4"], wait=False)
+        self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=1), timeout=self.fs.beacon_timeout);
+        self.delete_mds_coredump(rank1['name']);
+
+        self.mount_a.kill()
+        self.mount_a.kill_cleanup()
+
+        self.assertEqual(len(self._get_pending_snap_update(rank=0)), 1)
+
+        self.fs.rank_fail(rank=1)
+        self.fs.mds_restart(rank1['name'])
+        self.wait_for_daemon_start([rank1['name']])
+
+        # rollback triggers assertion
+        self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=0), timeout=self.fs.beacon_timeout);
+        self.delete_mds_coredump(rank0['name']);
+        self.fs.rank_fail(rank=0)
+        self.fs.mds_restart(rank0['name'])
+        self.wait_for_daemon_start([rank0['name']])
+        self.fs.wait_for_state('up:active', rank=0, timeout=MDS_RESTART_GRACE)
+
+        # mds.1 should re-send rollback message
+        self.wait_until_true(lambda: len(self._get_pending_snap_update(rank=0)) == 0, timeout=30)
+        self.assertEqual(self._get_last_created_snap(rank=0), last_created)
+
+        self.mount_a.mount_wait()
+
+    def test_snapclient_cache(self):
+        """
+        check if snapclient cache gets synced properly
+        """
+        self.fs.set_allow_new_snaps(True);
+        self.fs.set_max_mds(3)
+        status = self.fs.wait_for_daemons()
+
+        self.mount_a.run_shell(["mkdir", "-p", "d0/d1/dir"])
+        self.mount_a.run_shell(["mkdir", "-p", "d0/d2/dir"])
+        self.mount_a.setfattr("d0", "ceph.dir.pin", "0")
+        self.mount_a.setfattr("d0/d1", "ceph.dir.pin", "1")
+        self.mount_a.setfattr("d0/d2", "ceph.dir.pin", "2")
+        self._wait_subtrees([("/d0", 0), ("/d0/d1", 1), ("/d0/d2", 2)], rank="all", status=status, path="/d0")
+
+        def _check_snapclient_cache(snaps_dump, cache_dump=None, rank=0):
+            if cache_dump is None:
+                cache_dump = self._get_snapclient_dump(rank=rank)
+            for key, value in cache_dump.items():
+                if value != snaps_dump[key]:
+                    return False
+            return True;
+
+        # sync after mksnap
+        last_created = self._get_last_created_snap(rank=0)
+        self.mount_a.run_shell(["mkdir", "d0/d1/dir/.snap/s1", "d0/d1/dir/.snap/s2"])
+        self.wait_until_true(lambda: len(self._get_pending_snap_update(rank=0)) == 0, timeout=30)
+        self.assertGreater(self._get_last_created_snap(rank=0), last_created)
+
+        snaps_dump = self._get_snapserver_dump(rank=0)
+        self.assertTrue(_check_snapclient_cache(snaps_dump, rank=0));
+        self.assertTrue(_check_snapclient_cache(snaps_dump, rank=1));
+        self.assertTrue(_check_snapclient_cache(snaps_dump, rank=2));
+
+        # sync after rmsnap
+        last_destroyed = self._get_last_destroyed_snap(rank=0)
+        self.mount_a.run_shell(["rmdir", "d0/d1/dir/.snap/s1"])
+        self.wait_until_true(lambda: len(self._get_pending_snap_destroy(rank=0)) == 0, timeout=30)
+        self.assertGreater(self._get_last_destroyed_snap(rank=0), last_destroyed)
+
+        snaps_dump = self._get_snapserver_dump(rank=0)
+        self.assertTrue(_check_snapclient_cache(snaps_dump, rank=0));
+        self.assertTrue(_check_snapclient_cache(snaps_dump, rank=1));
+        self.assertTrue(_check_snapclient_cache(snaps_dump, rank=2));
+
+        # sync during mds recovers
+        self.fs.rank_fail(rank=2)
+        status = self.fs.wait_for_daemons(timeout=MDS_RESTART_GRACE)
+        self.assertTrue(_check_snapclient_cache(snaps_dump, rank=2));
+
+        self.fs.rank_fail(rank=0)
+        self.fs.rank_fail(rank=1)
+        status = self.fs.wait_for_daemons()
+        self.fs.wait_for_state('up:active', rank=0, timeout=MDS_RESTART_GRACE)
+        self.assertTrue(_check_snapclient_cache(snaps_dump, rank=0));
+        self.assertTrue(_check_snapclient_cache(snaps_dump, rank=1));
+        self.assertTrue(_check_snapclient_cache(snaps_dump, rank=2));
+
+        # kill at MDSTableClient::handle_notify_prep
+        status = self.fs.status()
+        rank2 = self.fs.get_rank(rank=2, status=status)
+        self.fs.rank_freeze(True, rank=2)
+        self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "9"], rank=2, status=status)
+        proc = self.mount_a.run_shell(["mkdir", "d0/d1/dir/.snap/s3"], wait=False)
+        self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=2), timeout=self.fs.beacon_timeout);
+        self.delete_mds_coredump(rank2['name']);
+
+        # mksnap should wait for notify ack from mds.2
+        self.assertFalse(proc.finished);
+
+        # mksnap should proceed after mds.2 fails
+        self.fs.rank_fail(rank=2)
+        self.wait_until_true(lambda: proc.finished, timeout=30);
+
+        self.fs.mds_restart(rank2['name'])
+        self.wait_for_daemon_start([rank2['name']])
+        status = self.fs.wait_for_daemons(timeout=MDS_RESTART_GRACE)
+
+        self.mount_a.run_shell(["rmdir", Raw("d0/d1/dir/.snap/*")])
+
+        # kill at MDSTableClient::commit
+        # the recovering mds should sync all mds' cache when it enters resolve stage
+        self.set_conf("mds", "mds_reconnect_timeout", "5")
+        for i in range(1, 4):
+            status = self.fs.status()
+            rank2 = self.fs.get_rank(rank=2, status=status)
+            self.fs.rank_freeze(True, rank=2)
+            self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "4"], rank=2, status=status)
+            last_created = self._get_last_created_snap(rank=0)
+            proc = self.mount_a.run_shell(["mkdir", "d0/d2/dir/.snap/s{0}".format(i)], wait=False)
+            self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=2), timeout=self.fs.beacon_timeout);
+            self.delete_mds_coredump(rank2['name']);
+
+            self.mount_a.kill()
+            self.mount_a.kill_cleanup()
+
+            self.assertEqual(len(self._get_pending_snap_update(rank=0)), 1)
+
+            if i in [2,4]:
+                self.fs.rank_fail(rank=0)
+            if i in [3,4]:
+                self.fs.rank_fail(rank=1)
+
+            self.fs.rank_fail(rank=2)
+            self.fs.mds_restart(rank2['name'])
+            self.wait_for_daemon_start([rank2['name']])
+            status = self.fs.wait_for_daemons(timeout=MDS_RESTART_GRACE)
+
+            rank0_cache = self._get_snapclient_dump(rank=0)
+            rank1_cache = self._get_snapclient_dump(rank=1)
+            rank2_cache = self._get_snapclient_dump(rank=2)
+
+            self.assertGreater(int(rank0_cache["last_created"]), last_created)
+            self.assertEqual(rank0_cache, rank1_cache);
+            self.assertEqual(rank0_cache, rank2_cache);
+
+            self.wait_until_true(lambda: len(self._get_pending_snap_update(rank=0)) == 0, timeout=30)
+
+            snaps_dump = self._get_snapserver_dump(rank=0)
+            self.assertEqual(snaps_dump["last_created"], rank0_cache["last_created"])
+            self.assertTrue(_check_snapclient_cache(snaps_dump, cache_dump=rank0_cache));
+
+            self.mount_a.mount_wait()
+
+        self.mount_a.run_shell(["rmdir", Raw("d0/d2/dir/.snap/*")])
+
+    def test_multimds_mksnap(self):
+        """
+        check if snapshot takes effect across authority subtrees
+        """
+        self.fs.set_allow_new_snaps(True);
+        self.fs.set_max_mds(2)
+        status = self.fs.wait_for_daemons()
+
+        self.mount_a.run_shell(["mkdir", "-p", "d0/d1/empty"])
+        self.mount_a.setfattr("d0", "ceph.dir.pin", "0")
+        self.mount_a.setfattr("d0/d1", "ceph.dir.pin", "1")
+        self._wait_subtrees([("/d0", 0), ("/d0/d1", 1)], rank="all", status=status, path="/d0")
+
+        self.mount_a.write_test_pattern("d0/d1/file_a", 8 * 1024 * 1024)
+        self.mount_a.run_shell(["mkdir", "d0/.snap/s1"])
+        self.mount_a.run_shell(["rm", "-f", "d0/d1/file_a"])
+        self.mount_a.validate_test_pattern("d0/.snap/s1/d1/file_a", 8 * 1024 * 1024)
+
+        self.mount_a.run_shell(["rmdir", "d0/.snap/s1"])
+        self.mount_a.run_shell(["rm", "-rf", "d0"])
+
+    def test_multimds_past_parents(self):
+        """
+        check if past parents are properly recorded during across authority rename
+        """
+        self.fs.set_allow_new_snaps(True);
+        self.fs.set_max_mds(2)
+        status = self.fs.wait_for_daemons()
+
+        self.mount_a.run_shell_payload("mkdir -p {d0,d1}/empty")
+        self.mount_a.setfattr("d0", "ceph.dir.pin", "0")
+        self.mount_a.setfattr("d1", "ceph.dir.pin", "1")
+        self._wait_subtrees([("/d0", 0), ("/d1", 1)], rank=0, status=status)
+
+        self.mount_a.run_shell(["mkdir", "d0/d3"])
+        self.mount_a.run_shell(["mkdir", "d0/.snap/s1"])
+        snap_name = self.mount_a.run_shell(["ls", "d0/d3/.snap"]).stdout.getvalue()
+
+        self.mount_a.run_shell(["mv", "d0/d3", "d1/d3"])
+        snap_name1 = self.mount_a.run_shell(["ls", "d1/d3/.snap"]).stdout.getvalue()
+        self.assertEqual(snap_name1, snap_name);
+
+        self.mount_a.run_shell(["rmdir", "d0/.snap/s1"])
+        snap_name1 = self.mount_a.run_shell(["ls", "d1/d3/.snap"]).stdout.getvalue()
+        self.assertEqual(snap_name1, "");
+
+        self.mount_a.run_shell(["rm", "-rf", "d0", "d1"])
+
+    def test_multimds_hardlink(self):
+        """
+        check if hardlink snapshot works in multimds setup
+        """
+        self.fs.set_allow_new_snaps(True);
+        self.fs.set_max_mds(2)
+        status = self.fs.wait_for_daemons()
+
+        self.mount_a.run_shell_payload("mkdir -p {d0,d1}/empty")
+
+        self.mount_a.setfattr("d0", "ceph.dir.pin", "0")
+        self.mount_a.setfattr("d1", "ceph.dir.pin", "1")
+        self._wait_subtrees([("/d0", 0), ("/d1", 1)], rank=0, status=status)
+
+        self.mount_a.run_python(dedent("""
+            import os
+            open(os.path.join("{path}", "d0/file1"), 'w').write("asdf")
+            open(os.path.join("{path}", "d0/file2"), 'w').write("asdf")
+            """.format(path=self.mount_a.mountpoint)
+        ))
+
+        self.mount_a.run_shell(["ln", "d0/file1", "d1/file1"])
+        self.mount_a.run_shell(["ln", "d0/file2", "d1/file2"])
+
+        self.mount_a.run_shell(["mkdir", "d1/.snap/s1"])
+
+        self.mount_a.run_python(dedent("""
+            import os
+            open(os.path.join("{path}", "d0/file1"), 'w').write("qwer")
+            """.format(path=self.mount_a.mountpoint)
+        ))
+
+        self.mount_a.run_shell(["grep", "asdf", "d1/.snap/s1/file1"])
+
+        self.mount_a.run_shell(["rm", "-f", "d0/file2"])
+        self.mount_a.run_shell(["grep", "asdf", "d1/.snap/s1/file2"])
+
+        self.mount_a.run_shell(["rm", "-f", "d1/file2"])
+        self.mount_a.run_shell(["grep", "asdf", "d1/.snap/s1/file2"])
+
+        self.mount_a.run_shell(["rmdir", "d1/.snap/s1"])
+        self.mount_a.run_shell(["rm", "-rf", "d0", "d1"])
+
+    class SnapLimitViolationException(Exception):
+        failed_snapshot_number = -1
+
+        def __init__(self, num):
+            self.failed_snapshot_number = num
+
+    def get_snap_name(self, dir_name, sno):
+            sname = "{dir_name}/.snap/s_{sno}".format(dir_name=dir_name, sno=sno)
+            return sname
+
+    def create_snap_dir(self, sname):
+        self.mount_a.run_shell(["mkdir", sname])
+
+    def delete_dir_and_snaps(self, dir_name, snaps):
+        for sno in range(1, snaps+1, 1):
+            sname = self.get_snap_name(dir_name, sno)
+            self.mount_a.run_shell(["rmdir", sname])
+        self.mount_a.run_shell(["rmdir", dir_name])
+
+    def create_dir_and_snaps(self, dir_name, snaps):
+        self.mount_a.run_shell(["mkdir", dir_name])
+
+        for sno in range(1, snaps+1, 1):
+            sname = self.get_snap_name(dir_name, sno)
+            try:
+                self.create_snap_dir(sname)
+            except CommandFailedError as e:
+                # failing at the last mkdir beyond the limit is expected
+                if sno == snaps:
+                    log.info("failed while creating snap #{}: {}".format(sno, repr(e)))
+                    raise TestSnapshots.SnapLimitViolationException(sno)
+
+    def test_mds_max_snaps_per_dir_default_limit(self):
+        """
+        Test the newly introudced option named mds_max_snaps_per_dir
+        Default snaps limit is 100
+        Test if the default number of snapshot directories can be created
+        """
+        self.create_dir_and_snaps("accounts", int(self.mds_max_snaps_per_dir))
+        self.delete_dir_and_snaps("accounts", int(self.mds_max_snaps_per_dir))
+
+    def test_mds_max_snaps_per_dir_with_increased_limit(self):
+        """
+        Test the newly introudced option named mds_max_snaps_per_dir
+        First create 101 directories and ensure that the 101st directory
+        creation fails. Then increase the default by one and see if the
+        additional directory creation succeeds
+        """
+        # first test the default limit
+        new_limit = int(self.mds_max_snaps_per_dir)
+        self.fs.rank_asok(['config', 'set', 'mds_max_snaps_per_dir', repr(new_limit)])
+        try:
+            self.create_dir_and_snaps("accounts", new_limit + 1)
+        except TestSnapshots.SnapLimitViolationException as e:
+            if e.failed_snapshot_number == (new_limit + 1):
+                pass
+        # then increase the limit by one and test
+        new_limit = new_limit + 1
+        self.fs.rank_asok(['config', 'set', 'mds_max_snaps_per_dir', repr(new_limit)])
+        sname = self.get_snap_name("accounts", new_limit)
+        self.create_snap_dir(sname)
+        self.delete_dir_and_snaps("accounts", new_limit)
+
+    def test_mds_max_snaps_per_dir_with_reduced_limit(self):
+        """
+        Test the newly introudced option named mds_max_snaps_per_dir
+        First create 99 directories. Then reduce the limit to 98. Then try
+        creating another directory and ensure that additional directory
+        creation fails.
+        """
+        # first test the new limit
+        new_limit = int(self.mds_max_snaps_per_dir) - 1
+        self.create_dir_and_snaps("accounts", new_limit)
+        sname = self.get_snap_name("accounts", new_limit + 1)
+        # then reduce the limit by one and test
+        new_limit = new_limit - 1
+        self.fs.rank_asok(['config', 'set', 'mds_max_snaps_per_dir', repr(new_limit)])
+        try:
+            self.create_snap_dir(sname)
+        except CommandFailedError:
+            # after reducing limit we expect the new snapshot creation to fail
+            pass
+        self.delete_dir_and_snaps("accounts", new_limit + 1)
+
+
+class TestMonSnapsAndFsPools(CephFSTestCase):
+    MDSS_REQUIRED = 3
+
+    def test_disallow_monitor_managed_snaps_for_fs_pools(self):
+        """
+        Test that creation of monitor managed snaps fails for pools attached
+        to any file-system
+        """
+        with self.assertRaises(CommandFailedError):
+            self.fs.rados(["mksnap", "snap1"], pool=self.fs.get_data_pool_name())
+
+        with self.assertRaises(CommandFailedError):
+            self.fs.rados(["mksnap", "snap2"], pool=self.fs.get_metadata_pool_name())
+
+        with self.assertRaises(CommandFailedError):
+            test_pool_name = self.fs.get_data_pool_name()
+            base_cmd = f'osd pool mksnap {test_pool_name} snap3'
+            self.run_cluster_cmd(base_cmd)
+
+        with self.assertRaises(CommandFailedError):
+            test_pool_name = self.fs.get_metadata_pool_name()
+            base_cmd = f'osd pool mksnap {test_pool_name} snap4'
+            self.run_cluster_cmd(base_cmd)
+
+    def test_attaching_pools_with_snaps_to_fs_fails(self):
+        """
+        Test that attempt to attach pool with snapshots to an fs fails
+        """
+        test_pool_name = 'snap-test-pool'
+        base_cmd = f'osd pool create {test_pool_name}'
+        ret = self.run_cluster_cmd_result(base_cmd)
+        self.assertEqual(ret, 0)
+
+        self.fs.rados(["mksnap", "snap3"], pool=test_pool_name)
+
+        base_cmd = f'fs add_data_pool {self.fs.name} {test_pool_name}'
+        ret = self.run_cluster_cmd_result(base_cmd)
+        self.assertEqual(ret, errno.EOPNOTSUPP)
+
+        # cleanup
+        self.fs.rados(["rmsnap", "snap3"], pool=test_pool_name)
+        base_cmd = f'osd pool delete {test_pool_name}'
+        ret = self.run_cluster_cmd_result(base_cmd)
+
+    def test_using_pool_with_snap_fails_fs_creation(self):
+        """
+        Test that using a pool with snaps for fs creation fails
+        """
+        base_cmd = 'osd pool create test_data_pool'
+        ret = self.run_cluster_cmd_result(base_cmd)
+        self.assertEqual(ret, 0)
+        base_cmd = 'osd pool create test_metadata_pool'
+        ret = self.run_cluster_cmd_result(base_cmd)
+        self.assertEqual(ret, 0)
+
+        self.fs.rados(["mksnap", "snap4"], pool='test_data_pool')
+
+        base_cmd = 'fs new testfs test_metadata_pool test_data_pool'
+        ret = self.run_cluster_cmd_result(base_cmd)
+        self.assertEqual(ret, errno.EOPNOTSUPP)
+
+        # cleanup
+        self.fs.rados(["rmsnap", "snap4"], pool='test_data_pool')
+        base_cmd = 'osd pool delete test_data_pool'
+        ret = self.run_cluster_cmd_result(base_cmd)
+        base_cmd = 'osd pool delete test_metadata_pool'
+        ret = self.run_cluster_cmd_result(base_cmd)
diff --git a/qa/tasks/cephfs/test_strays.py b/qa/tasks/cephfs/test_strays.py
new file mode 100644
index 000000000..8bdc126e2
--- /dev/null
+++ b/qa/tasks/cephfs/test_strays.py
@@ -0,0 +1,1027 @@
+import json
+import time
+import logging
+from textwrap import dedent
+import datetime
+import gevent
+
+from teuthology.exceptions import CommandFailedError
+from teuthology.orchestra.run import Raw
+from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
+
+log = logging.getLogger(__name__)
+
+
+class TestStrays(CephFSTestCase):
+    MDSS_REQUIRED = 2
+
+    OPS_THROTTLE = 1
+    FILES_THROTTLE = 2
+
+    # Range of different file sizes used in throttle test's workload
+    throttle_workload_size_range = 16
+
+    @for_teuthology
+    def test_ops_throttle(self):
+        self._test_throttling(self.OPS_THROTTLE)
+
+    @for_teuthology
+    def test_files_throttle(self):
+        self._test_throttling(self.FILES_THROTTLE)
+
+    def test_dir_deletion(self):
+        """
+        That when deleting a bunch of dentries and the containing
+        directory, everything gets purged.
+        Catches cases where the client might e.g. fail to trim
+        the unlinked dir from its cache.
+        """
+        file_count = 1000
+        create_script = dedent("""
+            import os
+
+            mountpoint = "{mountpoint}"
+            subdir = "delete_me"
+            size = {size}
+            file_count = {file_count}
+            os.mkdir(os.path.join(mountpoint, subdir))
+            for i in range(0, file_count):
+                filename = "{{0}}_{{1}}.bin".format(i, size)
+                with open(os.path.join(mountpoint, subdir, filename), 'w') as f:
+                    f.write(size * 'x')
+        """.format(
+            mountpoint=self.mount_a.mountpoint,
+            size=1024,
+            file_count=file_count
+        ))
+
+        self.mount_a.run_python(create_script)
+
+        # That the dirfrag object is created
+        self.fs.mds_asok(["flush", "journal"])
+        dir_ino = self.mount_a.path_to_ino("delete_me")
+        self.assertTrue(self.fs.dirfrag_exists(dir_ino, 0))
+
+        # Remove everything
+        self.mount_a.run_shell(["rm", "-rf", "delete_me"])
+        self.fs.mds_asok(["flush", "journal"])
+
+        # That all the removed files get created as strays
+        strays = self.get_mdc_stat("strays_created")
+        self.assertEqual(strays, file_count + 1)
+
+        # That the strays all get enqueued for purge
+        self.wait_until_equal(
+            lambda: self.get_mdc_stat("strays_enqueued"),
+            strays,
+            timeout=600
+
+        )
+
+        # That all the purge operations execute
+        self.wait_until_equal(
+            lambda: self.get_stat("purge_queue", "pq_executed"),
+            strays,
+            timeout=600
+        )
+
+        # That finally, the directory metadata object is gone
+        self.assertFalse(self.fs.dirfrag_exists(dir_ino, 0))
+
+        # That finally, the data objects are all gone
+        self.await_data_pool_empty()
+
+    def _test_throttling(self, throttle_type):
+        self.data_log = []
+        try:
+            return self._do_test_throttling(throttle_type)
+        except:
+            for l in self.data_log:
+                log.info(",".join([l_.__str__() for l_ in l]))
+            raise
+
+    def _do_test_throttling(self, throttle_type):
+        """
+        That the mds_max_purge_ops setting is respected
+        """
+
+        def set_throttles(files, ops):
+            """
+            Helper for updating ops/files limits, and calculating effective
+            ops_per_pg setting to give the same ops limit.
+            """
+            self.set_conf('mds', 'mds_max_purge_files', "%d" % files)
+            self.set_conf('mds', 'mds_max_purge_ops', "%d" % ops)
+
+            pgs = self.fs.mon_manager.get_pool_int_property(
+                self.fs.get_data_pool_name(),
+                "pg_num"
+            )
+            ops_per_pg = float(ops) / pgs
+            self.set_conf('mds', 'mds_max_purge_ops_per_pg', "%s" % ops_per_pg)
+
+        # Test conditions depend on what we're going to be exercising.
+        # * Lift the threshold on whatever throttle we are *not* testing, so
+        #   that the throttle of interest is the one that will be the bottleneck
+        # * Create either many small files (test file count throttling) or fewer
+        #   large files (test op throttling)
+        if throttle_type == self.OPS_THROTTLE:
+            set_throttles(files=100000000, ops=16)
+            size_unit = 1024 * 1024  # big files, generate lots of ops
+            file_multiplier = 100
+        elif throttle_type == self.FILES_THROTTLE:
+            # The default value of file limit is pretty permissive, so to avoid
+            # the test running too fast, create lots of files and set the limit
+            # pretty low.
+            set_throttles(ops=100000000, files=6)
+            size_unit = 1024  # small, numerous files
+            file_multiplier = 200
+        else:
+            raise NotImplementedError(throttle_type)
+
+        # Pick up config changes
+        self.fs.mds_fail_restart()
+        self.fs.wait_for_daemons()
+
+        create_script = dedent("""
+            import os
+
+            mountpoint = "{mountpoint}"
+            subdir = "delete_me"
+            size_unit = {size_unit}
+            file_multiplier = {file_multiplier}
+            os.mkdir(os.path.join(mountpoint, subdir))
+            for i in range(0, file_multiplier):
+                for size in range(0, {size_range}*size_unit, size_unit):
+                    filename = "{{0}}_{{1}}.bin".format(i, size // size_unit)
+                    with open(os.path.join(mountpoint, subdir, filename), 'w') as f:
+                        f.write(size * 'x')
+        """.format(
+            mountpoint=self.mount_a.mountpoint,
+            size_unit=size_unit,
+            file_multiplier=file_multiplier,
+            size_range=self.throttle_workload_size_range
+        ))
+
+        self.mount_a.run_python(create_script)
+
+        # We will run the deletion in the background, to reduce the risk of it completing before
+        # we have started monitoring the stray statistics.
+        def background():
+            self.mount_a.run_shell(["rm", "-rf", "delete_me"])
+            self.fs.mds_asok(["flush", "journal"])
+
+        background_thread = gevent.spawn(background)
+
+        total_inodes = file_multiplier * self.throttle_workload_size_range + 1
+        mds_max_purge_ops = int(self.fs.get_config("mds_max_purge_ops", 'mds'))
+        mds_max_purge_files = int(self.fs.get_config("mds_max_purge_files", 'mds'))
+
+        # During this phase we look for the concurrent ops to exceed half
+        # the limit (a heuristic) and not exceed the limit (a correctness
+        # condition).
+        purge_timeout = 600
+        elapsed = 0
+        files_high_water = 0
+        ops_high_water = 0
+
+        while True:
+            stats = self.fs.mds_asok(['perf', 'dump'])
+            mdc_stats = stats['mds_cache']
+            pq_stats = stats['purge_queue']
+            if elapsed >= purge_timeout:
+                raise RuntimeError("Timeout waiting for {0} inodes to purge, stats:{1}".format(total_inodes, mdc_stats))
+
+            num_strays = mdc_stats['num_strays']
+            num_strays_purging = pq_stats['pq_executing']
+            num_purge_ops = pq_stats['pq_executing_ops']
+            files_high_water = pq_stats['pq_executing_high_water']
+            ops_high_water = pq_stats['pq_executing_ops_high_water']
+
+            self.data_log.append([datetime.datetime.now(), num_strays, num_strays_purging, num_purge_ops, files_high_water, ops_high_water])
+
+            total_strays_created = mdc_stats['strays_created']
+            total_strays_purged = pq_stats['pq_executed']
+
+            if total_strays_purged == total_inodes:
+                log.info("Complete purge in {0} seconds".format(elapsed))
+                break
+            elif total_strays_purged > total_inodes:
+                raise RuntimeError("Saw more strays than expected, mdc stats: {0}".format(mdc_stats))
+            else:
+                if throttle_type == self.OPS_THROTTLE:
+                    # 11 is filer_max_purge_ops plus one for the backtrace:
+                    # limit is allowed to be overshot by this much.
+                    if num_purge_ops > mds_max_purge_ops + 11:
+                        raise RuntimeError("num_purge_ops violates threshold {0}/{1}".format(
+                            num_purge_ops, mds_max_purge_ops
+                        ))
+                elif throttle_type == self.FILES_THROTTLE:
+                    if num_strays_purging > mds_max_purge_files:
+                        raise RuntimeError("num_strays_purging violates threshold {0}/{1}".format(
+                            num_strays_purging, mds_max_purge_files
+                        ))
+                else:
+                    raise NotImplementedError(throttle_type)
+
+                log.info("Waiting for purge to complete {0}/{1}, {2}/{3}".format(
+                    num_strays_purging, num_strays,
+                    total_strays_purged, total_strays_created
+                ))
+                time.sleep(1)
+                elapsed += 1
+
+        background_thread.join()
+
+        # Check that we got up to a respectable rate during the purge.  This is totally
+        # racy, but should be safeish unless the cluster is pathologically slow, or
+        # insanely fast such that the deletions all pass before we have polled the
+        # statistics.
+        if throttle_type == self.OPS_THROTTLE:
+            if ops_high_water < mds_max_purge_ops // 2:
+                raise RuntimeError("Ops in flight high water is unexpectedly low ({0} / {1})".format(
+                    ops_high_water, mds_max_purge_ops
+                ))
+            # The MDS may go over mds_max_purge_ops for some items, like a
+            # heavily fragmented directory.  The throttle does not kick in
+            # until *after* we reach or exceed the limit.  This is expected
+            # because we don't want to starve the PQ or never purge a
+            # particularly large file/directory.
+            self.assertLessEqual(ops_high_water, mds_max_purge_ops+64)
+        elif throttle_type == self.FILES_THROTTLE:
+            if files_high_water < mds_max_purge_files // 2:
+                raise RuntimeError("Files in flight high water is unexpectedly low ({0} / {1})".format(
+                    files_high_water, mds_max_purge_files
+                ))
+            self.assertLessEqual(files_high_water, mds_max_purge_files)
+
+        # Sanity check all MDC stray stats
+        stats = self.fs.mds_asok(['perf', 'dump'])
+        mdc_stats = stats['mds_cache']
+        pq_stats = stats['purge_queue']
+        self.assertEqual(mdc_stats['num_strays'], 0)
+        self.assertEqual(mdc_stats['num_strays_delayed'], 0)
+        self.assertEqual(pq_stats['pq_executing'], 0)
+        self.assertEqual(pq_stats['pq_executing_ops'], 0)
+        self.assertEqual(mdc_stats['strays_created'], total_inodes)
+        self.assertEqual(mdc_stats['strays_enqueued'], total_inodes)
+        self.assertEqual(pq_stats['pq_executed'], total_inodes)
+
+    def get_mdc_stat(self, name, mds_id=None):
+        return self.get_stat("mds_cache", name, mds_id)
+
+    def get_stat(self, subsys, name, mds_id=None):
+        return self.fs.mds_asok(['perf', 'dump', subsys, name],
+                                mds_id=mds_id)[subsys][name]
+
+    def _wait_for_counter(self, subsys, counter, expect_val, timeout=60,
+                          mds_id=None):
+        self.wait_until_equal(
+            lambda: self.get_stat(subsys, counter, mds_id),
+            expect_val=expect_val, timeout=timeout,
+            reject_fn=lambda x: x > expect_val
+        )
+
+    def test_open_inode(self):
+        """
+        That the case of a dentry unlinked while a client holds an
+        inode open is handled correctly.
+
+        The inode should be moved into a stray dentry, while the original
+        dentry and directory should be purged.
+
+        The inode's data should be purged when the client eventually closes
+        it.
+        """
+        mount_a_client_id = self.mount_a.get_global_id()
+
+        # Write some bytes to a file
+        size_mb = 8
+
+        # Hold the file open
+        p = self.mount_a.open_background("open_file")
+        self.mount_a.write_n_mb("open_file", size_mb)
+        open_file_ino = self.mount_a.path_to_ino("open_file")
+
+        self.assertEqual(self.get_session(mount_a_client_id)['num_caps'], 2)
+
+        # Unlink the dentry
+        self.mount_a.run_shell(["rm", "-f", "open_file"])
+
+        # Wait to see the stray count increment
+        self.wait_until_equal(
+            lambda: self.get_mdc_stat("num_strays"),
+            expect_val=1, timeout=60, reject_fn=lambda x: x > 1)
+
+        # See that while the stray count has incremented, none have passed
+        # on to the purge queue
+        self.assertEqual(self.get_mdc_stat("strays_created"), 1)
+        self.assertEqual(self.get_mdc_stat("strays_enqueued"), 0)
+
+        # See that the client still holds 2 caps
+        self.assertEqual(self.get_session(mount_a_client_id)['num_caps'], 2)
+
+        # See that the data objects remain in the data pool
+        self.assertTrue(self.fs.data_objects_present(open_file_ino, size_mb * 1024 * 1024))
+
+        # Now close the file
+        self.mount_a.kill_background(p)
+
+        # Wait to see the client cap count decrement
+        self.wait_until_equal(
+            lambda: self.get_session(mount_a_client_id)['num_caps'],
+            expect_val=1, timeout=60, reject_fn=lambda x: x > 2 or x < 1
+        )
+        # Wait to see the purge counter increment, stray count go to zero
+        self._wait_for_counter("mds_cache", "strays_enqueued", 1)
+        self.wait_until_equal(
+            lambda: self.get_mdc_stat("num_strays"),
+            expect_val=0, timeout=6, reject_fn=lambda x: x > 1
+        )
+        self._wait_for_counter("purge_queue", "pq_executed", 1)
+
+        # See that the data objects no longer exist
+        self.assertTrue(self.fs.data_objects_absent(open_file_ino, size_mb * 1024 * 1024))
+
+        self.await_data_pool_empty()
+
+    def test_reintegration_limit(self):
+        """
+        That the reintegration is not blocked by full directories.
+        """
+
+        LOW_LIMIT = 50
+        self.config_set('mds', 'mds_bal_fragment_size_max', str(LOW_LIMIT))
+        time.sleep(10) # for config to reach MDS; async create is fast!!
+
+        last_reintegrated = self.get_mdc_stat("strays_reintegrated")
+        self.mount_a.run_shell_payload("""
+        mkdir a b
+        for i in `seq 1 50`; do
+           touch a/"$i"
+           ln a/"$i" b/"$i"
+        done
+        sync -f a b
+        rm a/*
+        """)
+
+        self.wait_until_equal(
+            lambda: self.get_mdc_stat("num_strays"),
+            expect_val=0,
+            timeout=60
+        )
+        curr_reintegrated = self.get_mdc_stat("strays_reintegrated")
+        self.assertGreater(curr_reintegrated, last_reintegrated)
+
+
+    def test_hardlink_reintegration(self):
+        """
+        That removal of primary dentry of hardlinked inode results
+        in reintegration of inode into the previously-remote dentry,
+        rather than lingering as a stray indefinitely.
+        """
+        # Write some bytes to file_a
+        size_mb = 8
+        self.mount_a.run_shell(["mkdir", "dir_1"])
+        self.mount_a.write_n_mb("dir_1/file_a", size_mb)
+        ino = self.mount_a.path_to_ino("dir_1/file_a")
+
+        # Create a hardlink named file_b
+        self.mount_a.run_shell(["mkdir", "dir_2"])
+        self.mount_a.run_shell(["ln", "dir_1/file_a", "dir_2/file_b"])
+        self.assertEqual(self.mount_a.path_to_ino("dir_2/file_b"), ino)
+
+        # Flush journal
+        self.fs.mds_asok(['flush', 'journal'])
+
+        # See that backtrace for the file points to the file_a path
+        pre_unlink_bt = self.fs.read_backtrace(ino)
+        self.assertEqual(pre_unlink_bt['ancestors'][0]['dname'], "file_a")
+
+        # empty mds cache. otherwise mds reintegrates stray when unlink finishes
+        self.mount_a.umount_wait()
+        self.fs.mds_asok(['flush', 'journal'])
+        self.fs.mds_fail_restart()
+        self.fs.wait_for_daemons()
+        self.mount_a.mount_wait()
+
+        # Unlink file_a
+        self.mount_a.run_shell(["rm", "-f", "dir_1/file_a"])
+
+        # See that a stray was created
+        self.assertEqual(self.get_mdc_stat("num_strays"), 1)
+        self.assertEqual(self.get_mdc_stat("strays_created"), 1)
+
+        # Wait, see that data objects are still present (i.e. that the
+        # stray did not advance to purging given time)
+        time.sleep(30)
+        self.assertTrue(self.fs.data_objects_present(ino, size_mb * 1024 * 1024))
+        self.assertEqual(self.get_mdc_stat("strays_enqueued"), 0)
+
+        # See that before reintegration, the inode's backtrace points to a stray dir
+        self.fs.mds_asok(['flush', 'journal'])
+        self.assertTrue(self.get_backtrace_path(ino).startswith("stray"))
+
+        last_reintegrated = self.get_mdc_stat("strays_reintegrated")
+
+        # Do a metadata operation on the remaining link (mv is heavy handed, but
+        # others like touch may be satisfied from caps without poking MDS)
+        self.mount_a.run_shell(["mv", "dir_2/file_b", "dir_2/file_c"])
+
+        # Stray reintegration should happen as a result of the eval_remote call
+        # on responding to a client request.
+        self.wait_until_equal(
+            lambda: self.get_mdc_stat("num_strays"),
+            expect_val=0,
+            timeout=60
+        )
+
+        # See the reintegration counter increment
+        curr_reintegrated = self.get_mdc_stat("strays_reintegrated")
+        self.assertGreater(curr_reintegrated, last_reintegrated)
+        last_reintegrated = curr_reintegrated
+
+        # Flush the journal
+        self.fs.mds_asok(['flush', 'journal'])
+
+        # See that the backtrace for the file points to the remaining link's path
+        post_reint_bt = self.fs.read_backtrace(ino)
+        self.assertEqual(post_reint_bt['ancestors'][0]['dname'], "file_c")
+
+        # mds should reintegrates stray when unlink finishes
+        self.mount_a.run_shell(["ln", "dir_2/file_c", "dir_2/file_d"])
+        self.mount_a.run_shell(["rm", "-f", "dir_2/file_c"])
+
+        # Stray reintegration should happen as a result of the notify_stray call
+        # on completion of unlink
+        self.wait_until_equal(
+            lambda: self.get_mdc_stat("num_strays"),
+            expect_val=0,
+            timeout=60
+        )
+
+        # See the reintegration counter increment
+        curr_reintegrated = self.get_mdc_stat("strays_reintegrated")
+        self.assertGreater(curr_reintegrated, last_reintegrated)
+        last_reintegrated = curr_reintegrated
+
+        # Flush the journal
+        self.fs.mds_asok(['flush', 'journal'])
+
+        # See that the backtrace for the file points to the newest link's path
+        post_reint_bt = self.fs.read_backtrace(ino)
+        self.assertEqual(post_reint_bt['ancestors'][0]['dname'], "file_d")
+
+        # Now really delete it
+        self.mount_a.run_shell(["rm", "-f", "dir_2/file_d"])
+        self._wait_for_counter("mds_cache", "strays_enqueued", 1)
+        self._wait_for_counter("purge_queue", "pq_executed", 1)
+
+        self.assert_purge_idle()
+        self.assertTrue(self.fs.data_objects_absent(ino, size_mb * 1024 * 1024))
+
+        # We caused the inode to go stray 3 times
+        self.assertEqual(self.get_mdc_stat("strays_created"), 3)
+        # We purged it at the last
+        self.assertEqual(self.get_mdc_stat("strays_enqueued"), 1)
+
+    def test_reintegration_via_scrub(self):
+        """
+        That reintegration is triggered via recursive scrub.
+        """
+
+        self.mount_a.run_shell_payload("""
+        mkdir -p a b
+        for i in `seq 1 50`; do
+           touch a/"$i"
+           ln a/"$i" b/"$i"
+        done
+        sync -f .
+        """)
+
+        self.mount_a.remount() # drop caps/cache
+        self.fs.rank_tell(["flush", "journal"])
+        self.fs.rank_fail()
+        self.fs.wait_for_daemons()
+
+        # only / in cache, reintegration cannot happen
+        self.wait_until_equal(
+            lambda: len(self.fs.rank_tell(["dump", "tree", "/"])),
+            expect_val=3,
+            timeout=60
+        )
+
+        last_reintegrated = self.get_mdc_stat("strays_reintegrated")
+        self.mount_a.run_shell_payload("""
+        rm a/*
+        sync -f .
+        """)
+        self.wait_until_equal(
+            lambda: len(self.fs.rank_tell(["dump", "tree", "/"])),
+            expect_val=3,
+            timeout=60
+        )
+        self.assertEqual(self.get_mdc_stat("num_strays"), 50)
+        curr_reintegrated = self.get_mdc_stat("strays_reintegrated")
+        self.assertEqual(last_reintegrated, curr_reintegrated)
+
+        self.fs.rank_tell(["scrub", "start", "/", "recursive,force"])
+
+        self.wait_until_equal(
+            lambda: self.get_mdc_stat("num_strays"),
+            expect_val=0,
+            timeout=60
+        )
+        curr_reintegrated = self.get_mdc_stat("strays_reintegrated")
+        # N.B.: reintegrate (rename RPC) may be tried multiple times from different code paths
+        self.assertGreaterEqual(curr_reintegrated, last_reintegrated+50)
+
+    def test_mv_hardlink_cleanup(self):
+        """
+        That when doing a rename from A to B, and B has hardlinks,
+        then we make a stray for B which is then reintegrated
+        into one of his hardlinks.
+        """
+        # Create file_a, file_b, and a hardlink to file_b
+        size_mb = 8
+        self.mount_a.write_n_mb("file_a", size_mb)
+        file_a_ino = self.mount_a.path_to_ino("file_a")
+
+        self.mount_a.write_n_mb("file_b", size_mb)
+        file_b_ino = self.mount_a.path_to_ino("file_b")
+
+        self.mount_a.run_shell(["ln", "file_b", "linkto_b"])
+        self.assertEqual(self.mount_a.path_to_ino("linkto_b"), file_b_ino)
+
+        # mv file_a file_b
+        self.mount_a.run_shell(["mv", "file_a", "file_b"])
+
+        # Stray reintegration should happen as a result of the notify_stray call on
+        # completion of rename
+        self.wait_until_equal(
+            lambda: self.get_mdc_stat("num_strays"),
+            expect_val=0,
+            timeout=60
+        )
+
+        self.assertEqual(self.get_mdc_stat("strays_created"), 1)
+        self.assertGreaterEqual(self.get_mdc_stat("strays_reintegrated"), 1)
+
+        # No data objects should have been deleted, as both files still have linkage.
+        self.assertTrue(self.fs.data_objects_present(file_a_ino, size_mb * 1024 * 1024))
+        self.assertTrue(self.fs.data_objects_present(file_b_ino, size_mb * 1024 * 1024))
+
+        self.fs.mds_asok(['flush', 'journal'])
+
+        post_reint_bt = self.fs.read_backtrace(file_b_ino)
+        self.assertEqual(post_reint_bt['ancestors'][0]['dname'], "linkto_b")
+
+    def _setup_two_ranks(self):
+        # Set up two MDSs
+        self.fs.set_max_mds(2)
+
+        # See that we have two active MDSs
+        self.wait_until_equal(lambda: len(self.fs.get_active_names()), 2, 30,
+                              reject_fn=lambda v: v > 2 or v < 1)
+
+        active_mds_names = self.fs.get_active_names()
+        rank_0_id = active_mds_names[0]
+        rank_1_id = active_mds_names[1]
+        log.info("Ranks 0 and 1 are {0} and {1}".format(
+            rank_0_id, rank_1_id))
+
+        # Get rid of other MDS daemons so that it's easier to know which
+        # daemons to expect in which ranks after restarts
+        for unneeded_mds in set(self.mds_cluster.mds_ids) - {rank_0_id, rank_1_id}:
+            self.mds_cluster.mds_stop(unneeded_mds)
+            self.mds_cluster.mds_fail(unneeded_mds)
+
+        return rank_0_id, rank_1_id
+
+    def _force_migrate(self, path, rank=1):
+        """
+        :param to_id: MDS id to move it to
+        :param path: Filesystem path (string) to move
+        :return: None
+        """
+        self.mount_a.run_shell(["setfattr", "-n", "ceph.dir.pin", "-v", str(rank), path])
+        rpath = "/"+path
+        self._wait_subtrees([(rpath, rank)], rank=rank, path=rpath)
+
+    def _is_stopped(self, rank):
+        mds_map = self.fs.get_mds_map()
+        return rank not in [i['rank'] for i in mds_map['info'].values()]
+
+    def test_purge_on_shutdown(self):
+        """
+        That when an MDS rank is shut down, its purge queue is
+        drained in the process.
+        """
+        rank_0_id, rank_1_id = self._setup_two_ranks()
+
+        self.set_conf("mds.{0}".format(rank_1_id), 'mds_max_purge_files', "0")
+        self.mds_cluster.mds_fail_restart(rank_1_id)
+        self.fs.wait_for_daemons()
+
+        file_count = 5
+
+        self.mount_a.create_n_files("delete_me/file", file_count)
+
+        self._force_migrate("delete_me")
+
+        self.mount_a.run_shell(["rm", "-rf", Raw("delete_me/*")])
+        self.mount_a.umount_wait()
+
+        # See all the strays go into purge queue
+        self._wait_for_counter("mds_cache", "strays_created", file_count, mds_id=rank_1_id)
+        self._wait_for_counter("mds_cache", "strays_enqueued", file_count, mds_id=rank_1_id)
+        self.assertEqual(self.get_stat("mds_cache", "num_strays", mds_id=rank_1_id), 0)
+
+        # See nothing get purged from the purge queue (yet)
+        time.sleep(10)
+        self.assertEqual(self.get_stat("purge_queue", "pq_executed", mds_id=rank_1_id), 0)
+
+        # Shut down rank 1
+        self.fs.set_max_mds(1)
+
+        # It shouldn't proceed past stopping because its still not allowed
+        # to purge
+        time.sleep(10)
+        self.assertEqual(self.get_stat("purge_queue", "pq_executed", mds_id=rank_1_id), 0)
+        self.assertFalse(self._is_stopped(1))
+
+        # Permit the daemon to start purging again
+        self.fs.mon_manager.raw_cluster_cmd('tell', 'mds.{0}'.format(rank_1_id),
+                                            'injectargs',
+                                            "--mds_max_purge_files 100")
+
+        # It should now proceed through shutdown
+        self.fs.wait_for_daemons(timeout=120)
+
+        # ...and in the process purge all that data
+        self.await_data_pool_empty()
+
+    def test_migration_on_shutdown(self):
+        """
+        That when an MDS rank is shut down, any non-purgeable strays
+        get migrated to another rank.
+        """
+
+        rank_0_id, rank_1_id = self._setup_two_ranks()
+
+        # Create a non-purgeable stray in a ~mds1 stray directory
+        # by doing a hard link and deleting the original file
+        self.mount_a.run_shell_payload("""
+mkdir dir_1 dir_2
+touch dir_1/original
+ln dir_1/original dir_2/linkto
+""")
+
+        self._force_migrate("dir_1")
+        self._force_migrate("dir_2", rank=0)
+
+        # empty mds cache. otherwise mds reintegrates stray when unlink finishes
+        self.mount_a.umount_wait()
+        self.fs.mds_asok(['flush', 'journal'], rank_1_id)
+        self.fs.mds_asok(['cache', 'drop'], rank_1_id)
+
+        self.mount_a.mount_wait()
+        self.mount_a.run_shell(["rm", "-f", "dir_1/original"])
+        self.mount_a.umount_wait()
+
+        self._wait_for_counter("mds_cache", "strays_created", 1,
+                               mds_id=rank_1_id)
+
+        # Shut down rank 1
+        self.fs.set_max_mds(1)
+        self.fs.wait_for_daemons(timeout=120)
+
+        # See that the stray counter on rank 0 has incremented
+        self.assertEqual(self.get_mdc_stat("strays_created", rank_0_id), 1)
+
+    def test_migrate_unlinked_dir(self):
+        """
+        Reproduce https://tracker.ceph.com/issues/53597
+        """
+        rank_0_id, rank_1_id = self._setup_two_ranks()
+
+        self.mount_a.run_shell_payload("""
+mkdir pin
+touch pin/placeholder
+""")
+
+        self._force_migrate("pin")
+
+        # Hold the dir open so it cannot be purged
+        p = self.mount_a.open_dir_background("pin/to-be-unlinked")
+
+        # Unlink the dentry
+        self.mount_a.run_shell(["rmdir", "pin/to-be-unlinked"])
+
+        # Wait to see the stray count increment
+        self.wait_until_equal(
+            lambda: self.get_mdc_stat("num_strays", mds_id=rank_1_id),
+            expect_val=1, timeout=60, reject_fn=lambda x: x > 1)
+        # but not purged
+        self.assertEqual(self.get_mdc_stat("strays_created", mds_id=rank_1_id), 1)
+        self.assertEqual(self.get_mdc_stat("strays_enqueued", mds_id=rank_1_id), 0)
+
+        # Test loading unlinked dir into cache
+        self.fs.mds_asok(['flush', 'journal'], rank_1_id)
+        self.fs.mds_asok(['cache', 'drop'], rank_1_id)
+
+        # Shut down rank 1
+        self.fs.set_max_mds(1)
+        self.fs.wait_for_daemons(timeout=120)
+        # Now the stray should be migrated to rank 0
+        # self.assertEqual(self.get_mdc_stat("strays_created", mds_id=rank_0_id), 1)
+        # https://github.com/ceph/ceph/pull/44335#issuecomment-1125940158
+
+        self.mount_a.kill_background(p)
+
+    def assert_backtrace(self, ino, expected_path):
+        """
+        Assert that the backtrace in the data pool for an inode matches
+        an expected /foo/bar path.
+        """
+        expected_elements = expected_path.strip("/").split("/")
+        bt = self.fs.read_backtrace(ino)
+        actual_elements = list(reversed([dn['dname'] for dn in bt['ancestors']]))
+        self.assertListEqual(expected_elements, actual_elements)
+
+    def get_backtrace_path(self, ino):
+        bt = self.fs.read_backtrace(ino)
+        elements = reversed([dn['dname'] for dn in bt['ancestors']])
+        return "/".join(elements)
+
+    def assert_purge_idle(self):
+        """
+        Assert that the MDS perf counters indicate no strays exist and
+        no ongoing purge activity.  Sanity check for when PurgeQueue should
+        be idle.
+        """
+        mdc_stats = self.fs.mds_asok(['perf', 'dump', "mds_cache"])['mds_cache']
+        pq_stats = self.fs.mds_asok(['perf', 'dump', "purge_queue"])['purge_queue']
+        self.assertEqual(mdc_stats["num_strays"], 0)
+        self.assertEqual(mdc_stats["num_strays_delayed"], 0)
+        self.assertEqual(pq_stats["pq_executing"], 0)
+        self.assertEqual(pq_stats["pq_executing_ops"], 0)
+
+    def test_mv_cleanup(self):
+        """
+        That when doing a rename from A to B, and B has no hardlinks,
+        then we make a stray for B and purge him.
+        """
+        # Create file_a and file_b, write some to both
+        size_mb = 8
+        self.mount_a.write_n_mb("file_a", size_mb)
+        file_a_ino = self.mount_a.path_to_ino("file_a")
+        self.mount_a.write_n_mb("file_b", size_mb)
+        file_b_ino = self.mount_a.path_to_ino("file_b")
+
+        self.fs.mds_asok(['flush', 'journal'])
+        self.assert_backtrace(file_a_ino, "file_a")
+        self.assert_backtrace(file_b_ino, "file_b")
+
+        # mv file_a file_b
+        self.mount_a.run_shell(['mv', 'file_a', 'file_b'])
+
+        # See that stray counter increments
+        self.assertEqual(self.get_mdc_stat("strays_created"), 1)
+        # Wait for purge counter to increment
+        self._wait_for_counter("mds_cache", "strays_enqueued", 1)
+        self._wait_for_counter("purge_queue", "pq_executed", 1)
+
+        self.assert_purge_idle()
+
+        # file_b should have been purged
+        self.assertTrue(self.fs.data_objects_absent(file_b_ino, size_mb * 1024 * 1024))
+
+        # Backtrace should have updated from file_a to file_b
+        self.fs.mds_asok(['flush', 'journal'])
+        self.assert_backtrace(file_a_ino, "file_b")
+
+        # file_a's data should still exist
+        self.assertTrue(self.fs.data_objects_present(file_a_ino, size_mb * 1024 * 1024))
+
+    def _pool_df(self, pool_name):
+        """
+        Return a dict like
+            {
+                "kb_used": 0,
+                "bytes_used": 0,
+                "max_avail": 19630292406,
+                "objects": 0
+            }
+
+        :param pool_name: Which pool (must exist)
+        """
+        out = self.fs.mon_manager.raw_cluster_cmd("df", "--format=json-pretty")
+        for p in json.loads(out)['pools']:
+            if p['name'] == pool_name:
+                return p['stats']
+
+        raise RuntimeError("Pool '{0}' not found".format(pool_name))
+
+    def await_data_pool_empty(self):
+        self.wait_until_true(
+            lambda: self._pool_df(
+                self.fs.get_data_pool_name()
+            )['objects'] == 0,
+            timeout=60)
+
+    def test_snapshot_remove(self):
+        """
+        That removal of a snapshot that references a now-unlinked file results
+        in purging on the stray for the file.
+        """
+        # Enable snapshots
+        self.fs.set_allow_new_snaps(True)
+
+        # Create a dir with a file in it
+        size_mb = 8
+        self.mount_a.run_shell(["mkdir", "snapdir"])
+        self.mount_a.run_shell(["mkdir", "snapdir/subdir"])
+        self.mount_a.write_test_pattern("snapdir/subdir/file_a", size_mb * 1024 * 1024)
+        file_a_ino = self.mount_a.path_to_ino("snapdir/subdir/file_a")
+
+        # Snapshot the dir
+        self.mount_a.run_shell(["mkdir", "snapdir/.snap/snap1"])
+
+        # Cause the head revision to deviate from the snapshot
+        self.mount_a.write_n_mb("snapdir/subdir/file_a", size_mb)
+
+        # Flush the journal so that backtraces, dirfrag objects will actually be written
+        self.fs.mds_asok(["flush", "journal"])
+
+        # Unlink the file
+        self.mount_a.run_shell(["rm", "-f", "snapdir/subdir/file_a"])
+        self.mount_a.run_shell(["rmdir", "snapdir/subdir"])
+
+        # Unmount the client because when I come back to check the data is still
+        # in the file I don't want to just see what's in the page cache.
+        self.mount_a.umount_wait()
+
+        self.assertEqual(self.get_mdc_stat("strays_created"), 2)
+
+        # FIXME: at this stage we see a purge and the stray count drops to
+        # zero, but there's actually still a stray, so at the very
+        # least the StrayManager stats code is slightly off
+
+        self.mount_a.mount_wait()
+
+        # See that the data from the snapshotted revision of the file is still present
+        # and correct
+        self.mount_a.validate_test_pattern("snapdir/.snap/snap1/subdir/file_a", size_mb * 1024 * 1024)
+
+        # Remove the snapshot
+        self.mount_a.run_shell(["rmdir", "snapdir/.snap/snap1"])
+
+        # Purging file_a doesn't happen until after we've flushed the journal, because
+        # it is referenced by the snapshotted subdir, and the snapshot isn't really
+        # gone until the journal references to it are gone
+        self.fs.mds_asok(["flush", "journal"])
+
+        # Wait for purging to complete, which requires the OSDMap to propagate to the OSDs.
+        # See also: http://tracker.ceph.com/issues/20072
+        self.wait_until_true(
+            lambda: self.fs.data_objects_absent(file_a_ino, size_mb * 1024 * 1024),
+            timeout=60
+        )
+
+        # See that a purge happens now
+        self._wait_for_counter("mds_cache", "strays_enqueued", 2)
+        self._wait_for_counter("purge_queue", "pq_executed", 2)
+
+        self.await_data_pool_empty()
+
+    def test_fancy_layout(self):
+        """
+        purge stray file with fancy layout
+        """
+
+        file_name = "fancy_layout_file"
+        self.mount_a.run_shell(["touch", file_name])
+
+        file_layout = "stripe_unit=1048576 stripe_count=4 object_size=8388608"
+        self.mount_a.setfattr(file_name, "ceph.file.layout", file_layout)
+
+        # 35MB requires 7 objects
+        size_mb = 35
+        self.mount_a.write_n_mb(file_name, size_mb)
+
+        self.mount_a.run_shell(["rm", "-f", file_name])
+        self.fs.mds_asok(["flush", "journal"])
+
+        # can't use self.fs.data_objects_absent here, it does not support fancy layout
+        self.await_data_pool_empty()
+
+    def test_dirfrag_limit(self):
+        """
+        That the directory fragment size cannot exceed mds_bal_fragment_size_max (using a limit of 50 in all configurations).
+        """
+
+        LOW_LIMIT = 50
+        self.config_set('mds', 'mds_bal_fragment_size_max', str(LOW_LIMIT))
+        time.sleep(10) # for config to reach MDS; async create is fast!!
+
+        try:
+            self.mount_a.create_n_files("subdir/file", LOW_LIMIT+1, finaldirsync=True)
+        except CommandFailedError:
+            pass # ENOSPC
+        else:
+            self.fail("fragment size exceeded")
+
+
+    def test_dirfrag_limit_fragmented(self):
+        """
+        That fragmentation (forced) will allow more entries to be created.
+        """
+
+        LOW_LIMIT = 50
+        self.config_set('mds', 'mds_bal_fragment_size_max', str(LOW_LIMIT))
+        self.config_set('mds', 'mds_bal_merge_size', 1) # disable merging
+        time.sleep(10) # for config to reach MDS; async create is fast!!
+
+        # Test that we can go beyond the limit if we fragment the directory
+        self.mount_a.create_n_files("subdir/file", LOW_LIMIT, finaldirsync=True)
+        self.mount_a.umount_wait() # release client caps
+
+        # Ensure that subdir is fragmented
+        self.fs.rank_asok(["dirfrag", "split", "/subdir", "0/0", "1"])
+        self.fs.rank_asok(["flush", "journal"])
+
+        # Create 50% more files than the current fragment limit
+        self.mount_a.mount_wait()
+        self.mount_a.create_n_files("subdir/file", (LOW_LIMIT*3)//2, finaldirsync=True)
+
+    def test_dirfrag_limit_strays(self):
+        """
+        That unlinking fails when the stray directory fragment becomes too
+        large and that unlinking may continue once those strays are purged.
+        """
+
+        LOW_LIMIT = 10
+        # N.B. this test is inherently racy because stray removal may be faster
+        # than slow(er) file creation.
+        self.config_set('mds', 'mds_bal_fragment_size_max', LOW_LIMIT)
+        time.sleep(10) # for config to reach MDS; async create is fast!!
+
+        # Now test the stray directory size is limited and recovers
+        strays_before = self.get_mdc_stat("strays_created")
+        try:
+            # 10 stray directories: expect collisions
+            self.mount_a.create_n_files("subdir/file", LOW_LIMIT*10, finaldirsync=True, unlink=True)
+        except CommandFailedError:
+            pass # ENOSPC
+        else:
+            self.fail("fragment size exceeded")
+        strays_after = self.get_mdc_stat("strays_created")
+        self.assertGreaterEqual(strays_after-strays_before, LOW_LIMIT)
+
+        self._wait_for_counter("mds_cache", "strays_enqueued", strays_after)
+        self._wait_for_counter("purge_queue", "pq_executed", strays_after)
+
+        # verify new files can be created and unlinked
+        self.mount_a.create_n_files("subdir/file", LOW_LIMIT, dirsync=True, unlink=True)
+
+    def test_purge_queue_upgrade(self):
+        """
+        That when starting on a system with no purge queue in the metadata
+        pool, we silently create one.
+        :return:
+        """
+
+        self.mds_cluster.mds_stop()
+        self.mds_cluster.mds_fail()
+        self.fs.radosm(["rm", "500.00000000"])
+        self.mds_cluster.mds_restart()
+        self.fs.wait_for_daemons()
+
+    def test_replicated_delete_speed(self):
+        """
+        That deletions of replicated metadata are not pathologically slow
+        """
+        rank_0_id, rank_1_id = self._setup_two_ranks()
+
+        self.set_conf("mds.{0}".format(rank_1_id), 'mds_max_purge_files', "0")
+        self.mds_cluster.mds_fail_restart(rank_1_id)
+        self.fs.wait_for_daemons()
+
+        file_count = 10
+
+        self.mount_a.create_n_files("delete_me/file", file_count)
+
+        self._force_migrate("delete_me")
+
+        begin = datetime.datetime.now()
+        self.mount_a.run_shell(["rm", "-rf", Raw("delete_me/*")])
+        end = datetime.datetime.now()
+
+        # What we're really checking here is that we are completing client
+        # operations immediately rather than delaying until the next tick.
+        tick_period = float(self.fs.get_config("mds_tick_interval",
+                                               service_type="mds"))
+
+        duration = (end - begin).total_seconds()
+        self.assertLess(duration, (file_count * tick_period) * 0.25)
diff --git a/qa/tasks/cephfs/test_subvolume.py b/qa/tasks/cephfs/test_subvolume.py
new file mode 100644
index 000000000..1ebb137dd
--- /dev/null
+++ b/qa/tasks/cephfs/test_subvolume.py
@@ -0,0 +1,170 @@
+import logging
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.exceptions import CommandFailedError
+
+log = logging.getLogger(__name__)
+
+
+class TestSubvolume(CephFSTestCase):
+    CLIENTS_REQUIRED = 1
+    MDSS_REQUIRED = 1
+
+    def setUp(self):
+        super().setUp()
+        self.setup_test()
+
+    def tearDown(self):
+        # clean up
+        self.cleanup_test()
+        super().tearDown()
+
+    def setup_test(self):
+        self.mount_a.run_shell(['mkdir', 'group'])
+        self.mount_a.run_shell(['mkdir', 'group/subvol1'])
+        self.mount_a.run_shell(['setfattr', '-n', 'ceph.dir.subvolume',
+                                '-v', '1', 'group/subvol1'])
+        self.mount_a.run_shell(['mv', 'group/subvol1', 'group/subvol2'])
+
+    def cleanup_test(self):
+        self.mount_a.run_shell(['rm', '-rf', 'group'])
+
+    def test_subvolume_move_out_file(self):
+        """
+        To verify that file can't be moved out of subvolume
+        """
+        self.mount_a.run_shell(['touch', 'group/subvol2/file1'])
+
+        # file can't be moved out of a subvolume
+        with self.assertRaises(CommandFailedError):
+            self.mount_a.run_shell(['rename', 'group/subvol2/file1',
+                                    'group/file1', 'group/subvol2/file1'])
+
+
+    def test_subvolume_move_in_file(self):
+        """
+        To verify that file can't be moved into subvolume
+        """
+        # file can't be moved into a subvolume
+        self.mount_a.run_shell(['touch', 'group/file2'])
+        with self.assertRaises(CommandFailedError):
+            self.mount_a.run_shell(['rename', 'group/file2',
+                                    'group/subvol2/file2', 'group/file2'])
+
+    def test_subvolume_hardlink_to_outside(self):
+        """
+        To verify that file can't be hardlinked to outside subvolume
+        """
+        self.mount_a.run_shell(['touch', 'group/subvol2/file1'])
+
+        # create hard link within subvolume
+        self.mount_a.run_shell(['ln',
+                                'group/subvol2/file1', 'group/subvol2/file1_'])
+
+        # hard link can't be created out of subvolume
+        with self.assertRaises(CommandFailedError):
+            self.mount_a.run_shell(['ln',
+                                    'group/subvol2/file1', 'group/file1_'])
+
+    def test_subvolume_hardlink_to_inside(self):
+        """
+        To verify that file can't be hardlinked to inside subvolume
+        """
+        self.mount_a.run_shell(['touch', 'group/subvol2/file1'])
+
+        # create hard link within subvolume
+        self.mount_a.run_shell(['ln',
+                                'group/subvol2/file1', 'group/subvol2/file1_'])
+
+        # hard link can't be created inside subvolume
+        self.mount_a.run_shell(['touch', 'group/file2'])
+        with self.assertRaises(CommandFailedError):
+            self.mount_a.run_shell(['ln',
+                                    'group/file2', 'group/subvol2/file2_'])
+
+    def test_subvolume_snapshot_inside_subvolume_subdir(self):
+        """
+        To verify that snapshot can't be taken for a subvolume subdir
+        """
+        self.mount_a.run_shell(['touch', 'group/subvol2/file1'])
+
+        # create snapshot at subvolume root
+        self.mount_a.run_shell(['mkdir', 'group/subvol2/.snap/s1'])
+
+        # can't create snapshot in a descendent dir of subvolume
+        self.mount_a.run_shell(['mkdir', 'group/subvol2/dir'])
+        with self.assertRaises(CommandFailedError):
+            self.mount_a.run_shell(['mkdir', 'group/subvol2/dir/.snap/s2'])
+
+        # clean up
+        self.mount_a.run_shell(['rmdir', 'group/subvol2/.snap/s1'])
+
+    def test_subvolume_file_move_across_subvolumes(self):
+        """
+        To verify that file can't be moved across subvolumes
+        """
+        self.mount_a.run_shell(['touch', 'group/subvol2/file1'])
+
+        # create another subvol
+        self.mount_a.run_shell(['mkdir', 'group/subvol3'])
+        self.mount_a.run_shell(['setfattr', '-n', 'ceph.dir.subvolume',
+                                '-v', '1', 'group/subvol3'])
+
+        # can't move file across subvolumes
+        with self.assertRaises(CommandFailedError):
+            self.mount_a.run_shell(['rename', 'group/subvol2/file1',
+                                    'group/subvol3/file1',
+                                    'group/subvol2/file1'])
+
+    def test_subvolume_hardlink_across_subvolumes(self):
+        """
+        To verify that hardlink can't be created across subvolumes
+        """
+        self.mount_a.run_shell(['touch', 'group/subvol2/file1'])
+
+        # create another subvol
+        self.mount_a.run_shell(['mkdir', 'group/subvol3'])
+        self.mount_a.run_shell(['setfattr', '-n', 'ceph.dir.subvolume',
+                                '-v', '1', 'group/subvol3'])
+
+        # can't create hard link across subvolumes
+        with self.assertRaises(CommandFailedError):
+            self.mount_a.run_shell(['ln', 'group/subvol2/file1',
+                                    'group/subvol3/file1'])
+
+    def test_subvolume_create_subvolume_inside_subvolume(self):
+        """
+        To verify that subvolume can't be created inside a subvolume
+        """
+        # can't create subvolume inside a subvolume
+        self.mount_a.run_shell(['mkdir', 'group/subvol2/dir'])
+        with self.assertRaises(CommandFailedError):
+            self.mount_a.run_shell(['setfattr', '-n', 'ceph.dir.subvolume',
+                                    '-v', '1', 'group/subvol2/dir'])
+
+    def test_subvolume_create_snapshot_inside_new_subvolume_parent(self):
+        """
+        To verify that subvolume can't be created inside a new subvolume parent
+        """
+        self.mount_a.run_shell(['touch', 'group/subvol2/file1'])
+
+        # clear subvolume flag
+        self.mount_a.run_shell(['setfattr', '-n', 'ceph.dir.subvolume',
+                                '-v', '0', 'group/subvol2'])
+
+        # create a snap
+        self.mount_a.run_shell(['mkdir', 'group/subvol2/dir'])
+        self.mount_a.run_shell(['mkdir', 'group/subvol2/dir/.snap/s2'])
+
+        # override subdir subvolume with parent subvolume
+        self.mount_a.run_shell(['setfattr', '-n', 'ceph.dir.subvolume',
+                                '-v', '1', 'group/subvol2/dir'])
+        self.mount_a.run_shell(['setfattr', '-n', 'ceph.dir.subvolume',
+                                '-v', '1', 'group/subvol2'])
+
+        # can't create a snap in a subdir of a subvol parent
+        with self.assertRaises(CommandFailedError):
+            self.mount_a.run_shell(['mkdir', 'group/subvol2/dir/.snap/s3'])
+
+        # clean up
+        self.mount_a.run_shell(['rmdir', 'group/subvol2/dir/.snap/s2'])
diff --git a/qa/tasks/cephfs/test_volumes.py b/qa/tasks/cephfs/test_volumes.py
new file mode 100644
index 000000000..2ecfeb327
--- /dev/null
+++ b/qa/tasks/cephfs/test_volumes.py
@@ -0,0 +1,7946 @@
+import os
+import json
+import time
+import errno
+import random
+import logging
+import collections
+import uuid
+import unittest
+from hashlib import md5
+from textwrap import dedent
+from io import StringIO
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from tasks.cephfs.fuse_mount import FuseMount
+from teuthology.exceptions import CommandFailedError
+
+log = logging.getLogger(__name__)
+
+class TestVolumesHelper(CephFSTestCase):
+    """Helper class for testing FS volume, subvolume group and subvolume operations."""
+    TEST_VOLUME_PREFIX = "volume"
+    TEST_SUBVOLUME_PREFIX="subvolume"
+    TEST_GROUP_PREFIX="group"
+    TEST_SNAPSHOT_PREFIX="snapshot"
+    TEST_CLONE_PREFIX="clone"
+    TEST_FILE_NAME_PREFIX="subvolume_file"
+
+    # for filling subvolume with data
+    CLIENTS_REQUIRED = 2
+    MDSS_REQUIRED = 2
+
+    # io defaults
+    DEFAULT_FILE_SIZE = 1 # MB
+    DEFAULT_NUMBER_OF_FILES = 1024
+
+    def _fs_cmd(self, *args):
+        return self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", *args)
+
+    def _raw_cmd(self, *args):
+        return self.mgr_cluster.mon_manager.raw_cluster_cmd(*args)
+
+    def __check_clone_state(self, state, clone, clone_group=None, timo=120):
+        check = 0
+        args = ["clone", "status", self.volname, clone]
+        if clone_group:
+            args.append(clone_group)
+        args = tuple(args)
+        while check < timo:
+            result = json.loads(self._fs_cmd(*args))
+            if result["status"]["state"] == state:
+                break
+            check += 1
+            time.sleep(1)
+        self.assertTrue(check < timo)
+
+    def _get_clone_status(self, clone, clone_group=None):
+        args = ["clone", "status", self.volname, clone]
+        if clone_group:
+            args.append(clone_group)
+        args = tuple(args)
+        result = json.loads(self._fs_cmd(*args))
+        return result
+
+    def _wait_for_clone_to_complete(self, clone, clone_group=None, timo=120):
+        self.__check_clone_state("complete", clone, clone_group, timo)
+
+    def _wait_for_clone_to_fail(self, clone, clone_group=None, timo=120):
+        self.__check_clone_state("failed", clone, clone_group, timo)
+
+    def _wait_for_clone_to_be_in_progress(self, clone, clone_group=None, timo=120):
+        self.__check_clone_state("in-progress", clone, clone_group, timo)
+
+    def _check_clone_canceled(self, clone, clone_group=None):
+        self.__check_clone_state("canceled", clone, clone_group, timo=1)
+
+    def _get_subvolume_snapshot_path(self, subvolume, snapshot, source_group, subvol_path, source_version):
+        if source_version == 2:
+            # v2
+            if subvol_path is not None:
+                (base_path, uuid_str) = os.path.split(subvol_path)
+            else:
+                (base_path, uuid_str) = os.path.split(self._get_subvolume_path(self.volname, subvolume, group_name=source_group))
+            return os.path.join(base_path, ".snap", snapshot, uuid_str)
+
+        # v1
+        base_path = self._get_subvolume_path(self.volname, subvolume, group_name=source_group)
+        return os.path.join(base_path, ".snap", snapshot)
+
+    def _verify_clone_attrs(self, source_path, clone_path):
+        path1 = source_path
+        path2 = clone_path
+
+        p = self.mount_a.run_shell(["find", path1])
+        paths = p.stdout.getvalue().strip().split()
+
+        # for each entry in source and clone (sink) verify certain inode attributes:
+        # inode type, mode, ownership, [am]time.
+        for source_path in paths:
+            sink_entry = source_path[len(path1)+1:]
+            sink_path = os.path.join(path2, sink_entry)
+
+            # mode+type
+            sval = int(self.mount_a.run_shell(['stat', '-c' '%f', source_path]).stdout.getvalue().strip(), 16)
+            cval = int(self.mount_a.run_shell(['stat', '-c' '%f', sink_path]).stdout.getvalue().strip(), 16)
+            self.assertEqual(sval, cval)
+
+            # ownership
+            sval = int(self.mount_a.run_shell(['stat', '-c' '%u', source_path]).stdout.getvalue().strip())
+            cval = int(self.mount_a.run_shell(['stat', '-c' '%u', sink_path]).stdout.getvalue().strip())
+            self.assertEqual(sval, cval)
+
+            sval = int(self.mount_a.run_shell(['stat', '-c' '%g', source_path]).stdout.getvalue().strip())
+            cval = int(self.mount_a.run_shell(['stat', '-c' '%g', sink_path]).stdout.getvalue().strip())
+            self.assertEqual(sval, cval)
+
+            # inode timestamps
+            # do not check access as kclient will generally not update this like ceph-fuse will.
+            sval = int(self.mount_a.run_shell(['stat', '-c' '%Y', source_path]).stdout.getvalue().strip())
+            cval = int(self.mount_a.run_shell(['stat', '-c' '%Y', sink_path]).stdout.getvalue().strip())
+            self.assertEqual(sval, cval)
+
+    def _verify_clone_root(self, source_path, clone_path, clone, clone_group, clone_pool):
+        # verifies following clone root attrs quota, data_pool and pool_namespace
+        # remaining attributes of clone root are validated in _verify_clone_attrs
+
+        clone_info = json.loads(self._get_subvolume_info(self.volname, clone, clone_group))
+
+        # verify quota is inherited from source snapshot
+        src_quota = self.mount_a.getfattr(source_path, "ceph.quota.max_bytes")
+        # FIXME: kclient fails to get this quota value: https://tracker.ceph.com/issues/48075
+        if isinstance(self.mount_a, FuseMount):
+            self.assertEqual(clone_info["bytes_quota"], "infinite" if src_quota is None else int(src_quota))
+
+        if clone_pool:
+            # verify pool is set as per request
+            self.assertEqual(clone_info["data_pool"], clone_pool)
+        else:
+            # verify pool and pool namespace are inherited from snapshot
+            self.assertEqual(clone_info["data_pool"],
+                             self.mount_a.getfattr(source_path, "ceph.dir.layout.pool"))
+            self.assertEqual(clone_info["pool_namespace"],
+                             self.mount_a.getfattr(source_path, "ceph.dir.layout.pool_namespace"))
+
+    def _verify_clone(self, subvolume, snapshot, clone,
+                      source_group=None, clone_group=None, clone_pool=None,
+                      subvol_path=None, source_version=2, timo=120):
+        # pass in subvol_path (subvolume path when snapshot was taken) when subvolume is removed
+        # but snapshots are retained for clone verification
+        path1 = self._get_subvolume_snapshot_path(subvolume, snapshot, source_group, subvol_path, source_version)
+        path2 = self._get_subvolume_path(self.volname, clone, group_name=clone_group)
+
+        check = 0
+        # TODO: currently snapshot rentries are not stable if snapshot source entries
+        #       are removed, https://tracker.ceph.com/issues/46747
+        while check < timo and subvol_path is None:
+            val1 = int(self.mount_a.getfattr(path1, "ceph.dir.rentries"))
+            val2 = int(self.mount_a.getfattr(path2, "ceph.dir.rentries"))
+            if val1 == val2:
+                break
+            check += 1
+            time.sleep(1)
+        self.assertTrue(check < timo)
+
+        self._verify_clone_root(path1, path2, clone, clone_group, clone_pool)
+        self._verify_clone_attrs(path1, path2)
+
+    def _generate_random_volume_name(self, count=1):
+        n = self.volume_start
+        volumes = [f"{TestVolumes.TEST_VOLUME_PREFIX}_{i:016}" for i in range(n, n+count)]
+        self.volume_start += count
+        return volumes[0] if count == 1 else volumes
+
+    def _generate_random_subvolume_name(self, count=1):
+        n = self.subvolume_start
+        subvolumes = [f"{TestVolumes.TEST_SUBVOLUME_PREFIX}_{i:016}" for i in range(n, n+count)]
+        self.subvolume_start += count
+        return subvolumes[0] if count == 1 else subvolumes
+
+    def _generate_random_group_name(self, count=1):
+        n = self.group_start
+        groups = [f"{TestVolumes.TEST_GROUP_PREFIX}_{i:016}" for i in range(n, n+count)]
+        self.group_start += count
+        return groups[0] if count == 1 else groups
+
+    def _generate_random_snapshot_name(self, count=1):
+        n = self.snapshot_start
+        snaps = [f"{TestVolumes.TEST_SNAPSHOT_PREFIX}_{i:016}" for i in range(n, n+count)]
+        self.snapshot_start += count
+        return snaps[0] if count == 1 else snaps
+
+    def _generate_random_clone_name(self, count=1):
+        n = self.clone_start
+        clones = [f"{TestVolumes.TEST_CLONE_PREFIX}_{i:016}" for i in range(n, n+count)]
+        self.clone_start += count
+        return clones[0] if count == 1 else clones
+
+    def _enable_multi_fs(self):
+        self._fs_cmd("flag", "set", "enable_multiple", "true", "--yes-i-really-mean-it")
+
+    def _create_or_reuse_test_volume(self):
+        result = json.loads(self._fs_cmd("volume", "ls"))
+        if len(result) == 0:
+            self.vol_created = True
+            self.volname = self._generate_random_volume_name()
+            self._fs_cmd("volume", "create", self.volname)
+        else:
+            self.volname = result[0]['name']
+
+    def  _get_volume_info(self, vol_name, human_readable=False):
+        if human_readable:
+            args = ["volume", "info", vol_name, human_readable]
+        else:
+            args = ["volume", "info", vol_name]
+        args = tuple(args)
+        vol_md = self._fs_cmd(*args)
+        return vol_md
+
+    def  _get_subvolume_group_path(self, vol_name, group_name):
+        args = ("subvolumegroup", "getpath", vol_name, group_name)
+        path = self._fs_cmd(*args)
+        # remove the leading '/', and trailing whitespaces
+        return path[1:].rstrip()
+
+    def  _get_subvolume_group_info(self, vol_name, group_name):
+        args = ["subvolumegroup", "info", vol_name, group_name]
+        args = tuple(args)
+        group_md = self._fs_cmd(*args)
+        return group_md
+
+    def  _get_subvolume_path(self, vol_name, subvol_name, group_name=None):
+        args = ["subvolume", "getpath", vol_name, subvol_name]
+        if group_name:
+            args.append(group_name)
+        args = tuple(args)
+        path = self._fs_cmd(*args)
+        # remove the leading '/', and trailing whitespaces
+        return path[1:].rstrip()
+
+    def  _get_subvolume_info(self, vol_name, subvol_name, group_name=None):
+        args = ["subvolume", "info", vol_name, subvol_name]
+        if group_name:
+            args.append(group_name)
+        args = tuple(args)
+        subvol_md = self._fs_cmd(*args)
+        return subvol_md
+
+    def _get_subvolume_snapshot_info(self, vol_name, subvol_name, snapname, group_name=None):
+        args = ["subvolume", "snapshot", "info", vol_name, subvol_name, snapname]
+        if group_name:
+            args.append(group_name)
+        args = tuple(args)
+        snap_md = self._fs_cmd(*args)
+        return snap_md
+
+    def _delete_test_volume(self):
+        self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it")
+
+    def _do_subvolume_pool_and_namespace_update(self, subvolume, pool=None, pool_namespace=None, subvolume_group=None):
+        subvolpath = self._get_subvolume_path(self.volname, subvolume, group_name=subvolume_group)
+
+        if pool is not None:
+            self.mount_a.setfattr(subvolpath, 'ceph.dir.layout.pool', pool, sudo=True)
+
+        if pool_namespace is not None:
+            self.mount_a.setfattr(subvolpath, 'ceph.dir.layout.pool_namespace', pool_namespace, sudo=True)
+
+    def _do_subvolume_attr_update(self, subvolume, uid, gid, mode, subvolume_group=None):
+        subvolpath = self._get_subvolume_path(self.volname, subvolume, group_name=subvolume_group)
+
+        # mode
+        self.mount_a.run_shell(['sudo', 'chmod', mode, subvolpath], omit_sudo=False)
+
+        # ownership
+        self.mount_a.run_shell(['sudo', 'chown', uid, subvolpath], omit_sudo=False)
+        self.mount_a.run_shell(['sudo', 'chgrp', gid, subvolpath], omit_sudo=False)
+
+    def _do_subvolume_io(self, subvolume, subvolume_group=None, create_dir=None,
+                         number_of_files=DEFAULT_NUMBER_OF_FILES, file_size=DEFAULT_FILE_SIZE):
+        # get subvolume path for IO
+        args = ["subvolume", "getpath", self.volname, subvolume]
+        if subvolume_group:
+            args.append(subvolume_group)
+        args = tuple(args)
+        subvolpath = self._fs_cmd(*args)
+        self.assertNotEqual(subvolpath, None)
+        subvolpath = subvolpath[1:].rstrip() # remove "/" prefix and any trailing newline
+
+        io_path = subvolpath
+        if create_dir:
+            io_path = os.path.join(subvolpath, create_dir)
+            self.mount_a.run_shell_payload(f"mkdir -p {io_path}")
+
+        log.debug("filling subvolume {0} with {1} files each {2}MB size under directory {3}".format(subvolume, number_of_files, file_size, io_path))
+        for i in range(number_of_files):
+            filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i)
+            self.mount_a.write_n_mb(os.path.join(io_path, filename), file_size)
+
+    def _do_subvolume_io_mixed(self, subvolume, subvolume_group=None):
+        subvolpath = self._get_subvolume_path(self.volname, subvolume, group_name=subvolume_group)
+
+        reg_file = "regfile.0"
+        dir_path = os.path.join(subvolpath, "dir.0")
+        sym_path1 = os.path.join(subvolpath, "sym.0")
+        # this symlink's ownership would be changed
+        sym_path2 = os.path.join(dir_path, "sym.0")
+
+        self.mount_a.run_shell(["mkdir", dir_path])
+        self.mount_a.run_shell(["ln", "-s", "./{}".format(reg_file), sym_path1])
+        self.mount_a.run_shell(["ln", "-s", "./{}".format(reg_file), sym_path2])
+        # flip ownership to nobody. assumption: nobody's id is 65534
+        self.mount_a.run_shell(["sudo", "chown", "-h", "65534:65534", sym_path2], omit_sudo=False)
+
+    def _wait_for_trash_empty(self, timeout=60):
+        # XXX: construct the trash dir path (note that there is no mgr
+        # [sub]volume interface for this).
+        trashdir = os.path.join("./", "volumes", "_deleting")
+        self.mount_a.wait_for_dir_empty(trashdir, timeout=timeout)
+
+    def _wait_for_subvol_trash_empty(self, subvol, group="_nogroup", timeout=30):
+        trashdir = os.path.join("./", "volumes", group, subvol, ".trash")
+        try:
+            self.mount_a.wait_for_dir_empty(trashdir, timeout=timeout)
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.ENOENT:
+                pass
+            else:
+                raise
+
+    def _assert_meta_location_and_version(self, vol_name, subvol_name, subvol_group=None, version=2, legacy=False):
+        if legacy:
+            subvol_path = self._get_subvolume_path(vol_name, subvol_name, group_name=subvol_group)
+            m = md5()
+            m.update(("/"+subvol_path).encode('utf-8'))
+            meta_filename = "{0}.meta".format(m.digest().hex())
+            metapath = os.path.join(".", "volumes", "_legacy", meta_filename)
+        else:
+            group = subvol_group if subvol_group is not None else '_nogroup'
+            metapath = os.path.join(".", "volumes", group, subvol_name, ".meta")
+
+        out = self.mount_a.run_shell(['sudo', 'cat', metapath], omit_sudo=False)
+        lines = out.stdout.getvalue().strip().split('\n')
+        sv_version = -1
+        for line in lines:
+            if line == "version = " + str(version):
+                sv_version = version
+                break
+        self.assertEqual(sv_version, version, "version expected was '{0}' but got '{1}' from meta file at '{2}'".format(
+                         version, sv_version, metapath))
+
+    def _create_v1_subvolume(self, subvol_name, subvol_group=None, has_snapshot=True, subvol_type='subvolume', state='complete'):
+        group = subvol_group if subvol_group is not None else '_nogroup'
+        basepath = os.path.join("volumes", group, subvol_name)
+        uuid_str = str(uuid.uuid4())
+        createpath = os.path.join(basepath, uuid_str)
+        self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath], omit_sudo=False)
+
+        # create a v1 snapshot, to prevent auto upgrades
+        if has_snapshot:
+            snappath = os.path.join(createpath, ".snap", "fake")
+            self.mount_a.run_shell(['sudo', 'mkdir', '-p', snappath], omit_sudo=False)
+
+        # add required xattrs to subvolume
+        default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool")
+        self.mount_a.setfattr(createpath, 'ceph.dir.layout.pool', default_pool, sudo=True)
+
+        # create a v1 .meta file
+        meta_contents = "[GLOBAL]\nversion = 1\ntype = {0}\npath = {1}\nstate = {2}\n".format(subvol_type, "/" + createpath, state)
+        if state == 'pending':
+            # add a fake clone source
+            meta_contents = meta_contents + '[source]\nvolume = fake\nsubvolume = fake\nsnapshot = fake\n'
+        meta_filepath1 = os.path.join(self.mount_a.mountpoint, basepath, ".meta")
+        self.mount_a.client_remote.write_file(meta_filepath1, meta_contents, sudo=True)
+        return createpath
+
+    def _update_fake_trash(self, subvol_name, subvol_group=None, trash_name='fake', create=True):
+        group = subvol_group if subvol_group is not None else '_nogroup'
+        trashpath = os.path.join("volumes", group, subvol_name, '.trash', trash_name)
+        if create:
+            self.mount_a.run_shell(['sudo', 'mkdir', '-p', trashpath], omit_sudo=False)
+        else:
+            self.mount_a.run_shell(['sudo', 'rmdir', trashpath], omit_sudo=False)
+
+    def _configure_guest_auth(self, guest_mount, authid, key):
+        """
+        Set up auth credentials for a guest client.
+        """
+        # Create keyring file for the guest client.
+        keyring_txt = dedent("""
+        [client.{authid}]
+            key = {key}
+
+        """.format(authid=authid,key=key))
+
+        guest_mount.client_id = authid
+        guest_mount.client_remote.write_file(guest_mount.get_keyring_path(),
+                                             keyring_txt, sudo=True)
+        # Add a guest client section to the ceph config file.
+        self.config_set("client.{0}".format(authid), "debug client", 20)
+        self.config_set("client.{0}".format(authid), "debug objecter", 20)
+        self.set_conf("client.{0}".format(authid),
+                      "keyring", guest_mount.get_keyring_path())
+
+    def _auth_metadata_get(self, filedata):
+        """
+        Return a deserialized JSON object, or None
+        """
+        try:
+            data = json.loads(filedata)
+        except json.decoder.JSONDecodeError:
+            data = None
+        return data
+
+    def setUp(self):
+        super(TestVolumesHelper, self).setUp()
+        self.volname = None
+        self.vol_created = False
+        self._enable_multi_fs()
+        self._create_or_reuse_test_volume()
+        self.config_set('mon', 'mon_allow_pool_delete', True)
+        self.volume_start = random.randint(1, (1<<20))
+        self.subvolume_start = random.randint(1, (1<<20))
+        self.group_start = random.randint(1, (1<<20))
+        self.snapshot_start = random.randint(1, (1<<20))
+        self.clone_start = random.randint(1, (1<<20))
+
+    def tearDown(self):
+        if self.vol_created:
+            self._delete_test_volume()
+        super(TestVolumesHelper, self).tearDown()
+
+
+class TestVolumes(TestVolumesHelper):
+    """Tests for FS volume operations."""
+    def test_volume_create(self):
+        """
+        That the volume can be created and then cleans up
+        """
+        volname = self._generate_random_volume_name()
+        self._fs_cmd("volume", "create", volname)
+        volumels = json.loads(self._fs_cmd("volume", "ls"))
+
+        if not (volname in ([volume['name'] for volume in volumels])):
+            raise RuntimeError("Error creating volume '{0}'".format(volname))
+
+        # check that the pools were created with the correct config
+        pool_details = json.loads(self._raw_cmd("osd", "pool", "ls", "detail", "--format=json"))
+        pool_flags = {}
+        for pool in pool_details:
+            pool_flags[pool["pool_id"]] = pool["flags_names"].split(",")
+
+        volume_details = json.loads(self._fs_cmd("get", volname, "--format=json"))
+        for data_pool_id in volume_details['mdsmap']['data_pools']:
+            self.assertIn("bulk", pool_flags[data_pool_id])
+        meta_pool_id = volume_details['mdsmap']['metadata_pool']
+        self.assertNotIn("bulk", pool_flags[meta_pool_id])
+
+        # clean up
+        self._fs_cmd("volume", "rm", volname, "--yes-i-really-mean-it")
+
+    def test_volume_ls(self):
+        """
+        That the existing and the newly created volumes can be listed and
+        finally cleans up.
+        """
+        vls = json.loads(self._fs_cmd("volume", "ls"))
+        volumes = [volume['name'] for volume in vls]
+
+        #create new volumes and add it to the existing list of volumes
+        volumenames = self._generate_random_volume_name(2)
+        for volumename in volumenames:
+            self._fs_cmd("volume", "create", volumename)
+        volumes.extend(volumenames)
+
+        # list volumes
+        try:
+            volumels = json.loads(self._fs_cmd('volume', 'ls'))
+            if len(volumels) == 0:
+                raise RuntimeError("Expected the 'fs volume ls' command to list the created volumes.")
+            else:
+                volnames = [volume['name'] for volume in volumels]
+                if collections.Counter(volnames) != collections.Counter(volumes):
+                    raise RuntimeError("Error creating or listing volumes")
+        finally:
+            # clean up
+            for volume in volumenames:
+                self._fs_cmd("volume", "rm", volume, "--yes-i-really-mean-it")
+
+    def test_volume_rm(self):
+        """
+        That the volume can only be removed when --yes-i-really-mean-it is used
+        and verify that the deleted volume is not listed anymore.
+        """
+        for m in self.mounts:
+            m.umount_wait()
+        try:
+            self._fs_cmd("volume", "rm", self.volname)
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.EPERM:
+                raise RuntimeError("expected the 'fs volume rm' command to fail with EPERM, "
+                                   "but it failed with {0}".format(ce.exitstatus))
+            else:
+                self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it")
+
+                #check if it's gone
+                volumes = json.loads(self._fs_cmd("volume", "ls", "--format=json-pretty"))
+                if (self.volname in [volume['name'] for volume in volumes]):
+                    raise RuntimeError("Expected the 'fs volume rm' command to succeed. "
+                                       "The volume {0} not removed.".format(self.volname))
+        else:
+            raise RuntimeError("expected the 'fs volume rm' command to fail.")
+
+    def test_volume_rm_arbitrary_pool_removal(self):
+        """
+        That the arbitrary pool added to the volume out of band is removed
+        successfully on volume removal.
+        """
+        for m in self.mounts:
+            m.umount_wait()
+        new_pool = "new_pool"
+        # add arbitrary data pool
+        self.fs.add_data_pool(new_pool)
+        vol_status = json.loads(self._fs_cmd("status", self.volname, "--format=json-pretty"))
+        self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it")
+
+        #check if fs is gone
+        volumes = json.loads(self._fs_cmd("volume", "ls", "--format=json-pretty"))
+        volnames = [volume['name'] for volume in volumes]
+        self.assertNotIn(self.volname, volnames)
+
+        #check if osd pools are gone
+        pools = json.loads(self._raw_cmd("osd", "pool", "ls", "--format=json-pretty"))
+        for pool in vol_status["pools"]:
+            self.assertNotIn(pool["name"], pools)
+
+    def test_volume_rm_when_mon_delete_pool_false(self):
+        """
+        That the volume can only be removed when mon_allowd_pool_delete is set
+        to true and verify that the pools are removed after volume deletion.
+        """
+        for m in self.mounts:
+            m.umount_wait()
+        self.config_set('mon', 'mon_allow_pool_delete', False)
+        try:
+            self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it")
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EPERM,
+                             "expected the 'fs volume rm' command to fail with EPERM, "
+                             "but it failed with {0}".format(ce.exitstatus))
+        vol_status = json.loads(self._fs_cmd("status", self.volname, "--format=json-pretty"))
+        self.config_set('mon', 'mon_allow_pool_delete', True)
+        self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it")
+
+        #check if fs is gone
+        volumes = json.loads(self._fs_cmd("volume", "ls", "--format=json-pretty"))
+        volnames = [volume['name'] for volume in volumes]
+        self.assertNotIn(self.volname, volnames,
+                         "volume {0} exists after removal".format(self.volname))
+        #check if pools are gone
+        pools = json.loads(self._raw_cmd("osd", "pool", "ls", "--format=json-pretty"))
+        for pool in vol_status["pools"]:
+            self.assertNotIn(pool["name"], pools,
+                             "pool {0} exists after volume removal".format(pool["name"]))
+
+    def test_volume_rename(self):
+        """
+        That volume, its file system and pools, can be renamed.
+        """
+        for m in self.mounts:
+            m.umount_wait()
+        oldvolname = self.volname
+        newvolname = self._generate_random_volume_name()
+        new_data_pool, new_metadata_pool = f"cephfs.{newvolname}.data", f"cephfs.{newvolname}.meta"
+        self._fs_cmd("volume", "rename", oldvolname, newvolname,
+                     "--yes-i-really-mean-it")
+        volumels = json.loads(self._fs_cmd('volume', 'ls'))
+        volnames = [volume['name'] for volume in volumels]
+        # volume name changed
+        self.assertIn(newvolname, volnames)
+        self.assertNotIn(oldvolname, volnames)
+        # pool names changed
+        self.fs.get_pool_names(refresh=True)
+        self.assertEqual(new_metadata_pool, self.fs.get_metadata_pool_name())
+        self.assertEqual(new_data_pool, self.fs.get_data_pool_name())
+
+    def test_volume_rename_idempotency(self):
+        """
+        That volume rename is idempotent.
+        """
+        for m in self.mounts:
+            m.umount_wait()
+        oldvolname = self.volname
+        newvolname = self._generate_random_volume_name()
+        new_data_pool, new_metadata_pool = f"cephfs.{newvolname}.data", f"cephfs.{newvolname}.meta"
+        self._fs_cmd("volume", "rename", oldvolname, newvolname,
+                     "--yes-i-really-mean-it")
+        self._fs_cmd("volume", "rename", oldvolname, newvolname,
+                     "--yes-i-really-mean-it")
+        volumels = json.loads(self._fs_cmd('volume', 'ls'))
+        volnames = [volume['name'] for volume in volumels]
+        self.assertIn(newvolname, volnames)
+        self.assertNotIn(oldvolname, volnames)
+        self.fs.get_pool_names(refresh=True)
+        self.assertEqual(new_metadata_pool, self.fs.get_metadata_pool_name())
+        self.assertEqual(new_data_pool, self.fs.get_data_pool_name())
+
+    def test_volume_rename_fails_without_confirmation_flag(self):
+        """
+        That renaming volume fails without --yes-i-really-mean-it flag.
+        """
+        newvolname = self._generate_random_volume_name()
+        try:
+            self._fs_cmd("volume", "rename", self.volname, newvolname)
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EPERM,
+                "invalid error code on renaming a FS volume without the "
+                "'--yes-i-really-mean-it' flag")
+        else:
+            self.fail("expected renaming of FS volume to fail without the "
+                      "'--yes-i-really-mean-it' flag")
+
+    def test_volume_rename_for_more_than_one_data_pool(self):
+        """
+        That renaming a volume with more than one data pool does not change
+        the name of the data pools.
+        """
+        for m in self.mounts:
+            m.umount_wait()
+        self.fs.add_data_pool('another-data-pool')
+        oldvolname = self.volname
+        newvolname = self._generate_random_volume_name()
+        self.fs.get_pool_names(refresh=True)
+        orig_data_pool_names = list(self.fs.data_pools.values())
+        new_metadata_pool = f"cephfs.{newvolname}.meta"
+        self._fs_cmd("volume", "rename", self.volname, newvolname,
+                     "--yes-i-really-mean-it")
+        volumels = json.loads(self._fs_cmd('volume', 'ls'))
+        volnames = [volume['name'] for volume in volumels]
+        # volume name changed
+        self.assertIn(newvolname, volnames)
+        self.assertNotIn(oldvolname, volnames)
+        self.fs.get_pool_names(refresh=True)
+        # metadata pool name changed
+        self.assertEqual(new_metadata_pool, self.fs.get_metadata_pool_name())
+        # data pool names unchanged
+        self.assertCountEqual(orig_data_pool_names, list(self.fs.data_pools.values()))
+
+    def test_volume_info(self):
+        """
+        Tests the 'fs volume info' command
+        """
+        vol_fields = ["pools", "used_size", "pending_subvolume_deletions", "mon_addrs"]
+        group = self._generate_random_group_name()
+        # create subvolumegroup
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+        # get volume metadata
+        vol_info = json.loads(self._get_volume_info(self.volname))
+        for md in vol_fields:
+            self.assertIn(md, vol_info,
+                          f"'{md}' key not present in metadata of volume")
+        self.assertEqual(vol_info["used_size"], 0,
+                         "Size should be zero when volumes directory is empty")
+
+    def test_volume_info_pending_subvol_deletions(self):
+        """
+        Tests the pending_subvolume_deletions in 'fs volume info' command
+        """
+        subvolname = self._generate_random_subvolume_name()
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, "--mode=777")
+        # create 3K zero byte files
+        self._do_subvolume_io(subvolname, number_of_files=3000, file_size=0)
+        # Delete the subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+        # get volume metadata
+        vol_info = json.loads(self._get_volume_info(self.volname))
+        self.assertNotEqual(vol_info['pending_subvolume_deletions'], 0,
+                            "pending_subvolume_deletions should be 1")
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_volume_info_without_subvolumegroup(self):
+        """
+        Tests the 'fs volume info' command without subvolume group
+        """
+        vol_fields = ["pools", "mon_addrs"]
+        # get volume metadata
+        vol_info = json.loads(self._get_volume_info(self.volname))
+        for md in vol_fields:
+            self.assertIn(md, vol_info,
+                          f"'{md}' key not present in metadata of volume")
+        self.assertNotIn("used_size", vol_info,
+                         "'used_size' should not be present in absence of subvolumegroup")
+        self.assertNotIn("pending_subvolume_deletions", vol_info,
+                         "'pending_subvolume_deletions' should not be present in absence"
+                         " of subvolumegroup")
+
+    def test_volume_info_with_human_readable_flag(self):
+        """
+        Tests the 'fs volume info --human_readable' command
+        """
+        vol_fields = ["pools", "used_size", "pending_subvolume_deletions", "mon_addrs"]
+        group = self._generate_random_group_name()
+        # create subvolumegroup
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+        # get volume metadata
+        vol_info = json.loads(self._get_volume_info(self.volname, "--human_readable"))
+        for md in vol_fields:
+            self.assertIn(md, vol_info,
+                          f"'{md}' key not present in metadata of volume")
+        units = [' ', 'k', 'M', 'G', 'T', 'P', 'E']
+        assert vol_info["used_size"][-1] in units, "unit suffix in used_size is absent"
+        assert vol_info["pools"]["data"][0]["avail"][-1] in units, "unit suffix in avail data is absent"
+        assert vol_info["pools"]["data"][0]["used"][-1] in units, "unit suffix in used data is absent"
+        assert vol_info["pools"]["metadata"][0]["avail"][-1] in units, "unit suffix in avail metadata is absent"
+        assert vol_info["pools"]["metadata"][0]["used"][-1] in units, "unit suffix in used metadata is absent"
+        self.assertEqual(int(vol_info["used_size"]), 0,
+                         "Size should be zero when volumes directory is empty")
+
+    def test_volume_info_with_human_readable_flag_without_subvolumegroup(self):
+        """
+        Tests the 'fs volume info --human_readable' command without subvolume group
+        """
+        vol_fields = ["pools", "mon_addrs"]
+        # get volume metadata
+        vol_info = json.loads(self._get_volume_info(self.volname, "--human_readable"))
+        for md in vol_fields:
+            self.assertIn(md, vol_info,
+                          f"'{md}' key not present in metadata of volume")
+        units = [' ', 'k', 'M', 'G', 'T', 'P', 'E']
+        assert vol_info["pools"]["data"][0]["avail"][-1] in units, "unit suffix in avail data is absent"
+        assert vol_info["pools"]["data"][0]["used"][-1] in units, "unit suffix in used data is absent"
+        assert vol_info["pools"]["metadata"][0]["avail"][-1] in units, "unit suffix in avail metadata is absent"
+        assert vol_info["pools"]["metadata"][0]["used"][-1] in units, "unit suffix in used metadata is absent"
+        self.assertNotIn("used_size", vol_info,
+                         "'used_size' should not be present in absence of subvolumegroup")
+        self.assertNotIn("pending_subvolume_deletions", vol_info,
+                         "'pending_subvolume_deletions' should not be present in absence"
+                         " of subvolumegroup")
+
+
+class TestSubvolumeGroups(TestVolumesHelper):
+    """Tests for FS subvolume group operations."""
+    def test_default_uid_gid_subvolume_group(self):
+        group = self._generate_random_group_name()
+        expected_uid = 0
+        expected_gid = 0
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+        group_path = self._get_subvolume_group_path(self.volname, group)
+
+        # check group's uid and gid
+        stat = self.mount_a.stat(group_path)
+        self.assertEqual(stat['st_uid'], expected_uid)
+        self.assertEqual(stat['st_gid'], expected_gid)
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_nonexistent_subvolume_group_create(self):
+        subvolume = self._generate_random_subvolume_name()
+        group = "non_existent_group"
+
+        # try, creating subvolume in a nonexistent group
+        try:
+            self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.ENOENT:
+                raise
+        else:
+            raise RuntimeError("expected the 'fs subvolume create' command to fail")
+
+    def test_nonexistent_subvolume_group_rm(self):
+        group = "non_existent_group"
+
+        # try, remove subvolume group
+        try:
+            self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.ENOENT:
+                raise
+        else:
+            raise RuntimeError("expected the 'fs subvolumegroup rm' command to fail")
+
+    def test_subvolume_group_create_with_auto_cleanup_on_fail(self):
+        group = self._generate_random_group_name()
+        data_pool = "invalid_pool"
+        # create group with invalid data pool layout
+        with self.assertRaises(CommandFailedError):
+            self._fs_cmd("subvolumegroup", "create", self.volname, group, "--pool_layout", data_pool)
+
+        # check whether group path is cleaned up
+        try:
+            self._fs_cmd("subvolumegroup", "getpath", self.volname, group)
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.ENOENT:
+                raise
+        else:
+            raise RuntimeError("expected the 'fs subvolumegroup getpath' command to fail")
+
+    def test_subvolume_group_create_with_desired_data_pool_layout(self):
+        group1, group2 = self._generate_random_group_name(2)
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group1)
+        group1_path = self._get_subvolume_group_path(self.volname, group1)
+
+        default_pool = self.mount_a.getfattr(group1_path, "ceph.dir.layout.pool")
+        new_pool = "new_pool"
+        self.assertNotEqual(default_pool, new_pool)
+
+        # add data pool
+        newid = self.fs.add_data_pool(new_pool)
+
+        # create group specifying the new data pool as its pool layout
+        self._fs_cmd("subvolumegroup", "create", self.volname, group2,
+                     "--pool_layout", new_pool)
+        group2_path = self._get_subvolume_group_path(self.volname, group2)
+
+        desired_pool = self.mount_a.getfattr(group2_path, "ceph.dir.layout.pool")
+        try:
+            self.assertEqual(desired_pool, new_pool)
+        except AssertionError:
+            self.assertEqual(int(desired_pool), newid) # old kernel returns id
+
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group1)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group2)
+
+    def test_subvolume_group_create_with_desired_mode(self):
+        group1, group2 = self._generate_random_group_name(2)
+        # default mode
+        expected_mode1 = "755"
+        # desired mode
+        expected_mode2 = "777"
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group2, f"--mode={expected_mode2}")
+        self._fs_cmd("subvolumegroup", "create", self.volname, group1)
+
+        group1_path = self._get_subvolume_group_path(self.volname, group1)
+        group2_path = self._get_subvolume_group_path(self.volname, group2)
+        volumes_path = os.path.dirname(group1_path)
+
+        # check group's mode
+        actual_mode1 = self.mount_a.run_shell(['stat', '-c' '%a', group1_path]).stdout.getvalue().strip()
+        actual_mode2 = self.mount_a.run_shell(['stat', '-c' '%a', group2_path]).stdout.getvalue().strip()
+        actual_mode3 = self.mount_a.run_shell(['stat', '-c' '%a', volumes_path]).stdout.getvalue().strip()
+        self.assertEqual(actual_mode1, expected_mode1)
+        self.assertEqual(actual_mode2, expected_mode2)
+        self.assertEqual(actual_mode3, expected_mode1)
+
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group1)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group2)
+
+    def test_subvolume_group_create_with_desired_uid_gid(self):
+        """
+        That the subvolume group can be created with the desired uid and gid and its uid and gid matches the
+        expected values.
+        """
+        uid = 1000
+        gid = 1000
+
+        # create subvolume group
+        subvolgroupname = self._generate_random_group_name()
+        self._fs_cmd("subvolumegroup", "create", self.volname, subvolgroupname, "--uid", str(uid), "--gid", str(gid))
+
+        # make sure it exists
+        subvolgrouppath = self._get_subvolume_group_path(self.volname, subvolgroupname)
+        self.assertNotEqual(subvolgrouppath, None)
+
+        # verify the uid and gid
+        suid = int(self.mount_a.run_shell(['stat', '-c' '%u', subvolgrouppath]).stdout.getvalue().strip())
+        sgid = int(self.mount_a.run_shell(['stat', '-c' '%g', subvolgrouppath]).stdout.getvalue().strip())
+        self.assertEqual(uid, suid)
+        self.assertEqual(gid, sgid)
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, subvolgroupname)
+
+    def test_subvolume_group_create_with_invalid_data_pool_layout(self):
+        group = self._generate_random_group_name()
+        data_pool = "invalid_pool"
+        # create group with invalid data pool layout
+        try:
+            self._fs_cmd("subvolumegroup", "create", self.volname, group, "--pool_layout", data_pool)
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.EINVAL:
+                raise
+        else:
+            raise RuntimeError("expected the 'fs subvolumegroup create' command to fail")
+
+    def test_subvolume_group_create_with_size(self):
+        # create group with size -- should set quota
+        group = self._generate_random_group_name()
+        self._fs_cmd("subvolumegroup", "create", self.volname, group, "1000000000")
+
+        # get group metadata
+        group_info = json.loads(self._get_subvolume_group_info(self.volname, group))
+        self.assertEqual(group_info["bytes_quota"], 1000000000)
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_subvolume_group_info(self):
+        # tests the 'fs subvolumegroup info' command
+
+        group_md = ["atime", "bytes_pcent", "bytes_quota", "bytes_used", "created_at", "ctime",
+                     "data_pool", "gid", "mode", "mon_addrs", "mtime", "uid"]
+
+        # create group
+        group = self._generate_random_group_name()
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # get group metadata
+        group_info = json.loads(self._get_subvolume_group_info(self.volname, group))
+        for md in group_md:
+            self.assertIn(md, group_info, "'{0}' key not present in metadata of group".format(md))
+
+        self.assertEqual(group_info["bytes_pcent"], "undefined", "bytes_pcent should be set to undefined if quota is not set")
+        self.assertEqual(group_info["bytes_quota"], "infinite", "bytes_quota should be set to infinite if quota is not set")
+        self.assertEqual(group_info["uid"], 0)
+        self.assertEqual(group_info["gid"], 0)
+
+        nsize = self.DEFAULT_FILE_SIZE*1024*1024
+        self._fs_cmd("subvolumegroup", "resize", self.volname, group, str(nsize))
+
+        # get group metadata after quota set
+        group_info = json.loads(self._get_subvolume_group_info(self.volname, group))
+        for md in group_md:
+            self.assertIn(md, group_info, "'{0}' key not present in metadata of subvolume".format(md))
+
+        self.assertNotEqual(group_info["bytes_pcent"], "undefined", "bytes_pcent should not be set to undefined if quota is set")
+        self.assertEqual(group_info["bytes_quota"], nsize, "bytes_quota should be set to '{0}'".format(nsize))
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_subvolume_group_create_idempotence(self):
+        # create group
+        group = self._generate_random_group_name()
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # try creating w/ same subvolume group name -- should be idempotent
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_subvolume_group_create_idempotence_mode(self):
+        # create group
+        group = self._generate_random_group_name()
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # try creating w/ same subvolume group name with mode -- should set mode
+        self._fs_cmd("subvolumegroup", "create", self.volname, group, "--mode=766")
+
+        group_path = self._get_subvolume_group_path(self.volname, group)
+
+        # check subvolumegroup's  mode
+        mode = self.mount_a.run_shell(['stat', '-c' '%a', group_path]).stdout.getvalue().strip()
+        self.assertEqual(mode, "766")
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_subvolume_group_create_idempotence_uid_gid(self):
+        desired_uid = 1000
+        desired_gid = 1000
+
+        # create group
+        group = self._generate_random_group_name()
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # try creating w/ same subvolume group name with uid/gid -- should set uid/gid
+        self._fs_cmd("subvolumegroup", "create", self.volname, group, "--uid", str(desired_uid), "--gid", str(desired_gid))
+
+        group_path = self._get_subvolume_group_path(self.volname, group)
+
+        # verify the uid and gid
+        actual_uid = int(self.mount_a.run_shell(['stat', '-c' '%u', group_path]).stdout.getvalue().strip())
+        actual_gid = int(self.mount_a.run_shell(['stat', '-c' '%g', group_path]).stdout.getvalue().strip())
+        self.assertEqual(desired_uid, actual_uid)
+        self.assertEqual(desired_gid, actual_gid)
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_subvolume_group_create_idempotence_data_pool(self):
+        # create group
+        group = self._generate_random_group_name()
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        group_path = self._get_subvolume_group_path(self.volname, group)
+
+        default_pool = self.mount_a.getfattr(group_path, "ceph.dir.layout.pool")
+        new_pool = "new_pool"
+        self.assertNotEqual(default_pool, new_pool)
+
+        # add data pool
+        newid = self.fs.add_data_pool(new_pool)
+
+        # try creating w/ same subvolume group name with new data pool -- should set pool
+        self._fs_cmd("subvolumegroup", "create", self.volname, group, "--pool_layout", new_pool)
+        desired_pool = self.mount_a.getfattr(group_path, "ceph.dir.layout.pool")
+        try:
+            self.assertEqual(desired_pool, new_pool)
+        except AssertionError:
+            self.assertEqual(int(desired_pool), newid) # old kernel returns id
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_subvolume_group_create_idempotence_resize(self):
+        # create group
+        group = self._generate_random_group_name()
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # try creating w/ same subvolume name with size -- should set quota
+        self._fs_cmd("subvolumegroup", "create", self.volname, group, "1000000000")
+
+        # get group metadata
+        group_info = json.loads(self._get_subvolume_group_info(self.volname, group))
+        self.assertEqual(group_info["bytes_quota"], 1000000000)
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_subvolume_group_quota_mds_path_restriction_to_group_path(self):
+        """
+        Tests subvolumegroup quota enforcement with mds path restriction set to group.
+        For quota to be enforced, read permission needs to be provided to the parent
+        of the directory on which quota is set. Please see the tracker comment [1]
+        [1] https://tracker.ceph.com/issues/55090#note-8
+        """
+        osize = self.DEFAULT_FILE_SIZE*1024*1024*100
+        # create group with 100MB quota
+        group = self._generate_random_group_name()
+        self._fs_cmd("subvolumegroup", "create", self.volname, group,
+                     "--size", str(osize), "--mode=777")
+
+        # make sure it exists
+        grouppath = self._get_subvolume_group_path(self.volname, group)
+        self.assertNotEqual(grouppath, None)
+
+        # create subvolume under the group
+        subvolname = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolname,
+                     "--group_name", group, "--mode=777")
+
+        # make sure it exists
+        subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group)
+        self.assertNotEqual(subvolpath, None)
+
+        # Create auth_id
+        authid = "client.guest1"
+        user = json.loads(self.fs.mon_manager.raw_cluster_cmd(
+            "auth", "get-or-create", authid,
+            "mds", "allow rw path=/volumes",
+            "mgr", "allow rw",
+            "osd", "allow rw tag cephfs *=*",
+            "mon", "allow r",
+            "--format=json-pretty"
+            ))
+
+        # Prepare guest_mount with new authid
+        guest_mount = self.mount_b
+        guest_mount.umount_wait()
+
+        # configure credentials for guest client
+        self._configure_guest_auth(guest_mount, "guest1", user[0]["key"])
+
+        # mount the subvolume
+        mount_path = os.path.join("/", subvolpath)
+        guest_mount.mount_wait(cephfs_mntpt=mount_path)
+
+        # create 99 files of 1MB
+        guest_mount.run_shell_payload("mkdir -p dir1")
+        for i in range(99):
+            filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i)
+            guest_mount.write_n_mb(os.path.join("dir1", filename), self.DEFAULT_FILE_SIZE)
+        try:
+            # write two files of 1MB file to exceed the quota
+            guest_mount.run_shell_payload("mkdir -p dir2")
+            for i in range(2):
+                filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i)
+                guest_mount.write_n_mb(os.path.join("dir2", filename), self.DEFAULT_FILE_SIZE)
+            # For quota to be enforced
+            time.sleep(60)
+            # create 400 files of 1MB to exceed quota
+            for i in range(400):
+                filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i)
+                guest_mount.write_n_mb(os.path.join("dir2", filename), self.DEFAULT_FILE_SIZE)
+                # Sometimes quota enforcement takes time.
+                if i == 200:
+                    time.sleep(60)
+        except CommandFailedError:
+            pass
+        else:
+            self.fail(f"expected filling subvolume {subvolname} with 400 files of size 1MB to fail")
+
+        # clean up
+        guest_mount.umount_wait()
+
+        # Delete the subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group)
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_group_quota_mds_path_restriction_to_subvolume_path(self):
+        """
+        Tests subvolumegroup quota enforcement with mds path restriction set to subvolume path
+        The quota should not be enforced because of the fourth limitation mentioned at
+        https://docs.ceph.com/en/latest/cephfs/quota/#limitations
+        """
+        osize = self.DEFAULT_FILE_SIZE*1024*1024*100
+        # create group with 100MB quota
+        group = self._generate_random_group_name()
+        self._fs_cmd("subvolumegroup", "create", self.volname, group,
+                     "--size", str(osize), "--mode=777")
+
+        # make sure it exists
+        grouppath = self._get_subvolume_group_path(self.volname, group)
+        self.assertNotEqual(grouppath, None)
+
+        # create subvolume under the group
+        subvolname = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolname,
+                     "--group_name", group, "--mode=777")
+
+        # make sure it exists
+        subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group)
+        self.assertNotEqual(subvolpath, None)
+
+        mount_path = os.path.join("/", subvolpath)
+
+        # Create auth_id
+        authid = "client.guest1"
+        user = json.loads(self.fs.mon_manager.raw_cluster_cmd(
+            "auth", "get-or-create", authid,
+            "mds", f"allow rw path={mount_path}",
+            "mgr", "allow rw",
+            "osd", "allow rw tag cephfs *=*",
+            "mon", "allow r",
+            "--format=json-pretty"
+            ))
+
+        # Prepare guest_mount with new authid
+        guest_mount = self.mount_b
+        guest_mount.umount_wait()
+
+        # configure credentials for guest client
+        self._configure_guest_auth(guest_mount, "guest1", user[0]["key"])
+
+        # mount the subvolume
+        guest_mount.mount_wait(cephfs_mntpt=mount_path)
+
+        # create 99 files of 1MB to exceed quota
+        guest_mount.run_shell_payload("mkdir -p dir1")
+        for i in range(99):
+            filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i)
+            guest_mount.write_n_mb(os.path.join("dir1", filename), self.DEFAULT_FILE_SIZE)
+        try:
+            # write two files of 1MB file to exceed the quota
+            guest_mount.run_shell_payload("mkdir -p dir2")
+            for i in range(2):
+                filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i)
+                guest_mount.write_n_mb(os.path.join("dir2", filename), self.DEFAULT_FILE_SIZE)
+            # For quota to be enforced
+            time.sleep(60)
+            # create 400 files of 1MB to exceed quota
+            for i in range(400):
+                filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i)
+                guest_mount.write_n_mb(os.path.join("dir2", filename), self.DEFAULT_FILE_SIZE)
+                # Sometimes quota enforcement takes time.
+                if i == 200:
+                    time.sleep(60)
+        except CommandFailedError:
+            self.fail(f"Quota should not be enforced, expected filling subvolume {subvolname} with 400 files of size 1MB to succeed")
+
+        # clean up
+        guest_mount.umount_wait()
+
+        # Delete the subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group)
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_group_quota_exceeded_subvolume_removal(self):
+        """
+        Tests subvolume removal if it's group quota is exceeded
+        """
+        osize = self.DEFAULT_FILE_SIZE*1024*1024*100
+        # create group with 100MB quota
+        group = self._generate_random_group_name()
+        self._fs_cmd("subvolumegroup", "create", self.volname, group,
+                     "--size", str(osize), "--mode=777")
+
+        # make sure it exists
+        grouppath = self._get_subvolume_group_path(self.volname, group)
+        self.assertNotEqual(grouppath, None)
+
+        # create subvolume under the group
+        subvolname = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolname,
+                     "--group_name", group, "--mode=777")
+
+        # make sure it exists
+        subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group)
+        self.assertNotEqual(subvolpath, None)
+
+        # create 99 files of 1MB to exceed quota
+        self._do_subvolume_io(subvolname, subvolume_group=group, number_of_files=99)
+
+        try:
+            # write two files of 1MB file to exceed the quota
+            self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=2)
+            # For quota to be enforced
+            time.sleep(20)
+            # create 400 files of 1MB to exceed quota
+            self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=400)
+        except CommandFailedError:
+            # Delete subvolume when group quota is exceeded
+            self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group)
+        else:
+            self.fail(f"expected filling subvolume {subvolname} with 400 files of size 1MB to fail")
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_group_quota_exceeded_subvolume_removal_retained_snaps(self):
+        """
+        Tests retained snapshot subvolume removal if it's group quota is exceeded
+        """
+        group = self._generate_random_group_name()
+        subvolname = self._generate_random_subvolume_name()
+        snapshot1, snapshot2 = self._generate_random_snapshot_name(2)
+
+        osize = self.DEFAULT_FILE_SIZE*1024*1024*100
+        # create group with 100MB quota
+        self._fs_cmd("subvolumegroup", "create", self.volname, group,
+                     "--size", str(osize), "--mode=777")
+
+        # make sure it exists
+        grouppath = self._get_subvolume_group_path(self.volname, group)
+        self.assertNotEqual(grouppath, None)
+
+        # create subvolume under the group
+        self._fs_cmd("subvolume", "create", self.volname, subvolname,
+                     "--group_name", group, "--mode=777")
+
+        # make sure it exists
+        subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group)
+        self.assertNotEqual(subvolpath, None)
+
+        # create 99 files of 1MB to exceed quota
+        self._do_subvolume_io(subvolname, subvolume_group=group, number_of_files=99)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot1, "--group_name", group)
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot2, "--group_name", group)
+
+        try:
+            # write two files of 1MB file to exceed the quota
+            self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=2)
+            # For quota to be enforced
+            time.sleep(20)
+            # create 400 files of 1MB to exceed quota
+            self._do_subvolume_io(subvolname, subvolume_group=group, number_of_files=400)
+        except CommandFailedError:
+            # remove with snapshot retention
+            self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group, "--retain-snapshots")
+            # remove snapshot1
+            self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot1, "--group_name", group)
+            # remove snapshot2 (should remove volume)
+            self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot2, "--group_name", group)
+            # verify subvolume trash is clean
+            self._wait_for_subvol_trash_empty(subvolname, group=group)
+        else:
+            self.fail(f"expected filling subvolume {subvolname} with 400 files of size 1MB to fail")
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_group_quota_subvolume_removal(self):
+        """
+        Tests subvolume removal if it's group quota is set.
+        """
+        # create group with size -- should set quota
+        group = self._generate_random_group_name()
+        self._fs_cmd("subvolumegroup", "create", self.volname, group, "1000000000")
+
+        # create subvolume under the group
+        subvolname = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+        # remove subvolume
+        try:
+            self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group)
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume rm' command to succeed if group quota is set")
+
+        # remove subvolumegroup
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_group_quota_legacy_subvolume_removal(self):
+        """
+        Tests legacy subvolume removal if it's group quota is set.
+        """
+        subvolume = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+
+        # emulate a old-fashioned subvolume -- in a custom group
+        createpath1 = os.path.join(".", "volumes", group, subvolume)
+        self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath1], omit_sudo=False)
+
+        # this would auto-upgrade on access without anyone noticing
+        subvolpath1 = self._fs_cmd("subvolume", "getpath", self.volname, subvolume, "--group-name", group)
+        self.assertNotEqual(subvolpath1, None)
+        subvolpath1 = subvolpath1.rstrip() # remove "/" prefix and any trailing newline
+
+        # and... the subvolume path returned should be what we created behind the scene
+        self.assertEqual(createpath1[1:], subvolpath1)
+
+        # Set subvolumegroup quota on idempotent subvolumegroup creation
+        self._fs_cmd("subvolumegroup", "create", self.volname, group, "1000000000")
+
+        # remove subvolume
+        try:
+            self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume rm' command to succeed if group quota is set")
+
+        # remove subvolumegroup
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_group_quota_v1_subvolume_removal(self):
+        """
+        Tests v1 subvolume removal if it's group quota is set.
+        """
+        subvolume = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+
+        # emulate a v1 subvolume -- in a custom group
+        self._create_v1_subvolume(subvolume, subvol_group=group, has_snapshot=False)
+
+        # Set subvolumegroup quota on idempotent subvolumegroup creation
+        self._fs_cmd("subvolumegroup", "create", self.volname, group, "1000000000")
+
+        # remove subvolume
+        try:
+            self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume rm' command to succeed if group quota is set")
+
+        # remove subvolumegroup
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_group_resize_fail_invalid_size(self):
+        """
+        That a subvolume group cannot be resized to an invalid size and the quota did not change
+        """
+
+        osize = self.DEFAULT_FILE_SIZE*1024*1024
+        # create group with 1MB quota
+        group = self._generate_random_group_name()
+        self._fs_cmd("subvolumegroup", "create", self.volname, group, "--size", str(osize))
+
+        # make sure it exists
+        grouppath = self._get_subvolume_group_path(self.volname, group)
+        self.assertNotEqual(grouppath, None)
+
+        # try to resize the subvolume with an invalid size -10
+        nsize = -10
+        try:
+            self._fs_cmd("subvolumegroup", "resize", self.volname, group, str(nsize))
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EINVAL,
+                             "invalid error code on resize of subvolume group with invalid size")
+        else:
+            self.fail("expected the 'fs subvolumegroup resize' command to fail")
+
+        # verify the quota did not change
+        size = int(self.mount_a.getfattr(grouppath, "ceph.quota.max_bytes"))
+        self.assertEqual(size, osize)
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_subvolume_group_resize_fail_zero_size(self):
+        """
+        That a subvolume group cannot be resized to a zero size and the quota did not change
+        """
+
+        osize = self.DEFAULT_FILE_SIZE*1024*1024
+        # create group with 1MB quota
+        group = self._generate_random_group_name()
+        self._fs_cmd("subvolumegroup", "create", self.volname, group, "--size", str(osize))
+
+        # make sure it exists
+        grouppath = self._get_subvolume_group_path(self.volname, group)
+        self.assertNotEqual(grouppath, None)
+
+        # try to resize the subvolume group with size 0
+        nsize = 0
+        try:
+            self._fs_cmd("subvolumegroup", "resize", self.volname, group, str(nsize))
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EINVAL,
+                             "invalid error code on resize of subvolume group with invalid size")
+        else:
+            self.fail("expected the 'fs subvolumegroup resize' command to fail")
+
+        # verify the quota did not change
+        size = int(self.mount_a.getfattr(grouppath, "ceph.quota.max_bytes"))
+        self.assertEqual(size, osize)
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_subvolume_group_resize_quota_lt_used_size(self):
+        """
+        That a subvolume group can be resized to a size smaller than the current used size
+        and the resulting quota matches the expected size.
+        """
+
+        osize = self.DEFAULT_FILE_SIZE*1024*1024*20
+        # create group with 20MB quota
+        group = self._generate_random_group_name()
+        self._fs_cmd("subvolumegroup", "create", self.volname, group,
+                     "--size", str(osize), "--mode=777")
+
+        # make sure it exists
+        grouppath = self._get_subvolume_group_path(self.volname, group)
+        self.assertNotEqual(grouppath, None)
+
+        # create subvolume under the group
+        subvolname = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolname,
+                     "--group_name", group, "--mode=777")
+
+        # make sure it exists
+        subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group)
+        self.assertNotEqual(subvolpath, None)
+
+        # create one file of 10MB
+        file_size=self.DEFAULT_FILE_SIZE*10
+        number_of_files=1
+        log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname,
+                                                                             number_of_files,
+                                                                             file_size))
+        filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+1)
+        self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size)
+
+        usedsize = int(self.mount_a.getfattr(subvolpath, "ceph.dir.rbytes"))
+
+        # shrink the subvolume group
+        nsize = usedsize // 2
+        try:
+            self._fs_cmd("subvolumegroup", "resize", self.volname, group, str(nsize))
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolumegroup resize' command to succeed")
+
+        # verify the quota
+        size = int(self.mount_a.getfattr(grouppath, "ceph.quota.max_bytes"))
+        self.assertEqual(size, nsize)
+
+        # remove subvolume and group
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_group_resize_fail_quota_lt_used_size_no_shrink(self):
+        """
+        That a subvolume group cannot be resized to a size smaller than the current used size
+        when --no_shrink is given and the quota did not change.
+        """
+
+        osize = self.DEFAULT_FILE_SIZE*1024*1024*20
+        # create group with 20MB quota
+        group = self._generate_random_group_name()
+        self._fs_cmd("subvolumegroup", "create", self.volname, group,
+                     "--size", str(osize), "--mode=777")
+
+        # make sure it exists
+        grouppath = self._get_subvolume_group_path(self.volname, group)
+        self.assertNotEqual(grouppath, None)
+
+        # create subvolume under the group
+        subvolname = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolname,
+                     "--group_name", group, "--mode=777")
+
+        # make sure it exists
+        subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group)
+        self.assertNotEqual(subvolpath, None)
+
+        # create one file of 10MB
+        file_size=self.DEFAULT_FILE_SIZE*10
+        number_of_files=1
+        log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname,
+                                                                             number_of_files,
+                                                                             file_size))
+        filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+2)
+        self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size)
+
+        usedsize = int(self.mount_a.getfattr(grouppath, "ceph.dir.rbytes"))
+
+        # shrink the subvolume group
+        nsize = usedsize // 2
+        try:
+            self._fs_cmd("subvolumegroup", "resize", self.volname, group, str(nsize), "--no_shrink")
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on resize of subvolumegroup with quota less than used")
+        else:
+            self.fail("expected the 'fs subvolumegroup resize' command to fail")
+
+        # verify the quota did not change
+        size = int(self.mount_a.getfattr(grouppath, "ceph.quota.max_bytes"))
+        self.assertEqual(size, osize)
+
+        # remove subvolume and group
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_group_resize_expand_on_full_subvolume(self):
+        """
+        That the subvolume group can be expanded after it is full and future write succeed
+        """
+
+        osize = self.DEFAULT_FILE_SIZE*1024*1024*100
+        # create group with 100MB quota
+        group = self._generate_random_group_name()
+        self._fs_cmd("subvolumegroup", "create", self.volname, group,
+                     "--size", str(osize), "--mode=777")
+
+        # make sure it exists
+        grouppath = self._get_subvolume_group_path(self.volname, group)
+        self.assertNotEqual(grouppath, None)
+
+        # create subvolume under the group
+        subvolname = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolname,
+                     "--group_name", group, "--mode=777")
+
+        # make sure it exists
+        subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group)
+        self.assertNotEqual(subvolpath, None)
+
+        # create 99 files of 1MB
+        self._do_subvolume_io(subvolname, subvolume_group=group, number_of_files=99)
+
+        try:
+            # write two files of 1MB file to exceed the quota
+            self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=2)
+            # For quota to be enforced
+            time.sleep(20)
+            # create 500 files of 1MB
+            self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=500)
+        except CommandFailedError:
+            # Not able to write. So expand the subvolumegroup more and try writing the files again
+            nsize = osize*7
+            self._fs_cmd("subvolumegroup", "resize", self.volname, group, str(nsize))
+            try:
+                self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=500)
+            except CommandFailedError:
+                self.fail("expected filling subvolume {0} with 500 files of size 1MB "
+                          "to succeed".format(subvolname))
+        else:
+            self.fail("expected filling subvolume {0} with 500 files of size 1MB "
+                      "to fail".format(subvolname))
+
+        # remove subvolume and group
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_group_resize_infinite_size(self):
+        """
+        That a subvolume group can be resized to an infinite size by unsetting its quota.
+        """
+
+        osize = self.DEFAULT_FILE_SIZE*1024*1024
+        # create group
+        group = self._generate_random_group_name()
+        self._fs_cmd("subvolumegroup", "create", self.volname, group,
+                     "--size", str(osize))
+
+        # make sure it exists
+        grouppath = self._get_subvolume_group_path(self.volname, group)
+        self.assertNotEqual(grouppath, None)
+
+        # resize inf
+        self._fs_cmd("subvolumegroup", "resize", self.volname, group, "inf")
+
+        # verify that the quota is None
+        size = self.mount_a.getfattr(grouppath, "ceph.quota.max_bytes")
+        self.assertEqual(size, None)
+
+        # remove subvolume group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_subvolume_group_resize_infinite_size_future_writes(self):
+        """
+        That a subvolume group can be resized to an infinite size and the future writes succeed.
+        """
+
+        osize = self.DEFAULT_FILE_SIZE*1024*1024*5
+        # create group with 5MB quota
+        group = self._generate_random_group_name()
+        self._fs_cmd("subvolumegroup", "create", self.volname, group,
+                     "--size", str(osize), "--mode=777")
+
+        # make sure it exists
+        grouppath = self._get_subvolume_group_path(self.volname, group)
+        self.assertNotEqual(grouppath, None)
+
+        # create subvolume under the group
+        subvolname = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolname,
+                     "--group_name", group, "--mode=777")
+
+        # make sure it exists
+        subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group)
+        self.assertNotEqual(subvolpath, None)
+
+        # create 4 files of 1MB
+        self._do_subvolume_io(subvolname, subvolume_group=group, number_of_files=4)
+
+        try:
+            # write two files of 1MB file to exceed the quota
+            self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=2)
+            # For quota to be enforced
+            time.sleep(20)
+            # create 500 files of 1MB
+            self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=500)
+        except CommandFailedError:
+            # Not able to write. So resize subvolumegroup to 'inf' and try writing the files again
+            # resize inf
+            self._fs_cmd("subvolumegroup", "resize", self.volname, group, "inf")
+            try:
+                self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=500)
+            except CommandFailedError:
+                self.fail("expected filling subvolume {0} with 500 files of size 1MB "
+                          "to succeed".format(subvolname))
+        else:
+            self.fail("expected filling subvolume {0} with 500 files of size 1MB "
+                      "to fail".format(subvolname))
+
+
+        # verify that the quota is None
+        size = self.mount_a.getfattr(grouppath, "ceph.quota.max_bytes")
+        self.assertEqual(size, None)
+
+        # remove subvolume and group
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_group_ls(self):
+        # tests the 'fs subvolumegroup ls' command
+
+        subvolumegroups = []
+
+        #create subvolumegroups
+        subvolumegroups = self._generate_random_group_name(3)
+        for groupname in subvolumegroups:
+            self._fs_cmd("subvolumegroup", "create", self.volname, groupname)
+
+        subvolumegroupls = json.loads(self._fs_cmd('subvolumegroup', 'ls', self.volname))
+        if len(subvolumegroupls) == 0:
+            raise RuntimeError("Expected the 'fs subvolumegroup ls' command to list the created subvolume groups")
+        else:
+            subvolgroupnames = [subvolumegroup['name'] for subvolumegroup in subvolumegroupls]
+            if collections.Counter(subvolgroupnames) != collections.Counter(subvolumegroups):
+                raise RuntimeError("Error creating or listing subvolume groups")
+
+    def test_subvolume_group_ls_filter(self):
+        # tests the 'fs subvolumegroup ls' command filters '_deleting' directory
+
+        subvolumegroups = []
+
+        #create subvolumegroup
+        subvolumegroups = self._generate_random_group_name(3)
+        for groupname in subvolumegroups:
+            self._fs_cmd("subvolumegroup", "create", self.volname, groupname)
+
+        # create subvolume and remove. This creates '_deleting' directory.
+        subvolume = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        subvolumegroupls = json.loads(self._fs_cmd('subvolumegroup', 'ls', self.volname))
+        subvolgroupnames = [subvolumegroup['name'] for subvolumegroup in subvolumegroupls]
+        if "_deleting" in subvolgroupnames:
+            self.fail("Listing subvolume groups listed '_deleting' directory")
+
+    def test_subvolume_group_ls_filter_internal_directories(self):
+        # tests the 'fs subvolumegroup ls' command filters internal directories
+        # eg: '_deleting', '_nogroup', '_index', "_legacy"
+
+        subvolumegroups = self._generate_random_group_name(3)
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone = self._generate_random_clone_name()
+
+        #create subvolumegroups
+        for groupname in subvolumegroups:
+            self._fs_cmd("subvolumegroup", "create", self.volname, groupname)
+
+        # create subvolume which will create '_nogroup' directory
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        # create snapshot
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # clone snapshot which will create '_index' directory
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+        # wait for clone to complete
+        self._wait_for_clone_to_complete(clone)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolume which will create '_deleting' directory
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        # list subvolumegroups
+        ret = json.loads(self._fs_cmd('subvolumegroup', 'ls', self.volname))
+        self.assertEqual(len(ret), len(subvolumegroups))
+
+        ret_list = [subvolumegroup['name'] for subvolumegroup in ret]
+        self.assertEqual(len(ret_list), len(subvolumegroups))
+
+        self.assertEqual(all(elem in subvolumegroups for elem in ret_list), True)
+
+        # cleanup
+        self._fs_cmd("subvolume", "rm", self.volname, clone)
+        for groupname in subvolumegroups:
+            self._fs_cmd("subvolumegroup", "rm", self.volname, groupname)
+
+    def test_subvolume_group_ls_for_nonexistent_volume(self):
+        # tests the 'fs subvolumegroup ls' command when /volume doesn't exist
+        # prerequisite: we expect that the test volume is created and a subvolumegroup is NOT created
+
+        # list subvolume groups
+        subvolumegroupls = json.loads(self._fs_cmd('subvolumegroup', 'ls', self.volname))
+        if len(subvolumegroupls) > 0:
+            raise RuntimeError("Expected the 'fs subvolumegroup ls' command to output an empty list")
+
+    def test_subvolumegroup_pin_distributed(self):
+        self.fs.set_max_mds(2)
+        status = self.fs.wait_for_daemons()
+        self.config_set('mds', 'mds_export_ephemeral_distributed', True)
+
+        group = "pinme"
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+        self._fs_cmd("subvolumegroup", "pin", self.volname, group, "distributed", "True")
+        subvolumes = self._generate_random_subvolume_name(50)
+        for subvolume in subvolumes:
+            self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+        self._wait_distributed_subtrees(2 * 2, status=status, rank="all")
+
+        # remove subvolumes
+        for subvolume in subvolumes:
+            self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_group_rm_force(self):
+        # test removing non-existing subvolume group with --force
+        group = self._generate_random_group_name()
+        try:
+            self._fs_cmd("subvolumegroup", "rm", self.volname, group, "--force")
+        except CommandFailedError:
+            raise RuntimeError("expected the 'fs subvolumegroup rm --force' command to succeed")
+
+    def test_subvolume_group_exists_with_subvolumegroup_and_no_subvolume(self):
+        """Test the presence of any subvolumegroup when only subvolumegroup is present"""
+
+        group = self._generate_random_group_name()
+        # create subvolumegroup
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+        ret = self._fs_cmd("subvolumegroup", "exist", self.volname)
+        self.assertEqual(ret.strip('\n'), "subvolumegroup exists")
+        # delete subvolumegroup
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+        ret = self._fs_cmd("subvolumegroup", "exist", self.volname)
+        self.assertEqual(ret.strip('\n'), "no subvolumegroup exists")
+
+    def test_subvolume_group_exists_with_no_subvolumegroup_and_subvolume(self):
+        """Test the presence of any subvolumegroup when no subvolumegroup is present"""
+
+        ret = self._fs_cmd("subvolumegroup", "exist", self.volname)
+        self.assertEqual(ret.strip('\n'), "no subvolumegroup exists")
+
+    def test_subvolume_group_exists_with_subvolumegroup_and_subvolume(self):
+        """Test the presence of any subvolume when subvolumegroup
+            and subvolume both are present"""
+
+        group = self._generate_random_group_name()
+        subvolume = self._generate_random_subvolume_name(2)
+        # create subvolumegroup
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+        # create subvolume in group
+        self._fs_cmd("subvolume", "create", self.volname, subvolume[0], "--group_name", group)
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume[1])
+        ret = self._fs_cmd("subvolumegroup", "exist", self.volname)
+        self.assertEqual(ret.strip('\n'), "subvolumegroup exists")
+        # delete subvolume in group
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume[0], "--group_name", group)
+        ret = self._fs_cmd("subvolumegroup", "exist", self.volname)
+        self.assertEqual(ret.strip('\n'), "subvolumegroup exists")
+        # delete subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume[1])
+        ret = self._fs_cmd("subvolumegroup", "exist", self.volname)
+        self.assertEqual(ret.strip('\n'), "subvolumegroup exists")
+        # delete subvolumegroup
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+        ret = self._fs_cmd("subvolumegroup", "exist", self.volname)
+        self.assertEqual(ret.strip('\n'), "no subvolumegroup exists")
+
+    def test_subvolume_group_exists_without_subvolumegroup_and_with_subvolume(self):
+        """Test the presence of any subvolume when subvolume is present
+            but no subvolumegroup is present"""
+
+        subvolume = self._generate_random_subvolume_name()
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+        ret = self._fs_cmd("subvolumegroup", "exist", self.volname)
+        self.assertEqual(ret.strip('\n'), "no subvolumegroup exists")
+        # delete subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        ret = self._fs_cmd("subvolumegroup", "exist", self.volname)
+        self.assertEqual(ret.strip('\n'), "no subvolumegroup exists")
+
+
+class TestSubvolumes(TestVolumesHelper):
+    """Tests for FS subvolume operations, except snapshot and snapshot clone."""
+    def test_async_subvolume_rm(self):
+        subvolumes = self._generate_random_subvolume_name(100)
+
+        # create subvolumes
+        for subvolume in subvolumes:
+            self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+            self._do_subvolume_io(subvolume, number_of_files=10)
+
+        self.mount_a.umount_wait()
+
+        # remove subvolumes
+        for subvolume in subvolumes:
+            self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        self.mount_a.mount_wait()
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty(timeout=300)
+
+    def test_default_uid_gid_subvolume(self):
+        subvolume = self._generate_random_subvolume_name()
+        expected_uid = 0
+        expected_gid = 0
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+        subvol_path = self._get_subvolume_path(self.volname, subvolume)
+
+        # check subvolume's uid and gid
+        stat = self.mount_a.stat(subvol_path)
+        self.assertEqual(stat['st_uid'], expected_uid)
+        self.assertEqual(stat['st_gid'], expected_gid)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_nonexistent_subvolume_rm(self):
+        # remove non-existing subvolume
+        subvolume = "non_existent_subvolume"
+
+        # try, remove subvolume
+        try:
+            self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.ENOENT:
+                raise
+        else:
+            raise RuntimeError("expected the 'fs subvolume rm' command to fail")
+
+    def test_subvolume_create_and_rm(self):
+        # create subvolume
+        subvolume = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        # make sure it exists
+        subvolpath = self._fs_cmd("subvolume", "getpath", self.volname, subvolume)
+        self.assertNotEqual(subvolpath, None)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        # make sure its gone
+        try:
+            self._fs_cmd("subvolume", "getpath", self.volname, subvolume)
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.ENOENT:
+                raise
+        else:
+            raise RuntimeError("expected the 'fs subvolume getpath' command to fail. Subvolume not removed.")
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_create_and_rm_in_group(self):
+        subvolume = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_subvolume_create_idempotence(self):
+        # create subvolume
+        subvolume = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        # try creating w/ same subvolume name -- should be idempotent
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_create_idempotence_resize(self):
+        # create subvolume
+        subvolume = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        # try creating w/ same subvolume name with size -- should set quota
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "1000000000")
+
+        # get subvolume metadata
+        subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume))
+        self.assertEqual(subvol_info["bytes_quota"], 1000000000)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_create_idempotence_mode(self):
+        # default mode
+        default_mode = "755"
+
+        # create subvolume
+        subvolume = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        subvol_path = self._get_subvolume_path(self.volname, subvolume)
+
+        actual_mode_1 = self.mount_a.run_shell(['stat', '-c' '%a', subvol_path]).stdout.getvalue().strip()
+        self.assertEqual(actual_mode_1, default_mode)
+
+        # try creating w/ same subvolume name with --mode 777
+        new_mode = "777"
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode", new_mode)
+
+        actual_mode_2 = self.mount_a.run_shell(['stat', '-c' '%a', subvol_path]).stdout.getvalue().strip()
+        self.assertEqual(actual_mode_2, new_mode)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_create_idempotence_without_passing_mode(self):
+        # create subvolume
+        desired_mode = "777"
+        subvolume = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode", desired_mode)
+
+        subvol_path = self._get_subvolume_path(self.volname, subvolume)
+
+        actual_mode_1 = self.mount_a.run_shell(['stat', '-c' '%a', subvol_path]).stdout.getvalue().strip()
+        self.assertEqual(actual_mode_1, desired_mode)
+
+        # default mode
+        default_mode = "755"
+
+        # try creating w/ same subvolume name without passing --mode argument
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        actual_mode_2 = self.mount_a.run_shell(['stat', '-c' '%a', subvol_path]).stdout.getvalue().strip()
+        self.assertEqual(actual_mode_2, default_mode)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_create_isolated_namespace(self):
+        """
+        Create subvolume in separate rados namespace
+        """
+
+        # create subvolume
+        subvolume = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--namespace-isolated")
+
+        # get subvolume metadata
+        subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume))
+        self.assertNotEqual(len(subvol_info), 0)
+        self.assertEqual(subvol_info["pool_namespace"], "fsvolumens_" + subvolume)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_create_with_auto_cleanup_on_fail(self):
+        subvolume = self._generate_random_subvolume_name()
+        data_pool = "invalid_pool"
+        # create subvolume with invalid data pool layout fails
+        with self.assertRaises(CommandFailedError):
+            self._fs_cmd("subvolume", "create", self.volname, subvolume, "--pool_layout", data_pool)
+
+        # check whether subvol path is cleaned up
+        try:
+            self._fs_cmd("subvolume", "getpath", self.volname, subvolume)
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on getpath of non-existent subvolume")
+        else:
+            self.fail("expected the 'fs subvolume getpath' command to fail")
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_create_with_desired_data_pool_layout_in_group(self):
+        subvol1, subvol2 = self._generate_random_subvolume_name(2)
+        group = self._generate_random_group_name()
+
+        # create group. this also helps set default pool layout for subvolumes
+        # created within the group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvol1, "--group_name", group)
+        subvol1_path = self._get_subvolume_path(self.volname, subvol1, group_name=group)
+
+        default_pool = self.mount_a.getfattr(subvol1_path, "ceph.dir.layout.pool")
+        new_pool = "new_pool"
+        self.assertNotEqual(default_pool, new_pool)
+
+        # add data pool
+        newid = self.fs.add_data_pool(new_pool)
+
+        # create subvolume specifying the new data pool as its pool layout
+        self._fs_cmd("subvolume", "create", self.volname, subvol2, "--group_name", group,
+                     "--pool_layout", new_pool)
+        subvol2_path = self._get_subvolume_path(self.volname, subvol2, group_name=group)
+
+        desired_pool = self.mount_a.getfattr(subvol2_path, "ceph.dir.layout.pool")
+        try:
+            self.assertEqual(desired_pool, new_pool)
+        except AssertionError:
+            self.assertEqual(int(desired_pool), newid) # old kernel returns id
+
+        self._fs_cmd("subvolume", "rm", self.volname, subvol2, group)
+        self._fs_cmd("subvolume", "rm", self.volname, subvol1, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_create_with_desired_mode(self):
+        subvol1 = self._generate_random_subvolume_name()
+
+        # default mode
+        default_mode = "755"
+        # desired mode
+        desired_mode = "777"
+
+        self._fs_cmd("subvolume", "create", self.volname, subvol1,  "--mode", "777")
+
+        subvol1_path = self._get_subvolume_path(self.volname, subvol1)
+
+        # check subvolumegroup's mode
+        subvol_par_path = os.path.dirname(subvol1_path)
+        group_path = os.path.dirname(subvol_par_path)
+        actual_mode1 = self.mount_a.run_shell(['stat', '-c' '%a', group_path]).stdout.getvalue().strip()
+        self.assertEqual(actual_mode1, default_mode)
+        # check /volumes mode
+        volumes_path = os.path.dirname(group_path)
+        actual_mode2 = self.mount_a.run_shell(['stat', '-c' '%a', volumes_path]).stdout.getvalue().strip()
+        self.assertEqual(actual_mode2, default_mode)
+        # check subvolume's  mode
+        actual_mode3 = self.mount_a.run_shell(['stat', '-c' '%a', subvol1_path]).stdout.getvalue().strip()
+        self.assertEqual(actual_mode3, desired_mode)
+
+        self._fs_cmd("subvolume", "rm", self.volname, subvol1)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_create_with_desired_mode_in_group(self):
+        subvol1, subvol2, subvol3 = self._generate_random_subvolume_name(3)
+
+        group = self._generate_random_group_name()
+        # default mode
+        expected_mode1 = "755"
+        # desired mode
+        expected_mode2 = "777"
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group
+        self._fs_cmd("subvolume", "create", self.volname, subvol1, "--group_name", group)
+        self._fs_cmd("subvolume", "create", self.volname, subvol2, "--group_name", group, "--mode", "777")
+        # check whether mode 0777 also works
+        self._fs_cmd("subvolume", "create", self.volname, subvol3, "--group_name", group, "--mode", "0777")
+
+        subvol1_path = self._get_subvolume_path(self.volname, subvol1, group_name=group)
+        subvol2_path = self._get_subvolume_path(self.volname, subvol2, group_name=group)
+        subvol3_path = self._get_subvolume_path(self.volname, subvol3, group_name=group)
+
+        # check subvolume's  mode
+        actual_mode1 = self.mount_a.run_shell(['stat', '-c' '%a', subvol1_path]).stdout.getvalue().strip()
+        actual_mode2 = self.mount_a.run_shell(['stat', '-c' '%a', subvol2_path]).stdout.getvalue().strip()
+        actual_mode3 = self.mount_a.run_shell(['stat', '-c' '%a', subvol3_path]).stdout.getvalue().strip()
+        self.assertEqual(actual_mode1, expected_mode1)
+        self.assertEqual(actual_mode2, expected_mode2)
+        self.assertEqual(actual_mode3, expected_mode2)
+
+        self._fs_cmd("subvolume", "rm", self.volname, subvol1, group)
+        self._fs_cmd("subvolume", "rm", self.volname, subvol2, group)
+        self._fs_cmd("subvolume", "rm", self.volname, subvol3, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_create_with_desired_uid_gid(self):
+        """
+        That the subvolume can be created with the desired uid and gid and its uid and gid matches the
+        expected values.
+        """
+        uid = 1000
+        gid = 1000
+
+        # create subvolume
+        subvolname = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, "--uid", str(uid), "--gid", str(gid))
+
+        # make sure it exists
+        subvolpath = self._get_subvolume_path(self.volname, subvolname)
+        self.assertNotEqual(subvolpath, None)
+
+        # verify the uid and gid
+        suid = int(self.mount_a.run_shell(['stat', '-c' '%u', subvolpath]).stdout.getvalue().strip())
+        sgid = int(self.mount_a.run_shell(['stat', '-c' '%g', subvolpath]).stdout.getvalue().strip())
+        self.assertEqual(uid, suid)
+        self.assertEqual(gid, sgid)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_create_with_invalid_data_pool_layout(self):
+        subvolume = self._generate_random_subvolume_name()
+        data_pool = "invalid_pool"
+        # create subvolume with invalid data pool layout
+        try:
+            self._fs_cmd("subvolume", "create", self.volname, subvolume, "--pool_layout", data_pool)
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on create of subvolume with invalid pool layout")
+        else:
+            self.fail("expected the 'fs subvolume create' command to fail")
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_create_with_invalid_size(self):
+        # create subvolume with an invalid size -1
+        subvolume = self._generate_random_subvolume_name()
+        try:
+            self._fs_cmd("subvolume", "create", self.volname, subvolume, "--size", "-1")
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on create of subvolume with invalid size")
+        else:
+            self.fail("expected the 'fs subvolume create' command to fail")
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_create_and_ls_providing_group_as_nogroup(self):
+        """
+        That a 'subvolume create' and 'subvolume ls' should throw
+        permission denied error if option --group=_nogroup is provided.
+        """
+
+        subvolname = self._generate_random_subvolume_name()
+
+        # try to create subvolume providing --group_name=_nogroup option
+        try:
+            self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", "_nogroup")
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EPERM)
+        else:
+            self.fail("expected the 'fs subvolume create' command to fail")
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolname)
+
+        # try to list subvolumes providing --group_name=_nogroup option
+        try:
+            self._fs_cmd("subvolume", "ls", self.volname, "--group_name", "_nogroup")
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EPERM)
+        else:
+            self.fail("expected the 'fs subvolume ls' command to fail")
+
+        # list subvolumes
+        self._fs_cmd("subvolume", "ls", self.volname)
+
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_expand(self):
+        """
+        That a subvolume can be expanded in size and its quota matches the expected size.
+        """
+
+        # create subvolume
+        subvolname = self._generate_random_subvolume_name()
+        osize = self.DEFAULT_FILE_SIZE*1024*1024
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize))
+
+        # make sure it exists
+        subvolpath = self._get_subvolume_path(self.volname, subvolname)
+        self.assertNotEqual(subvolpath, None)
+
+        # expand the subvolume
+        nsize = osize*2
+        self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize))
+
+        # verify the quota
+        size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes"))
+        self.assertEqual(size, nsize)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_info(self):
+        # tests the 'fs subvolume info' command
+
+        subvol_md = ["atime", "bytes_pcent", "bytes_quota", "bytes_used", "created_at", "ctime",
+                     "data_pool", "gid", "mode", "mon_addrs", "mtime", "path", "pool_namespace",
+                     "type", "uid", "features", "state"]
+
+        # create subvolume
+        subvolume = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        # get subvolume metadata
+        subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume))
+        for md in subvol_md:
+            self.assertIn(md, subvol_info, "'{0}' key not present in metadata of subvolume".format(md))
+
+        self.assertEqual(subvol_info["bytes_pcent"], "undefined", "bytes_pcent should be set to undefined if quota is not set")
+        self.assertEqual(subvol_info["bytes_quota"], "infinite", "bytes_quota should be set to infinite if quota is not set")
+        self.assertEqual(subvol_info["pool_namespace"], "", "expected pool namespace to be empty")
+        self.assertEqual(subvol_info["state"], "complete", "expected state to be complete")
+
+        self.assertEqual(len(subvol_info["features"]), 3,
+                         msg="expected 3 features, found '{0}' ({1})".format(len(subvol_info["features"]), subvol_info["features"]))
+        for feature in ['snapshot-clone', 'snapshot-autoprotect', 'snapshot-retention']:
+            self.assertIn(feature, subvol_info["features"], msg="expected feature '{0}' in subvolume".format(feature))
+
+        nsize = self.DEFAULT_FILE_SIZE*1024*1024
+        self._fs_cmd("subvolume", "resize", self.volname, subvolume, str(nsize))
+
+        # get subvolume metadata after quota set
+        subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume))
+        for md in subvol_md:
+            self.assertIn(md, subvol_info, "'{0}' key not present in metadata of subvolume".format(md))
+
+        self.assertNotEqual(subvol_info["bytes_pcent"], "undefined", "bytes_pcent should not be set to undefined if quota is not set")
+        self.assertEqual(subvol_info["bytes_quota"], nsize, "bytes_quota should be set to '{0}'".format(nsize))
+        self.assertEqual(subvol_info["type"], "subvolume", "type should be set to subvolume")
+        self.assertEqual(subvol_info["state"], "complete", "expected state to be complete")
+
+        self.assertEqual(len(subvol_info["features"]), 3,
+                         msg="expected 3 features, found '{0}' ({1})".format(len(subvol_info["features"]), subvol_info["features"]))
+        for feature in ['snapshot-clone', 'snapshot-autoprotect', 'snapshot-retention']:
+            self.assertIn(feature, subvol_info["features"], msg="expected feature '{0}' in subvolume".format(feature))
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_ls(self):
+        # tests the 'fs subvolume ls' command
+
+        subvolumes = []
+
+        # create subvolumes
+        subvolumes = self._generate_random_subvolume_name(3)
+        for subvolume in subvolumes:
+            self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        # list subvolumes
+        subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+        if len(subvolumels) == 0:
+            self.fail("Expected the 'fs subvolume ls' command to list the created subvolumes.")
+        else:
+            subvolnames = [subvolume['name'] for subvolume in subvolumels]
+            if collections.Counter(subvolnames) != collections.Counter(subvolumes):
+                self.fail("Error creating or listing subvolumes")
+
+        # remove subvolume
+        for subvolume in subvolumes:
+            self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_ls_with_groupname_as_internal_directory(self):
+        # tests the 'fs subvolume ls' command when the default groupname as internal directories
+        # Eg: '_nogroup', '_legacy', '_deleting', '_index'.
+        # Expecting 'fs subvolume ls' will be fail with errno EINVAL for '_legacy', '_deleting', '_index'
+        # Expecting 'fs subvolume ls' will be fail with errno EPERM for '_nogroup'
+
+        # try to list subvolumes providing --group_name=_nogroup option
+        try:
+            self._fs_cmd("subvolume", "ls", self.volname, "--group_name", "_nogroup")
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EPERM)
+        else:
+            self.fail("expected the 'fs subvolume ls' command to fail with error 'EPERM' for _nogroup")
+
+        # try to list subvolumes providing --group_name=_legacy option
+        try:
+            self._fs_cmd("subvolume", "ls", self.volname, "--group_name", "_legacy")
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EINVAL)
+        else:
+            self.fail("expected the 'fs subvolume ls' command to fail with error 'EINVAL' for _legacy")
+
+        # try to list subvolumes providing --group_name=_deleting option
+        try:
+            self._fs_cmd("subvolume", "ls", self.volname, "--group_name", "_deleting")
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EINVAL)
+        else:
+            self.fail("expected the 'fs subvolume ls' command to fail with error 'EINVAL' for _deleting")
+
+        # try to list subvolumes providing --group_name=_index option
+        try:
+            self._fs_cmd("subvolume", "ls", self.volname, "--group_name", "_index")
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EINVAL)
+        else:
+            self.fail("expected the 'fs subvolume ls' command to fail with error 'EINVAL' for _index")
+
+    def test_subvolume_ls_for_notexistent_default_group(self):
+        # tests the 'fs subvolume ls' command when the default group '_nogroup' doesn't exist
+        # prerequisite: we expect that the volume is created and the default group _nogroup is
+        # NOT created (i.e. a subvolume without group is not created)
+
+        # list subvolumes
+        subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+        if len(subvolumels) > 0:
+            raise RuntimeError("Expected the 'fs subvolume ls' command to output an empty list.")
+
+    def test_subvolume_marked(self):
+        """
+        ensure a subvolume is marked with the ceph.dir.subvolume xattr
+        """
+        subvolume = self._generate_random_subvolume_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        # getpath
+        subvolpath = self._get_subvolume_path(self.volname, subvolume)
+
+        # subdirectory of a subvolume cannot be moved outside the subvolume once marked with
+        # the xattr ceph.dir.subvolume, hence test by attempting to rename subvol path (incarnation)
+        # outside the subvolume
+        dstpath = os.path.join(self.mount_a.mountpoint, 'volumes', '_nogroup', 'new_subvol_location')
+        srcpath = os.path.join(self.mount_a.mountpoint, subvolpath)
+        rename_script = dedent("""
+            import os
+            import errno
+            try:
+                os.rename("{src}", "{dst}")
+            except OSError as e:
+                if e.errno != errno.EXDEV:
+                    raise RuntimeError("invalid error code on renaming subvolume incarnation out of subvolume directory")
+            else:
+                raise RuntimeError("expected renaming subvolume incarnation out of subvolume directory to fail")
+            """)
+        self.mount_a.run_python(rename_script.format(src=srcpath, dst=dstpath), sudo=True)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_pin_export(self):
+        self.fs.set_max_mds(2)
+        status = self.fs.wait_for_daemons()
+
+        subvolume = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+        self._fs_cmd("subvolume", "pin", self.volname, subvolume, "export", "1")
+        path = self._fs_cmd("subvolume", "getpath", self.volname, subvolume)
+        path = os.path.dirname(path) # get subvolume path
+
+        self._get_subtrees(status=status, rank=1)
+        self._wait_subtrees([(path, 1)], status=status)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    ### authorize operations
+
+    def test_authorize_deauthorize_legacy_subvolume(self):
+        subvolume = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+        authid = "alice"
+
+        guest_mount = self.mount_b
+        guest_mount.umount_wait()
+
+        # emulate a old-fashioned subvolume in a custom group
+        createpath = os.path.join(".", "volumes", group, subvolume)
+        self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath], omit_sudo=False)
+
+        # add required xattrs to subvolume
+        default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool")
+        self.mount_a.setfattr(createpath, 'ceph.dir.layout.pool', default_pool, sudo=True)
+
+        mount_path = os.path.join("/", "volumes", group, subvolume)
+
+        # authorize guest authID read-write access to subvolume
+        key = self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid,
+                           "--group_name", group, "--tenant_id", "tenant_id")
+
+        # guest authID should exist
+        existing_ids = [a['entity'] for a in self.auth_list()]
+        self.assertIn("client.{0}".format(authid), existing_ids)
+
+        # configure credentials for guest client
+        self._configure_guest_auth(guest_mount, authid, key)
+
+        # mount the subvolume, and write to it
+        guest_mount.mount_wait(cephfs_mntpt=mount_path)
+        guest_mount.write_n_mb("data.bin", 1)
+
+        # authorize guest authID read access to subvolume
+        key = self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid,
+                           "--group_name", group, "--tenant_id", "tenant_id", "--access_level", "r")
+
+        # guest client sees the change in access level to read only after a
+        # remount of the subvolume.
+        guest_mount.umount_wait()
+        guest_mount.mount_wait(cephfs_mntpt=mount_path)
+
+        # read existing content of the subvolume
+        self.assertListEqual(guest_mount.ls(guest_mount.mountpoint), ["data.bin"])
+        # cannot write into read-only subvolume
+        with self.assertRaises(CommandFailedError):
+            guest_mount.write_n_mb("rogue.bin", 1)
+
+        # cleanup
+        guest_mount.umount_wait()
+        self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, authid,
+                     "--group_name", group)
+        # guest authID should no longer exist
+        existing_ids = [a['entity'] for a in self.auth_list()]
+        self.assertNotIn("client.{0}".format(authid), existing_ids)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_authorize_deauthorize_subvolume(self):
+        subvolume = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+        authid = "alice"
+
+        guest_mount = self.mount_b
+        guest_mount.umount_wait()
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group, "--mode=777")
+
+        # create subvolume in group
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+        mount_path = self._fs_cmd("subvolume", "getpath", self.volname, subvolume,
+                                  "--group_name", group).rstrip()
+
+        # authorize guest authID read-write access to subvolume
+        key = self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid,
+                           "--group_name", group, "--tenant_id", "tenant_id")
+
+        # guest authID should exist
+        existing_ids = [a['entity'] for a in self.auth_list()]
+        self.assertIn("client.{0}".format(authid), existing_ids)
+
+        # configure credentials for guest client
+        self._configure_guest_auth(guest_mount, authid, key)
+
+        # mount the subvolume, and write to it
+        guest_mount.mount_wait(cephfs_mntpt=mount_path)
+        guest_mount.write_n_mb("data.bin", 1)
+
+        # authorize guest authID read access to subvolume
+        key = self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid,
+                           "--group_name", group, "--tenant_id", "tenant_id", "--access_level", "r")
+
+        # guest client sees the change in access level to read only after a
+        # remount of the subvolume.
+        guest_mount.umount_wait()
+        guest_mount.mount_wait(cephfs_mntpt=mount_path)
+
+        # read existing content of the subvolume
+        self.assertListEqual(guest_mount.ls(guest_mount.mountpoint), ["data.bin"])
+        # cannot write into read-only subvolume
+        with self.assertRaises(CommandFailedError):
+            guest_mount.write_n_mb("rogue.bin", 1)
+
+        # cleanup
+        guest_mount.umount_wait()
+        self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, authid,
+                     "--group_name", group)
+        # guest authID should no longer exist
+        existing_ids = [a['entity'] for a in self.auth_list()]
+        self.assertNotIn("client.{0}".format(authid), existing_ids)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_multitenant_subvolumes(self):
+        """
+        That subvolume access can be restricted to a tenant.
+
+        That metadata used to enforce tenant isolation of
+        subvolumes is stored as a two-way mapping between auth
+        IDs and subvolumes that they're authorized to access.
+        """
+        subvolume = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+
+        guest_mount = self.mount_b
+
+        # Guest clients belonging to different tenants, but using the same
+        # auth ID.
+        auth_id = "alice"
+        guestclient_1 = {
+            "auth_id": auth_id,
+            "tenant_id": "tenant1",
+        }
+        guestclient_2 = {
+            "auth_id": auth_id,
+            "tenant_id": "tenant2",
+        }
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+        # Check that subvolume metadata file is created on subvolume creation.
+        subvol_metadata_filename = "_{0}:{1}.meta".format(group, subvolume)
+        self.assertIn(subvol_metadata_filename, guest_mount.ls("volumes"))
+
+        # Authorize 'guestclient_1', using auth ID 'alice' and belonging to
+        # 'tenant1', with 'rw' access to the volume.
+        self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"],
+                     "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+        # Check that auth metadata file for auth ID 'alice', is
+        # created on authorizing 'alice' access to the subvolume.
+        auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"])
+        self.assertIn(auth_metadata_filename, guest_mount.ls("volumes"))
+
+        # Verify that the auth metadata file stores the tenant ID that the
+        # auth ID belongs to, the auth ID's authorized access levels
+        # for different subvolumes, versioning details, etc.
+        expected_auth_metadata = {
+            "version": 5,
+            "compat_version": 6,
+            "dirty": False,
+            "tenant_id": "tenant1",
+            "subvolumes": {
+                "{0}/{1}".format(group,subvolume): {
+                    "dirty": False,
+                    "access_level": "rw"
+                }
+            }
+        }
+
+        auth_metadata = self._auth_metadata_get(guest_mount.read_file("volumes/{0}".format(auth_metadata_filename)))
+        self.assertGreaterEqual(auth_metadata["version"], expected_auth_metadata["version"])
+        del expected_auth_metadata["version"]
+        del auth_metadata["version"]
+        self.assertEqual(expected_auth_metadata, auth_metadata)
+
+        # Verify that the subvolume metadata file stores info about auth IDs
+        # and their access levels to the subvolume, versioning details, etc.
+        expected_subvol_metadata = {
+            "version": 1,
+            "compat_version": 1,
+            "auths": {
+                "alice": {
+                    "dirty": False,
+                    "access_level": "rw"
+                }
+            }
+        }
+        subvol_metadata = self._auth_metadata_get(guest_mount.read_file("volumes/{0}".format(subvol_metadata_filename)))
+
+        self.assertGreaterEqual(subvol_metadata["version"], expected_subvol_metadata["version"])
+        del expected_subvol_metadata["version"]
+        del subvol_metadata["version"]
+        self.assertEqual(expected_subvol_metadata, subvol_metadata)
+
+        # Cannot authorize 'guestclient_2' to access the volume.
+        # It uses auth ID 'alice', which has already been used by a
+        # 'guestclient_1' belonging to an another tenant for accessing
+        # the volume.
+
+        try:
+            self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_2["auth_id"],
+                         "--group_name", group, "--tenant_id", guestclient_2["tenant_id"])
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EPERM,
+                             "Invalid error code returned on authorize of subvolume with same auth_id but different tenant_id")
+        else:
+            self.fail("expected the 'fs subvolume authorize' command to fail")
+
+        # Check that auth metadata file is cleaned up on removing
+        # auth ID's only access to a volume.
+
+        self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, auth_id,
+                     "--group_name", group)
+        self.assertNotIn(auth_metadata_filename, guest_mount.ls("volumes"))
+
+        # Check that subvolume metadata file is cleaned up on subvolume deletion.
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+        self.assertNotIn(subvol_metadata_filename, guest_mount.ls("volumes"))
+
+        # clean up
+        guest_mount.umount_wait()
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_subvolume_authorized_list(self):
+        subvolume = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+        authid1 = "alice"
+        authid2 = "guest1"
+        authid3 = "guest2"
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+        # authorize alice authID read-write access to subvolume
+        self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid1,
+                     "--group_name", group)
+        # authorize guest1 authID read-write access to subvolume
+        self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid2,
+                     "--group_name", group)
+        # authorize guest2 authID read access to subvolume
+        self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid3,
+                     "--group_name", group, "--access_level", "r")
+
+        # list authorized-ids of the subvolume
+        expected_auth_list = [{'alice': 'rw'}, {'guest1': 'rw'}, {'guest2': 'r'}]
+        auth_list = json.loads(self._fs_cmd('subvolume', 'authorized_list', self.volname, subvolume, "--group_name", group))
+        self.assertCountEqual(expected_auth_list, auth_list)
+
+        # cleanup
+        self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, authid1,
+                     "--group_name", group)
+        self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, authid2,
+                     "--group_name", group)
+        self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, authid3,
+                     "--group_name", group)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_authorize_auth_id_not_created_by_mgr_volumes(self):
+        """
+        If the auth_id already exists and is not created by mgr plugin,
+        it's not allowed to authorize the auth-id by default.
+        """
+
+        subvolume = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+
+        # Create auth_id
+        self.fs.mon_manager.raw_cluster_cmd(
+            "auth", "get-or-create", "client.guest1",
+            "mds", "allow *",
+            "osd", "allow rw",
+            "mon", "allow *"
+        )
+
+        auth_id = "guest1"
+        guestclient_1 = {
+            "auth_id": auth_id,
+            "tenant_id": "tenant1",
+        }
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+        try:
+            self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"],
+                         "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EPERM,
+                             "Invalid error code returned on authorize of subvolume for auth_id created out of band")
+        else:
+            self.fail("expected the 'fs subvolume authorize' command to fail")
+
+        # clean up
+        self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_authorize_allow_existing_id_option(self):
+        """
+        If the auth_id already exists and is not created by mgr volumes,
+        it's not allowed to authorize the auth-id by default but is
+        allowed with option allow_existing_id.
+        """
+
+        subvolume = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+
+        # Create auth_id
+        self.fs.mon_manager.raw_cluster_cmd(
+            "auth", "get-or-create", "client.guest1",
+            "mds", "allow *",
+            "osd", "allow rw",
+            "mon", "allow *"
+        )
+
+        auth_id = "guest1"
+        guestclient_1 = {
+            "auth_id": auth_id,
+            "tenant_id": "tenant1",
+        }
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+        # Cannot authorize 'guestclient_1' to access the volume by default,
+        # which already exists and not created by mgr volumes but is allowed
+        # with option 'allow_existing_id'.
+        self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"],
+                     "--group_name", group, "--tenant_id", guestclient_1["tenant_id"], "--allow-existing-id")
+
+        # clean up
+        self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, auth_id,
+                     "--group_name", group)
+        self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_deauthorize_auth_id_after_out_of_band_update(self):
+        """
+        If the auth_id authorized by mgr/volumes plugin is updated
+        out of band, the auth_id should not be deleted after a
+        deauthorize. It should only remove caps associated with it.
+        """
+
+        subvolume = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+
+        auth_id = "guest1"
+        guestclient_1 = {
+            "auth_id": auth_id,
+            "tenant_id": "tenant1",
+        }
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+        # Authorize 'guestclient_1' to access the subvolume.
+        self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"],
+                     "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+        subvol_path = self._fs_cmd("subvolume", "getpath", self.volname, subvolume,
+                                  "--group_name", group).rstrip()
+
+        # Update caps for guestclient_1 out of band
+        out = self.fs.mon_manager.raw_cluster_cmd(
+            "auth", "caps", "client.guest1",
+            "mds", "allow rw path=/volumes/{0}, allow rw path={1}".format(group, subvol_path),
+            "osd", "allow rw pool=cephfs_data",
+            "mon", "allow r",
+            "mgr", "allow *"
+        )
+
+        # Deauthorize guestclient_1
+        self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, auth_id, "--group_name", group)
+
+        # Validate the caps of guestclient_1 after deauthorize. It should not have deleted
+        # guestclient_1. The mgr and mds caps should be present which was updated out of band.
+        out = json.loads(self.fs.mon_manager.raw_cluster_cmd("auth", "get", "client.guest1", "--format=json-pretty"))
+
+        self.assertEqual("client.guest1", out[0]["entity"])
+        self.assertEqual("allow rw path=/volumes/{0}".format(group), out[0]["caps"]["mds"])
+        self.assertEqual("allow *", out[0]["caps"]["mgr"])
+        self.assertNotIn("osd", out[0]["caps"])
+
+        # clean up
+        out = self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_recover_auth_metadata_during_authorize(self):
+        """
+        That auth metadata manager can recover from partial auth updates using
+        metadata files, which store auth info and its update status info. This
+        test validates the recovery during authorize.
+        """
+
+        guest_mount = self.mount_b
+
+        subvolume = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+
+        auth_id = "guest1"
+        guestclient_1 = {
+            "auth_id": auth_id,
+            "tenant_id": "tenant1",
+        }
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+        # Authorize 'guestclient_1' to access the subvolume.
+        self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"],
+                     "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+        # Check that auth metadata file for auth ID 'guest1', is
+        # created on authorizing 'guest1' access to the subvolume.
+        auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"])
+        self.assertIn(auth_metadata_filename, guest_mount.ls("volumes"))
+        expected_auth_metadata_content = self._auth_metadata_get(self.mount_a.read_file("volumes/{0}".format(auth_metadata_filename)))
+
+        # Induce partial auth update state by modifying the auth metadata file,
+        # and then run authorize again.
+        guest_mount.run_shell(['sudo', 'sed', '-i', 's/false/true/g', 'volumes/{0}'.format(auth_metadata_filename)], omit_sudo=False)
+
+        # Authorize 'guestclient_1' to access the subvolume.
+        self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"],
+                     "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+        auth_metadata_content = self._auth_metadata_get(self.mount_a.read_file("volumes/{0}".format(auth_metadata_filename)))
+        self.assertEqual(auth_metadata_content, expected_auth_metadata_content)
+
+        # clean up
+        self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, auth_id, "--group_name", group)
+        guest_mount.umount_wait()
+        self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_recover_auth_metadata_during_deauthorize(self):
+        """
+        That auth metadata manager can recover from partial auth updates using
+        metadata files, which store auth info and its update status info. This
+        test validates the recovery during deauthorize.
+        """
+
+        guest_mount = self.mount_b
+
+        subvolume1, subvolume2 = self._generate_random_subvolume_name(2)
+        group = self._generate_random_group_name()
+
+        guestclient_1 = {
+            "auth_id": "guest1",
+            "tenant_id": "tenant1",
+        }
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolumes in group
+        self._fs_cmd("subvolume", "create", self.volname, subvolume1, "--group_name", group)
+        self._fs_cmd("subvolume", "create", self.volname, subvolume2, "--group_name", group)
+
+        # Authorize 'guestclient_1' to access the subvolume1.
+        self._fs_cmd("subvolume", "authorize", self.volname, subvolume1, guestclient_1["auth_id"],
+                     "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+        # Check that auth metadata file for auth ID 'guest1', is
+        # created on authorizing 'guest1' access to the subvolume1.
+        auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"])
+        self.assertIn(auth_metadata_filename, guest_mount.ls("volumes"))
+        expected_auth_metadata_content = self._auth_metadata_get(self.mount_a.read_file("volumes/{0}".format(auth_metadata_filename)))
+
+        # Authorize 'guestclient_1' to access the subvolume2.
+        self._fs_cmd("subvolume", "authorize", self.volname, subvolume2, guestclient_1["auth_id"],
+                     "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+        # Induce partial auth update state by modifying the auth metadata file,
+        # and then run de-authorize.
+        guest_mount.run_shell(['sudo', 'sed', '-i', 's/false/true/g', 'volumes/{0}'.format(auth_metadata_filename)], omit_sudo=False)
+
+        # Deauthorize 'guestclient_1' to access the subvolume2.
+        self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume2, guestclient_1["auth_id"],
+                     "--group_name", group)
+
+        auth_metadata_content = self._auth_metadata_get(self.mount_a.read_file("volumes/{0}".format(auth_metadata_filename)))
+        self.assertEqual(auth_metadata_content, expected_auth_metadata_content)
+
+        # clean up
+        self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume1, "guest1", "--group_name", group)
+        guest_mount.umount_wait()
+        self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume1, "--group_name", group)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume2, "--group_name", group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_update_old_style_auth_metadata_to_new_during_authorize(self):
+        """
+        CephVolumeClient stores the subvolume data in auth metadata file with
+        'volumes' key as there was no subvolume namespace. It doesn't makes sense
+        with mgr/volumes. This test validates the transparent update of 'volumes'
+        key to 'subvolumes' key in auth metadata file during authorize.
+        """
+
+        guest_mount = self.mount_b
+
+        subvolume1, subvolume2 = self._generate_random_subvolume_name(2)
+        group = self._generate_random_group_name()
+
+        auth_id = "guest1"
+        guestclient_1 = {
+            "auth_id": auth_id,
+            "tenant_id": "tenant1",
+        }
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolumes in group
+        self._fs_cmd("subvolume", "create", self.volname, subvolume1, "--group_name", group)
+        self._fs_cmd("subvolume", "create", self.volname, subvolume2, "--group_name", group)
+
+        # Authorize 'guestclient_1' to access the subvolume1.
+        self._fs_cmd("subvolume", "authorize", self.volname, subvolume1, guestclient_1["auth_id"],
+                     "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+        # Check that auth metadata file for auth ID 'guest1', is
+        # created on authorizing 'guest1' access to the subvolume1.
+        auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"])
+        self.assertIn(auth_metadata_filename, guest_mount.ls("volumes"))
+
+        # Replace 'subvolumes' to 'volumes', old style auth-metadata file
+        guest_mount.run_shell(['sudo', 'sed', '-i', 's/subvolumes/volumes/g', 'volumes/{0}'.format(auth_metadata_filename)], omit_sudo=False)
+
+        # Authorize 'guestclient_1' to access the subvolume2. This should transparently update 'volumes' to 'subvolumes'
+        self._fs_cmd("subvolume", "authorize", self.volname, subvolume2, guestclient_1["auth_id"],
+                     "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+        expected_auth_metadata = {
+            "version": 5,
+            "compat_version": 6,
+            "dirty": False,
+            "tenant_id": "tenant1",
+            "subvolumes": {
+                "{0}/{1}".format(group,subvolume1): {
+                    "dirty": False,
+                    "access_level": "rw"
+                },
+                "{0}/{1}".format(group,subvolume2): {
+                    "dirty": False,
+                    "access_level": "rw"
+                }
+            }
+        }
+
+        auth_metadata = self._auth_metadata_get(guest_mount.read_file("volumes/{0}".format(auth_metadata_filename)))
+
+        self.assertGreaterEqual(auth_metadata["version"], expected_auth_metadata["version"])
+        del expected_auth_metadata["version"]
+        del auth_metadata["version"]
+        self.assertEqual(expected_auth_metadata, auth_metadata)
+
+        # clean up
+        self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume1, auth_id, "--group_name", group)
+        self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume2, auth_id, "--group_name", group)
+        guest_mount.umount_wait()
+        self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume1, "--group_name", group)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume2, "--group_name", group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_update_old_style_auth_metadata_to_new_during_deauthorize(self):
+        """
+        CephVolumeClient stores the subvolume data in auth metadata file with
+        'volumes' key as there was no subvolume namespace. It doesn't makes sense
+        with mgr/volumes. This test validates the transparent update of 'volumes'
+        key to 'subvolumes' key in auth metadata file during deauthorize.
+        """
+
+        guest_mount = self.mount_b
+
+        subvolume1, subvolume2 = self._generate_random_subvolume_name(2)
+        group = self._generate_random_group_name()
+
+        auth_id = "guest1"
+        guestclient_1 = {
+            "auth_id": auth_id,
+            "tenant_id": "tenant1",
+        }
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolumes in group
+        self._fs_cmd("subvolume", "create", self.volname, subvolume1, "--group_name", group)
+        self._fs_cmd("subvolume", "create", self.volname, subvolume2, "--group_name", group)
+
+        # Authorize 'guestclient_1' to access the subvolume1.
+        self._fs_cmd("subvolume", "authorize", self.volname, subvolume1, guestclient_1["auth_id"],
+                     "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+        # Authorize 'guestclient_1' to access the subvolume2.
+        self._fs_cmd("subvolume", "authorize", self.volname, subvolume2, guestclient_1["auth_id"],
+                     "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+        # Check that auth metadata file for auth ID 'guest1', is created.
+        auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"])
+        self.assertIn(auth_metadata_filename, guest_mount.ls("volumes"))
+
+        # Replace 'subvolumes' to 'volumes', old style auth-metadata file
+        guest_mount.run_shell(['sudo', 'sed', '-i', 's/subvolumes/volumes/g', 'volumes/{0}'.format(auth_metadata_filename)], omit_sudo=False)
+
+        # Deauthorize 'guestclient_1' to access the subvolume2. This should update 'volumes' to subvolumes'
+        self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume2, auth_id, "--group_name", group)
+
+        expected_auth_metadata = {
+            "version": 5,
+            "compat_version": 6,
+            "dirty": False,
+            "tenant_id": "tenant1",
+            "subvolumes": {
+                "{0}/{1}".format(group,subvolume1): {
+                    "dirty": False,
+                    "access_level": "rw"
+                }
+            }
+        }
+
+        auth_metadata = self._auth_metadata_get(guest_mount.read_file("volumes/{0}".format(auth_metadata_filename)))
+
+        self.assertGreaterEqual(auth_metadata["version"], expected_auth_metadata["version"])
+        del expected_auth_metadata["version"]
+        del auth_metadata["version"]
+        self.assertEqual(expected_auth_metadata, auth_metadata)
+
+        # clean up
+        self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume1, auth_id, "--group_name", group)
+        guest_mount.umount_wait()
+        self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume1, "--group_name", group)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume2, "--group_name", group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_subvolume_evict_client(self):
+        """
+        That a subvolume client can be evicted based on the auth ID
+        """
+
+        subvolumes = self._generate_random_subvolume_name(2)
+        group = self._generate_random_group_name()
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # mounts[0] and mounts[1] would be used as guests to mount the volumes/shares.
+        for i in range(0, 2):
+            self.mounts[i].umount_wait()
+        guest_mounts = (self.mounts[0], self.mounts[1])
+        auth_id = "guest"
+        guestclient_1 = {
+            "auth_id": auth_id,
+            "tenant_id": "tenant1",
+        }
+
+        # Create two subvolumes. Authorize 'guest' auth ID to mount the two
+        # subvolumes. Mount the two subvolumes. Write data to the volumes.
+        for i in range(2):
+            # Create subvolume.
+            self._fs_cmd("subvolume", "create", self.volname, subvolumes[i], "--group_name", group, "--mode=777")
+
+            # authorize guest authID read-write access to subvolume
+            key = self._fs_cmd("subvolume", "authorize", self.volname, subvolumes[i], guestclient_1["auth_id"],
+                               "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+            mount_path = self._fs_cmd("subvolume", "getpath", self.volname, subvolumes[i],
+                                      "--group_name", group).rstrip()
+            # configure credentials for guest client
+            self._configure_guest_auth(guest_mounts[i], auth_id, key)
+
+            # mount the subvolume, and write to it
+            guest_mounts[i].mount_wait(cephfs_mntpt=mount_path)
+            guest_mounts[i].write_n_mb("data.bin", 1)
+
+        # Evict client, guest_mounts[0], using auth ID 'guest' and has mounted
+        # one volume.
+        self._fs_cmd("subvolume", "evict", self.volname, subvolumes[0], auth_id, "--group_name", group)
+
+        # Evicted guest client, guest_mounts[0], should not be able to do
+        # anymore metadata ops.  It should start failing all operations
+        # when it sees that its own address is in the blocklist.
+        try:
+            guest_mounts[0].write_n_mb("rogue.bin", 1)
+        except CommandFailedError:
+            pass
+        else:
+            raise RuntimeError("post-eviction write should have failed!")
+
+        # The blocklisted guest client should now be unmountable
+        guest_mounts[0].umount_wait()
+
+        # Guest client, guest_mounts[1], using the same auth ID 'guest', but
+        # has mounted the other volume, should be able to use its volume
+        # unaffected.
+        guest_mounts[1].write_n_mb("data.bin.1", 1)
+
+        # Cleanup.
+        guest_mounts[1].umount_wait()
+        for i in range(2):
+            self._fs_cmd("subvolume", "deauthorize", self.volname, subvolumes[i], auth_id, "--group_name", group)
+            self._fs_cmd("subvolume", "rm", self.volname, subvolumes[i], "--group_name", group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_subvolume_pin_random(self):
+        self.fs.set_max_mds(2)
+        self.fs.wait_for_daemons()
+        self.config_set('mds', 'mds_export_ephemeral_random', True)
+
+        subvolume = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+        self._fs_cmd("subvolume", "pin", self.volname, subvolume, "random", ".01")
+        # no verification
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_resize_fail_invalid_size(self):
+        """
+        That a subvolume cannot be resized to an invalid size and the quota did not change
+        """
+
+        osize = self.DEFAULT_FILE_SIZE*1024*1024
+        # create subvolume
+        subvolname = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize))
+
+        # make sure it exists
+        subvolpath = self._get_subvolume_path(self.volname, subvolname)
+        self.assertNotEqual(subvolpath, None)
+
+        # try to resize the subvolume with an invalid size -10
+        nsize = -10
+        try:
+            self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize))
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on resize of subvolume with invalid size")
+        else:
+            self.fail("expected the 'fs subvolume resize' command to fail")
+
+        # verify the quota did not change
+        size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes"))
+        self.assertEqual(size, osize)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_resize_fail_zero_size(self):
+        """
+        That a subvolume cannot be resized to a zero size and the quota did not change
+        """
+
+        osize = self.DEFAULT_FILE_SIZE*1024*1024
+        # create subvolume
+        subvolname = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize))
+
+        # make sure it exists
+        subvolpath = self._get_subvolume_path(self.volname, subvolname)
+        self.assertNotEqual(subvolpath, None)
+
+        # try to resize the subvolume with size 0
+        nsize = 0
+        try:
+            self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize))
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on resize of subvolume with invalid size")
+        else:
+            self.fail("expected the 'fs subvolume resize' command to fail")
+
+        # verify the quota did not change
+        size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes"))
+        self.assertEqual(size, osize)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_resize_quota_lt_used_size(self):
+        """
+        That a subvolume can be resized to a size smaller than the current used size
+        and the resulting quota matches the expected size.
+        """
+
+        osize = self.DEFAULT_FILE_SIZE*1024*1024*20
+        # create subvolume
+        subvolname = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize), "--mode=777")
+
+        # make sure it exists
+        subvolpath = self._get_subvolume_path(self.volname, subvolname)
+        self.assertNotEqual(subvolpath, None)
+
+        # create one file of 10MB
+        file_size=self.DEFAULT_FILE_SIZE*10
+        number_of_files=1
+        log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname,
+                                                                             number_of_files,
+                                                                             file_size))
+        filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+1)
+        self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size)
+
+        usedsize = int(self.mount_a.getfattr(subvolpath, "ceph.dir.rbytes"))
+        susedsize = int(self.mount_a.run_shell(['stat', '-c' '%s', subvolpath]).stdout.getvalue().strip())
+        if isinstance(self.mount_a, FuseMount):
+            # kclient dir does not have size==rbytes
+            self.assertEqual(usedsize, susedsize)
+
+        # shrink the subvolume
+        nsize = usedsize // 2
+        try:
+            self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize))
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume resize' command to succeed")
+
+        # verify the quota
+        size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes"))
+        self.assertEqual(size, nsize)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_resize_fail_quota_lt_used_size_no_shrink(self):
+        """
+        That a subvolume cannot be resized to a size smaller than the current used size
+        when --no_shrink is given and the quota did not change.
+        """
+
+        osize = self.DEFAULT_FILE_SIZE*1024*1024*20
+        # create subvolume
+        subvolname = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize), "--mode=777")
+
+        # make sure it exists
+        subvolpath = self._get_subvolume_path(self.volname, subvolname)
+        self.assertNotEqual(subvolpath, None)
+
+        # create one file of 10MB
+        file_size=self.DEFAULT_FILE_SIZE*10
+        number_of_files=1
+        log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname,
+                                                                             number_of_files,
+                                                                             file_size))
+        filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+2)
+        self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size)
+
+        usedsize = int(self.mount_a.getfattr(subvolpath, "ceph.dir.rbytes"))
+        susedsize = int(self.mount_a.run_shell(['stat', '-c' '%s', subvolpath]).stdout.getvalue().strip())
+        if isinstance(self.mount_a, FuseMount):
+            # kclient dir does not have size==rbytes
+            self.assertEqual(usedsize, susedsize)
+
+        # shrink the subvolume
+        nsize = usedsize // 2
+        try:
+            self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize), "--no_shrink")
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on resize of subvolume with invalid size")
+        else:
+            self.fail("expected the 'fs subvolume resize' command to fail")
+
+        # verify the quota did not change
+        size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes"))
+        self.assertEqual(size, osize)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_resize_expand_on_full_subvolume(self):
+        """
+        That the subvolume can be expanded from a full subvolume and future writes succeed.
+        """
+
+        osize = self.DEFAULT_FILE_SIZE*1024*1024*10
+        # create subvolume of quota 10MB and make sure it exists
+        subvolname = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize), "--mode=777")
+        subvolpath = self._get_subvolume_path(self.volname, subvolname)
+        self.assertNotEqual(subvolpath, None)
+
+        # create one file of size 10MB and write
+        file_size=self.DEFAULT_FILE_SIZE*10
+        number_of_files=1
+        log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname,
+                                                                             number_of_files,
+                                                                             file_size))
+        filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+3)
+        self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size)
+
+        # create a file of size 5MB and try write more
+        file_size=file_size // 2
+        number_of_files=1
+        log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname,
+                                                                             number_of_files,
+                                                                             file_size))
+        filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+4)
+        try:
+            self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size)
+        except CommandFailedError:
+            # Not able to write. So expand the subvolume more and try writing the 5MB file again
+            nsize = osize*2
+            self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize))
+            try:
+                self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size)
+            except CommandFailedError:
+                self.fail("expected filling subvolume {0} with {1} file of size {2}MB"
+                                   "to succeed".format(subvolname, number_of_files, file_size))
+        else:
+            self.fail("expected filling subvolume {0} with {1} file of size {2}MB"
+                               "to fail".format(subvolname, number_of_files, file_size))
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_resize_infinite_size(self):
+        """
+        That a subvolume can be resized to an infinite size by unsetting its quota.
+        """
+
+        # create subvolume
+        subvolname = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size",
+                     str(self.DEFAULT_FILE_SIZE*1024*1024))
+
+        # make sure it exists
+        subvolpath = self._get_subvolume_path(self.volname, subvolname)
+        self.assertNotEqual(subvolpath, None)
+
+        # resize inf
+        self._fs_cmd("subvolume", "resize", self.volname, subvolname, "inf")
+
+        # verify that the quota is None
+        size = self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes")
+        self.assertEqual(size, None)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_resize_infinite_size_future_writes(self):
+        """
+        That a subvolume can be resized to an infinite size and the future writes succeed.
+        """
+
+        # create subvolume
+        subvolname = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size",
+                     str(self.DEFAULT_FILE_SIZE*1024*1024*5), "--mode=777")
+
+        # make sure it exists
+        subvolpath = self._get_subvolume_path(self.volname, subvolname)
+        self.assertNotEqual(subvolpath, None)
+
+        # resize inf
+        self._fs_cmd("subvolume", "resize", self.volname, subvolname, "inf")
+
+        # verify that the quota is None
+        size = self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes")
+        self.assertEqual(size, None)
+
+        # create one file of 10MB and try to write
+        file_size=self.DEFAULT_FILE_SIZE*10
+        number_of_files=1
+        log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname,
+                                                                             number_of_files,
+                                                                             file_size))
+        filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+5)
+
+        try:
+            self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size)
+        except CommandFailedError:
+            self.fail("expected filling subvolume {0} with {1} file of size {2}MB "
+                               "to succeed".format(subvolname, number_of_files, file_size))
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_rm_force(self):
+        # test removing non-existing subvolume with --force
+        subvolume = self._generate_random_subvolume_name()
+        try:
+            self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--force")
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume rm --force' command to succeed")
+
+    def test_subvolume_exists_with_subvolumegroup_and_subvolume(self):
+        """Test the presence of any subvolume by specifying the name of subvolumegroup"""
+
+        group = self._generate_random_group_name()
+        subvolume1 = self._generate_random_subvolume_name()
+        # create subvolumegroup
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+        # create subvolume in group
+        self._fs_cmd("subvolume", "create", self.volname, subvolume1, "--group_name", group)
+        ret = self._fs_cmd("subvolume", "exist", self.volname, "--group_name", group)
+        self.assertEqual(ret.strip('\n'), "subvolume exists")
+        # delete subvolume in group
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume1, "--group_name", group)
+        ret = self._fs_cmd("subvolume", "exist", self.volname, "--group_name", group)
+        self.assertEqual(ret.strip('\n'), "no subvolume exists")
+        # delete subvolumegroup
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_subvolume_exists_with_subvolumegroup_and_no_subvolume(self):
+        """Test the presence of any subvolume specifying the name
+            of subvolumegroup and no subvolumes"""
+
+        group = self._generate_random_group_name()
+        # create subvolumegroup
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+        ret = self._fs_cmd("subvolume", "exist", self.volname, "--group_name", group)
+        self.assertEqual(ret.strip('\n'), "no subvolume exists")
+        # delete subvolumegroup
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_subvolume_exists_without_subvolumegroup_and_with_subvolume(self):
+        """Test the presence of any subvolume without specifying the name
+            of subvolumegroup"""
+
+        subvolume1 = self._generate_random_subvolume_name()
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume1)
+        ret = self._fs_cmd("subvolume", "exist", self.volname)
+        self.assertEqual(ret.strip('\n'), "subvolume exists")
+        # delete subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume1)
+        ret = self._fs_cmd("subvolume", "exist", self.volname)
+        self.assertEqual(ret.strip('\n'), "no subvolume exists")
+
+    def test_subvolume_exists_without_subvolumegroup_and_without_subvolume(self):
+        """Test the presence of any subvolume without any subvolumegroup
+            and without any subvolume"""
+
+        ret = self._fs_cmd("subvolume", "exist", self.volname)
+        self.assertEqual(ret.strip('\n'), "no subvolume exists")
+
+    def test_subvolume_shrink(self):
+        """
+        That a subvolume can be shrinked in size and its quota matches the expected size.
+        """
+
+        # create subvolume
+        subvolname = self._generate_random_subvolume_name()
+        osize = self.DEFAULT_FILE_SIZE*1024*1024
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize))
+
+        # make sure it exists
+        subvolpath = self._get_subvolume_path(self.volname, subvolname)
+        self.assertNotEqual(subvolpath, None)
+
+        # shrink the subvolume
+        nsize = osize // 2
+        self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize))
+
+        # verify the quota
+        size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes"))
+        self.assertEqual(size, nsize)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_retain_snapshot_rm_idempotency(self):
+        """
+        ensure subvolume deletion of a subvolume which is already deleted with retain snapshots option passes.
+        After subvolume deletion with retain snapshots, the subvolume exists until the trash directory (resides inside subvolume)
+        is cleaned up. The subvolume deletion issued while the trash directory is not empty, should pass and should
+        not error out with EAGAIN.
+        """
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=256)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # remove with snapshot retention
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+        # remove snapshots (removes retained volume)
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolume (check idempotency)
+        try:
+            self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.ENOENT:
+                self.fail(f"expected subvolume rm to pass with error: {os.strerror(ce.exitstatus)}")
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+
+    def test_subvolume_user_metadata_set(self):
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+        # set metadata for subvolume.
+        key = "key"
+        value = "value"
+        try:
+            self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group)
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume metadata set' command to succeed")
+
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_user_metadata_set_idempotence(self):
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+        # set metadata for subvolume.
+        key = "key"
+        value = "value"
+        try:
+            self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group)
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume metadata set' command to succeed")
+
+        # set same metadata again for subvolume.
+        try:
+            self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group)
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume metadata set' command to succeed because it is idempotent operation")
+
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_user_metadata_get(self):
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+        # set metadata for subvolume.
+        key = "key"
+        value = "value"
+        self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group)
+
+        # get value for specified key.
+        try:
+            ret = self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, key, "--group_name", group)
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume metadata get' command to succeed")
+
+        # remove '\n' from returned value.
+        ret = ret.strip('\n')
+
+        # match received value with expected value.
+        self.assertEqual(value, ret)
+
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_user_metadata_get_for_nonexisting_key(self):
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+        # set metadata for subvolume.
+        key = "key"
+        value = "value"
+        self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group)
+
+        # try to get value for nonexisting key
+        # Expecting ENOENT exit status because key does not exist
+        try:
+            self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, "key_nonexist", "--group_name", group)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.ENOENT)
+        else:
+            self.fail("Expected ENOENT because 'key_nonexist' does not exist")
+
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_user_metadata_get_for_nonexisting_section(self):
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+        # try to get value for nonexisting key (as section does not exist)
+        # Expecting ENOENT exit status because key does not exist
+        try:
+            self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, "key", "--group_name", group)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.ENOENT)
+        else:
+            self.fail("Expected ENOENT because section does not exist")
+
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_user_metadata_update(self):
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+        # set metadata for subvolume.
+        key = "key"
+        value = "value"
+        self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group)
+
+        # update metadata against key.
+        new_value = "new_value"
+        self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, new_value, "--group_name", group)
+
+        # get metadata for specified key of subvolume.
+        try:
+            ret = self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, key, "--group_name", group)
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume metadata get' command to succeed")
+
+        # remove '\n' from returned value.
+        ret = ret.strip('\n')
+
+        # match received value with expected value.
+        self.assertEqual(new_value, ret)
+
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_user_metadata_list(self):
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+        # set metadata for subvolume.
+        input_metadata_dict =  {f'key_{i}' : f'value_{i}' for i in range(3)}
+
+        for k, v in input_metadata_dict.items():
+            self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, k, v, "--group_name", group)
+
+        # list metadata
+        try:
+            ret = self._fs_cmd("subvolume", "metadata", "ls", self.volname, subvolname, "--group_name", group)
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume metadata ls' command to succeed")
+
+        ret_dict = json.loads(ret)
+
+        # compare output with expected output
+        self.assertDictEqual(input_metadata_dict, ret_dict)
+
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_user_metadata_list_if_no_metadata_set(self):
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+        # list metadata
+        try:
+            ret = self._fs_cmd("subvolume", "metadata", "ls", self.volname, subvolname, "--group_name", group)
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume metadata ls' command to succeed")
+
+        # remove '\n' from returned value.
+        ret = ret.strip('\n')
+
+        # compare output with expected output
+        # expecting empty json/dictionary
+        self.assertEqual(ret, "{}")
+
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_user_metadata_remove(self):
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+        # set metadata for subvolume.
+        key = "key"
+        value = "value"
+        self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group)
+
+        # remove metadata against specified key.
+        try:
+            self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, key, "--group_name", group)
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume metadata rm' command to succeed")
+
+        # confirm key is removed by again fetching metadata
+        try:
+            self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, key, "--group_name", group)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.ENOENT)
+        else:
+            self.fail("Expected ENOENT because key does not exist")
+
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_user_metadata_remove_for_nonexisting_key(self):
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+        # set metadata for subvolume.
+        key = "key"
+        value = "value"
+        self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group)
+
+        # try to remove value for nonexisting key
+        # Expecting ENOENT exit status because key does not exist
+        try:
+            self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, "key_nonexist", "--group_name", group)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.ENOENT)
+        else:
+            self.fail("Expected ENOENT because 'key_nonexist' does not exist")
+
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_user_metadata_remove_for_nonexisting_section(self):
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+        # try to remove value for nonexisting key (as section does not exist)
+        # Expecting ENOENT exit status because key does not exist
+        try:
+            self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, "key", "--group_name", group)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.ENOENT)
+        else:
+            self.fail("Expected ENOENT because section does not exist")
+
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_user_metadata_remove_force(self):
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+        # set metadata for subvolume.
+        key = "key"
+        value = "value"
+        self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group)
+
+        # remove metadata against specified key with --force option.
+        try:
+            self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, key, "--group_name", group, "--force")
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume metadata rm' command to succeed")
+
+        # confirm key is removed by again fetching metadata
+        try:
+            self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, key, "--group_name", group)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.ENOENT)
+        else:
+            self.fail("Expected ENOENT because key does not exist")
+
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_user_metadata_remove_force_for_nonexisting_key(self):
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+        # set metadata for subvolume.
+        key = "key"
+        value = "value"
+        self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group)
+
+        # remove metadata against specified key.
+        try:
+            self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, key, "--group_name", group)
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume metadata rm' command to succeed")
+
+        # confirm key is removed by again fetching metadata
+        try:
+            self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, key, "--group_name", group)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.ENOENT)
+        else:
+            self.fail("Expected ENOENT because key does not exist")
+
+        # again remove metadata against already removed key with --force option.
+        try:
+            self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, key, "--group_name", group, "--force")
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume metadata rm' (with --force) command to succeed")
+
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_user_metadata_set_and_get_for_legacy_subvolume(self):
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+
+        # emulate a old-fashioned subvolume in a custom group
+        createpath = os.path.join(".", "volumes", group, subvolname)
+        self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath], omit_sudo=False)
+
+        # set metadata for subvolume.
+        key = "key"
+        value = "value"
+        try:
+            self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group)
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume metadata set' command to succeed")
+
+        # get value for specified key.
+        try:
+            ret = self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, key, "--group_name", group)
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume metadata get' command to succeed")
+
+        # remove '\n' from returned value.
+        ret = ret.strip('\n')
+
+        # match received value with expected value.
+        self.assertEqual(value, ret)
+
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_user_metadata_list_and_remove_for_legacy_subvolume(self):
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+
+        # emulate a old-fashioned subvolume in a custom group
+        createpath = os.path.join(".", "volumes", group, subvolname)
+        self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath], omit_sudo=False)
+
+        # set metadata for subvolume.
+        input_metadata_dict =  {f'key_{i}' : f'value_{i}' for i in range(3)}
+
+        for k, v in input_metadata_dict.items():
+            self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, k, v, "--group_name", group)
+
+        # list metadata
+        try:
+            ret = self._fs_cmd("subvolume", "metadata", "ls", self.volname, subvolname, "--group_name", group)
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume metadata ls' command to succeed")
+
+        ret_dict = json.loads(ret)
+
+        # compare output with expected output
+        self.assertDictEqual(input_metadata_dict, ret_dict)
+
+        # remove metadata against specified key.
+        try:
+            self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, "key_1", "--group_name", group)
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume metadata rm' command to succeed")
+
+        # confirm key is removed by again fetching metadata
+        try:
+            self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, "key_1", "--group_name", group)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.ENOENT)
+        else:
+            self.fail("Expected ENOENT because key_1 does not exist")
+
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+class TestSubvolumeGroupSnapshots(TestVolumesHelper):
+    """Tests for FS subvolume group snapshot operations."""
+    @unittest.skip("skipping subvolumegroup snapshot tests")
+    def test_nonexistent_subvolume_group_snapshot_rm(self):
+        subvolume = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+        # snapshot group
+        self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot)
+
+        # remove snapshot
+        self._fs_cmd("subvolumegroup", "snapshot", "rm", self.volname, group, snapshot)
+
+        # remove snapshot
+        try:
+            self._fs_cmd("subvolumegroup", "snapshot", "rm", self.volname, group, snapshot)
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.ENOENT:
+                raise
+        else:
+            raise RuntimeError("expected the 'fs subvolumegroup snapshot rm' command to fail")
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    @unittest.skip("skipping subvolumegroup snapshot tests")
+    def test_subvolume_group_snapshot_create_and_rm(self):
+        subvolume = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+        # snapshot group
+        self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot)
+
+        # remove snapshot
+        self._fs_cmd("subvolumegroup", "snapshot", "rm", self.volname, group, snapshot)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    @unittest.skip("skipping subvolumegroup snapshot tests")
+    def test_subvolume_group_snapshot_idempotence(self):
+        subvolume = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+        # snapshot group
+        self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot)
+
+        # try creating snapshot w/ same snapshot name -- shoule be idempotent
+        self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot)
+
+        # remove snapshot
+        self._fs_cmd("subvolumegroup", "snapshot", "rm", self.volname, group, snapshot)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    @unittest.skip("skipping subvolumegroup snapshot tests")
+    def test_subvolume_group_snapshot_ls(self):
+        # tests the 'fs subvolumegroup snapshot ls' command
+
+        snapshots = []
+
+        # create group
+        group = self._generate_random_group_name()
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolumegroup snapshots
+        snapshots = self._generate_random_snapshot_name(3)
+        for snapshot in snapshots:
+            self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot)
+
+        subvolgrpsnapshotls = json.loads(self._fs_cmd('subvolumegroup', 'snapshot', 'ls', self.volname, group))
+        if len(subvolgrpsnapshotls) == 0:
+            raise RuntimeError("Expected the 'fs subvolumegroup snapshot ls' command to list the created subvolume group snapshots")
+        else:
+            snapshotnames = [snapshot['name'] for snapshot in subvolgrpsnapshotls]
+            if collections.Counter(snapshotnames) != collections.Counter(snapshots):
+                raise RuntimeError("Error creating or listing subvolume group snapshots")
+
+    @unittest.skip("skipping subvolumegroup snapshot tests")
+    def test_subvolume_group_snapshot_rm_force(self):
+        # test removing non-existing subvolume group snapshot with --force
+        group = self._generate_random_group_name()
+        snapshot = self._generate_random_snapshot_name()
+        # remove snapshot
+        try:
+            self._fs_cmd("subvolumegroup", "snapshot", "rm", self.volname, group, snapshot, "--force")
+        except CommandFailedError:
+            raise RuntimeError("expected the 'fs subvolumegroup snapshot rm --force' command to succeed")
+
+    def test_subvolume_group_snapshot_unsupported_status(self):
+        group = self._generate_random_group_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # snapshot group
+        try:
+            self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot)
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.ENOSYS, "invalid error code on subvolumegroup snapshot create")
+        else:
+            self.fail("expected subvolumegroup snapshot create command to fail")
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+
+class TestSubvolumeSnapshots(TestVolumesHelper):
+    """Tests for FS subvolume snapshot operations."""
+    def test_nonexistent_subvolume_snapshot_rm(self):
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove snapshot again
+        try:
+            self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.ENOENT:
+                raise
+        else:
+            raise RuntimeError("expected the 'fs subvolume snapshot rm' command to fail")
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_create_and_rm(self):
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_create_idempotence(self):
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # try creating w/ same subvolume snapshot name -- should be idempotent
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_info(self):
+
+        """
+        tests the 'fs subvolume snapshot info' command
+        """
+
+        snap_md = ["created_at", "data_pool", "has_pending_clones"]
+
+        subvolume = self._generate_random_subvolume_name()
+        snapshot, snap_missing = self._generate_random_snapshot_name(2)
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=1)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        snap_info = json.loads(self._get_subvolume_snapshot_info(self.volname, subvolume, snapshot))
+        for md in snap_md:
+            self.assertIn(md, snap_info, "'{0}' key not present in metadata of snapshot".format(md))
+        self.assertEqual(snap_info["has_pending_clones"], "no")
+
+        # snapshot info for non-existent snapshot
+        try:
+            self._get_subvolume_snapshot_info(self.volname, subvolume, snap_missing)
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on snapshot info of non-existent snapshot")
+        else:
+            self.fail("expected snapshot info of non-existent snapshot to fail")
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_in_group(self):
+        subvolume = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+        # snapshot subvolume in group
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, group)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, group)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_subvolume_snapshot_ls(self):
+        # tests the 'fs subvolume snapshot ls' command
+
+        snapshots = []
+
+        # create subvolume
+        subvolume = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        # create subvolume snapshots
+        snapshots = self._generate_random_snapshot_name(3)
+        for snapshot in snapshots:
+            self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        subvolsnapshotls = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, subvolume))
+        if len(subvolsnapshotls) == 0:
+            self.fail("Expected the 'fs subvolume snapshot ls' command to list the created subvolume snapshots")
+        else:
+            snapshotnames = [snapshot['name'] for snapshot in subvolsnapshotls]
+            if collections.Counter(snapshotnames) != collections.Counter(snapshots):
+                self.fail("Error creating or listing subvolume snapshots")
+
+        # remove snapshot
+        for snapshot in snapshots:
+            self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_inherited_snapshot_ls(self):
+        # tests the scenario where 'fs subvolume snapshot ls' command
+        # should not list inherited snapshots created as part of snapshot
+        # at ancestral level
+
+        snapshots = []
+        subvolume = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+        snap_count = 3
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+        # create subvolume snapshots
+        snapshots = self._generate_random_snapshot_name(snap_count)
+        for snapshot in snapshots:
+            self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, group)
+
+        # Create snapshot at ancestral level
+        ancestral_snappath1 = os.path.join(".", "volumes", group, ".snap", "ancestral_snap_1")
+        ancestral_snappath2 = os.path.join(".", "volumes", group, ".snap", "ancestral_snap_2")
+        self.mount_a.run_shell(['sudo', 'mkdir', '-p', ancestral_snappath1, ancestral_snappath2], omit_sudo=False)
+
+        subvolsnapshotls = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, subvolume, group))
+        self.assertEqual(len(subvolsnapshotls), snap_count)
+
+        # remove ancestral snapshots
+        self.mount_a.run_shell(['sudo', 'rmdir', ancestral_snappath1, ancestral_snappath2], omit_sudo=False)
+
+        # remove snapshot
+        for snapshot in snapshots:
+            self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, group)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_subvolume_inherited_snapshot_info(self):
+        """
+        tests the scenario where 'fs subvolume snapshot info' command
+        should fail for inherited snapshots created as part of snapshot
+        at ancestral level
+        """
+
+        subvolume = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+        # Create snapshot at ancestral level
+        ancestral_snap_name = "ancestral_snap_1"
+        ancestral_snappath1 = os.path.join(".", "volumes", group, ".snap", ancestral_snap_name)
+        self.mount_a.run_shell(['sudo', 'mkdir', '-p', ancestral_snappath1], omit_sudo=False)
+
+        # Validate existence of inherited snapshot
+        group_path = os.path.join(".", "volumes", group)
+        inode_number_group_dir = int(self.mount_a.run_shell(['stat', '-c' '%i', group_path]).stdout.getvalue().strip())
+        inherited_snap = "_{0}_{1}".format(ancestral_snap_name, inode_number_group_dir)
+        inherited_snappath = os.path.join(".", "volumes", group, subvolume,".snap", inherited_snap)
+        self.mount_a.run_shell(['ls', inherited_snappath])
+
+        # snapshot info on inherited snapshot
+        try:
+            self._get_subvolume_snapshot_info(self.volname, subvolume, inherited_snap, group)
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on snapshot info of inherited snapshot")
+        else:
+            self.fail("expected snapshot info of inherited snapshot to fail")
+
+        # remove ancestral snapshots
+        self.mount_a.run_shell(['sudo', 'rmdir', ancestral_snappath1], omit_sudo=False)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_subvolume_inherited_snapshot_rm(self):
+        """
+        tests the scenario where 'fs subvolume snapshot rm' command
+        should fail for inherited snapshots created as part of snapshot
+        at ancestral level
+        """
+
+        subvolume = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+        # Create snapshot at ancestral level
+        ancestral_snap_name = "ancestral_snap_1"
+        ancestral_snappath1 = os.path.join(".", "volumes", group, ".snap", ancestral_snap_name)
+        self.mount_a.run_shell(['sudo', 'mkdir', '-p', ancestral_snappath1], omit_sudo=False)
+
+        # Validate existence of inherited snap
+        group_path = os.path.join(".", "volumes", group)
+        inode_number_group_dir = int(self.mount_a.run_shell(['stat', '-c' '%i', group_path]).stdout.getvalue().strip())
+        inherited_snap = "_{0}_{1}".format(ancestral_snap_name, inode_number_group_dir)
+        inherited_snappath = os.path.join(".", "volumes", group, subvolume,".snap", inherited_snap)
+        self.mount_a.run_shell(['ls', inherited_snappath])
+
+        # inherited snapshot should not be deletable
+        try:
+            self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, inherited_snap, "--group_name", group)
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EINVAL, msg="invalid error code when removing inherited snapshot")
+        else:
+            self.fail("expected removing inheirted snapshot to fail")
+
+        # remove ancestral snapshots
+        self.mount_a.run_shell(['sudo', 'rmdir', ancestral_snappath1], omit_sudo=False)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_subvolume_subvolumegroup_snapshot_name_conflict(self):
+        """
+        tests the scenario where creation of subvolume snapshot name
+        with same name as it's subvolumegroup snapshot name. This should
+        fail.
+        """
+
+        subvolume = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+        group_snapshot = self._generate_random_snapshot_name()
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+        # Create subvolumegroup snapshot
+        group_snapshot_path = os.path.join(".", "volumes", group, ".snap", group_snapshot)
+        self.mount_a.run_shell(['sudo', 'mkdir', '-p', group_snapshot_path], omit_sudo=False)
+
+        # Validate existence of subvolumegroup snapshot
+        self.mount_a.run_shell(['ls', group_snapshot_path])
+
+        # Creation of subvolume snapshot with it's subvolumegroup snapshot name should fail
+        try:
+            self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, group_snapshot, "--group_name", group)
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EINVAL, msg="invalid error code when creating subvolume snapshot with same name as subvolume group snapshot")
+        else:
+            self.fail("expected subvolume snapshot creation with same name as subvolumegroup snapshot to fail")
+
+        # remove subvolumegroup snapshot
+        self.mount_a.run_shell(['sudo', 'rmdir', group_snapshot_path], omit_sudo=False)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_subvolume_retain_snapshot_invalid_recreate(self):
+        """
+        ensure retained subvolume recreate does not leave any incarnations in the subvolume and trash
+        """
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # remove with snapshot retention
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+        # recreate subvolume with an invalid pool
+        data_pool = "invalid_pool"
+        try:
+            self._fs_cmd("subvolume", "create", self.volname, subvolume, "--pool_layout", data_pool)
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on recreate of subvolume with invalid poolname")
+        else:
+            self.fail("expected recreate of subvolume with invalid poolname to fail")
+
+        # fetch info
+        subvol_info = json.loads(self._fs_cmd("subvolume", "info", self.volname, subvolume))
+        self.assertEqual(subvol_info["state"], "snapshot-retained",
+                         msg="expected state to be 'snapshot-retained', found '{0}".format(subvol_info["state"]))
+
+        # getpath
+        try:
+            self._fs_cmd("subvolume", "getpath", self.volname, subvolume)
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on getpath of subvolume with retained snapshots")
+        else:
+            self.fail("expected getpath of subvolume with retained snapshots to fail")
+
+        # remove snapshot (should remove volume)
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_retain_snapshot_recreate_subvolume(self):
+        """
+        ensure a retained subvolume can be recreated and further snapshotted
+        """
+        snap_md = ["created_at", "data_pool", "has_pending_clones"]
+
+        subvolume = self._generate_random_subvolume_name()
+        snapshot1, snapshot2 = self._generate_random_snapshot_name(2)
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot1)
+
+        # remove with snapshot retention
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+        # fetch info
+        subvol_info = json.loads(self._fs_cmd("subvolume", "info", self.volname, subvolume))
+        self.assertEqual(subvol_info["state"], "snapshot-retained",
+                         msg="expected state to be 'snapshot-retained', found '{0}".format(subvol_info["state"]))
+
+        # recreate retained subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        # fetch info
+        subvol_info = json.loads(self._fs_cmd("subvolume", "info", self.volname, subvolume))
+        self.assertEqual(subvol_info["state"], "complete",
+                         msg="expected state to be 'snapshot-retained', found '{0}".format(subvol_info["state"]))
+
+        # snapshot info (older snapshot)
+        snap_info = json.loads(self._get_subvolume_snapshot_info(self.volname, subvolume, snapshot1))
+        for md in snap_md:
+            self.assertIn(md, snap_info, "'{0}' key not present in metadata of snapshot".format(md))
+        self.assertEqual(snap_info["has_pending_clones"], "no")
+
+        # snap-create (new snapshot)
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot2)
+
+        # remove with retain snapshots
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+        # list snapshots
+        subvolsnapshotls = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, subvolume))
+        self.assertEqual(len(subvolsnapshotls), 2, "Expected the 'fs subvolume snapshot ls' command to list the"
+                         " created subvolume snapshots")
+        snapshotnames = [snapshot['name'] for snapshot in subvolsnapshotls]
+        for snap in [snapshot1, snapshot2]:
+            self.assertIn(snap, snapshotnames, "Missing snapshot '{0}' in snapshot list".format(snap))
+
+        # remove snapshots (should remove volume)
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot1)
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot2)
+
+        # verify list subvolumes returns an empty list
+        subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+        self.assertEqual(len(subvolumels), 0)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_retain_snapshot_with_snapshots(self):
+        """
+        ensure retain snapshots based delete of a subvolume with snapshots retains the subvolume
+        also test allowed and dis-allowed operations on a retained subvolume
+        """
+        snap_md = ["created_at", "data_pool", "has_pending_clones"]
+
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # remove subvolume -- should fail with ENOTEMPTY since it has snapshots
+        try:
+            self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.ENOTEMPTY, "invalid error code on rm of retained subvolume with snapshots")
+        else:
+            self.fail("expected rm of subvolume with retained snapshots to fail")
+
+        # remove with snapshot retention
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+        # fetch info
+        subvol_info = json.loads(self._fs_cmd("subvolume", "info", self.volname, subvolume))
+        self.assertEqual(subvol_info["state"], "snapshot-retained",
+                         msg="expected state to be 'snapshot-retained', found '{0}".format(subvol_info["state"]))
+
+        ## test allowed ops in retained state
+        # ls
+        subvolumes = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+        self.assertEqual(len(subvolumes), 1, "subvolume ls count mismatch, expected '1', found {0}".format(len(subvolumes)))
+        self.assertEqual(subvolumes[0]['name'], subvolume,
+                         "subvolume name mismatch in ls output, expected '{0}', found '{1}'".format(subvolume, subvolumes[0]['name']))
+
+        # snapshot info
+        snap_info = json.loads(self._get_subvolume_snapshot_info(self.volname, subvolume, snapshot))
+        for md in snap_md:
+            self.assertIn(md, snap_info, "'{0}' key not present in metadata of snapshot".format(md))
+        self.assertEqual(snap_info["has_pending_clones"], "no")
+
+        # rm --force (allowed but should fail)
+        try:
+            self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--force")
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.ENOTEMPTY, "invalid error code on rm of subvolume with retained snapshots")
+        else:
+            self.fail("expected rm of subvolume with retained snapshots to fail")
+
+        # rm (allowed but should fail)
+        try:
+            self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.ENOTEMPTY, "invalid error code on rm of subvolume with retained snapshots")
+        else:
+            self.fail("expected rm of subvolume with retained snapshots to fail")
+
+        ## test disallowed ops
+        # getpath
+        try:
+            self._fs_cmd("subvolume", "getpath", self.volname, subvolume)
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on getpath of subvolume with retained snapshots")
+        else:
+            self.fail("expected getpath of subvolume with retained snapshots to fail")
+
+        # resize
+        nsize = self.DEFAULT_FILE_SIZE*1024*1024
+        try:
+            self._fs_cmd("subvolume", "resize", self.volname, subvolume, str(nsize))
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on resize of subvolume with retained snapshots")
+        else:
+            self.fail("expected resize of subvolume with retained snapshots to fail")
+
+        # snap-create
+        try:
+            self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, "fail")
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on snapshot create of subvolume with retained snapshots")
+        else:
+            self.fail("expected snapshot create of subvolume with retained snapshots to fail")
+
+        # remove snapshot (should remove volume)
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # verify list subvolumes returns an empty list
+        subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+        self.assertEqual(len(subvolumels), 0)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_retain_snapshot_without_snapshots(self):
+        """
+        ensure retain snapshots based delete of a subvolume with no snapshots, deletes the subbvolume
+        """
+        subvolume = self._generate_random_subvolume_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        # remove with snapshot retention (should remove volume, no snapshots to retain)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+        # verify list subvolumes returns an empty list
+        subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+        self.assertEqual(len(subvolumels), 0)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_retain_snapshot_trash_busy_recreate(self):
+        """
+        ensure retained subvolume recreate fails if its trash is not yet purged
+        """
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # remove with snapshot retention
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+        # fake a trash entry
+        self._update_fake_trash(subvolume)
+
+        # recreate subvolume
+        try:
+            self._fs_cmd("subvolume", "create", self.volname, subvolume)
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EAGAIN, "invalid error code on recreate of subvolume with purge pending")
+        else:
+            self.fail("expected recreate of subvolume with purge pending to fail")
+
+        # clear fake trash entry
+        self._update_fake_trash(subvolume, create=False)
+
+        # recreate subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_rm_with_snapshots(self):
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # remove subvolume -- should fail with ENOTEMPTY since it has snapshots
+        try:
+            self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.ENOTEMPTY:
+                raise RuntimeError("invalid error code returned when deleting subvolume with snapshots")
+        else:
+            raise RuntimeError("expected subvolume deletion to fail")
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_protect_unprotect_sanity(self):
+        """
+        Snapshot protect/unprotect commands are deprecated. This test exists to ensure that
+        invoking the command does not cause errors, till they are removed from a subsequent release.
+        """
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone = self._generate_random_clone_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=64)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # now, protect snapshot
+        self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot)
+
+        # schedule a clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone)
+
+        # now, unprotect snapshot
+        self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot)
+
+        # verify clone
+        self._verify_clone(subvolume, snapshot, clone)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_rm_force(self):
+        # test removing non existing subvolume snapshot with --force
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # remove snapshot
+        try:
+            self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, "--force")
+        except CommandFailedError:
+            raise RuntimeError("expected the 'fs subvolume snapshot rm --force' command to succeed")
+
+    def test_subvolume_snapshot_metadata_set(self):
+        """
+        Set custom metadata for subvolume snapshot.
+        """
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+        # set metadata for snapshot.
+        key = "key"
+        value = "value"
+        try:
+            self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group)
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume snapshot metadata set' command to succeed")
+
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_metadata_set_idempotence(self):
+        """
+        Set custom metadata for subvolume snapshot (Idempotency).
+        """
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+        # set metadata for snapshot.
+        key = "key"
+        value = "value"
+        try:
+            self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group)
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume snapshot metadata set' command to succeed")
+
+        # set same metadata again for subvolume.
+        try:
+            self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group)
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume snapshot metadata set' command to succeed because it is idempotent operation")
+
+        # get value for specified key.
+        try:
+            ret = self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group)
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume snapshot metadata get' command to succeed")
+
+        # remove '\n' from returned value.
+        ret = ret.strip('\n')
+
+        # match received value with expected value.
+        self.assertEqual(value, ret)
+
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_metadata_get(self):
+        """
+        Get custom metadata for a specified key in subvolume snapshot metadata.
+        """
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+        # set metadata for snapshot.
+        key = "key"
+        value = "value"
+        self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group)
+
+        # get value for specified key.
+        try:
+            ret = self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group)
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume snapshot metadata get' command to succeed")
+
+        # remove '\n' from returned value.
+        ret = ret.strip('\n')
+
+        # match received value with expected value.
+        self.assertEqual(value, ret)
+
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_metadata_get_for_nonexisting_key(self):
+        """
+        Get custom metadata for subvolume snapshot if specified key not exist in metadata.
+        """
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+        # set metadata for snapshot.
+        key = "key"
+        value = "value"
+        self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group)
+
+        # try to get value for nonexisting key
+        # Expecting ENOENT exit status because key does not exist
+        try:
+            self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, "key_nonexist", group)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.ENOENT)
+        else:
+            self.fail("Expected ENOENT because 'key_nonexist' does not exist")
+
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_metadata_get_for_nonexisting_section(self):
+        """
+        Get custom metadata for subvolume snapshot if metadata is not added for subvolume snapshot.
+        """
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+        # try to get value for nonexisting key (as section does not exist)
+        # Expecting ENOENT exit status because key does not exist
+        try:
+            self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, "key", group)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.ENOENT)
+        else:
+            self.fail("Expected ENOENT because section does not exist")
+
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_metadata_update(self):
+        """
+        Update custom metadata for a specified key in subvolume snapshot metadata.
+        """
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+        # set metadata for snapshot.
+        key = "key"
+        value = "value"
+        self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group)
+
+        # update metadata against key.
+        new_value = "new_value"
+        self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, new_value, group)
+
+        # get metadata for specified key of snapshot.
+        try:
+            ret = self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group)
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume snapshot metadata get' command to succeed")
+
+        # remove '\n' from returned value.
+        ret = ret.strip('\n')
+
+        # match received value with expected value.
+        self.assertEqual(new_value, ret)
+
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_metadata_list(self):
+        """
+        List custom metadata for subvolume snapshot.
+        """
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+        # set metadata for subvolume.
+        input_metadata_dict =  {f'key_{i}' : f'value_{i}' for i in range(3)}
+
+        for k, v in input_metadata_dict.items():
+            self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, k, v, group)
+
+        # list metadata
+        try:
+            ret_dict = json.loads(self._fs_cmd("subvolume", "snapshot", "metadata", "ls", self.volname, subvolname, snapshot, group))
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume snapshot metadata ls' command to succeed")
+
+        # compare output with expected output
+        self.assertDictEqual(input_metadata_dict, ret_dict)
+
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_metadata_list_if_no_metadata_set(self):
+        """
+        List custom metadata for subvolume snapshot if metadata is not added for subvolume snapshot.
+        """
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+        # list metadata
+        try:
+            ret_dict = json.loads(self._fs_cmd("subvolume", "snapshot", "metadata", "ls", self.volname, subvolname, snapshot, group))
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume snapshot metadata ls' command to succeed")
+
+        # compare output with expected output
+        empty_dict = {}
+        self.assertDictEqual(ret_dict, empty_dict)
+
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_metadata_remove(self):
+        """
+        Remove custom metadata for a specified key in subvolume snapshot metadata.
+        """
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+        # set metadata for snapshot.
+        key = "key"
+        value = "value"
+        self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group)
+
+        # remove metadata against specified key.
+        try:
+            self._fs_cmd("subvolume", "snapshot", "metadata", "rm", self.volname, subvolname, snapshot, key, group)
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume snapshot metadata rm' command to succeed")
+
+        # confirm key is removed by again fetching metadata
+        try:
+            self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, key, snapshot, group)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.ENOENT)
+        else:
+            self.fail("Expected ENOENT because key does not exist")
+
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_metadata_remove_for_nonexisting_key(self):
+        """
+        Remove custom metadata for subvolume snapshot if specified key not exist in metadata.
+        """
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+        # set metadata for snapshot.
+        key = "key"
+        value = "value"
+        self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group)
+
+        # try to remove value for nonexisting key
+        # Expecting ENOENT exit status because key does not exist
+        try:
+            self._fs_cmd("subvolume", "snapshot", "metadata", "rm", self.volname, subvolname, snapshot, "key_nonexist", group)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.ENOENT)
+        else:
+            self.fail("Expected ENOENT because 'key_nonexist' does not exist")
+
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_metadata_remove_for_nonexisting_section(self):
+        """
+        Remove custom metadata for subvolume snapshot if metadata is not added for subvolume snapshot.
+        """
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+        # try to remove value for nonexisting key (as section does not exist)
+        # Expecting ENOENT exit status because key does not exist
+        try:
+            self._fs_cmd("subvolume", "snapshot", "metadata", "rm", self.volname, subvolname, snapshot, "key", group)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.ENOENT)
+        else:
+            self.fail("Expected ENOENT because section does not exist")
+
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_metadata_remove_force(self):
+        """
+        Forcefully remove custom metadata for a specified key in subvolume snapshot metadata.
+        """
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+        # set metadata for snapshot.
+        key = "key"
+        value = "value"
+        self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group)
+
+        # remove metadata against specified key with --force option.
+        try:
+            self._fs_cmd("subvolume", "snapshot", "metadata", "rm", self.volname, subvolname, snapshot, key, group, "--force")
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume snapshot metadata rm' command to succeed")
+
+        # confirm key is removed by again fetching metadata
+        try:
+            self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.ENOENT)
+        else:
+            self.fail("Expected ENOENT because key does not exist")
+
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_metadata_remove_force_for_nonexisting_key(self):
+        """
+        Forcefully remove custom metadata for subvolume snapshot if specified key not exist in metadata.
+        """
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+        # set metadata for snapshot.
+        key = "key"
+        value = "value"
+        self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group)
+
+        # remove metadata against specified key.
+        try:
+            self._fs_cmd("subvolume", "snapshot", "metadata", "rm", self.volname, subvolname, snapshot, key, group)
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume snapshot metadata rm' command to succeed")
+
+        # confirm key is removed by again fetching metadata
+        try:
+            self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.ENOENT)
+        else:
+            self.fail("Expected ENOENT because key does not exist")
+
+        # again remove metadata against already removed key with --force option.
+        try:
+            self._fs_cmd("subvolume", "snapshot", "metadata", "rm", self.volname, subvolname, snapshot, key, group, "--force")
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume snapshot metadata rm' (with --force) command to succeed")
+
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_metadata_after_snapshot_remove(self):
+        """
+        Verify metadata removal of subvolume snapshot after snapshot removal.
+        """
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+        # set metadata for snapshot.
+        key = "key"
+        value = "value"
+        self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group)
+
+        # get value for specified key.
+        ret = self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group)
+
+        # remove '\n' from returned value.
+        ret = ret.strip('\n')
+
+        # match received value with expected value.
+        self.assertEqual(value, ret)
+
+        # remove subvolume snapshot.
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+
+        # try to get metadata after removing snapshot.
+        # Expecting error ENOENT with error message of snapshot does not exist
+        cmd_ret = self.mgr_cluster.mon_manager.run_cluster_cmd(
+                args=["fs", "subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group],
+                check_status=False, stdout=StringIO(), stderr=StringIO())
+        self.assertEqual(cmd_ret.returncode, errno.ENOENT, "Expecting ENOENT error")
+        self.assertIn(f"snapshot '{snapshot}' does not exist", cmd_ret.stderr.getvalue(),
+                f"Expecting message: snapshot '{snapshot}' does not exist ")
+
+        # confirm metadata is removed by searching section name in .meta file
+        meta_path = os.path.join(".", "volumes", group, subvolname, ".meta")
+        section_name = "SNAP_METADATA_" + snapshot
+
+        try:
+            self.mount_a.run_shell(f"sudo grep {section_name} {meta_path}", omit_sudo=False)
+        except CommandFailedError as e:
+            self.assertNotEqual(e.exitstatus, 0)
+        else:
+            self.fail("Expected non-zero exist status because section should not exist")
+
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+
+    def test_clean_stale_subvolume_snapshot_metadata(self):
+        """
+        Validate cleaning of stale subvolume snapshot metadata.
+        """
+        subvolname = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create group.
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume in group.
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+        # set metadata for snapshot.
+        key = "key"
+        value = "value"
+        try:
+            self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group)
+        except CommandFailedError:
+            self.fail("expected the 'fs subvolume snapshot metadata set' command to succeed")
+
+        # save the subvolume config file.
+        meta_path = os.path.join(".", "volumes", group, subvolname, ".meta")
+        tmp_meta_path = os.path.join(".", "volumes", group, subvolname, ".meta.stale_snap_section")
+        self.mount_a.run_shell(['sudo', 'cp', '-p', meta_path, tmp_meta_path], omit_sudo=False)
+
+        # Delete snapshot, this would remove user snap metadata
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+
+        # Copy back saved subvolume config file. This would have stale snapshot metadata
+        self.mount_a.run_shell(['sudo', 'cp', '-p', tmp_meta_path, meta_path], omit_sudo=False)
+
+        # Verify that it has stale snapshot metadata
+        section_name = "SNAP_METADATA_" + snapshot
+        try:
+            self.mount_a.run_shell(f"sudo grep {section_name} {meta_path}", omit_sudo=False)
+        except CommandFailedError:
+            self.fail("Expected grep cmd to succeed because stale snapshot metadata exist")
+
+        # Do any subvolume operation to clean the stale snapshot metadata
+        _ = json.loads(self._get_subvolume_info(self.volname, subvolname, group))
+
+        # Verify that the stale snapshot metadata is cleaned
+        try:
+            self.mount_a.run_shell(f"sudo grep {section_name} {meta_path}", omit_sudo=False)
+        except CommandFailedError as e:
+            self.assertNotEqual(e.exitstatus, 0)
+        else:
+            self.fail("Expected non-zero exist status because stale snapshot metadata should not exist")
+
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean.
+        self._wait_for_trash_empty()
+        # Clean tmp config file
+        self.mount_a.run_shell(['sudo', 'rm', '-f', tmp_meta_path], omit_sudo=False)
+
+
+class TestSubvolumeSnapshotClones(TestVolumesHelper):
+    """ Tests for FS subvolume snapshot clone operations."""
+    def test_clone_subvolume_info(self):
+        # tests the 'fs subvolume info' command for a clone
+        subvol_md = ["atime", "bytes_pcent", "bytes_quota", "bytes_used", "created_at", "ctime",
+                     "data_pool", "gid", "mode", "mon_addrs", "mtime", "path", "pool_namespace",
+                     "type", "uid"]
+
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone = self._generate_random_clone_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=1)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # schedule a clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        subvol_info = json.loads(self._get_subvolume_info(self.volname, clone))
+        if len(subvol_info) == 0:
+            raise RuntimeError("Expected the 'fs subvolume info' command to list metadata of subvolume")
+        for md in subvol_md:
+            if md not in subvol_info.keys():
+                raise RuntimeError("%s not present in the metadata of subvolume" % md)
+        if subvol_info["type"] != "clone":
+            raise RuntimeError("type should be set to clone")
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_info_without_snapshot_clone(self):
+        """
+        Verify subvolume snapshot info output without cloning snapshot.
+        If no clone is performed then path /volumes/_index/clone/{track_id}
+        will not exist.
+        """
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create subvolume.
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # list snapshot info
+        result = json.loads(self._fs_cmd("subvolume", "snapshot", "info", self.volname, subvolume, snapshot))
+
+        # verify snapshot info
+        self.assertEqual(result['has_pending_clones'], "no")
+        self.assertFalse('orphan_clones_count' in result)
+        self.assertFalse('pending_clones' in result)
+
+        # remove snapshot, subvolume, clone
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_info_if_no_clone_pending(self):
+        """
+        Verify subvolume snapshot info output if no clone is in pending state.
+        """
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone_list =  [f'clone_{i}' for i in range(3)]
+
+        # create subvolume.
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # schedule a clones
+        for clone in clone_list:
+            self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+        # check clones status
+        for clone in clone_list:
+            self._wait_for_clone_to_complete(clone)
+
+        # list snapshot info
+        result = json.loads(self._fs_cmd("subvolume", "snapshot", "info", self.volname, subvolume, snapshot))
+
+        # verify snapshot info
+        self.assertEqual(result['has_pending_clones'], "no")
+        self.assertFalse('orphan_clones_count' in result)
+        self.assertFalse('pending_clones' in result)
+
+        # remove snapshot, subvolume, clone
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        for clone in clone_list:
+            self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_info_if_clone_pending_for_no_group(self):
+        """
+        Verify subvolume snapshot info output if clones are in pending state.
+        Clones are not specified for particular target_group. Hence target_group
+        should not be in the output as we don't show _nogroup (default group)
+        """
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone_list =  [f'clone_{i}' for i in range(3)]
+
+        # create subvolume.
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # insert delay at the beginning of snapshot clone
+        self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 5)
+
+        # schedule a clones
+        for clone in clone_list:
+            self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+        # list snapshot info
+        result = json.loads(self._fs_cmd("subvolume", "snapshot", "info", self.volname, subvolume, snapshot))
+
+        # verify snapshot info
+        expected_clone_list = []
+        for clone in clone_list:
+            expected_clone_list.append({"name": clone})
+        self.assertEqual(result['has_pending_clones'], "yes")
+        self.assertFalse('orphan_clones_count' in result)
+        self.assertListEqual(result['pending_clones'], expected_clone_list)
+        self.assertEqual(len(result['pending_clones']), 3)
+
+        # check clones status
+        for clone in clone_list:
+            self._wait_for_clone_to_complete(clone)
+
+        # remove snapshot, subvolume, clone
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        for clone in clone_list:
+            self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_info_if_clone_pending_for_target_group(self):
+        """
+        Verify subvolume snapshot info output if clones are in pending state.
+        Clones are not specified for target_group.
+        """
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone = self._generate_random_clone_name()
+        group = self._generate_random_group_name()
+        target_group = self._generate_random_group_name()
+
+        # create groups
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+        self._fs_cmd("subvolumegroup", "create", self.volname, target_group)
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, group, "--mode=777")
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, group)
+
+        # insert delay at the beginning of snapshot clone
+        self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 5)
+
+        # schedule a clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone,
+                     "--group_name", group, "--target_group_name", target_group)
+
+        # list snapshot info
+        result = json.loads(self._fs_cmd("subvolume", "snapshot", "info", self.volname, subvolume, snapshot, "--group_name", group))
+
+        # verify snapshot info
+        expected_clone_list = [{"name": clone, "target_group": target_group}]
+        self.assertEqual(result['has_pending_clones'], "yes")
+        self.assertFalse('orphan_clones_count' in result)
+        self.assertListEqual(result['pending_clones'], expected_clone_list)
+        self.assertEqual(len(result['pending_clones']), 1)
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone, clone_group=target_group)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, group)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+        self._fs_cmd("subvolume", "rm", self.volname, clone, target_group)
+
+        # remove groups
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, target_group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_info_if_orphan_clone(self):
+        """
+        Verify subvolume snapshot info output if orphan clones exists.
+        Orphan clones should not list under pending clones.
+        orphan_clones_count should display correct count of orphan clones'
+        """
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone_list =  [f'clone_{i}' for i in range(3)]
+
+        # create subvolume.
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # insert delay at the beginning of snapshot clone
+        self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 15)
+
+        # schedule a clones
+        for clone in clone_list:
+            self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+        # remove track file for third clone to make it orphan
+        meta_path = os.path.join(".", "volumes", "_nogroup", subvolume, ".meta")
+        pending_clones_result = self.mount_a.run_shell(['sudo', 'grep', 'clone snaps', '-A3', meta_path], omit_sudo=False, stdout=StringIO(), stderr=StringIO())
+        third_clone_track_id = pending_clones_result.stdout.getvalue().splitlines()[3].split(" = ")[0]
+        third_clone_track_path = os.path.join(".", "volumes", "_index", "clone", third_clone_track_id)
+        self.mount_a.run_shell(f"sudo rm -f {third_clone_track_path}", omit_sudo=False)
+
+        # list snapshot info
+        result = json.loads(self._fs_cmd("subvolume", "snapshot", "info", self.volname, subvolume, snapshot))
+
+        # verify snapshot info
+        expected_clone_list = []
+        for i in range(len(clone_list)-1):
+            expected_clone_list.append({"name": clone_list[i]})
+        self.assertEqual(result['has_pending_clones'], "yes")
+        self.assertEqual(result['orphan_clones_count'], 1)
+        self.assertListEqual(result['pending_clones'], expected_clone_list)
+        self.assertEqual(len(result['pending_clones']), 2)
+
+        # check clones status
+        for i in range(len(clone_list)-1):
+            self._wait_for_clone_to_complete(clone_list[i])
+
+        # list snapshot info after cloning completion
+        res = json.loads(self._fs_cmd("subvolume", "snapshot", "info", self.volname, subvolume, snapshot))
+
+        # verify snapshot info (has_pending_clones should be no)
+        self.assertEqual(res['has_pending_clones'], "no")
+
+    def test_non_clone_status(self):
+        subvolume = self._generate_random_subvolume_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        try:
+            self._fs_cmd("clone", "status", self.volname, subvolume)
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.ENOTSUP:
+                raise RuntimeError("invalid error code when fetching status of a non cloned subvolume")
+        else:
+            raise RuntimeError("expected fetching of clone status of a subvolume to fail")
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_clone_inherit_snapshot_namespace_and_size(self):
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone = self._generate_random_clone_name()
+        osize = self.DEFAULT_FILE_SIZE*1024*1024*12
+
+        # create subvolume, in an isolated namespace with a specified size
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--namespace-isolated", "--size", str(osize), "--mode=777")
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=8)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # create a pool different from current subvolume pool
+        subvol_path = self._get_subvolume_path(self.volname, subvolume)
+        default_pool = self.mount_a.getfattr(subvol_path, "ceph.dir.layout.pool")
+        new_pool = "new_pool"
+        self.assertNotEqual(default_pool, new_pool)
+        self.fs.add_data_pool(new_pool)
+
+        # update source subvolume pool
+        self._do_subvolume_pool_and_namespace_update(subvolume, pool=new_pool, pool_namespace="")
+
+        # schedule a clone, with NO --pool specification
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone)
+
+        # verify clone
+        self._verify_clone(subvolume, snapshot, clone)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_clone_inherit_quota_attrs(self):
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone = self._generate_random_clone_name()
+        osize = self.DEFAULT_FILE_SIZE*1024*1024*12
+
+        # create subvolume with a specified size
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777", "--size", str(osize))
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=8)
+
+        # get subvolume path
+        subvolpath = self._get_subvolume_path(self.volname, subvolume)
+
+        # set quota on number of files
+        self.mount_a.setfattr(subvolpath, 'ceph.quota.max_files', "20", sudo=True)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # schedule a clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone)
+
+        # verify clone
+        self._verify_clone(subvolume, snapshot, clone)
+
+        # get subvolume path
+        clonepath = self._get_subvolume_path(self.volname, clone)
+
+        # verify quota max_files is inherited from source snapshot
+        subvol_quota = self.mount_a.getfattr(subvolpath, "ceph.quota.max_files")
+        clone_quota = self.mount_a.getfattr(clonepath, "ceph.quota.max_files")
+        self.assertEqual(subvol_quota, clone_quota)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_clone_in_progress_getpath(self):
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone = self._generate_random_clone_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=64)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # Insert delay at the beginning of snapshot clone
+        self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2)
+
+        # schedule a clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+        # clone should not be accessible right now
+        try:
+            self._get_subvolume_path(self.volname, clone)
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.EAGAIN:
+                raise RuntimeError("invalid error code when fetching path of an pending clone")
+        else:
+            raise RuntimeError("expected fetching path of an pending clone to fail")
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone)
+
+        # clone should be accessible now
+        subvolpath = self._get_subvolume_path(self.volname, clone)
+        self.assertNotEqual(subvolpath, None)
+
+        # verify clone
+        self._verify_clone(subvolume, snapshot, clone)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_clone_in_progress_snapshot_rm(self):
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone = self._generate_random_clone_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=64)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # Insert delay at the beginning of snapshot clone
+        self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2)
+
+        # schedule a clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+        # snapshot should not be deletable now
+        try:
+            self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EAGAIN, msg="invalid error code when removing source snapshot of a clone")
+        else:
+            self.fail("expected removing source snapshot of a clone to fail")
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone)
+
+        # clone should be accessible now
+        subvolpath = self._get_subvolume_path(self.volname, clone)
+        self.assertNotEqual(subvolpath, None)
+
+        # verify clone
+        self._verify_clone(subvolume, snapshot, clone)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_clone_in_progress_source(self):
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone = self._generate_random_clone_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=64)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # Insert delay at the beginning of snapshot clone
+        self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2)
+
+        # schedule a clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+        # verify clone source
+        result = json.loads(self._fs_cmd("clone", "status", self.volname, clone))
+        source = result['status']['source']
+        self.assertEqual(source['volume'], self.volname)
+        self.assertEqual(source['subvolume'], subvolume)
+        self.assertEqual(source.get('group', None), None)
+        self.assertEqual(source['snapshot'], snapshot)
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone)
+
+        # clone should be accessible now
+        subvolpath = self._get_subvolume_path(self.volname, clone)
+        self.assertNotEqual(subvolpath, None)
+
+        # verify clone
+        self._verify_clone(subvolume, snapshot, clone)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_clone_retain_snapshot_with_snapshots(self):
+        """
+        retain snapshots of a cloned subvolume and check disallowed operations
+        """
+        subvolume = self._generate_random_subvolume_name()
+        snapshot1, snapshot2 = self._generate_random_snapshot_name(2)
+        clone = self._generate_random_clone_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # store path for clone verification
+        subvol1_path = self._get_subvolume_path(self.volname, subvolume)
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=16)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot1)
+
+        # remove with snapshot retention
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+        # clone retained subvolume snapshot
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot1, clone)
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone)
+
+        # verify clone
+        self._verify_clone(subvolume, snapshot1, clone, subvol_path=subvol1_path)
+
+        # create a snapshot on the clone
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, clone, snapshot2)
+
+        # retain a clone
+        self._fs_cmd("subvolume", "rm", self.volname, clone, "--retain-snapshots")
+
+        # list snapshots
+        clonesnapshotls = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, clone))
+        self.assertEqual(len(clonesnapshotls), 1, "Expected the 'fs subvolume snapshot ls' command to list the"
+                         " created subvolume snapshots")
+        snapshotnames = [snapshot['name'] for snapshot in clonesnapshotls]
+        for snap in [snapshot2]:
+            self.assertIn(snap, snapshotnames, "Missing snapshot '{0}' in snapshot list".format(snap))
+
+        ## check disallowed operations on retained clone
+        # clone-status
+        try:
+            self._fs_cmd("clone", "status", self.volname, clone)
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on clone status of clone with retained snapshots")
+        else:
+            self.fail("expected clone status of clone with retained snapshots to fail")
+
+        # clone-cancel
+        try:
+            self._fs_cmd("clone", "cancel", self.volname, clone)
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on clone cancel of clone with retained snapshots")
+        else:
+            self.fail("expected clone cancel of clone with retained snapshots to fail")
+
+        # remove snapshots (removes subvolumes as all are in retained state)
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot1)
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, clone, snapshot2)
+
+        # verify list subvolumes returns an empty list
+        subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+        self.assertEqual(len(subvolumels), 0)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_retain_snapshot_clone(self):
+        """
+        clone a snapshot from a snapshot retained subvolume
+        """
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone = self._generate_random_clone_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # store path for clone verification
+        subvol_path = self._get_subvolume_path(self.volname, subvolume)
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=16)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # remove with snapshot retention
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+        # clone retained subvolume snapshot
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone)
+
+        # verify clone
+        self._verify_clone(subvolume, snapshot, clone, subvol_path=subvol_path)
+
+        # remove snapshots (removes retained volume)
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+        # verify list subvolumes returns an empty list
+        subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+        self.assertEqual(len(subvolumels), 0)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_retain_snapshot_clone_from_newer_snapshot(self):
+        """
+        clone a subvolume from recreated subvolume's latest snapshot
+        """
+        subvolume = self._generate_random_subvolume_name()
+        snapshot1, snapshot2 = self._generate_random_snapshot_name(2)
+        clone = self._generate_random_clone_name(1)
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=16)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot1)
+
+        # remove with snapshot retention
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+        # recreate subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # get and store path for clone verification
+        subvol2_path = self._get_subvolume_path(self.volname, subvolume)
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=16)
+
+        # snapshot newer subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot2)
+
+        # remove with snapshot retention
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+        # clone retained subvolume's newer snapshot
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot2, clone)
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone)
+
+        # verify clone
+        self._verify_clone(subvolume, snapshot2, clone, subvol_path=subvol2_path)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot1)
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot2)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+        # verify list subvolumes returns an empty list
+        subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+        self.assertEqual(len(subvolumels), 0)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_retain_snapshot_recreate(self):
+        """
+        recreate a subvolume from one of its retained snapshots
+        """
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # store path for clone verification
+        subvol_path = self._get_subvolume_path(self.volname, subvolume)
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=16)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # remove with snapshot retention
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+        # recreate retained subvolume using its own snapshot to clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, subvolume)
+
+        # check clone status
+        self._wait_for_clone_to_complete(subvolume)
+
+        # verify clone
+        self._verify_clone(subvolume, snapshot, subvolume, subvol_path=subvol_path)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        # verify list subvolumes returns an empty list
+        subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+        self.assertEqual(len(subvolumels), 0)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_retain_snapshot_trash_busy_recreate_clone(self):
+        """
+        ensure retained clone recreate fails if its trash is not yet purged
+        """
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone = self._generate_random_clone_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # clone subvolume snapshot
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone)
+
+        # snapshot clone
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, clone, snapshot)
+
+        # remove clone with snapshot retention
+        self._fs_cmd("subvolume", "rm", self.volname, clone, "--retain-snapshots")
+
+        # fake a trash entry
+        self._update_fake_trash(clone)
+
+        # clone subvolume snapshot (recreate)
+        try:
+            self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EAGAIN, "invalid error code on recreate of clone with purge pending")
+        else:
+            self.fail("expected recreate of clone with purge pending to fail")
+
+        # clear fake trash entry
+        self._update_fake_trash(clone, create=False)
+
+        # recreate subvolume
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, clone, snapshot)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_attr_clone(self):
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone = self._generate_random_clone_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # do some IO
+        self._do_subvolume_io_mixed(subvolume)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # schedule a clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone)
+
+        # verify clone
+        self._verify_clone(subvolume, snapshot, clone)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_clone_failure_status_pending_in_progress_complete(self):
+        """
+        ensure failure status is not shown when clone is not in failed/cancelled state
+        """
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone1 = self._generate_random_clone_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=200)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # Insert delay at the beginning of snapshot clone
+        self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 5)
+
+        # schedule a clone1
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1)
+
+        # pending clone shouldn't show failure status
+        clone1_result = self._get_clone_status(clone1)
+        try:
+            clone1_result["status"]["failure"]["errno"]
+        except KeyError as e:
+            self.assertEqual(str(e), "'failure'")
+        else:
+            self.fail("clone status shouldn't show failure for pending clone")
+
+        # check clone1 to be in-progress
+        self._wait_for_clone_to_be_in_progress(clone1)
+
+        # in-progress clone1 shouldn't show failure status
+        clone1_result = self._get_clone_status(clone1)
+        try:
+            clone1_result["status"]["failure"]["errno"]
+        except KeyError as e:
+            self.assertEqual(str(e), "'failure'")
+        else:
+            self.fail("clone status shouldn't show failure for in-progress clone")
+
+        # wait for clone1 to complete
+        self._wait_for_clone_to_complete(clone1)
+
+        # complete clone1 shouldn't show failure status
+        clone1_result = self._get_clone_status(clone1)
+        try:
+            clone1_result["status"]["failure"]["errno"]
+        except KeyError as e:
+            self.assertEqual(str(e), "'failure'")
+        else:
+            self.fail("clone status shouldn't show failure for complete clone")
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        self._fs_cmd("subvolume", "rm", self.volname, clone1)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_clone_failure_status_failed(self):
+        """
+        ensure failure status is shown when clone is in failed state and validate the reason
+        """
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone1 = self._generate_random_clone_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=200)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # Insert delay at the beginning of snapshot clone
+        self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 5)
+
+        # schedule a clone1
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1)
+
+        # remove snapshot from backend to force the clone failure.
+        snappath = os.path.join(".", "volumes", "_nogroup", subvolume, ".snap", snapshot)
+        self.mount_a.run_shell(['sudo', 'rmdir', snappath], omit_sudo=False)
+
+        # wait for clone1 to fail.
+        self._wait_for_clone_to_fail(clone1)
+
+        # check clone1 status
+        clone1_result = self._get_clone_status(clone1)
+        self.assertEqual(clone1_result["status"]["state"], "failed")
+        self.assertEqual(clone1_result["status"]["failure"]["errno"], "2")
+        self.assertEqual(clone1_result["status"]["failure"]["error_msg"], "snapshot '{0}' does not exist".format(snapshot))
+
+        # clone removal should succeed after failure, remove clone1
+        self._fs_cmd("subvolume", "rm", self.volname, clone1, "--force")
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_clone_failure_status_pending_cancelled(self):
+        """
+        ensure failure status is shown when clone is cancelled during pending state and validate the reason
+        """
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone1 = self._generate_random_clone_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=200)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # Insert delay at the beginning of snapshot clone
+        self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 5)
+
+        # schedule a clone1
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1)
+
+        # cancel pending clone1
+        self._fs_cmd("clone", "cancel", self.volname, clone1)
+
+        # check clone1 status
+        clone1_result = self._get_clone_status(clone1)
+        self.assertEqual(clone1_result["status"]["state"], "canceled")
+        self.assertEqual(clone1_result["status"]["failure"]["errno"], "4")
+        self.assertEqual(clone1_result["status"]["failure"]["error_msg"], "user interrupted clone operation")
+
+        # clone removal should succeed with force after cancelled, remove clone1
+        self._fs_cmd("subvolume", "rm", self.volname, clone1, "--force")
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_clone_failure_status_in_progress_cancelled(self):
+        """
+        ensure failure status is shown when clone is cancelled during in-progress state and validate the reason
+        """
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone1 = self._generate_random_clone_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=200)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # Insert delay at the beginning of snapshot clone
+        self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 5)
+
+        # schedule a clone1
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1)
+
+        # wait for clone1 to be in-progress
+        self._wait_for_clone_to_be_in_progress(clone1)
+
+        # cancel in-progess clone1
+        self._fs_cmd("clone", "cancel", self.volname, clone1)
+
+        # check clone1 status
+        clone1_result = self._get_clone_status(clone1)
+        self.assertEqual(clone1_result["status"]["state"], "canceled")
+        self.assertEqual(clone1_result["status"]["failure"]["errno"], "4")
+        self.assertEqual(clone1_result["status"]["failure"]["error_msg"], "user interrupted clone operation")
+
+        # clone removal should succeed with force after cancelled, remove clone1
+        self._fs_cmd("subvolume", "rm", self.volname, clone1, "--force")
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_clone(self):
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone = self._generate_random_clone_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=64)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # schedule a clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone)
+
+        # verify clone
+        self._verify_clone(subvolume, snapshot, clone)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_clone_quota_exceeded(self):
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone = self._generate_random_clone_name()
+
+        # create subvolume with 20MB quota
+        osize = self.DEFAULT_FILE_SIZE*1024*1024*20
+        self._fs_cmd("subvolume", "create", self.volname, subvolume,"--mode=777", "--size", str(osize))
+
+        # do IO, write 50 files of 1MB each to exceed quota. This mostly succeeds as quota enforcement takes time.
+        try:
+            self._do_subvolume_io(subvolume, number_of_files=50)
+        except CommandFailedError:
+            # ignore quota enforcement error.
+            pass
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # schedule a clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone)
+
+        # verify clone
+        self._verify_clone(subvolume, snapshot, clone)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_in_complete_clone_rm(self):
+        """
+        Validates the removal of clone when it is not in 'complete|cancelled|failed' state.
+        The forceful removl of subvolume clone succeeds only if it's in any of the
+        'complete|cancelled|failed' states. It fails with EAGAIN in any other states.
+        """
+
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone = self._generate_random_clone_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=64)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # Insert delay at the beginning of snapshot clone
+        self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2)
+
+        # schedule a clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+        # Use --force since clone is not complete. Returns EAGAIN as clone is not either complete or cancelled.
+        try:
+            self._fs_cmd("subvolume", "rm", self.volname, clone, "--force")
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.EAGAIN:
+                raise RuntimeError("invalid error code when trying to remove failed clone")
+        else:
+            raise RuntimeError("expected error when removing a failed clone")
+
+        # cancel on-going clone
+        self._fs_cmd("clone", "cancel", self.volname, clone)
+
+        # verify canceled state
+        self._check_clone_canceled(clone)
+
+        # clone removal should succeed after cancel
+        self._fs_cmd("subvolume", "rm", self.volname, clone, "--force")
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_clone_retain_suid_guid(self):
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone = self._generate_random_clone_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # Create a file with suid, guid bits set along with executable bit.
+        args = ["subvolume", "getpath", self.volname, subvolume]
+        args = tuple(args)
+        subvolpath = self._fs_cmd(*args)
+        self.assertNotEqual(subvolpath, None)
+        subvolpath = subvolpath[1:].rstrip() # remove "/" prefix and any trailing newline
+
+        file_path = subvolpath
+        file_path = os.path.join(subvolpath, "test_suid_file")
+        self.mount_a.run_shell(["touch", file_path])
+        self.mount_a.run_shell(["chmod", "u+sx,g+sx", file_path])
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # schedule a clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone)
+
+        # verify clone
+        self._verify_clone(subvolume, snapshot, clone)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_clone_and_reclone(self):
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone1, clone2 = self._generate_random_clone_name(2)
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=32)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # schedule a clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1)
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone1)
+
+        # verify clone
+        self._verify_clone(subvolume, snapshot, clone1)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # now the clone is just like a normal subvolume -- snapshot the clone and fork
+        # another clone. before that do some IO so it's can be differentiated.
+        self._do_subvolume_io(clone1, create_dir="data", number_of_files=32)
+
+        # snapshot clone -- use same snap name
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, clone1, snapshot)
+
+        # schedule a clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, clone1, snapshot, clone2)
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone2)
+
+        # verify clone
+        self._verify_clone(clone1, snapshot, clone2)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, clone1, snapshot)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        self._fs_cmd("subvolume", "rm", self.volname, clone1)
+        self._fs_cmd("subvolume", "rm", self.volname, clone2)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_clone_cancel_in_progress(self):
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone = self._generate_random_clone_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=128)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # Insert delay at the beginning of snapshot clone
+        self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2)
+
+        # schedule a clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+        # cancel on-going clone
+        self._fs_cmd("clone", "cancel", self.volname, clone)
+
+        # verify canceled state
+        self._check_clone_canceled(clone)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        self._fs_cmd("subvolume", "rm", self.volname, clone, "--force")
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_clone_cancel_pending(self):
+        """
+        this test is a bit more involved compared to canceling an in-progress clone.
+        we'd need to ensure that a to-be canceled clone has still not been picked up
+        by cloner threads. exploit the fact that clones are picked up in an FCFS
+        fashion and there are four (4) cloner threads by default. When the number of
+        cloner threads increase, this test _may_ start tripping -- so, the number of
+        clone operations would need to be jacked up.
+        """
+        # default number of clone threads
+        NR_THREADS = 4
+        # good enough for 4 threads
+        NR_CLONES = 5
+        # yeh, 1gig -- we need the clone to run for sometime
+        FILE_SIZE_MB = 1024
+
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clones = self._generate_random_clone_name(NR_CLONES)
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=4, file_size=FILE_SIZE_MB)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # schedule clones
+        for clone in clones:
+            self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+        to_wait = clones[0:NR_THREADS]
+        to_cancel = clones[NR_THREADS:]
+
+        # cancel pending clones and verify
+        for clone in to_cancel:
+            status = json.loads(self._fs_cmd("clone", "status", self.volname, clone))
+            self.assertEqual(status["status"]["state"], "pending")
+            self._fs_cmd("clone", "cancel", self.volname, clone)
+            self._check_clone_canceled(clone)
+
+        # let's cancel on-going clones. handle the case where some of the clones
+        # _just_ complete
+        for clone in list(to_wait):
+            try:
+                self._fs_cmd("clone", "cancel", self.volname, clone)
+                to_cancel.append(clone)
+                to_wait.remove(clone)
+            except CommandFailedError as ce:
+                if ce.exitstatus != errno.EINVAL:
+                    raise RuntimeError("invalid error code when cancelling on-going clone")
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        for clone in to_wait:
+            self._fs_cmd("subvolume", "rm", self.volname, clone)
+        for clone in to_cancel:
+            self._fs_cmd("subvolume", "rm", self.volname, clone, "--force")
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_clone_different_groups(self):
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone = self._generate_random_clone_name()
+        s_group, c_group = self._generate_random_group_name(2)
+
+        # create groups
+        self._fs_cmd("subvolumegroup", "create", self.volname, s_group)
+        self._fs_cmd("subvolumegroup", "create", self.volname, c_group)
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, s_group, "--mode=777")
+
+        # do some IO
+        self._do_subvolume_io(subvolume, subvolume_group=s_group, number_of_files=32)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, s_group)
+
+        # schedule a clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone,
+                     '--group_name', s_group, '--target_group_name', c_group)
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone, clone_group=c_group)
+
+        # verify clone
+        self._verify_clone(subvolume, snapshot, clone, source_group=s_group, clone_group=c_group)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, s_group)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, s_group)
+        self._fs_cmd("subvolume", "rm", self.volname, clone, c_group)
+
+        # remove groups
+        self._fs_cmd("subvolumegroup", "rm", self.volname, s_group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, c_group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_clone_fail_with_remove(self):
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone1, clone2 = self._generate_random_clone_name(2)
+
+        pool_capacity = 32 * 1024 * 1024
+        # number of files required to fill up 99% of the pool
+        nr_files = int((pool_capacity * 0.99) / (TestVolumes.DEFAULT_FILE_SIZE * 1024 * 1024))
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=nr_files)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # add data pool
+        new_pool = "new_pool"
+        self.fs.add_data_pool(new_pool)
+
+        self.fs.mon_manager.raw_cluster_cmd("osd", "pool", "set-quota", new_pool,
+                                            "max_bytes", "{0}".format(pool_capacity // 4))
+
+        # schedule a clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1, "--pool_layout", new_pool)
+
+        # check clone status -- this should dramatically overshoot the pool quota
+        self._wait_for_clone_to_complete(clone1)
+
+        # verify clone
+        self._verify_clone(subvolume, snapshot, clone1, clone_pool=new_pool)
+
+        # wait a bit so that subsequent I/O will give pool full error
+        time.sleep(120)
+
+        # schedule a clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone2, "--pool_layout", new_pool)
+
+        # check clone status
+        self._wait_for_clone_to_fail(clone2)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        self._fs_cmd("subvolume", "rm", self.volname, clone1)
+        try:
+            self._fs_cmd("subvolume", "rm", self.volname, clone2)
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.EAGAIN:
+                raise RuntimeError("invalid error code when trying to remove failed clone")
+        else:
+            raise RuntimeError("expected error when removing a failed clone")
+
+        #  ... and with force, failed clone can be removed
+        self._fs_cmd("subvolume", "rm", self.volname, clone2, "--force")
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_clone_on_existing_subvolumes(self):
+        subvolume1, subvolume2 = self._generate_random_subvolume_name(2)
+        snapshot = self._generate_random_snapshot_name()
+        clone = self._generate_random_clone_name()
+
+        # create subvolumes
+        self._fs_cmd("subvolume", "create", self.volname, subvolume1, "--mode=777")
+        self._fs_cmd("subvolume", "create", self.volname, subvolume2, "--mode=777")
+
+        # do some IO
+        self._do_subvolume_io(subvolume1, number_of_files=32)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume1, snapshot)
+
+        # schedule a clone with target as subvolume2
+        try:
+            self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume1, snapshot, subvolume2)
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.EEXIST:
+                raise RuntimeError("invalid error code when cloning to existing subvolume")
+        else:
+            raise RuntimeError("expected cloning to fail if the target is an existing subvolume")
+
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume1, snapshot, clone)
+
+        # schedule a clone with target as clone
+        try:
+            self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume1, snapshot, clone)
+        except CommandFailedError as ce:
+            if ce.exitstatus != errno.EEXIST:
+                raise RuntimeError("invalid error code when cloning to existing clone")
+        else:
+            raise RuntimeError("expected cloning to fail if the target is an existing clone")
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone)
+
+        # verify clone
+        self._verify_clone(subvolume1, snapshot, clone)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume1, snapshot)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume1)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume2)
+        self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_clone_pool_layout(self):
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone = self._generate_random_clone_name()
+
+        # add data pool
+        new_pool = "new_pool"
+        newid = self.fs.add_data_pool(new_pool)
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=32)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # schedule a clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone, "--pool_layout", new_pool)
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone)
+
+        # verify clone
+        self._verify_clone(subvolume, snapshot, clone, clone_pool=new_pool)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        subvol_path = self._get_subvolume_path(self.volname, clone)
+        desired_pool = self.mount_a.getfattr(subvol_path, "ceph.dir.layout.pool")
+        try:
+            self.assertEqual(desired_pool, new_pool)
+        except AssertionError:
+            self.assertEqual(int(desired_pool), newid) # old kernel returns id
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_clone_under_group(self):
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone = self._generate_random_clone_name()
+        group = self._generate_random_group_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=32)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # schedule a clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone, '--target_group_name', group)
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone, clone_group=group)
+
+        # verify clone
+        self._verify_clone(subvolume, snapshot, clone, clone_group=group)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        self._fs_cmd("subvolume", "rm", self.volname, clone, group)
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_clone_with_attrs(self):
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone = self._generate_random_clone_name()
+
+        mode = "777"
+        uid  = "1000"
+        gid  = "1000"
+        new_uid  = "1001"
+        new_gid  = "1001"
+        new_mode = "700"
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode", mode, "--uid", uid, "--gid", gid)
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=32)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # change subvolume attrs (to ensure clone picks up snapshot attrs)
+        self._do_subvolume_attr_update(subvolume, new_uid, new_gid, new_mode)
+
+        # schedule a clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone)
+
+        # verify clone
+        self._verify_clone(subvolume, snapshot, clone)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_clone_with_upgrade(self):
+        """
+        yet another poor man's upgrade test -- rather than going through a full
+        upgrade cycle, emulate old types subvolumes by going through the wormhole
+        and verify clone operation.
+        further ensure that a legacy volume is not updated to v2, but clone is.
+        """
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone = self._generate_random_clone_name()
+
+        # emulate a old-fashioned subvolume
+        createpath = os.path.join(".", "volumes", "_nogroup", subvolume)
+        self.mount_a.run_shell_payload(f"sudo mkdir -p -m 777 {createpath}", omit_sudo=False)
+
+        # add required xattrs to subvolume
+        default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool")
+        self.mount_a.setfattr(createpath, 'ceph.dir.layout.pool', default_pool, sudo=True)
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=64)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # ensure metadata file is in legacy location, with required version v1
+        self._assert_meta_location_and_version(self.volname, subvolume, version=1, legacy=True)
+
+        # Insert delay at the beginning of snapshot clone
+        self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2)
+
+        # schedule a clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+        # snapshot should not be deletable now
+        try:
+            self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EAGAIN, msg="invalid error code when removing source snapshot of a clone")
+        else:
+            self.fail("expected removing source snapshot of a clone to fail")
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone)
+
+        # verify clone
+        self._verify_clone(subvolume, snapshot, clone, source_version=1)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # ensure metadata file is in v2 location, with required version v2
+        self._assert_meta_location_and_version(self.volname, clone)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_reconf_max_concurrent_clones(self):
+        """
+        Validate 'max_concurrent_clones' config option
+        """
+
+        # get the default number of cloner threads
+        default_max_concurrent_clones = int(self.config_get('mgr', 'mgr/volumes/max_concurrent_clones'))
+        self.assertEqual(default_max_concurrent_clones, 4)
+
+        # Increase number of cloner threads
+        self.config_set('mgr', 'mgr/volumes/max_concurrent_clones', 6)
+        max_concurrent_clones = int(self.config_get('mgr', 'mgr/volumes/max_concurrent_clones'))
+        self.assertEqual(max_concurrent_clones, 6)
+
+        # Decrease number of cloner threads
+        self.config_set('mgr', 'mgr/volumes/max_concurrent_clones', 2)
+        max_concurrent_clones = int(self.config_get('mgr', 'mgr/volumes/max_concurrent_clones'))
+        self.assertEqual(max_concurrent_clones, 2)
+
+    def test_subvolume_snapshot_config_snapshot_clone_delay(self):
+        """
+        Validate 'snapshot_clone_delay' config option
+        """
+
+        # get the default delay before starting the clone
+        default_timeout = int(self.config_get('mgr', 'mgr/volumes/snapshot_clone_delay'))
+        self.assertEqual(default_timeout, 0)
+
+        # Insert delay of 2 seconds at the beginning of the snapshot clone
+        self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2)
+        default_timeout = int(self.config_get('mgr', 'mgr/volumes/snapshot_clone_delay'))
+        self.assertEqual(default_timeout, 2)
+
+        # Decrease number of cloner threads
+        self.config_set('mgr', 'mgr/volumes/max_concurrent_clones', 2)
+        max_concurrent_clones = int(self.config_get('mgr', 'mgr/volumes/max_concurrent_clones'))
+        self.assertEqual(max_concurrent_clones, 2)
+
+    def test_subvolume_under_group_snapshot_clone(self):
+        subvolume = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone = self._generate_random_clone_name()
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, group, "--mode=777")
+
+        # do some IO
+        self._do_subvolume_io(subvolume, subvolume_group=group, number_of_files=32)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, group)
+
+        # schedule a clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone, '--group_name', group)
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone)
+
+        # verify clone
+        self._verify_clone(subvolume, snapshot, clone, source_group=group)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, group)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+        self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+
+class TestMisc(TestVolumesHelper):
+    """Miscellaneous tests related to FS volume, subvolume group, and subvolume operations."""
+    def test_connection_expiration(self):
+        # unmount any cephfs mounts
+        for i in range(0, self.CLIENTS_REQUIRED):
+            self.mounts[i].umount_wait()
+        sessions = self._session_list()
+        self.assertLessEqual(len(sessions), 1) # maybe mgr is already mounted
+
+        # Get the mgr to definitely mount cephfs
+        subvolume = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+        sessions = self._session_list()
+        self.assertEqual(len(sessions), 1)
+
+        # Now wait for the mgr to expire the connection:
+        self.wait_until_evicted(sessions[0]['id'], timeout=90)
+
+    def test_mgr_eviction(self):
+        # unmount any cephfs mounts
+        for i in range(0, self.CLIENTS_REQUIRED):
+            self.mounts[i].umount_wait()
+        sessions = self._session_list()
+        self.assertLessEqual(len(sessions), 1) # maybe mgr is already mounted
+
+        # Get the mgr to definitely mount cephfs
+        subvolume = self._generate_random_subvolume_name()
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+        sessions = self._session_list()
+        self.assertEqual(len(sessions), 1)
+
+        # Now fail the mgr, check the session was evicted
+        mgr = self.mgr_cluster.get_active_id()
+        self.mgr_cluster.mgr_fail(mgr)
+        self.wait_until_evicted(sessions[0]['id'])
+
+    def test_names_can_only_be_goodchars(self):
+        """
+        Test the creating vols, subvols subvolgroups fails when their names uses
+        characters beyond [a-zA-Z0-9 -_.].
+        """
+        volname, badname = 'testvol', 'abcd@#'
+
+        with self.assertRaises(CommandFailedError):
+            self._fs_cmd('volume', 'create', badname)
+        self._fs_cmd('volume', 'create', volname)
+
+        with self.assertRaises(CommandFailedError):
+            self._fs_cmd('subvolumegroup', 'create', volname, badname)
+
+        with self.assertRaises(CommandFailedError):
+            self._fs_cmd('subvolume', 'create', volname, badname)
+        self._fs_cmd('volume', 'rm', volname, '--yes-i-really-mean-it')
+
+    def test_subvolume_ops_on_nonexistent_vol(self):
+        # tests the fs subvolume operations on non existing volume
+
+        volname = "non_existent_subvolume"
+
+        # try subvolume operations
+        for op in ("create", "rm", "getpath", "info", "resize", "pin", "ls"):
+            try:
+                if op == "resize":
+                    self._fs_cmd("subvolume", "resize", volname, "subvolname_1", "inf")
+                elif op == "pin":
+                    self._fs_cmd("subvolume", "pin", volname, "subvolname_1", "export", "1")
+                elif op == "ls":
+                    self._fs_cmd("subvolume", "ls", volname)
+                else:
+                    self._fs_cmd("subvolume", op, volname, "subvolume_1")
+            except CommandFailedError as ce:
+                self.assertEqual(ce.exitstatus, errno.ENOENT)
+            else:
+                self.fail("expected the 'fs subvolume {0}' command to fail".format(op))
+
+        # try subvolume snapshot operations and clone create
+        for op in ("create", "rm", "info", "protect", "unprotect", "ls", "clone"):
+            try:
+                if op == "ls":
+                    self._fs_cmd("subvolume", "snapshot", op, volname, "subvolume_1")
+                elif op == "clone":
+                    self._fs_cmd("subvolume", "snapshot", op, volname, "subvolume_1", "snapshot_1", "clone_1")
+                else:
+                    self._fs_cmd("subvolume", "snapshot", op, volname, "subvolume_1", "snapshot_1")
+            except CommandFailedError as ce:
+                self.assertEqual(ce.exitstatus, errno.ENOENT)
+            else:
+                self.fail("expected the 'fs subvolume snapshot {0}' command to fail".format(op))
+
+        # try, clone status
+        try:
+            self._fs_cmd("clone", "status", volname, "clone_1")
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.ENOENT)
+        else:
+            self.fail("expected the 'fs clone status' command to fail")
+
+        # try subvolumegroup operations
+        for op in ("create", "rm", "getpath", "pin", "ls"):
+            try:
+                if op == "pin":
+                    self._fs_cmd("subvolumegroup", "pin", volname, "group_1", "export", "0")
+                elif op == "ls":
+                    self._fs_cmd("subvolumegroup", op, volname)
+                else:
+                    self._fs_cmd("subvolumegroup", op, volname, "group_1")
+            except CommandFailedError as ce:
+                self.assertEqual(ce.exitstatus, errno.ENOENT)
+            else:
+                self.fail("expected the 'fs subvolumegroup {0}' command to fail".format(op))
+
+        # try subvolumegroup snapshot operations
+        for op in ("create", "rm", "ls"):
+            try:
+                if op == "ls":
+                    self._fs_cmd("subvolumegroup", "snapshot", op, volname, "group_1")
+                else:
+                    self._fs_cmd("subvolumegroup", "snapshot", op, volname, "group_1", "snapshot_1")
+            except CommandFailedError as ce:
+                self.assertEqual(ce.exitstatus, errno.ENOENT)
+            else:
+                self.fail("expected the 'fs subvolumegroup snapshot {0}' command to fail".format(op))
+
+    def test_subvolume_upgrade_legacy_to_v1(self):
+        """
+        poor man's upgrade test -- rather than going through a full upgrade cycle,
+        emulate subvolumes by going through the wormhole and verify if they are
+        accessible.
+        further ensure that a legacy volume is not updated to v2.
+        """
+        subvolume1, subvolume2 = self._generate_random_subvolume_name(2)
+        group = self._generate_random_group_name()
+
+        # emulate a old-fashioned subvolume -- one in the default group and
+        # the other in a custom group
+        createpath1 = os.path.join(".", "volumes", "_nogroup", subvolume1)
+        self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath1], omit_sudo=False)
+
+        # create group
+        createpath2 = os.path.join(".", "volumes", group, subvolume2)
+        self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath2], omit_sudo=False)
+
+        # this would auto-upgrade on access without anyone noticing
+        subvolpath1 = self._fs_cmd("subvolume", "getpath", self.volname, subvolume1)
+        self.assertNotEqual(subvolpath1, None)
+        subvolpath1 = subvolpath1.rstrip() # remove "/" prefix and any trailing newline
+
+        subvolpath2 = self._fs_cmd("subvolume", "getpath", self.volname, subvolume2, group)
+        self.assertNotEqual(subvolpath2, None)
+        subvolpath2 = subvolpath2.rstrip() # remove "/" prefix and any trailing newline
+
+        # and... the subvolume path returned should be what we created behind the scene
+        self.assertEqual(createpath1[1:], subvolpath1)
+        self.assertEqual(createpath2[1:], subvolpath2)
+
+        # ensure metadata file is in legacy location, with required version v1
+        self._assert_meta_location_and_version(self.volname, subvolume1, version=1, legacy=True)
+        self._assert_meta_location_and_version(self.volname, subvolume2, subvol_group=group, version=1, legacy=True)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume1)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume2, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_subvolume_no_upgrade_v1_sanity(self):
+        """
+        poor man's upgrade test -- theme continues...
+
+        This test is to ensure v1 subvolumes are retained as is, due to a snapshot being present, and runs through
+        a series of operations on the v1 subvolume to ensure they work as expected.
+        """
+        subvol_md = ["atime", "bytes_pcent", "bytes_quota", "bytes_used", "created_at", "ctime",
+                     "data_pool", "gid", "mode", "mon_addrs", "mtime", "path", "pool_namespace",
+                     "type", "uid", "features", "state"]
+        snap_md = ["created_at", "data_pool", "has_pending_clones"]
+
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone1, clone2 = self._generate_random_clone_name(2)
+        mode = "777"
+        uid  = "1000"
+        gid  = "1000"
+
+        # emulate a v1 subvolume -- in the default group
+        subvolume_path = self._create_v1_subvolume(subvolume)
+
+        # getpath
+        subvolpath = self._get_subvolume_path(self.volname, subvolume)
+        self.assertEqual(subvolpath, subvolume_path)
+
+        # ls
+        subvolumes = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+        self.assertEqual(len(subvolumes), 1, "subvolume ls count mismatch, expected '1', found {0}".format(len(subvolumes)))
+        self.assertEqual(subvolumes[0]['name'], subvolume,
+                         "subvolume name mismatch in ls output, expected '{0}', found '{1}'".format(subvolume, subvolumes[0]['name']))
+
+        # info
+        subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume))
+        for md in subvol_md:
+            self.assertIn(md, subvol_info, "'{0}' key not present in metadata of subvolume".format(md))
+
+        self.assertEqual(subvol_info["state"], "complete",
+                         msg="expected state to be 'complete', found '{0}".format(subvol_info["state"]))
+        self.assertEqual(len(subvol_info["features"]), 2,
+                         msg="expected 1 feature, found '{0}' ({1})".format(len(subvol_info["features"]), subvol_info["features"]))
+        for feature in ['snapshot-clone', 'snapshot-autoprotect']:
+            self.assertIn(feature, subvol_info["features"], msg="expected feature '{0}' in subvolume".format(feature))
+
+        # resize
+        nsize = self.DEFAULT_FILE_SIZE*1024*1024*10
+        self._fs_cmd("subvolume", "resize", self.volname, subvolume, str(nsize))
+        subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume))
+        for md in subvol_md:
+            self.assertIn(md, subvol_info, "'{0}' key not present in metadata of subvolume".format(md))
+        self.assertEqual(subvol_info["bytes_quota"], nsize, "bytes_quota should be set to '{0}'".format(nsize))
+
+        # create (idempotent) (change some attrs, to ensure attrs are preserved from the snapshot on clone)
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode", mode, "--uid", uid, "--gid", gid)
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=8)
+
+        # snap-create
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1)
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone1)
+
+        # ensure clone is v2
+        self._assert_meta_location_and_version(self.volname, clone1, version=2)
+
+        # verify clone
+        self._verify_clone(subvolume, snapshot, clone1, source_version=1)
+
+        # clone (older snapshot)
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, 'fake', clone2)
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone2)
+
+        # ensure clone is v2
+        self._assert_meta_location_and_version(self.volname, clone2, version=2)
+
+        # verify clone
+        # TODO: rentries will mismatch till this is fixed https://tracker.ceph.com/issues/46747
+        #self._verify_clone(subvolume, 'fake', clone2, source_version=1)
+
+        # snap-info
+        snap_info = json.loads(self._get_subvolume_snapshot_info(self.volname, subvolume, snapshot))
+        for md in snap_md:
+            self.assertIn(md, snap_info, "'{0}' key not present in metadata of snapshot".format(md))
+        self.assertEqual(snap_info["has_pending_clones"], "no")
+
+        # snap-ls
+        subvol_snapshots = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, subvolume))
+        self.assertEqual(len(subvol_snapshots), 2, "subvolume ls count mismatch, expected 2', found {0}".format(len(subvol_snapshots)))
+        snapshotnames = [snapshot['name'] for snapshot in subvol_snapshots]
+        for name in [snapshot, 'fake']:
+            self.assertIn(name, snapshotnames, msg="expected snapshot '{0}' in subvolume snapshot ls".format(name))
+
+        # snap-rm
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, "fake")
+
+        # ensure volume is still at version 1
+        self._assert_meta_location_and_version(self.volname, subvolume, version=1)
+
+        # rm
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        self._fs_cmd("subvolume", "rm", self.volname, clone1)
+        self._fs_cmd("subvolume", "rm", self.volname, clone2)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_no_upgrade_v1_to_v2(self):
+        """
+        poor man's upgrade test -- theme continues...
+        ensure v1 to v2 upgrades are not done automatically due to various states of v1
+        """
+        subvolume1, subvolume2, subvolume3 = self._generate_random_subvolume_name(3)
+        group = self._generate_random_group_name()
+
+        # emulate a v1 subvolume -- in the default group
+        subvol1_path = self._create_v1_subvolume(subvolume1)
+
+        # emulate a v1 subvolume -- in a custom group
+        subvol2_path = self._create_v1_subvolume(subvolume2, subvol_group=group)
+
+        # emulate a v1 subvolume -- in a clone pending state
+        self._create_v1_subvolume(subvolume3, subvol_type='clone', has_snapshot=False, state='pending')
+
+        # this would attempt auto-upgrade on access, but fail to do so as snapshots exist
+        subvolpath1 = self._get_subvolume_path(self.volname, subvolume1)
+        self.assertEqual(subvolpath1, subvol1_path)
+
+        subvolpath2 = self._get_subvolume_path(self.volname, subvolume2, group_name=group)
+        self.assertEqual(subvolpath2, subvol2_path)
+
+        # this would attempt auto-upgrade on access, but fail to do so as volume is not complete
+        # use clone status, as only certain operations are allowed in pending state
+        status = json.loads(self._fs_cmd("clone", "status", self.volname, subvolume3))
+        self.assertEqual(status["status"]["state"], "pending")
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume1, "fake")
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume2, "fake", group)
+
+        # ensure metadata file is in v1 location, with version retained as v1
+        self._assert_meta_location_and_version(self.volname, subvolume1, version=1)
+        self._assert_meta_location_and_version(self.volname, subvolume2, subvol_group=group, version=1)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume1)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume2, group)
+        try:
+            self._fs_cmd("subvolume", "rm", self.volname, subvolume3)
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EAGAIN, "invalid error code on rm of subvolume undergoing clone")
+        else:
+            self.fail("expected rm of subvolume undergoing clone to fail")
+
+        # ensure metadata file is in v1 location, with version retained as v1
+        self._assert_meta_location_and_version(self.volname, subvolume3, version=1)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume3, "--force")
+
+        # verify list subvolumes returns an empty list
+        subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+        self.assertEqual(len(subvolumels), 0)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_upgrade_v1_to_v2(self):
+        """
+        poor man's upgrade test -- theme continues...
+        ensure v1 to v2 upgrades work
+        """
+        subvolume1, subvolume2 = self._generate_random_subvolume_name(2)
+        group = self._generate_random_group_name()
+
+        # emulate a v1 subvolume -- in the default group
+        subvol1_path = self._create_v1_subvolume(subvolume1, has_snapshot=False)
+
+        # emulate a v1 subvolume -- in a custom group
+        subvol2_path = self._create_v1_subvolume(subvolume2, subvol_group=group, has_snapshot=False)
+
+        # this would attempt auto-upgrade on access
+        subvolpath1 = self._get_subvolume_path(self.volname, subvolume1)
+        self.assertEqual(subvolpath1, subvol1_path)
+
+        subvolpath2 = self._get_subvolume_path(self.volname, subvolume2, group_name=group)
+        self.assertEqual(subvolpath2, subvol2_path)
+
+        # ensure metadata file is in v2 location, with version retained as v2
+        self._assert_meta_location_and_version(self.volname, subvolume1, version=2)
+        self._assert_meta_location_and_version(self.volname, subvolume2, subvol_group=group, version=2)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume1)
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume2, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_malicious_metafile_on_legacy_to_v1_upgrade(self):
+        """
+        Validate handcrafted .meta file on legacy subvol root doesn't break the system
+        on legacy subvol upgrade to v1
+        poor man's upgrade test -- theme continues...
+        """
+        subvol1, subvol2 = self._generate_random_subvolume_name(2)
+
+        # emulate a old-fashioned subvolume in the default group
+        createpath1 = os.path.join(".", "volumes", "_nogroup", subvol1)
+        self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath1], omit_sudo=False)
+
+        # add required xattrs to subvolume
+        default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool")
+        self.mount_a.setfattr(createpath1, 'ceph.dir.layout.pool', default_pool, sudo=True)
+
+        # create v2 subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvol2)
+
+        # Create malicious .meta file in legacy subvolume root. Copy v2 subvolume
+        # .meta into legacy subvol1's root
+        subvol2_metapath = os.path.join(".", "volumes", "_nogroup", subvol2, ".meta")
+        self.mount_a.run_shell(['sudo', 'cp', subvol2_metapath, createpath1], omit_sudo=False)
+
+        # Upgrade legacy subvol1 to v1
+        subvolpath1 = self._fs_cmd("subvolume", "getpath", self.volname, subvol1)
+        self.assertNotEqual(subvolpath1, None)
+        subvolpath1 = subvolpath1.rstrip()
+
+        # the subvolume path returned should not be of subvol2 from handcrafted
+        # .meta file
+        self.assertEqual(createpath1[1:], subvolpath1)
+
+        # ensure metadata file is in legacy location, with required version v1
+        self._assert_meta_location_and_version(self.volname, subvol1, version=1, legacy=True)
+
+        # Authorize alice authID read-write access to subvol1. Verify it authorizes subvol1 path and not subvol2
+        # path whose '.meta' file is copied to subvol1 root
+        authid1 = "alice"
+        self._fs_cmd("subvolume", "authorize", self.volname, subvol1, authid1)
+
+        # Validate that the mds path added is of subvol1 and not of subvol2
+        out = json.loads(self.fs.mon_manager.raw_cluster_cmd("auth", "get", "client.alice", "--format=json-pretty"))
+        self.assertEqual("client.alice", out[0]["entity"])
+        self.assertEqual("allow rw path={0}".format(createpath1[1:]), out[0]["caps"]["mds"])
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvol1)
+        self._fs_cmd("subvolume", "rm", self.volname, subvol2)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_binary_metafile_on_legacy_to_v1_upgrade(self):
+        """
+        Validate binary .meta file on legacy subvol root doesn't break the system
+        on legacy subvol upgrade to v1
+        poor man's upgrade test -- theme continues...
+        """
+        subvol = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+
+        # emulate a old-fashioned subvolume -- in a custom group
+        createpath = os.path.join(".", "volumes", group, subvol)
+        self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath], omit_sudo=False)
+
+        # add required xattrs to subvolume
+        default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool")
+        self.mount_a.setfattr(createpath, 'ceph.dir.layout.pool', default_pool, sudo=True)
+
+        # Create unparseable binary .meta file on legacy subvol's root
+        meta_contents = os.urandom(4096)
+        meta_filepath = os.path.join(self.mount_a.mountpoint, createpath, ".meta")
+        self.mount_a.client_remote.write_file(meta_filepath, meta_contents, sudo=True)
+
+        # Upgrade legacy subvol to v1
+        subvolpath = self._fs_cmd("subvolume", "getpath", self.volname, subvol, group)
+        self.assertNotEqual(subvolpath, None)
+        subvolpath = subvolpath.rstrip()
+
+        # The legacy subvolume path should be returned for subvol.
+        # Should ignore unparseable binary .meta file in subvol's root
+        self.assertEqual(createpath[1:], subvolpath)
+
+        # ensure metadata file is in legacy location, with required version v1
+        self._assert_meta_location_and_version(self.volname, subvol, subvol_group=group, version=1, legacy=True)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvol, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+    def test_unparseable_metafile_on_legacy_to_v1_upgrade(self):
+        """
+        Validate unparseable text .meta file on legacy subvol root doesn't break the system
+        on legacy subvol upgrade to v1
+        poor man's upgrade test -- theme continues...
+        """
+        subvol = self._generate_random_subvolume_name()
+        group = self._generate_random_group_name()
+
+        # emulate a old-fashioned subvolume -- in a custom group
+        createpath = os.path.join(".", "volumes", group, subvol)
+        self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath], omit_sudo=False)
+
+        # add required xattrs to subvolume
+        default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool")
+        self.mount_a.setfattr(createpath, 'ceph.dir.layout.pool', default_pool, sudo=True)
+
+        # Create unparseable text .meta file on legacy subvol's root
+        meta_contents = "unparseable config\nfile ...\nunparseable config\nfile ...\n"
+        meta_filepath = os.path.join(self.mount_a.mountpoint, createpath, ".meta")
+        self.mount_a.client_remote.write_file(meta_filepath, meta_contents, sudo=True)
+
+        # Upgrade legacy subvol to v1
+        subvolpath = self._fs_cmd("subvolume", "getpath", self.volname, subvol, group)
+        self.assertNotEqual(subvolpath, None)
+        subvolpath = subvolpath.rstrip()
+
+        # The legacy subvolume path should be returned for subvol.
+        # Should ignore unparseable binary .meta file in subvol's root
+        self.assertEqual(createpath[1:], subvolpath)
+
+        # ensure metadata file is in legacy location, with required version v1
+        self._assert_meta_location_and_version(self.volname, subvol, subvol_group=group, version=1, legacy=True)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvol, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+        # remove group
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+class TestPerModuleFinsherThread(TestVolumesHelper):
+    """
+    Per module finisher thread tests related to mgr/volume cmds.
+    This is used in conjuction with check_counter with min val being 4
+    as four subvolume cmds are run
+    """
+    def test_volumes_module_finisher_thread(self):
+        subvol1, subvol2, subvol3 = self._generate_random_subvolume_name(3)
+        group = self._generate_random_group_name()
+
+        # create group
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+        # create subvolumes in group
+        self._fs_cmd("subvolume", "create", self.volname, subvol1, "--group_name", group)
+        self._fs_cmd("subvolume", "create", self.volname, subvol2, "--group_name", group)
+        self._fs_cmd("subvolume", "create", self.volname, subvol3, "--group_name", group)
+
+        self._fs_cmd("subvolume", "rm", self.volname, subvol1, group)
+        self._fs_cmd("subvolume", "rm", self.volname, subvol2, group)
+        self._fs_cmd("subvolume", "rm", self.volname, subvol3, group)
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
diff --git a/qa/tasks/cephfs/xfstests_dev.py b/qa/tasks/cephfs/xfstests_dev.py
new file mode 100644
index 000000000..cbb344305
--- /dev/null
+++ b/qa/tasks/cephfs/xfstests_dev.py
@@ -0,0 +1,303 @@
+from io import StringIO
+from logging import getLogger
+from os import getcwd as os_getcwd
+from os.path import join
+from textwrap import dedent
+
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from tasks.cephfs.fuse_mount import FuseMount
+from tasks.cephfs.kernel_mount import KernelMount
+
+
+log = getLogger(__name__)
+
+
+# TODO: add code to run non-ACL tests too.
+# TODO: make xfstests-dev tests running without running `make install`.
+class XFSTestsDev(CephFSTestCase):
+
+    RESULTS_DIR = "results"
+
+    def setUp(self):
+        super(XFSTestsDev, self).setUp()
+        self.setup_xfsprogs_devs()
+        self.prepare_xfstests_devs()
+
+    def setup_xfsprogs_devs(self):
+        self.install_xfsprogs = False
+
+    def prepare_xfstests_devs(self):
+        # NOTE: To run a quick test with vstart_runner.py, enable next line
+        # and disable calls to get_repo(), install_deps(), and
+        # build_and_install() and also disable lines in tearDown() for repo
+        # deletion.
+        #self.xfstests_repo_path = '/path/to/xfstests-dev'
+
+        self.get_repos()
+        self.get_test_and_scratch_dirs_ready()
+        self.install_deps()
+        self.create_reqd_users()
+        self.write_local_config()
+        self.write_ceph_exclude()
+        self.build_and_install()
+
+    def tearDown(self):
+        self.del_users_and_groups()
+        self.del_repos()
+        super(XFSTestsDev, self).tearDown()
+
+    def del_users_and_groups(self):
+        self.mount_a.client_remote.run(args=['sudo', 'userdel', '--force',
+                                             '--remove', 'fsgqa'],
+                                       omit_sudo=False, check_status=False)
+        self.mount_a.client_remote.run(args=['sudo', 'userdel', '--force',
+                                             '--remove', '123456-fsgqa'],
+                                       omit_sudo=False, check_status=False)
+        self.mount_a.client_remote.run(args=['sudo', 'groupdel', 'fsgqa'],
+                                       omit_sudo=False, check_status=False)
+
+    def del_repos(self):
+        self.save_results_dir()
+        self.mount_a.client_remote.run(args=f'sudo rm -rf {self.xfstests_repo_path}',
+                                       omit_sudo=False, check_status=False)
+
+        if self.install_xfsprogs:
+            self.mount_a.client_remote.run(args=f'sudo rm -rf {self.xfsprogs_repo_path}',
+                                           omit_sudo=False, check_status=False)
+
+    def save_results_dir(self):
+        """
+        When tests in xfstests-dev repo are executed, logs are created and
+        saved, under a directory named "results" that lies at the repo root.
+        In case a test from xfstests-dev repo fails, these logs will help find
+        the cause of the failure.
+
+        Since there's no option in teuthology to copy a directory lying at a
+        custom location in order to save it from teuthology test runner's tear
+        down, let's copy this directory to a standard location that teuthology
+        copies away before erasing all data on the test machine. The standard
+        location chosen in the case here is the Ceph log directory.
+
+        In case of vstart_runner.py, this methods does nothing.
+        """
+        # No need to save results dir in case of vstart_runner.py.
+        for x in ('LocalFuseMount', 'LocalKernelMount'):
+            if x in self.mount_a.__class__.__name__:
+                return
+
+        src = join(self.xfstests_repo_path, self.RESULTS_DIR)
+
+        if self.mount_a.run_shell(f'sudo stat {src}',
+                check_status=False, omit_sudo=False).returncode != 0:
+            log.info(f'xfstests-dev repo contains not directory named '
+                     f'"{self.RESULTS_DIR}". repo location: {self.xfstests_repo_path}')
+            return
+
+        std_loc = '/var/log/ceph' # standard location
+        dst = join(std_loc, 'xfstests-dev-results')
+        self.mount_a.run_shell(f'sudo mkdir -p {dst}', omit_sudo=False)
+        self.mount_a.run_shell(f'sudo cp -r {src} {dst}', omit_sudo=False)
+        log.info(f'results dir from xfstests-dev has been saved; it was '
+                 f'copied from {self.xfstests_repo_path} to {std_loc}.')
+
+    def build_and_install(self):
+        # NOTE: On teuthology machines it's necessary to run "make" as
+        # superuser since the repo is cloned somewhere in /tmp.
+        self.mount_a.client_remote.run(args=['sudo', 'make'],
+                                       cwd=self.xfstests_repo_path, stdout=StringIO(),
+                                       stderr=StringIO())
+        self.mount_a.client_remote.run(args=['sudo', 'make', 'install'],
+                                       cwd=self.xfstests_repo_path, omit_sudo=False,
+                                       stdout=StringIO(), stderr=StringIO())
+
+        if self.install_xfsprogs:
+            self.mount_a.client_remote.run(args=['sudo', 'make'],
+                                           cwd=self.xfsprogs_repo_path,
+                                           stdout=StringIO(), stderr=StringIO())
+            self.mount_a.client_remote.run(args=['sudo', 'make', 'install'],
+                                           cwd=self.xfsprogs_repo_path, omit_sudo=False,
+                                           stdout=StringIO(), stderr=StringIO())
+
+    def get_repos(self):
+        """
+        Clone xfstests_dev and xfsprogs-dev repositories. If already present,
+        update them. The xfsprogs-dev will be used to test the encrypt.
+        """
+        # TODO: make sure that repo is not cloned for every test. it should
+        # happen only once.
+        remoteurl = 'https://git.ceph.com/xfstests-dev.git'
+        self.xfstests_repo_path = self.mount_a.client_remote.mkdtemp(suffix=
+                                                            'xfstests-dev')
+        self.mount_a.run_shell(['git', 'clone', remoteurl, '--depth', '1',
+                                self.xfstests_repo_path])
+
+        if self.install_xfsprogs:
+            remoteurl = 'https://git.ceph.com/xfsprogs-dev.git'
+            self.xfsprogs_repo_path = self.mount_a.client_remote.mkdtemp(suffix=
+                                                                'xfsprogs-dev')
+            self.mount_a.run_shell(['git', 'clone', remoteurl, '--depth', '1',
+                                    self.xfsprogs_repo_path])
+
+    def get_admin_key(self):
+        import configparser
+
+        cp = configparser.ConfigParser()
+        cp.read_string(self.fs.mon_manager.raw_cluster_cmd(
+            'auth', 'get-or-create', 'client.admin'))
+
+        return cp['client.admin']['key']
+
+    def get_test_and_scratch_dirs_ready(self):
+        """ "test" and "scratch" directories are directories inside Ceph FS.
+            And, test and scratch mounts are path on the local FS where "test"
+            and "scratch" directories would be mounted. Look at xfstests-dev
+            local.config's template inside this file to get some context.
+        """
+        self.test_dirname = 'test'
+        self.mount_a.run_shell(['mkdir', self.test_dirname])
+        # read var name as "test dir's mount path"
+        self.test_dirs_mount_path = self.mount_a.client_remote.mkdtemp(
+            suffix=self.test_dirname)
+
+        self.scratch_dirname = 'scratch'
+        self.mount_a.run_shell(['mkdir', self.scratch_dirname])
+        # read var name as "scratch dir's mount path"
+        self.scratch_dirs_mount_path = self.mount_a.client_remote.mkdtemp(
+            suffix=self.scratch_dirname)
+
+    def install_deps(self):
+        from teuthology.misc import get_system_type
+
+        distro, version = get_system_type(self.mount_a.client_remote,
+                                          distro=True, version=True)
+        distro = distro.lower()
+        major_ver_num = int(version.split('.')[0]) # only keep major release
+                                                   # number
+        log.info(f'distro and version detected is "{distro}" and "{version}".')
+
+        # we keep fedora here so that right deps are installed when this test
+        # is run locally by a dev.
+        if distro in ('redhatenterpriseserver', 'redhatenterprise', 'fedora',
+                      'centos', 'centosstream', 'rhel'):
+            deps = """acl attr automake bc dbench dump e2fsprogs fio \
+            gawk gcc indent libtool lvm2 make psmisc quota sed \
+            xfsdump xfsprogs \
+            libacl-devel libattr-devel libaio-devel libuuid-devel \
+            xfsprogs-devel btrfs-progs-devel python3 sqlite""".split()
+
+            if self.install_xfsprogs:
+                if distro == 'centosstream' and major_ver_num == 8:
+                    deps += ['--enablerepo=powertools']
+                deps += ['inih-devel', 'userspace-rcu-devel', 'libblkid-devel',
+                         'gettext', 'libedit-devel', 'libattr-devel',
+                         'device-mapper-devel', 'libicu-devel']
+
+            deps_old_distros = ['xfsprogs-qa-devel']
+
+            if distro != 'fedora' and major_ver_num > 7:
+                    deps.remove('btrfs-progs-devel')
+
+            args = ['sudo', 'yum', 'install', '-y'] + deps + deps_old_distros
+        elif distro == 'ubuntu':
+            deps = """xfslibs-dev uuid-dev libtool-bin \
+            e2fsprogs automake gcc libuuid1 quota attr libattr1-dev make \
+            libacl1-dev libaio-dev xfsprogs libgdbm-dev gawk fio dbench \
+            uuid-runtime python sqlite3""".split()
+
+            if self.install_xfsprogs:
+                deps += ['libinih-dev', 'liburcu-dev', 'libblkid-dev',
+                         'gettext', 'libedit-dev', 'libattr1-dev',
+                         'libdevmapper-dev', 'libicu-dev', 'pkg-config']
+
+            if major_ver_num >= 19:
+                deps[deps.index('python')] ='python2'
+            args = ['sudo', 'apt-get', 'install', '-y'] + deps
+        else:
+            raise RuntimeError('expected a yum based or a apt based system')
+
+        self.mount_a.client_remote.run(args=args, omit_sudo=False)
+
+    def create_reqd_users(self):
+        self.mount_a.client_remote.run(args=['sudo', 'useradd', '-m', 'fsgqa'],
+                                       omit_sudo=False, check_status=False)
+        self.mount_a.client_remote.run(args=['sudo', 'groupadd', 'fsgqa'],
+                                       omit_sudo=False, check_status=False)
+        self.mount_a.client_remote.run(args=['sudo', 'useradd', 'fsgqa2'],
+                                       omit_sudo=False, check_status=False)
+        self.mount_a.client_remote.run(args=['sudo', 'useradd',
+                                             '123456-fsgqa'], omit_sudo=False,
+                                       check_status=False)
+
+    def write_local_config(self, options=None):
+        if isinstance(self.mount_a, KernelMount):
+            conf_contents = self._gen_conf_for_kernel_mnt(options)
+        elif isinstance(self.mount_a, FuseMount):
+            conf_contents = self._gen_conf_for_fuse_mnt(options)
+
+        self.mount_a.client_remote.write_file(join(self.xfstests_repo_path,
+                                                   'local.config'),
+                                              conf_contents, sudo=True)
+        log.info(f'local.config\'s contents -\n{conf_contents}')
+
+    def _gen_conf_for_kernel_mnt(self, options=None):
+        """
+        Generate local.config for CephFS kernel client.
+        """
+        _options = '' if not options else ',' + options
+        mon_sock = self.fs.mon_manager.get_msgrv1_mon_socks()[0]
+        test_dev = mon_sock + ':/' + self.test_dirname
+        scratch_dev = mon_sock + ':/' + self.scratch_dirname
+
+        return dedent(f'''\
+            export FSTYP=ceph
+            export TEST_DEV={test_dev}
+            export TEST_DIR={self.test_dirs_mount_path}
+            export SCRATCH_DEV={scratch_dev}
+            export SCRATCH_MNT={self.scratch_dirs_mount_path}
+            export CEPHFS_MOUNT_OPTIONS="-o name=admin,secret={self.get_admin_key()}{_options}"
+            ''')
+
+    def _gen_conf_for_fuse_mnt(self, options=None):
+        """
+        Generate local.config for CephFS FUSE client.
+        """
+        mon_sock = self.fs.mon_manager.get_msgrv1_mon_socks()[0]
+        test_dev = 'ceph-fuse'
+        scratch_dev = ''
+        # XXX: Please note that ceph_fuse_bin_path is not ideally required
+        # because ceph-fuse binary ought to be present in one of the standard
+        # locations during teuthology tests. But then testing with
+        # vstart_runner.py will not work since ceph-fuse binary won't be
+        # present in a standard locations during these sessions. Thus, this
+        # workaround.
+        ceph_fuse_bin_path = 'ceph-fuse' # bin expected to be in env
+        if 'LocalFuseMount' in str(type(self.mount_a)): # for vstart_runner.py runs
+            ceph_fuse_bin_path = join(os_getcwd(), 'bin', 'ceph-fuse')
+
+        keyring_path = self.mount_a.client_remote.mktemp(
+            data=self.fs.mon_manager.get_keyring('client.admin')+'\n')
+
+        lastline = (f'export CEPHFS_MOUNT_OPTIONS="-m {mon_sock} -k '
+                    f'{keyring_path} --client_mountpoint /{self.test_dirname}')
+        lastline += f'-o {options}"' if options else '"'
+
+        return dedent(f'''\
+            export FSTYP=ceph-fuse
+            export CEPH_FUSE_BIN_PATH={ceph_fuse_bin_path}
+            export TEST_DEV={test_dev}  # without this tests won't get started
+            export TEST_DIR={self.test_dirs_mount_path}
+            export SCRATCH_DEV={scratch_dev}
+            export SCRATCH_MNT={self.scratch_dirs_mount_path}
+            {lastline}
+            ''')
+
+    def write_ceph_exclude(self):
+        # These tests will fail or take too much time and will
+        # make the test timedout, just skip them for now.
+        xfstests_exclude_contents = dedent('''\
+            {c}/001 {g}/003 {g}/020 {g}/075 {g}/317 {g}/538 {g}/531
+            ''').format(g="generic", c="ceph")
+
+        self.mount_a.client_remote.write_file(join(self.xfstests_repo_path, 'ceph.exclude'),
+                                              xfstests_exclude_contents, sudo=True)
diff --git a/qa/tasks/cephfs_mirror.py b/qa/tasks/cephfs_mirror.py
new file mode 100644
index 000000000..9602a5a7f
--- /dev/null
+++ b/qa/tasks/cephfs_mirror.py
@@ -0,0 +1,73 @@
+"""
+Task for running cephfs mirror daemons
+"""
+
+import logging
+
+from teuthology.orchestra import run
+from teuthology import misc
+from teuthology.exceptions import ConfigError
+from teuthology.task import Task
+from tasks.ceph_manager import get_valgrind_args
+from tasks.util import get_remote_for_role
+
+log = logging.getLogger(__name__)
+
+class CephFSMirror(Task):
+    def __init__(self, ctx, config):
+        super(CephFSMirror, self).__init__(ctx, config)
+        self.log = log
+
+    def setup(self):
+        super(CephFSMirror, self).setup()
+        try:
+            self.client = self.config['client']
+        except KeyError:
+            raise ConfigError('cephfs-mirror requires a client to connect')
+
+        self.cluster_name, type_, self.client_id = misc.split_role(self.client)
+        if not type_ == 'client':
+            raise ConfigError(f'client role {self.client} must be a client')
+        self.remote = get_remote_for_role(self.ctx, self.client)
+
+    def begin(self):
+        super(CephFSMirror, self).begin()
+        testdir = misc.get_testdir(self.ctx)
+
+        args = [
+            'adjust-ulimits',
+            'ceph-coverage',
+            '{tdir}/archive/coverage'.format(tdir=testdir),
+            'daemon-helper',
+            'term',
+            ]
+
+        if 'valgrind' in self.config:
+            args = get_valgrind_args(
+                testdir, 'cephfs-mirror-{id}'.format(id=self.client),
+                args, self.config.get('valgrind'))
+
+        args.extend([
+            'cephfs-mirror',
+            '--cluster',
+            self.cluster_name,
+            '--id',
+            self.client_id,
+            ])
+        if 'run_in_foreground' in self.config:
+            args.extend(['--foreground'])
+
+        self.ctx.daemons.add_daemon(
+            self.remote, 'cephfs-mirror', self.client,
+            args=args,
+            logger=self.log.getChild(self.client),
+            stdin=run.PIPE,
+            wait=False,
+        )
+
+    def end(self):
+        mirror_daemon = self.ctx.daemons.get_daemon('cephfs-mirror', self.client)
+        mirror_daemon.stop()
+        super(CephFSMirror, self).end()
+
+task = CephFSMirror
diff --git a/qa/tasks/cephfs_mirror_thrash.py b/qa/tasks/cephfs_mirror_thrash.py
new file mode 100644
index 000000000..91f60ac50
--- /dev/null
+++ b/qa/tasks/cephfs_mirror_thrash.py
@@ -0,0 +1,219 @@
+"""
+Task for thrashing cephfs-mirror daemons
+"""
+
+import contextlib
+import logging
+import random
+import signal
+import socket
+import time
+
+from gevent import sleep
+from gevent.greenlet import Greenlet
+from gevent.event import Event
+
+from teuthology.exceptions import CommandFailedError
+from teuthology.orchestra import run
+from tasks.thrasher import Thrasher
+
+log = logging.getLogger(__name__)
+
+
+class CephFSMirrorThrasher(Thrasher, Greenlet):
+    """
+    CephFSMirrorThrasher::
+
+    The CephFSMirrorThrasher thrashes cephfs-mirror daemons during execution of other
+    tasks (workunits, etc).
+
+    The config is optional.  Many of the config parameters are a maximum value
+    to use when selecting a random value from a range.  The config is a dict
+    containing some or all of:
+
+    cluster: [default: ceph] cluster to thrash
+
+    max_thrash: [default: 1] the maximum number of active cephfs-mirror daemons per
+      cluster will be thrashed at any given time.
+
+    min_thrash_delay: [default: 60] minimum number of seconds to delay before
+      thrashing again.
+
+    max_thrash_delay: [default: 120] maximum number of seconds to delay before
+      thrashing again.
+
+    max_revive_delay: [default: 10] maximum number of seconds to delay before
+      bringing back a thrashed cephfs-mirror daemon.
+
+    randomize: [default: true] enables randomization and use the max/min values
+
+    seed: [no default] seed the random number generator
+
+    Examples::
+
+      The following example disables randomization, and uses the max delay
+      values:
+
+      tasks:
+      - ceph:
+      - cephfs_mirror_thrash:
+          randomize: False
+          max_thrash_delay: 10
+    """
+
+    def __init__(self, ctx, config, cluster, daemons):
+        super(CephFSMirrorThrasher, self).__init__()
+
+        self.ctx = ctx
+        self.config = config
+        self.cluster = cluster
+        self.daemons = daemons
+
+        self.logger = log
+        self.name = 'thrasher.cephfs_mirror.[{cluster}]'.format(cluster = cluster)
+        self.stopping = Event()
+
+        self.randomize = bool(self.config.get('randomize', True))
+        self.max_thrash = int(self.config.get('max_thrash', 1))
+        self.min_thrash_delay = float(self.config.get('min_thrash_delay', 5.0))
+        self.max_thrash_delay = float(self.config.get('max_thrash_delay', 10))
+        self.max_revive_delay = float(self.config.get('max_revive_delay', 15.0))
+
+    def _run(self):
+        try:
+            self.do_thrash()
+        except Exception as e:
+            # See _run exception comment for MDSThrasher
+            self.set_thrasher_exception(e)
+            self.logger.exception("exception:")
+            # Allow successful completion so gevent doesn't see an exception.
+            # The DaemonWatchdog will observe the error and tear down the test.
+
+    def log(self, x):
+        """Write data to logger assigned to this CephFSMirrorThrasher"""
+        self.logger.info(x)
+
+    def stop(self):
+        self.stopping.set()
+
+    def do_thrash(self):
+        """
+        Perform the random thrashing action
+        """
+
+        self.log('starting thrash for cluster {cluster}'.format(cluster=self.cluster))
+        stats = {
+            "kill": 0,
+        }
+
+        while not self.stopping.is_set():
+            delay = self.max_thrash_delay
+            if self.randomize:
+                delay = random.randrange(self.min_thrash_delay, self.max_thrash_delay)
+
+            if delay > 0.0:
+                self.log('waiting for {delay} secs before thrashing'.format(delay=delay))
+                self.stopping.wait(delay)
+                if self.stopping.is_set():
+                    continue
+
+            killed_daemons = []
+
+            weight = 1.0 / len(self.daemons)
+            count = 0
+            for daemon in self.daemons:
+                skip = random.uniform(0.0, 1.0)
+                if weight <= skip:
+                    self.log('skipping daemon {label} with skip ({skip}) > weight ({weight})'.format(
+                        label=daemon.id_, skip=skip, weight=weight))
+                    continue
+
+                self.log('kill {label}'.format(label=daemon.id_))
+                try:
+                    daemon.signal(signal.SIGTERM)
+                except Exception as e:
+                    self.log(f'exception when stopping mirror daemon: {e}')
+                else:
+                    killed_daemons.append(daemon)
+                    stats['kill'] += 1
+
+                # if we've reached max_thrash, we're done
+                count += 1
+                if count >= self.max_thrash:
+                    break
+
+            if killed_daemons:
+                # wait for a while before restarting
+                delay = self.max_revive_delay
+                if self.randomize:
+                    delay = random.randrange(0.0, self.max_revive_delay)
+
+                self.log('waiting for {delay} secs before reviving daemons'.format(delay=delay))
+                sleep(delay)
+
+                for daemon in killed_daemons:
+                    self.log('waiting for {label}'.format(label=daemon.id_))
+                    try:
+                        run.wait([daemon.proc], timeout=600)
+                    except CommandFailedError:
+                        pass
+                    except:
+                        self.log('Failed to stop {label}'.format(label=daemon.id_))
+
+                        try:
+                            # try to capture a core dump
+                            daemon.signal(signal.SIGABRT)
+                        except socket.error:
+                            pass
+                        raise
+                    finally:
+                        daemon.reset()
+
+                for daemon in killed_daemons:
+                    self.log('reviving {label}'.format(label=daemon.id_))
+                    daemon.start()
+
+        for stat in stats:
+            self.log("stat['{key}'] = {value}".format(key = stat, value = stats[stat]))
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Stress test the cephfs-mirror by thrashing while another task/workunit
+    is running.
+
+    Please refer to CephFSMirrorThrasher class for further information on the
+    available options.
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'cephfs_mirror_thrash task only accepts a dict for configuration'
+
+    cluster = config.get('cluster', 'ceph')
+    daemons = list(ctx.daemons.iter_daemons_of_role('cephfs-mirror', cluster))
+    assert len(daemons) > 0, \
+        'cephfs_mirror_thrash task requires at least 1 cephfs-mirror daemon'
+
+    # choose random seed
+    if 'seed' in config:
+        seed = int(config['seed'])
+    else:
+        seed = int(time.time())
+    log.info('cephfs_mirror_thrash using random seed: {seed}'.format(seed=seed))
+    random.seed(seed)
+
+    thrasher = CephFSMirrorThrasher(ctx, config, cluster, daemons)
+    thrasher.start()
+    ctx.ceph[cluster].thrashers.append(thrasher)
+
+    try:
+        log.debug('Yielding')
+        yield
+    finally:
+        log.info('joining cephfs_mirror_thrash')
+        thrasher.stop()
+        if thrasher.exception is not None:
+            raise RuntimeError('error during thrashing')
+        thrasher.join()
+        log.info('done joining')
diff --git a/qa/tasks/cephfs_test_runner.py b/qa/tasks/cephfs_test_runner.py
new file mode 100644
index 000000000..8a4919b93
--- /dev/null
+++ b/qa/tasks/cephfs_test_runner.py
@@ -0,0 +1,213 @@
+import contextlib
+import logging
+import os
+import unittest
+from unittest import suite, loader, case
+from teuthology.task import interactive
+from teuthology import misc
+from tasks.cephfs.filesystem import Filesystem, MDSCluster, CephCluster
+from tasks.mgr.mgr_test_case import MgrCluster
+
+log = logging.getLogger(__name__)
+
+
+class DecoratingLoader(loader.TestLoader):
+    """
+    A specialization of TestLoader that tags some extra attributes
+    onto test classes as they are loaded.
+    """
+    def __init__(self, params):
+        self._params = params
+        super(DecoratingLoader, self).__init__()
+
+    def _apply_params(self, obj):
+        for k, v in self._params.items():
+            if obj.__class__ is type:
+                cls = obj
+            else:
+                cls = obj.__class__
+            setattr(cls, k, v)
+
+    def loadTestsFromTestCase(self, testCaseClass):
+        self._apply_params(testCaseClass)
+        return super(DecoratingLoader, self).loadTestsFromTestCase(testCaseClass)
+
+    def loadTestsFromName(self, name, module=None):
+        result = super(DecoratingLoader, self).loadTestsFromName(name, module)
+
+        # Special case for when we were called with the name of a method, we get
+        # a suite with one TestCase
+        tests_in_result = list(result)
+        if len(tests_in_result) == 1 and isinstance(tests_in_result[0], case.TestCase):
+            self._apply_params(tests_in_result[0])
+
+        return result
+
+
+class LogStream(object):
+    def __init__(self):
+        self.buffer = ""
+
+    def write(self, data):
+        self.buffer += data
+        if "\n" in self.buffer:
+            lines = self.buffer.split("\n")
+            for line in lines[:-1]:
+                log.info(line)
+            self.buffer = lines[-1]
+
+    def flush(self):
+        pass
+
+
+class InteractiveFailureResult(unittest.TextTestResult):
+    """
+    Specialization that implements interactive-on-error style
+    behavior.
+    """
+    ctx = None
+
+    def addFailure(self, test, err):
+        log.error(self._exc_info_to_string(err, test))
+        log.error("Failure in test '{0}', going interactive".format(
+            self.getDescription(test)
+        ))
+        interactive.task(ctx=self.ctx, config=None)
+
+    def addError(self, test, err):
+        log.error(self._exc_info_to_string(err, test))
+        log.error("Error in test '{0}', going interactive".format(
+            self.getDescription(test)
+        ))
+        interactive.task(ctx=self.ctx, config=None)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run the CephFS test cases.
+
+    Run everything in tasks/cephfs/test_*.py:
+
+    ::
+
+        tasks:
+          - install:
+          - ceph:
+          - ceph-fuse:
+          - cephfs_test_runner:
+
+    `modules` argument allows running only some specific modules:
+
+    ::
+
+        tasks:
+            ...
+          - cephfs_test_runner:
+              modules:
+                - tasks.cephfs.test_sessionmap
+                - tasks.cephfs.test_auto_repair
+
+    By default, any cases that can't be run on the current cluster configuration
+    will generate a failure.  When the optional `fail_on_skip` argument is set
+    to false, any tests that can't be run on the current configuration will
+    simply be skipped:
+
+    ::
+        tasks:
+            ...
+         - cephfs_test_runner:
+           fail_on_skip: false
+
+    """
+
+    ceph_cluster = CephCluster(ctx)
+
+    if len(list(misc.all_roles_of_type(ctx.cluster, 'mds'))):
+        mds_cluster = MDSCluster(ctx)
+        fs = Filesystem(ctx)
+    else:
+        mds_cluster = None
+        fs = None
+
+    if len(list(misc.all_roles_of_type(ctx.cluster, 'mgr'))):
+        mgr_cluster = MgrCluster(ctx)
+    else:
+        mgr_cluster = None
+
+    # Mount objects, sorted by ID
+    if hasattr(ctx, 'mounts'):
+        mounts = [v for k, v in sorted(ctx.mounts.items(), key=lambda mount: mount[0])]
+    else:
+        # The test configuration has a filesystem but no fuse/kclient mounts
+        mounts = []
+
+    decorating_loader = DecoratingLoader({
+        "ctx": ctx,
+        "mounts": mounts,
+        "fs": fs,
+        "ceph_cluster": ceph_cluster,
+        "mds_cluster": mds_cluster,
+        "mgr_cluster": mgr_cluster,
+    })
+
+    fail_on_skip = config.get('fail_on_skip', True)
+
+    # Put useful things onto ctx for interactive debugging
+    ctx.fs = fs
+    ctx.mds_cluster = mds_cluster
+    ctx.mgr_cluster = mgr_cluster
+
+    # Depending on config, either load specific modules, or scan for moduless
+    if config and 'modules' in config and config['modules']:
+        module_suites = []
+        for mod_name in config['modules']:
+            # Test names like cephfs.test_auto_repair
+            module_suites.append(decorating_loader.loadTestsFromName(mod_name))
+        overall_suite = suite.TestSuite(module_suites)
+    else:
+        # Default, run all tests
+        overall_suite = decorating_loader.discover(
+            os.path.join(
+                os.path.dirname(os.path.abspath(__file__)),
+                "cephfs/"
+            )
+        )
+
+    if ctx.config.get("interactive-on-error", False):
+        InteractiveFailureResult.ctx = ctx
+        result_class = InteractiveFailureResult
+    else:
+        result_class = unittest.TextTestResult
+
+    class LoggingResult(result_class):
+        def startTest(self, test):
+            log.info("Starting test: {0}".format(self.getDescription(test)))
+            return super(LoggingResult, self).startTest(test)
+
+        def addSkip(self, test, reason):
+            if fail_on_skip:
+                # Don't just call addFailure because that requires a traceback
+                self.failures.append((test, reason))
+            else:
+                super(LoggingResult, self).addSkip(test, reason)
+
+    # Execute!
+    result = unittest.TextTestRunner(
+        stream=LogStream(),
+        resultclass=LoggingResult,
+        verbosity=2,
+        failfast=True).run(overall_suite)
+
+    if not result.wasSuccessful():
+        result.printErrors()  # duplicate output at end for convenience
+
+        bad_tests = []
+        for test, error in result.errors:
+            bad_tests.append(str(test))
+        for test, failure in result.failures:
+            bad_tests.append(str(test))
+
+        raise RuntimeError("Test failure: {0}".format(", ".join(bad_tests)))
+
+    yield
diff --git a/qa/tasks/cephfs_upgrade_snap.py b/qa/tasks/cephfs_upgrade_snap.py
new file mode 100644
index 000000000..1b0a737a7
--- /dev/null
+++ b/qa/tasks/cephfs_upgrade_snap.py
@@ -0,0 +1,47 @@
+"""
+Upgrade cluster snap format.
+"""
+
+import logging
+import time
+
+from tasks.cephfs.filesystem import Filesystem
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Upgrade CephFS file system snap format.
+    """
+
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'snap-upgrade task only accepts a dict for configuration'
+
+    fs = Filesystem(ctx)
+
+    mds_map = fs.get_mds_map()
+    assert(mds_map['max_mds'] == 1)
+
+    json = fs.run_scrub(["start", "/", "force", "recursive", "repair"])
+    if not json or json['return_code'] == 0:
+        assert(fs.wait_until_scrub_complete(tag=json["scrub_tag"]) == True)
+        log.info("scrub / completed")
+    else:
+        log.info("scrub / failed: {}".format(json))
+
+    json = fs.run_scrub(["start", "~mdsdir", "force", "recursive", "repair"])
+    if not json or json['return_code'] == 0:
+        assert(fs.wait_until_scrub_complete(tag=json["scrub_tag"]) == True)
+        log.info("scrub ~mdsdir completed")
+    else:
+        log.info("scrub / failed: {}".format(json))
+
+    for i in range(0, 10):
+        mds_map = fs.get_mds_map()
+        if (mds_map['flags'] & (1<<1)) != 0 and (mds_map['flags'] & (1<<4)) != 0:
+            break
+        time.sleep(10)
+    assert((mds_map['flags'] & (1<<1)) != 0) # Test CEPH_MDSMAP_ALLOW_SNAPS
+    assert((mds_map['flags'] & (1<<4)) != 0) # Test CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS
diff --git a/qa/tasks/check_counter.py b/qa/tasks/check_counter.py
new file mode 100644
index 000000000..40818f3f4
--- /dev/null
+++ b/qa/tasks/check_counter.py
@@ -0,0 +1,130 @@
+
+import logging
+import json
+
+from teuthology.task import Task
+from teuthology import misc
+
+from tasks import ceph_manager
+
+log = logging.getLogger(__name__)
+
+
+class CheckCounter(Task):
+    """
+    Use this task to validate that some daemon perf counters were
+    incremented by the nested tasks.
+
+    Config:
+     'cluster_name': optional, specify which cluster
+     'target': dictionary of daemon type to list of performance counters.
+     'dry_run': just log the value of the counters, don't fail if they
+                aren't nonzero.
+
+    Success condition is that for all of the named counters, at least
+    one of the daemons of that type has the counter nonzero.
+
+    Example to check cephfs dirfrag splits are happening:
+    - install:
+    - ceph:
+    - ceph-fuse:
+    - check-counter:
+        counters:
+            mds:
+                - "mds.dir_split"
+                -
+                    name: "mds.dir_update"
+                    min: 3
+    - workunit: ...
+    """
+    @property
+    def admin_remote(self):
+        first_mon = misc.get_first_mon(self.ctx, None)
+        (result,) = self.ctx.cluster.only(first_mon).remotes.keys()
+        return result
+
+    def start(self):
+        log.info("START")
+
+    def end(self):
+        overrides = self.ctx.config.get('overrides', {})
+        misc.deep_merge(self.config, overrides.get('check-counter', {}))
+
+        cluster_name = self.config.get('cluster_name', None)
+        dry_run = self.config.get('dry_run', False)
+        targets = self.config.get('counters', {})
+
+        if cluster_name is None:
+            cluster_name = next(iter(self.ctx.managers.keys()))
+
+
+        mon_manager = ceph_manager.CephManager(self.admin_remote, ctx=self.ctx, logger=log.getChild('ceph_manager'))
+        active_mgr = json.loads(mon_manager.raw_cluster_cmd("mgr", "dump", "--format=json-pretty"))["active_name"]
+
+        for daemon_type, counters in targets.items():
+            # List of 'a', 'b', 'c'...
+            daemon_ids = list(misc.all_roles_of_type(self.ctx.cluster, daemon_type))
+            daemons = dict([(daemon_id,
+                             self.ctx.daemons.get_daemon(daemon_type, daemon_id))
+                            for daemon_id in daemon_ids])
+
+            expected = set()
+            seen = set()
+
+            for daemon_id, daemon in daemons.items():
+                if not daemon.running():
+                    log.info("Ignoring daemon {0}, it isn't running".format(daemon_id))
+                    continue
+                elif daemon_type == 'mgr' and daemon_id != active_mgr:
+                    continue
+                else:
+                    log.debug("Getting stats from {0}".format(daemon_id))
+
+                manager = self.ctx.managers[cluster_name]
+                proc = manager.admin_socket(daemon_type, daemon_id, ["perf", "dump"])
+                response_data = proc.stdout.getvalue().strip()
+                if response_data:
+                    perf_dump = json.loads(response_data)
+                else:
+                    log.warning("No admin socket response from {0}, skipping".format(daemon_id))
+                    continue
+
+                minval = ''
+                expected_val = ''
+                for counter in counters:
+                    if isinstance(counter, dict):
+                        name = counter['name']
+                        if 'min' in counter:
+                            minval = counter['min']
+                        if 'expected_val' in counter:
+                            expected_val = counter['expected_val']
+                    else:
+                        name = counter
+                        minval = 1
+                    expected.add(name)
+
+                    val = perf_dump
+                    for key in name.split('.'):
+                        if key not in val:
+                            log.warning(f"Counter '{name}' not found on daemon {daemon_type}.{daemon_id}")
+                            val = None
+                            break
+
+                        val = val[key]
+
+                    if val is not None:
+                        log.info(f"Daemon {daemon_type}.{daemon_id} {name}={val}")
+                        if isinstance(minval, int) and val >= minval:
+                            seen.add(name)
+                        elif isinstance(expected_val, int) and val == expected_val:
+                            seen.add(name)
+
+            if not dry_run:
+                unseen = set(expected) - set(seen)
+                if unseen:
+                    raise RuntimeError("The following counters failed to be set "
+                                       "on {0} daemons: {1}".format(
+                        daemon_type, unseen
+                    ))
+
+task = CheckCounter
diff --git a/qa/tasks/cifs_mount.py b/qa/tasks/cifs_mount.py
new file mode 100644
index 000000000..b282b0b7d
--- /dev/null
+++ b/qa/tasks/cifs_mount.py
@@ -0,0 +1,137 @@
+"""
+Mount cifs clients.  Unmount when finished.
+"""
+import contextlib
+import logging
+import os
+
+from teuthology import misc as teuthology
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Mount/unmount a cifs client.
+
+    The config is optional and defaults to mounting on all clients. If
+    a config is given, it is expected to be a list of clients to do
+    this operation on.
+
+    Example that starts smbd and mounts cifs on all nodes::
+
+        tasks:
+        - ceph:
+        - samba:
+        - cifs-mount:
+        - interactive:
+
+    Example that splits smbd and cifs:
+
+        tasks:
+        - ceph:
+        - samba: [samba.0]
+        - cifs-mount: [client.0]
+        - ceph-fuse: [client.1]
+        - interactive:
+
+    Example that specifies the share name:
+
+        tasks:
+        - ceph:
+        - ceph-fuse:
+        - samba:
+            samba.0:
+                cephfuse: "{testdir}/mnt.0"
+        - cifs-mount:
+            client.0:
+                share: cephfuse
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    log.info('Mounting cifs clients...')
+
+    if config is None:
+        config = dict(('client.{id}'.format(id=id_), None)
+                  for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client'))
+    elif isinstance(config, list):
+        config = dict((name, None) for name in config)
+
+    clients = list(teuthology.get_clients(ctx=ctx, roles=config.keys()))
+
+    from .samba import get_sambas
+    samba_roles = ['samba.{id_}'.format(id_=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'samba')]
+    sambas = list(get_sambas(ctx=ctx, roles=samba_roles))
+    (ip, _) = sambas[0][1].ssh.get_transport().getpeername()
+    log.info('samba ip: {ip}'.format(ip=ip))
+
+    for id_, remote in clients:
+        mnt = os.path.join(teuthology.get_testdir(ctx), 'mnt.{id}'.format(id=id_))
+        log.info('Mounting cifs client.{id} at {remote} {mnt}...'.format(
+                id=id_, remote=remote,mnt=mnt))
+
+        remote.run(
+            args=[
+                'mkdir',
+                '--',
+                mnt,
+                ],
+            )
+
+        rolestr = 'client.{id_}'.format(id_=id_)
+        unc = "ceph"
+        log.info("config: {c}".format(c=config))
+        if config[rolestr] is not None and 'share' in config[rolestr]:
+            unc = config[rolestr]['share']
+
+        remote.run(
+            args=[
+                'sudo',
+                'mount',
+                '-t',
+                'cifs',
+                '//{sambaip}/{unc}'.format(sambaip=ip, unc=unc),
+                '-o',
+                'username=ubuntu,password=ubuntu',
+                mnt,
+                ],
+            )
+
+        remote.run(
+            args=[
+                'sudo',
+                'chown',
+                'ubuntu:ubuntu',
+                '{m}/'.format(m=mnt),
+                ],
+            )
+
+    try:
+        yield
+    finally:
+        log.info('Unmounting cifs clients...')
+        for id_, remote in clients:
+            remote.run(
+                args=[
+                    'sudo',
+                    'umount',
+                    mnt,
+                    ],
+                )
+        for id_, remote in clients:
+            while True:
+                try:
+                    remote.run(
+                        args=[
+                            'rmdir', '--', mnt,
+                            run.Raw('2>&1'),
+                            run.Raw('|'),
+                            'grep', 'Device or resource busy',
+                            ],
+                        )
+                    import time
+                    time.sleep(1)
+                except Exception:
+                    break
diff --git a/qa/tasks/cram.py b/qa/tasks/cram.py
new file mode 100644
index 000000000..a445a146f
--- /dev/null
+++ b/qa/tasks/cram.py
@@ -0,0 +1,160 @@
+"""
+Cram tests
+"""
+import logging
+import os
+
+from tasks.util.workunit import get_refspec_after_overrides
+
+from teuthology import misc as teuthology
+from teuthology.parallel import parallel
+from teuthology.orchestra import run
+from teuthology.config import config as teuth_config
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Run all cram tests from the specified paths on the specified
+    clients. Each client runs tests in parallel as default, and
+    you can also disable it by adding "parallel: False" option.
+
+    Limitations:
+    Tests must have a .t suffix. Tests with duplicate names will
+    overwrite each other, so only the last one will run.
+
+    For example::
+
+        tasks:
+        - ceph:
+        - cram:
+            clients:
+              client.0:
+              - qa/test.t
+              - qa/test2.t]
+              client.1: [qa/test.t]
+            branch: foo
+            parallel: False
+
+    You can also run a list of cram tests on all clients::
+
+        tasks:
+        - ceph:
+        - cram:
+            clients:
+              all: [qa/test.t]
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    assert isinstance(config, dict)
+    assert 'clients' in config and isinstance(config['clients'], dict), \
+           'configuration must contain a dictionary of clients'
+
+    clients = teuthology.replace_all_with_clients(ctx.cluster,
+                                                  config['clients'])
+    testdir = teuthology.get_testdir(ctx)
+
+    overrides = ctx.config.get('overrides', {})
+    refspec = get_refspec_after_overrides(config, overrides)
+
+    _parallel = config.get('parallel', True)
+
+    git_url = teuth_config.get_ceph_qa_suite_git_url()
+    log.info('Pulling tests from %s ref %s', git_url, refspec)
+
+    try:
+        for client, tests in clients.items():
+            (remote,) = (ctx.cluster.only(client).remotes.keys())
+            client_dir = '{tdir}/archive/cram.{role}'.format(tdir=testdir, role=client)
+            remote.run(
+                args=[
+                    'mkdir', '--', client_dir,
+                    run.Raw('&&'),
+                    'python3', '-m', 'venv', '{tdir}/virtualenv'.format(tdir=testdir),
+                    run.Raw('&&'),
+                    '{tdir}/virtualenv/bin/pip'.format(tdir=testdir),
+                    'install', 'cram==0.6',
+                    ],
+                )
+            clone_dir = '{tdir}/clone.{role}'.format(tdir=testdir, role=client)
+            remote.run(args=refspec.clone(git_url, clone_dir))
+
+            for test in tests:
+                assert test.endswith('.t'), 'tests must end in .t'
+                remote.run(
+                    args=[
+                        'cp', '--', os.path.join(clone_dir, test), client_dir,
+                        ],
+                    )
+
+        if _parallel:
+            with parallel() as p:
+                for role in clients.keys():
+                    p.spawn(_run_tests, ctx, role)
+        else:
+            for role in clients.keys():
+                _run_tests(ctx, role)
+    finally:
+        for client, tests in clients.items():
+            (remote,) = (ctx.cluster.only(client).remotes.keys())
+            client_dir = '{tdir}/archive/cram.{role}'.format(tdir=testdir, role=client)
+            test_files = set([test.rsplit('/', 1)[1] for test in tests])
+
+            # remove test files unless they failed
+            for test_file in test_files:
+                abs_file = os.path.join(client_dir, test_file)
+                remote.run(
+                    args=[
+                        'test', '-f', abs_file + '.err',
+                        run.Raw('||'),
+                        'rm', '-f', '--', abs_file,
+                        ],
+                    )
+
+            # ignore failure since more than one client may
+            # be run on a host, and the client dir should be
+            # non-empty if the test failed
+            clone_dir = '{tdir}/clone.{role}'.format(tdir=testdir, role=client)
+            remote.run(
+                args=[
+                    'rm', '-rf', '--',
+                    '{tdir}/virtualenv'.format(tdir=testdir),
+                    clone_dir,
+                    run.Raw(';'),
+                    'rmdir', '--ignore-fail-on-non-empty', client_dir,
+                    ],
+                )
+
+def _run_tests(ctx, role):
+    """
+    For each role, check to make sure it's a client, then run the cram on that client
+
+    :param ctx: Context
+    :param role: Roles
+    """
+    assert isinstance(role, str)
+    PREFIX = 'client.'
+    if role.startswith(PREFIX):
+        id_ = role[len(PREFIX):]
+    else:
+        id_ = role
+    (remote,) = (ctx.cluster.only(role).remotes.keys())
+    ceph_ref = ctx.summary.get('ceph-sha1', 'master')
+
+    testdir = teuthology.get_testdir(ctx)
+    log.info('Running tests for %s...', role)
+    remote.run(
+        args=[
+            run.Raw('CEPH_REF={ref}'.format(ref=ceph_ref)),
+            run.Raw('CEPH_ID="{id}"'.format(id=id_)),
+            run.Raw('PATH=$PATH:/usr/sbin'),
+            'adjust-ulimits',
+            'ceph-coverage',
+            '{tdir}/archive/coverage'.format(tdir=testdir),
+            '{tdir}/virtualenv/bin/cram'.format(tdir=testdir),
+            '-v', '--',
+            run.Raw('{tdir}/archive/cram.{role}/*.t'.format(tdir=testdir, role=role)),
+            ],
+        logger=log.getChild(role),
+        )
diff --git a/qa/tasks/create_verify_lfn_objects.py b/qa/tasks/create_verify_lfn_objects.py
new file mode 100644
index 000000000..532541581
--- /dev/null
+++ b/qa/tasks/create_verify_lfn_objects.py
@@ -0,0 +1,83 @@
+"""
+Rados modle-based integration tests
+"""
+import contextlib
+import logging
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    For each combination of namespace and name_length, create
+    <num_objects> objects with name length <name_length>
+    on entry.  On exit, verify that the objects still exist, can
+    be deleted, and then don't exist.
+
+    Usage::
+
+       create_verify_lfn_objects.py:
+         pool: <pool_name> default: 'data'
+         prefix: <prefix> default: ''
+         namespace: [<namespace>] default: ['']
+         num_objects: [<num_objects>] default: 10
+         name_length: [<name_length>] default: [400]
+    """
+    pool = config.get('pool', 'data')
+    num_objects = config.get('num_objects', 10)
+    name_length = config.get('name_length', [400])
+    namespace = config.get('namespace', [None])
+    prefix = config.get('prefix', None)
+    manager = ctx.managers['ceph']
+
+    objects = []
+    for l in name_length:
+        for ns in namespace:
+            def object_name(i):
+                nslength = 0
+                if namespace != '':
+                    nslength = len(namespace)
+                numstr = str(i)
+                fillerlen = l - nslength - len(prefix) - len(numstr)
+                assert fillerlen >= 0
+                return prefix + ('a'*fillerlen) + numstr
+            objects += [(ns, object_name(i)) for i in  range(num_objects)]
+
+    for ns, name in objects:
+        err = manager.do_put(
+            pool,
+            name,
+            '/etc/resolv.conf',
+            namespace=ns)
+        log.info("err is " + str(err))
+        assert err == 0
+
+    try:
+        yield
+    finally:
+        log.info('ceph_verify_lfn_objects verifying...')
+        for ns, name in objects:
+            err = manager.do_get(
+                pool,
+                name,
+                namespace=ns)
+            log.info("err is " + str(err))
+            assert err == 0
+
+        log.info('ceph_verify_lfn_objects deleting...')
+        for ns, name in objects:
+            err = manager.do_rm(
+                pool,
+                name,
+                namespace=ns)
+            log.info("err is " + str(err))
+            assert err == 0
+
+        log.info('ceph_verify_lfn_objects verifying absent...')
+        for ns, name in objects:
+            err = manager.do_get(
+                pool,
+                name,
+                namespace=ns)
+            log.info("err is " + str(err))
+            assert err != 0
diff --git a/qa/tasks/daemonwatchdog.py b/qa/tasks/daemonwatchdog.py
new file mode 100644
index 000000000..c8fa9f3c2
--- /dev/null
+++ b/qa/tasks/daemonwatchdog.py
@@ -0,0 +1,128 @@
+import logging
+import signal
+import time
+
+from gevent import sleep
+from gevent.greenlet import Greenlet
+from gevent.event import Event
+
+log = logging.getLogger(__name__)
+
+class DaemonWatchdog(Greenlet):
+    """
+    DaemonWatchdog::
+
+    Watch Ceph daemons for failures. If an extended failure is detected (i.e.
+    not intentional), then the watchdog will unmount file systems and send
+    SIGTERM to all daemons. The duration of an extended failure is configurable
+    with watchdog_daemon_timeout.
+
+    ceph:
+      watchdog:
+        daemon_restart [default: no]: restart daemon if "normal" exit (status==0).
+
+        daemon_timeout [default: 300]: number of seconds a daemon
+                                              is allowed to be failed before the
+                                              watchdog will bark.
+    """
+
+    def __init__(self, ctx, config, thrashers):
+        super(DaemonWatchdog, self).__init__()
+        self.config = ctx.config.get('watchdog', {})
+        self.ctx = ctx
+        self.e = None
+        self.logger = log.getChild('daemon_watchdog')
+        self.cluster = config.get('cluster', 'ceph')
+        self.name = 'watchdog'
+        self.stopping = Event()
+        self.thrashers = thrashers
+
+    def _run(self):
+        try:
+            self.watch()
+        except Exception as e:
+            # See _run exception comment for MDSThrasher
+            self.e = e
+            self.logger.exception("exception:")
+            # allow successful completion so gevent doesn't see an exception...
+
+    def log(self, x):
+        """Write data to logger"""
+        self.logger.info(x)
+
+    def stop(self):
+        self.stopping.set()
+
+    def bark(self):
+        self.log("BARK! unmounting mounts and killing all daemons")
+        if hasattr(self.ctx, 'mounts'):
+            for mount in self.ctx.mounts.values():
+                try:
+                    mount.umount_wait(force=True)
+                except:
+                    self.logger.exception("ignoring exception:")
+        daemons = []
+        daemons.extend(filter(lambda daemon: daemon.running() and not daemon.proc.finished, self.ctx.daemons.iter_daemons_of_role('osd', cluster=self.cluster)))
+        daemons.extend(filter(lambda daemon: daemon.running() and not daemon.proc.finished, self.ctx.daemons.iter_daemons_of_role('mds', cluster=self.cluster)))
+        daemons.extend(filter(lambda daemon: daemon.running() and not daemon.proc.finished, self.ctx.daemons.iter_daemons_of_role('mon', cluster=self.cluster)))
+        daemons.extend(filter(lambda daemon: daemon.running() and not daemon.proc.finished, self.ctx.daemons.iter_daemons_of_role('rgw', cluster=self.cluster)))
+        daemons.extend(filter(lambda daemon: daemon.running() and not daemon.proc.finished, self.ctx.daemons.iter_daemons_of_role('mgr', cluster=self.cluster)))
+
+        for daemon in daemons:
+            try:
+                daemon.signal(signal.SIGTERM)
+            except:
+                self.logger.exception("ignoring exception:")
+
+    def watch(self):
+        self.log("watchdog starting")
+        daemon_timeout = int(self.config.get('daemon_timeout', 300))
+        daemon_restart = self.config.get('daemon_restart', False)
+        daemon_failure_time = {}
+        while not self.stopping.is_set():
+            bark = False
+            now = time.time()
+
+            osds = self.ctx.daemons.iter_daemons_of_role('osd', cluster=self.cluster)
+            mons = self.ctx.daemons.iter_daemons_of_role('mon', cluster=self.cluster)
+            mdss = self.ctx.daemons.iter_daemons_of_role('mds', cluster=self.cluster)
+            rgws = self.ctx.daemons.iter_daemons_of_role('rgw', cluster=self.cluster)
+            mgrs = self.ctx.daemons.iter_daemons_of_role('mgr', cluster=self.cluster)
+
+            daemon_failures = []
+            daemon_failures.extend(filter(lambda daemon: daemon.running() and daemon.proc.finished, osds))
+            daemon_failures.extend(filter(lambda daemon: daemon.running() and daemon.proc.finished, mons))
+            daemon_failures.extend(filter(lambda daemon: daemon.running() and daemon.proc.finished, mdss))
+            daemon_failures.extend(filter(lambda daemon: daemon.running() and daemon.proc.finished, rgws))
+            daemon_failures.extend(filter(lambda daemon: daemon.running() and daemon.proc.finished, mgrs))
+
+            for daemon in daemon_failures:
+                name = daemon.role + '.' + daemon.id_
+                dt = daemon_failure_time.setdefault(name, (daemon, now))
+                assert dt[0] is daemon
+                delta = now-dt[1]
+                self.log("daemon {name} is failed for ~{t:.0f}s".format(name=name, t=delta))
+                if delta > daemon_timeout:
+                    bark = True
+                if daemon_restart == 'normal' and daemon.proc.exitstatus == 0:
+                    self.log(f"attempting to restart daemon {name}")
+                    daemon.restart()
+
+            # If a daemon is no longer failed, remove it from tracking:
+            for name in list(daemon_failure_time.keys()):
+                if name not in [d.role + '.' + d.id_ for d in daemon_failures]:
+                    self.log("daemon {name} has been restored".format(name=name))
+                    del daemon_failure_time[name]
+
+            for thrasher in self.thrashers:
+                if thrasher.exception is not None:
+                    self.log("{name} failed".format(name=thrasher.name))
+                    bark = True
+
+            if bark:
+                self.bark()
+                return
+
+            sleep(5)
+
+        self.log("watchdog finished")
diff --git a/qa/tasks/deduplication.py b/qa/tasks/deduplication.py
new file mode 100644
index 000000000..d4cdfbf57
--- /dev/null
+++ b/qa/tasks/deduplication.py
@@ -0,0 +1,220 @@
+"""
+Run ceph-dedup-tool
+"""
+import contextlib
+import logging
+import gevent
+from teuthology import misc as teuthology
+import json
+import time
+from io import StringIO
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run ceph-dedup-tool.
+    The config should be as follows::
+        ceph-dedup-tool:
+          clients: [client list]
+          op: <operation name>
+          pool: <pool name>
+          chunk_pool: <chunk pool name>
+          chunk_size: <chunk size>
+          chunk_algorithm: <chunk algorithm, fixed|fastcdc>
+          fingerprint_algorithm: <fingerprint algorithm, sha1|sha256|sha512>
+          chunk_dedup_threashold: <the number of duplicate chunks to trigger chunk dedup>
+          max_thread: <the number of threads>
+          wakeup_period: <duration>
+    For example::
+        tasks:
+        - exec:
+            client.0:
+              - sudo ceph osd pool create low_tier 4
+        - deduplication:
+            clients: [client.0]
+            op: 'sample-dedup'
+            pool: 'default.rgw.buckets.data'
+            chunk_pool: 'low_tier'
+            chunk_size: 131072
+            chunk_algorithm: 'fastcdc'
+            fingerprint_algorithm: 'sha1'
+            chunk_dedup_threshold: 5
+            max_thread: 2
+            wakeup_period: 20
+            sampling_ratio: 100
+    """
+    log.info('Beginning deduplication...')
+    assert isinstance(config, dict), \
+        "please list clients to run on"
+
+    args = [
+        'ceph-dedup-tool']
+    if config.get('op', None):
+        args.extend(['--op', config.get('op', None)])
+    if config.get('chunk_pool', None):
+        args.extend(['--chunk-pool', config.get('chunk_pool', None)])
+    if config.get('chunk_size', False):
+        args.extend(['--chunk-size', str(config.get('chunk_size', 131072))])
+    if config.get('chunk_algorithm', False):
+        args.extend(['--chunk-algorithm', config.get('chunk_algorithm', None)] )
+    if config.get('fingerprint_algorithm', False):
+        args.extend(['--fingerprint-algorithm', config.get('fingerprint_algorithm', None)] )
+    if config.get('chunk_dedup_threshold', False):
+        args.extend(['--chunk-dedup-threshold', str(config.get('chunk_dedup_threshold', 1))])
+    if config.get('max_thread', False):
+        args.extend(['--max-thread', str(config.get('max_thread', 2))])
+    if config.get('sampling_ratio', False):
+        args.extend(['--sampling-ratio', str(config.get('sampling_ratio', 100))])
+    if config.get('wakeup_period', False):
+        args.extend(['--wakeup-period', str(config.get('wakeup_period', 20))])
+    if config.get('pool', False):
+        args.extend(['--pool', config.get('pool', None)])
+
+    args.extend([
+        '--debug',
+        '--daemon',
+        '--loop'])
+
+    def thread():
+        run_remote(args, False, 0)
+
+    def run_remote(args, need_wait, client_num):
+        clients = ['client.{id}'.format(id=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+        log.info('clients are %s' % clients)
+        role = 'client.{id}'.format(id=client_num)
+        if role not in clients:
+            raise Exception('wrong client {c}'.format(c=role))
+        assert isinstance(role, str)
+        PREFIX = 'client.'
+        assert role.startswith(PREFIX)
+        testdir = teuthology.get_testdir(ctx)
+        cmd_args = [
+                'adjust-ulimits',
+                'ceph-coverage',
+                '{tdir}/archive/coverage'.format(tdir=testdir)]
+        cmd_args.extend(args)
+        log.info("cmd: %s", cmd_args)
+        tries = 0
+        while True:
+            (remote,) = ctx.cluster.only(role).remotes.keys()
+            proc = remote.run(
+                args=cmd_args,
+                wait=need_wait, check_status=False,
+                stdout=StringIO(),
+                )
+            log.info('exitstatus {r}'.format(r=proc.exitstatus))
+            if proc.exitstatus == 0 or need_wait == False:
+                log.info('proc stdout ', proc.stdout.getvalue())
+                return proc.stdout.getvalue().strip()
+            tries += 1
+            if tries > 30:
+                raise Exception('timed out getting correct exitstatus')
+            time.sleep(30)
+
+    def get_chunk_objs(chunk_pool):
+        chunk_obj_list = run_remote(('rados ls -p ' + chunk_pool).split(), True, 1).split()
+        if chunk_obj_list == False:
+            return None
+        else:
+            return chunk_obj_list
+
+    def get_ref_list(chunk_pool, chunk_obj):
+        # get reference list of chunk object
+        dump_str = run_remote(
+            ('ceph-dedup-tool --op dump-chunk-refs --chunk-pool '
+            + chunk_pool + ' --object ' + chunk_obj).split(),
+            True, 1
+        )
+        # fail in case that reference object is not written
+        assert len(dump_str) > 0
+        log.info('{0} obj has {1} refs'
+            .format(chunk_obj, json.loads(dump_str)['count']))
+
+        # check if chunk object's reference object exists in base-tier
+        ref_list = json.loads(dump_str)['refs']
+        return ref_list
+
+    # To validate whether the sample-dedup operation works well, this function checks if
+    #   1. sample-dedup has been started and
+    #   2. reference of chunk objects' exists in correct base pool
+    def validate():
+        log.info('start validating sample-dedup')
+        base_pool = config.get('pool', None)
+        chunk_pool = config.get('chunk_pool', None)
+        max_validation_cnt = 15
+        retry_cnt = 0
+        # chunk objs for re-validation after chunk-repair
+        retry_chunk_objs = list() 
+
+        # check whether sample-dedup has been started
+        chunk_obj_list = get_chunk_objs(chunk_pool)
+        while (chunk_obj_list == None or len(chunk_obj_list) == 0) and retry_cnt < max_validation_cnt:
+            # retry getting # chunk objs after 30 secs of sleep
+            time.sleep(30)
+            chunk_obj_list = get_chunk_objs(chunk_pool)
+            retry_cnt += 1
+            log.info('chunk pool empty. retry ', retry_cnt)
+        assert retry_cnt < max_validation_cnt
+
+        log.info('sample-dedup started successfully')
+
+        retry_cnt = 0
+        max_validation_cnt = 5
+        # validate chunk pool for max_validation_cnt times
+        while retry_cnt < max_validation_cnt:
+            for chunk_obj in chunk_obj_list:
+                ref_list = get_ref_list(chunk_pool, chunk_obj)
+                for ref in ref_list:
+                    ret = run_remote(
+                        ('rados -p ' + base_pool + ' stat ' + ref['oid'])
+                        .split(), True, 1
+                    )
+                    # check if ref exists in base pool
+                    if ret == False or len(ret) == 0:
+                        # if ref not exists in base pool, try repair in order to avoid 
+                        # false-positive inconsistent reference
+                        ret = run_remote(('ceph osd pool stats ' + base_pool).split(), True, 1)
+                        assert len(ret) > 0
+                        base_pool_id = ret.split()[3]
+                        ret = run_remote(
+                            ('ceph-dedup-tool --op chunk-repair --chunk-pool '
+                            + chunk_pool + ' --object ' + chunk_obj + ' --target-ref ' 
+                            + ref['oid'] + ' --target-ref-pool-id ' + base_pool_id)
+                            .split(), True, 1
+                        )
+                        retry_chunk_objs.append(chunk_obj)
+                    log.info('{0} obj exists in {1}'.format(ref['oid'], base_pool))
+
+            # retry validation for repaired objects
+            for chunk_obj in retry_chunk_objs:
+                ref_list = get_ref_list(chunk_pool, chunk_obj)
+                for ref in ref_list:
+                    ret = run_remote(
+                        ('rados -p ' + base_pool + ' stat ' + ref['oid'])
+                        .split(), True, 1
+                    )
+                    assert len(ret) > 0
+                    log.info(
+                        '{0} obj exists in {1} after repair'.format(ref['oid'], 
+                        base_pool)
+                    )
+            retry_chunk_objs = list()
+
+            # get chunk objects for the next loop
+            chunk_obj_list = get_chunk_objs(chunk_pool)
+            retry_cnt += 1
+            time.sleep(30)
+        return True
+
+
+    running = gevent.spawn(thread)
+    checker = gevent.spawn(validate)
+
+    try:
+        yield
+    finally:
+        log.info('joining ceph-dedup-tool')
+        running.get()
+        checker.get()
diff --git a/qa/tasks/devstack.py b/qa/tasks/devstack.py
new file mode 100644
index 000000000..2499e9e53
--- /dev/null
+++ b/qa/tasks/devstack.py
@@ -0,0 +1,371 @@
+#!/usr/bin/env python
+import contextlib
+import logging
+import textwrap
+import time
+from configparser import ConfigParser
+from io import BytesIO, StringIO
+
+from teuthology.orchestra import run
+from teuthology import misc
+from teuthology.contextutil import nested
+
+log = logging.getLogger(__name__)
+
+DEVSTACK_GIT_REPO = 'https://github.com/openstack-dev/devstack.git'
+DS_STABLE_BRANCHES = ("havana", "grizzly")
+
+is_devstack_node = lambda role: role.startswith('devstack')
+is_osd_node = lambda role: role.startswith('osd')
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    if config is None:
+        config = {}
+    if not isinstance(config, dict):
+        raise TypeError("config must be a dict")
+    with nested(lambda: install(ctx=ctx, config=config),
+                lambda: smoke(ctx=ctx, config=config),
+                ):
+        yield
+
+
+@contextlib.contextmanager
+def install(ctx, config):
+    """
+    Install OpenStack DevStack and configure it to use a Ceph cluster for
+    Glance and Cinder.
+
+    Requires one node with a role 'devstack'
+
+    Since devstack runs rampant on the system it's used on, typically you will
+    want to reprovision that machine after using devstack on it.
+
+    Also, the default 2GB of RAM that is given to vps nodes is insufficient. I
+    recommend 4GB. Downburst can be instructed to give 4GB to a vps node by
+    adding this to the yaml:
+
+    downburst:
+        ram: 4G
+
+    This was created using documentation found here:
+        https://github.com/openstack-dev/devstack/blob/master/README.md
+        http://docs.ceph.com/en/latest/rbd/rbd-openstack/
+    """
+    if config is None:
+        config = {}
+    if not isinstance(config, dict):
+        raise TypeError("config must be a dict")
+
+    devstack_node = next(iter(ctx.cluster.only(is_devstack_node).remotes.keys()))
+    an_osd_node = next(iter(ctx.cluster.only(is_osd_node).remotes.keys()))
+
+    devstack_branch = config.get("branch", "master")
+    install_devstack(devstack_node, devstack_branch)
+    try:
+        configure_devstack_and_ceph(ctx, config, devstack_node, an_osd_node)
+        yield
+    finally:
+        pass
+
+
+def install_devstack(devstack_node, branch="master"):
+    log.info("Cloning DevStack repo...")
+
+    args = ['git', 'clone', DEVSTACK_GIT_REPO]
+    devstack_node.run(args=args)
+
+    if branch != "master":
+        if branch in DS_STABLE_BRANCHES and not branch.startswith("stable"):
+            branch = "stable/" + branch
+        log.info("Checking out {branch} branch...".format(branch=branch))
+        cmd = "cd devstack && git checkout " + branch
+        devstack_node.run(args=cmd)
+
+    log.info("Installing DevStack...")
+    args = ['cd', 'devstack', run.Raw('&&'), './stack.sh']
+    devstack_node.run(args=args)
+
+
+def configure_devstack_and_ceph(ctx, config, devstack_node, ceph_node):
+    pool_size = config.get('pool_size', '128')
+    create_pools(ceph_node, pool_size)
+    distribute_ceph_conf(devstack_node, ceph_node)
+    # This is where we would install python-ceph and ceph-common but it appears
+    # the ceph task does that for us.
+    generate_ceph_keys(ceph_node)
+    distribute_ceph_keys(devstack_node, ceph_node)
+    secret_uuid = set_libvirt_secret(devstack_node, ceph_node)
+    update_devstack_config_files(devstack_node, secret_uuid)
+    set_apache_servername(devstack_node)
+    # Rebooting is the most-often-used method of restarting devstack services
+    misc.reboot(devstack_node)
+    start_devstack(devstack_node)
+    restart_apache(devstack_node)
+
+
+def create_pools(ceph_node, pool_size):
+    log.info("Creating pools on Ceph cluster...")
+
+    for pool_name in ['volumes', 'images', 'backups']:
+        args = ['sudo', 'ceph', 'osd', 'pool', 'create', pool_name, pool_size]
+        ceph_node.run(args=args)
+
+
+def distribute_ceph_conf(devstack_node, ceph_node):
+    log.info("Copying ceph.conf to DevStack node...")
+
+    ceph_conf_path = '/etc/ceph/ceph.conf'
+    ceph_conf = ceph_node.read_file(ceph_conf_path, sudo=True)
+    devstack_node.write_file(ceph_conf_path, ceph_conf, sudo=True)
+
+
+def generate_ceph_keys(ceph_node):
+    log.info("Generating Ceph keys...")
+
+    ceph_auth_cmds = [
+        ['sudo', 'ceph', 'auth', 'get-or-create', 'client.cinder', 'mon',
+            'allow r', 'osd', 'allow class-read object_prefix rbd_children, allow rwx pool=volumes, allow rx pool=images'],  # noqa
+        ['sudo', 'ceph', 'auth', 'get-or-create', 'client.glance', 'mon',
+            'allow r', 'osd', 'allow class-read object_prefix rbd_children, allow rwx pool=images'],  # noqa
+        ['sudo', 'ceph', 'auth', 'get-or-create', 'client.cinder-backup', 'mon',
+            'allow r', 'osd', 'allow class-read object_prefix rbd_children, allow rwx pool=backups'],  # noqa
+    ]
+    for cmd in ceph_auth_cmds:
+        ceph_node.run(args=cmd)
+
+
+def distribute_ceph_keys(devstack_node, ceph_node):
+    log.info("Copying Ceph keys to DevStack node...")
+
+    def copy_key(from_remote, key_name, to_remote, dest_path, owner):
+        key_stringio = BytesIO()
+        from_remote.run(
+            args=['sudo', 'ceph', 'auth', 'get-or-create', key_name],
+            stdout=key_stringio)
+        key_stringio.seek(0)
+        to_remote.write_file(dest_path, key_stringio, owner=owner, sudo=True)
+    keys = [
+        dict(name='client.glance',
+             path='/etc/ceph/ceph.client.glance.keyring',
+             # devstack appears to just want root:root
+             #owner='glance:glance',
+             ),
+        dict(name='client.cinder',
+             path='/etc/ceph/ceph.client.cinder.keyring',
+             # devstack appears to just want root:root
+             #owner='cinder:cinder',
+             ),
+        dict(name='client.cinder-backup',
+             path='/etc/ceph/ceph.client.cinder-backup.keyring',
+             # devstack appears to just want root:root
+             #owner='cinder:cinder',
+             ),
+    ]
+    for key_dict in keys:
+        copy_key(ceph_node, key_dict['name'], devstack_node,
+                 key_dict['path'], key_dict.get('owner'))
+
+
+def set_libvirt_secret(devstack_node, ceph_node):
+    log.info("Setting libvirt secret...")
+
+    cinder_key = ceph_node.sh('sudo ceph auth get-key client.cinder').strip()
+    uuid = devstack_node.sh('uuidgen').strip()
+
+    secret_path = '/tmp/secret.xml'
+    secret_template = textwrap.dedent("""
+    <secret ephemeral='no' private='no'>
+        <uuid>{uuid}</uuid>
+        <usage type='ceph'>
+            <name>client.cinder secret</name>
+        </usage>
+    </secret>""")
+    secret_data = secret_template.format(uuid=uuid)
+    devstack_node.write_file(secret_path, secret_data)
+    devstack_node.run(args=['sudo', 'virsh', 'secret-define', '--file',
+                            secret_path])
+    devstack_node.run(args=['sudo', 'virsh', 'secret-set-value', '--secret',
+                            uuid, '--base64', cinder_key])
+    return uuid
+
+
+def update_devstack_config_files(devstack_node, secret_uuid):
+    log.info("Updating DevStack config files to use Ceph...")
+
+    def backup_config(node, file_name, backup_ext='.orig.teuth'):
+        node.run(args=['cp', '-f', file_name, file_name + backup_ext])
+
+    def update_config(config_name, config_stream, update_dict,
+                      section='DEFAULT'):
+        parser = ConfigParser()
+        parser.read_file(config_stream)
+        for (key, value) in update_dict.items():
+            parser.set(section, key, value)
+        out_stream = StringIO()
+        parser.write(out_stream)
+        out_stream.seek(0)
+        return out_stream
+
+    updates = [
+        dict(name='/etc/glance/glance-api.conf', options=dict(
+            default_store='rbd',
+            rbd_store_user='glance',
+            rbd_store_pool='images',
+            show_image_direct_url='True',)),
+        dict(name='/etc/cinder/cinder.conf', options=dict(
+            volume_driver='cinder.volume.drivers.rbd.RBDDriver',
+            rbd_pool='volumes',
+            rbd_ceph_conf='/etc/ceph/ceph.conf',
+            rbd_flatten_volume_from_snapshot='false',
+            rbd_max_clone_depth='5',
+            glance_api_version='2',
+            rbd_user='cinder',
+            rbd_secret_uuid=secret_uuid,
+            backup_driver='cinder.backup.drivers.ceph',
+            backup_ceph_conf='/etc/ceph/ceph.conf',
+            backup_ceph_user='cinder-backup',
+            backup_ceph_chunk_size='134217728',
+            backup_ceph_pool='backups',
+            backup_ceph_stripe_unit='0',
+            backup_ceph_stripe_count='0',
+            restore_discard_excess_bytes='true',
+            )),
+        dict(name='/etc/nova/nova.conf', options=dict(
+            libvirt_images_type='rbd',
+            libvirt_images_rbd_pool='volumes',
+            libvirt_images_rbd_ceph_conf='/etc/ceph/ceph.conf',
+            rbd_user='cinder',
+            rbd_secret_uuid=secret_uuid,
+            libvirt_inject_password='false',
+            libvirt_inject_key='false',
+            libvirt_inject_partition='-2',
+            )),
+    ]
+
+    for update in updates:
+        file_name = update['name']
+        options = update['options']
+        config_data = devstack_node.read_file(file_name, sudo=True)
+        config_stream = StringIO(config_data)
+        backup_config(devstack_node, file_name)
+        new_config_stream = update_config(file_name, config_stream, options)
+        devstack_node.write_file(file_name, new_config_stream, sudo=True)
+
+
+def set_apache_servername(node):
+    # Apache complains: "Could not reliably determine the server's fully
+    # qualified domain name, using 127.0.0.1 for ServerName"
+    # So, let's make sure it knows its name.
+    log.info("Setting Apache ServerName...")
+
+    hostname = node.hostname
+    config_file = '/etc/apache2/conf.d/servername'
+    config_data = "ServerName {name}".format(name=hostname)
+    node.write_file(config_file, config_data, sudo=True)
+
+
+def start_devstack(devstack_node):
+    log.info("Patching devstack start script...")
+    # This causes screen to start headless - otherwise rejoin-stack.sh fails
+    # because there is no terminal attached.
+    cmd = "cd devstack && sed -ie 's/screen -c/screen -dm -c/' rejoin-stack.sh"
+    devstack_node.run(args=cmd)
+
+    log.info("Starting devstack...")
+    cmd = "cd devstack && ./rejoin-stack.sh"
+    devstack_node.run(args=cmd)
+
+    # This was added because I was getting timeouts on Cinder requests - which
+    # were trying to access Keystone on port 5000. A more robust way to handle
+    # this would be to introduce a wait-loop on devstack_node that checks to
+    # see if a service is listening on port 5000.
+    log.info("Waiting 30s for devstack to start...")
+    time.sleep(30)
+
+
+def restart_apache(node):
+    node.run(args=['sudo', '/etc/init.d/apache2', 'restart'], wait=True)
+
+
+@contextlib.contextmanager
+def exercise(ctx, config):
+    log.info("Running devstack exercises...")
+
+    if config is None:
+        config = {}
+    if not isinstance(config, dict):
+        raise TypeError("config must be a dict")
+
+    devstack_node = next(iter(ctx.cluster.only(is_devstack_node).remotes.keys()))
+
+    # TODO: save the log *and* preserve failures
+    #devstack_archive_dir = create_devstack_archive(ctx, devstack_node)
+
+    try:
+        #cmd = "cd devstack && ./exercise.sh 2>&1 | tee {dir}/exercise.log".format(  # noqa
+        #    dir=devstack_archive_dir)
+        cmd = "cd devstack && ./exercise.sh"
+        devstack_node.run(args=cmd, wait=True)
+        yield
+    finally:
+        pass
+
+
+def create_devstack_archive(ctx, devstack_node):
+    test_dir = misc.get_testdir(ctx)
+    devstack_archive_dir = "{test_dir}/archive/devstack".format(
+        test_dir=test_dir)
+    devstack_node.run(args="mkdir -p " + devstack_archive_dir)
+    return devstack_archive_dir
+
+
+@contextlib.contextmanager
+def smoke(ctx, config):
+    log.info("Running a basic smoketest...")
+
+    devstack_node = next(iter(ctx.cluster.only(is_devstack_node).remotes.keys()))
+    an_osd_node = next(iter(ctx.cluster.only(is_osd_node).remotes.keys()))
+
+    try:
+        create_volume(devstack_node, an_osd_node, 'smoke0', 1)
+        yield
+    finally:
+        pass
+
+
+def create_volume(devstack_node, ceph_node, vol_name, size):
+    """
+    :param size: The size of the volume, in GB
+    """
+    size = str(size)
+    log.info("Creating a {size}GB volume named {name}...".format(
+        name=vol_name,
+        size=size))
+    args = ['source', 'devstack/openrc', run.Raw('&&'), 'cinder', 'create',
+            '--display-name', vol_name, size]
+    cinder_create = devstack_node.sh(args, wait=True)
+    vol_info = parse_os_table(cinder_create)
+    log.debug("Volume info: %s", str(vol_info))
+
+    try:
+        rbd_output = ceph_node.sh("rbd --id cinder ls -l volumes", wait=True)
+    except run.CommandFailedError:
+        log.debug("Original rbd call failed; retrying without '--id cinder'")
+        rbd_output = ceph_node.sh("rbd ls -l volumes", wait=True)
+
+    assert vol_info['id'] in rbd_output, \
+        "Volume not found on Ceph cluster"
+    assert vol_info['size'] == size, \
+        "Volume size on Ceph cluster is different than specified"
+    return vol_info['id']
+
+
+def parse_os_table(table_str):
+    out_dict = dict()
+    for line in table_str.split('\n'):
+        if line.startswith('|'):
+            items = line.split()
+            out_dict[items[1]] = items[3]
+    return out_dict
diff --git a/qa/tasks/die_on_err.py b/qa/tasks/die_on_err.py
new file mode 100644
index 000000000..a6aa4c632
--- /dev/null
+++ b/qa/tasks/die_on_err.py
@@ -0,0 +1,70 @@
+"""
+Raise exceptions on osd coredumps or test err directories
+"""
+import contextlib
+import logging
+import time
+from teuthology.orchestra import run
+
+from tasks import ceph_manager
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Die if {testdir}/err exists or if an OSD dumps core
+    """
+    if config is None:
+        config = {}
+
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
+    log.info('num_osds is %s' % num_osds)
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    while len(manager.get_osd_status()['up']) < num_osds:
+        time.sleep(10)
+
+    testdir = teuthology.get_testdir(ctx)
+
+    while True:
+        for i in range(num_osds):
+            (osd_remote,) = ctx.cluster.only('osd.%d' % i).remotes.keys()
+            p = osd_remote.run(
+                args = [ 'test', '-e', '{tdir}/err'.format(tdir=testdir) ],
+                wait=True,
+                check_status=False,
+            )
+            exit_status = p.exitstatus
+
+            if exit_status == 0:
+                log.info("osd %d has an error" % i)
+                raise Exception("osd %d error" % i)
+
+            log_path = '/var/log/ceph/osd.%d.log' % (i)
+
+            p = osd_remote.run(
+                args = [
+                         'tail', '-1', log_path,
+                         run.Raw('|'),
+                         'grep', '-q', 'end dump'
+                       ],
+                wait=True,
+                check_status=False,
+            )
+            exit_status = p.exitstatus
+
+            if exit_status == 0:
+                log.info("osd %d dumped core" % i)
+                raise Exception("osd %d dumped core" % i)
+
+        time.sleep(5)
diff --git a/qa/tasks/divergent_priors.py b/qa/tasks/divergent_priors.py
new file mode 100644
index 000000000..e000bb2bb
--- /dev/null
+++ b/qa/tasks/divergent_priors.py
@@ -0,0 +1,160 @@
+"""
+Special case divergence test
+"""
+import logging
+import time
+
+from teuthology import misc as teuthology
+from tasks.util.rados import rados
+
+
+log = logging.getLogger(__name__)
+
+
+def task(ctx, config):
+    """
+    Test handling of divergent entries with prior_version
+    prior to log_tail
+
+    overrides:
+      ceph:
+        conf:
+          osd:
+            debug osd: 5
+
+    Requires 3 osds on a single test node.
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'divergent_priors task only accepts a dict for configuration'
+
+    manager = ctx.managers['ceph']
+
+    while len(manager.get_osd_status()['up']) < 3:
+        time.sleep(10)
+    manager.flush_pg_stats([0, 1, 2])
+    manager.raw_cluster_cmd('osd', 'set', 'noout')
+    manager.raw_cluster_cmd('osd', 'set', 'noin')
+    manager.raw_cluster_cmd('osd', 'set', 'nodown')
+    manager.wait_for_clean()
+
+    # something that is always there
+    dummyfile = '/etc/fstab'
+    dummyfile2 = '/etc/resolv.conf'
+
+    # create 1 pg pool
+    log.info('creating foo')
+    manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1')
+
+    osds = [0, 1, 2]
+    for i in osds:
+        manager.set_config(i, osd_min_pg_log_entries=10)
+        manager.set_config(i, osd_max_pg_log_entries=10)
+        manager.set_config(i, osd_pg_log_trim_min=5)
+
+    # determine primary
+    divergent = manager.get_pg_primary('foo', 0)
+    log.info("primary and soon to be divergent is %d", divergent)
+    non_divergent = list(osds)
+    non_divergent.remove(divergent)
+
+    log.info('writing initial objects')
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+    # write 100 objects
+    for i in range(100):
+        rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile])
+
+    manager.wait_for_clean()
+
+    # blackhole non_divergent
+    log.info("blackholing osds %s", str(non_divergent))
+    for i in non_divergent:
+        manager.set_config(i, objectstore_blackhole=1)
+
+    DIVERGENT_WRITE = 5
+    DIVERGENT_REMOVE = 5
+    # Write some soon to be divergent
+    log.info('writing divergent objects')
+    for i in range(DIVERGENT_WRITE):
+        rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i,
+                         dummyfile2], wait=False)
+    # Remove some soon to be divergent
+    log.info('remove divergent objects')
+    for i in range(DIVERGENT_REMOVE):
+        rados(ctx, mon, ['-p', 'foo', 'rm',
+                         'existing_%d' % (i + DIVERGENT_WRITE)], wait=False)
+    time.sleep(10)
+    mon.run(
+        args=['killall', '-9', 'rados'],
+        wait=True,
+        check_status=False)
+
+    # kill all the osds but leave divergent in
+    log.info('killing all the osds')
+    for i in osds:
+        manager.kill_osd(i)
+    for i in osds:
+        manager.mark_down_osd(i)
+    for i in non_divergent:
+        manager.mark_out_osd(i)
+
+    # bring up non-divergent
+    log.info("bringing up non_divergent %s", str(non_divergent))
+    for i in non_divergent:
+        manager.revive_osd(i)
+    for i in non_divergent:
+        manager.mark_in_osd(i)
+
+    # write 1 non-divergent object (ensure that old divergent one is divergent)
+    objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE)
+    log.info('writing non-divergent object ' + objname)
+    rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2])
+
+    manager.wait_for_recovery()
+
+    # ensure no recovery of up osds first
+    log.info('delay recovery')
+    for i in non_divergent:
+        manager.wait_run_admin_socket(
+            'osd', i, ['set_recovery_delay', '100000'])
+
+    # bring in our divergent friend
+    log.info("revive divergent %d", divergent)
+    manager.raw_cluster_cmd('osd', 'set', 'noup')
+    manager.revive_osd(divergent)
+
+    log.info('delay recovery divergent')
+    manager.wait_run_admin_socket(
+        'osd', divergent, ['set_recovery_delay', '100000'])
+
+    manager.raw_cluster_cmd('osd', 'unset', 'noup')
+    while len(manager.get_osd_status()['up']) < 3:
+        time.sleep(10)
+
+    log.info('wait for peering')
+    rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile])
+
+    # At this point the divergent_priors should have been detected
+
+    log.info("killing divergent %d", divergent)
+    manager.kill_osd(divergent)
+    log.info("reviving divergent %d", divergent)
+    manager.revive_osd(divergent)
+
+    time.sleep(20)
+
+    log.info('allowing recovery')
+    # Set osd_recovery_delay_start back to 0 and kick the queue
+    for i in osds:
+        manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug',
+                                    'kick_recovery_wq', ' 0')
+
+    log.info('reading divergent objects')
+    for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE):
+        exit_status = rados(ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i,
+                                       '/tmp/existing'])
+        assert exit_status == 0
+
+    log.info("success")
diff --git a/qa/tasks/divergent_priors2.py b/qa/tasks/divergent_priors2.py
new file mode 100644
index 000000000..4d4b07fc4
--- /dev/null
+++ b/qa/tasks/divergent_priors2.py
@@ -0,0 +1,192 @@
+"""
+Special case divergence test with ceph-objectstore-tool export/remove/import
+"""
+import logging
+import time
+
+from teuthology.exceptions import CommandFailedError
+from teuthology import misc as teuthology
+from tasks.util.rados import rados
+import os
+
+
+log = logging.getLogger(__name__)
+
+
+def task(ctx, config):
+    """
+    Test handling of divergent entries with prior_version
+    prior to log_tail and a ceph-objectstore-tool export/import
+
+    overrides:
+      ceph:
+        conf:
+          osd:
+            debug osd: 5
+
+    Requires 3 osds on a single test node.
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'divergent_priors task only accepts a dict for configuration'
+
+    manager = ctx.managers['ceph']
+
+    while len(manager.get_osd_status()['up']) < 3:
+        time.sleep(10)
+    manager.flush_pg_stats([0, 1, 2])
+    manager.raw_cluster_cmd('osd', 'set', 'noout')
+    manager.raw_cluster_cmd('osd', 'set', 'noin')
+    manager.raw_cluster_cmd('osd', 'set', 'nodown')
+    manager.wait_for_clean()
+
+    # something that is always there
+    dummyfile = '/etc/fstab'
+    dummyfile2 = '/etc/resolv.conf'
+    testdir = teuthology.get_testdir(ctx)
+
+    # create 1 pg pool
+    log.info('creating foo')
+    manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1')
+
+    osds = [0, 1, 2]
+    for i in osds:
+        manager.set_config(i, osd_min_pg_log_entries=10)
+        manager.set_config(i, osd_max_pg_log_entries=10)
+        manager.set_config(i, osd_pg_log_trim_min=5)
+
+    # determine primary
+    divergent = manager.get_pg_primary('foo', 0)
+    log.info("primary and soon to be divergent is %d", divergent)
+    non_divergent = list(osds)
+    non_divergent.remove(divergent)
+
+    log.info('writing initial objects')
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+    # write 100 objects
+    for i in range(100):
+        rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile])
+
+    manager.wait_for_clean()
+
+    # blackhole non_divergent
+    log.info("blackholing osds %s", str(non_divergent))
+    for i in non_divergent:
+        manager.set_config(i, objectstore_blackhole=1)
+
+    DIVERGENT_WRITE = 5
+    DIVERGENT_REMOVE = 5
+    # Write some soon to be divergent
+    log.info('writing divergent objects')
+    for i in range(DIVERGENT_WRITE):
+        rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i,
+                         dummyfile2], wait=False)
+    # Remove some soon to be divergent
+    log.info('remove divergent objects')
+    for i in range(DIVERGENT_REMOVE):
+        rados(ctx, mon, ['-p', 'foo', 'rm',
+                         'existing_%d' % (i + DIVERGENT_WRITE)], wait=False)
+    time.sleep(10)
+    mon.run(
+        args=['killall', '-9', 'rados'],
+        wait=True,
+        check_status=False)
+
+    # kill all the osds but leave divergent in
+    log.info('killing all the osds')
+    for i in osds:
+        manager.kill_osd(i)
+    for i in osds:
+        manager.mark_down_osd(i)
+    for i in non_divergent:
+        manager.mark_out_osd(i)
+
+    # bring up non-divergent
+    log.info("bringing up non_divergent %s", str(non_divergent))
+    for i in non_divergent:
+        manager.revive_osd(i)
+    for i in non_divergent:
+        manager.mark_in_osd(i)
+
+    # write 1 non-divergent object (ensure that old divergent one is divergent)
+    objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE)
+    log.info('writing non-divergent object ' + objname)
+    rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2])
+
+    manager.wait_for_recovery()
+
+    # ensure no recovery of up osds first
+    log.info('delay recovery')
+    for i in non_divergent:
+        manager.wait_run_admin_socket(
+            'osd', i, ['set_recovery_delay', '100000'])
+
+    # bring in our divergent friend
+    log.info("revive divergent %d", divergent)
+    manager.raw_cluster_cmd('osd', 'set', 'noup')
+    manager.revive_osd(divergent)
+
+    log.info('delay recovery divergent')
+    manager.wait_run_admin_socket(
+        'osd', divergent, ['set_recovery_delay', '100000'])
+
+    manager.raw_cluster_cmd('osd', 'unset', 'noup')
+    while len(manager.get_osd_status()['up']) < 3:
+        time.sleep(10)
+
+    log.info('wait for peering')
+    rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile])
+
+    # At this point the divergent_priors should have been detected
+
+    log.info("killing divergent %d", divergent)
+    manager.kill_osd(divergent)
+
+    # Export a pg
+    (exp_remote,) = ctx.\
+        cluster.only('osd.{o}'.format(o=divergent)).remotes.keys()
+    FSPATH = manager.get_filepath()
+    JPATH = os.path.join(FSPATH, "journal")
+    prefix = ("sudo adjust-ulimits ceph-objectstore-tool "
+              "--data-path {fpath} --journal-path {jpath} "
+              "--log-file="
+              "/var/log/ceph/objectstore_tool.$$.log ".
+              format(fpath=FSPATH, jpath=JPATH))
+    pid = os.getpid()
+    expfile = os.path.join(testdir, "exp.{pid}.out".format(pid=pid))
+    cmd = ((prefix + "--op export-remove --pgid 2.0 --file {file}").
+           format(id=divergent, file=expfile))
+    try:
+        exp_remote.sh(cmd, wait=True)
+    except CommandFailedError as e:
+        assert e.exitstatus == 0
+
+    cmd = ((prefix + "--op import --file {file}").
+           format(id=divergent, file=expfile))
+    try:
+        exp_remote.sh(cmd, wait=True)
+    except CommandFailedError as e:
+        assert e.exitstatus == 0
+
+    log.info("reviving divergent %d", divergent)
+    manager.revive_osd(divergent)
+    manager.wait_run_admin_socket('osd', divergent, ['dump_ops_in_flight'])
+    time.sleep(20);
+
+    log.info('allowing recovery')
+    # Set osd_recovery_delay_start back to 0 and kick the queue
+    for i in osds:
+        manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug',
+                                    'kick_recovery_wq', ' 0')
+
+    log.info('reading divergent objects')
+    for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE):
+        exit_status = rados(ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i,
+                                       '/tmp/existing'])
+        assert exit_status == 0
+
+    cmd = 'rm {file}'.format(file=expfile)
+    exp_remote.run(args=cmd, wait=True)
+    log.info("success")
diff --git a/qa/tasks/dnsmasq.py b/qa/tasks/dnsmasq.py
new file mode 100644
index 000000000..df8ccecb1
--- /dev/null
+++ b/qa/tasks/dnsmasq.py
@@ -0,0 +1,170 @@
+"""
+Task for dnsmasq configuration
+"""
+import contextlib
+import logging
+
+from teuthology import misc
+from teuthology.exceptions import ConfigError
+from teuthology import contextutil
+from teuthology import packaging
+from tasks.util import get_remote_for_role
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def install_dnsmasq(remote):
+    """
+    If dnsmasq is not installed, install it for the duration of the task.
+    """
+    try:
+        existing = packaging.get_package_version(remote, 'dnsmasq')
+    except:
+        existing = None
+
+    if existing is None:
+        packaging.install_package('dnsmasq', remote)
+    try:
+        yield
+    finally:
+        if existing is None:
+            packaging.remove_package('dnsmasq', remote)
+
+@contextlib.contextmanager
+def backup_resolv(remote, path):
+    """
+    Store a backup of resolv.conf in the testdir and restore it after the task.
+    """
+    remote.run(args=['cp', '/etc/resolv.conf', path])
+    try:
+        yield
+    finally:
+        # restore with 'cp' to avoid overwriting its security context
+        remote.run(args=['sudo', 'cp', path, '/etc/resolv.conf'])
+        remote.run(args=['rm', path])
+
+@contextlib.contextmanager
+def replace_resolv(remote, path):
+    """
+    Update resolv.conf to point the nameserver at localhost.
+    """
+    remote.write_file(path, "nameserver 127.0.0.1\n")
+    try:
+        # install it
+        if remote.os.package_type == "rpm":
+            # for centos ovh resolv.conf has immutable attribute set
+            remote.run(args=['sudo', 'chattr', '-i', '/etc/resolv.conf'], check_status=False)
+        remote.run(args=['sudo', 'cp', path, '/etc/resolv.conf'])
+        yield
+    finally:
+        remote.run(args=['rm', path])
+
+@contextlib.contextmanager
+def setup_dnsmasq(remote, testdir, cnames):
+    """ configure dnsmasq on the given remote, adding each cname given """
+    log.info('Configuring dnsmasq on remote %s..', remote.name)
+
+    # add address entries for each cname
+    dnsmasq = "server=8.8.8.8\nserver=8.8.4.4\n"
+    address_template = "address=/{cname}/{ip_address}\n"
+    for cname, ip_address in cnames.items():
+        dnsmasq += address_template.format(cname=cname, ip_address=ip_address)
+
+    # write to temporary dnsmasq file
+    dnsmasq_tmp = '/'.join((testdir, 'ceph.tmp'))
+    remote.write_file(dnsmasq_tmp, dnsmasq)
+
+    # move into /etc/dnsmasq.d/
+    dnsmasq_path = '/etc/dnsmasq.d/ceph'
+    remote.run(args=['sudo', 'mv', dnsmasq_tmp, dnsmasq_path])
+    # restore selinux context if necessary
+    remote.run(args=['sudo', 'restorecon', dnsmasq_path], check_status=False)
+
+    # restart dnsmasq
+    remote.run(args=['sudo', 'systemctl', 'restart', 'dnsmasq'])
+    # verify dns name is set
+    remote.run(args=['ping', '-c', '4', next(iter(cnames.keys()))])
+
+    try:
+        yield
+    finally:
+        log.info('Removing dnsmasq configuration from remote %s..', remote.name)
+        # remove /etc/dnsmasq.d/ceph
+        remote.run(args=['sudo', 'rm', dnsmasq_path])
+        # restart dnsmasq
+        remote.run(args=['sudo', 'systemctl', 'restart', 'dnsmasq'])
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Configures dnsmasq to add cnames for teuthology remotes. The task expects a
+    dictionary, where each key is a role. If all cnames for that role use the
+    same address as that role, the cnames can be given as a list. For example,
+    this entry configures dnsmasq on the remote associated with client.0, adding
+    two cnames for the ip address associated with client.0:
+
+        - dnsmasq:
+            client.0:
+            - client0.example.com
+            - c0.example.com
+
+    If the addresses do not all match the given role, a dictionary can be given
+    to specify the ip address by its target role. For example:
+
+        - dnsmasq:
+            client.0:
+              client.0.example.com: client.0
+              client.1.example.com: client.1
+
+    Cnames that end with a . are treated as prefix for the existing hostname.
+    For example, if the remote for client.0 has a hostname of 'example.com',
+    this task will add cnames for dev.example.com and test.example.com:
+
+        - dnsmasq:
+            client.0: [dev., test.]
+    """
+    # apply overrides
+    overrides = config.get('overrides', {})
+    misc.deep_merge(config, overrides.get('dnsmasq', {}))
+
+    # multiple roles may map to the same remote, so collect names by remote
+    remote_names = {}
+    for role, cnames in config.items():
+        remote = get_remote_for_role(ctx, role)
+        if remote is None:
+            raise ConfigError('no remote for role %s' % role)
+
+        names = remote_names.get(remote, {})
+
+        if isinstance(cnames, list):
+            # when given a list of cnames, point to local ip
+            for cname in cnames:
+                if cname.endswith('.'):
+                    cname += remote.hostname
+                names[cname] = remote.ip_address
+        elif isinstance(cnames, dict):
+            # when given a dict, look up the remote ip for each
+            for cname, client in cnames.items():
+                r = get_remote_for_role(ctx, client)
+                if r is None:
+                    raise ConfigError('no remote for role %s' % client)
+                if cname.endswith('.'):
+                    cname += r.hostname
+                names[cname] = r.ip_address
+
+        remote_names[remote] = names
+
+    testdir = misc.get_testdir(ctx)
+    resolv_bak = '/'.join((testdir, 'resolv.bak'))
+    resolv_tmp = '/'.join((testdir, 'resolv.tmp'))
+
+    # run subtasks for each unique remote
+    subtasks = []
+    for remote, cnames in remote_names.items():
+        subtasks.extend([ lambda r=remote: install_dnsmasq(r) ])
+        subtasks.extend([ lambda r=remote: backup_resolv(r, resolv_bak) ])
+        subtasks.extend([ lambda r=remote: replace_resolv(r, resolv_tmp) ])
+        subtasks.extend([ lambda r=remote, cn=cnames: setup_dnsmasq(r, testdir, cn) ])
+
+    with contextutil.nested(*subtasks):
+        yield
diff --git a/qa/tasks/dump_stuck.py b/qa/tasks/dump_stuck.py
new file mode 100644
index 000000000..4971f1916
--- /dev/null
+++ b/qa/tasks/dump_stuck.py
@@ -0,0 +1,161 @@
+"""
+Dump_stuck command
+"""
+import logging
+import time
+
+from tasks import ceph_manager
+from teuthology import misc as teuthology
+
+
+log = logging.getLogger(__name__)
+
+def check_stuck(manager, num_inactive, num_unclean, num_stale, timeout=10):
+    """
+    Do checks.  Make sure get_stuck_pgs return the right amount of information, then
+    extract health information from the raw_cluster_cmd and compare the results with
+    values passed in.  This passes if all asserts pass.
+ 
+    :param num_manager: Ceph manager
+    :param num_inactive: number of inaactive pages that are stuck
+    :param num_unclean: number of unclean pages that are stuck
+    :param num_stale: number of stale pages that are stuck
+    :param timeout: timeout value for get_stuck_pgs calls
+    """
+    inactive = manager.get_stuck_pgs('inactive', timeout)
+    unclean = manager.get_stuck_pgs('unclean', timeout)
+    stale = manager.get_stuck_pgs('stale', timeout)
+    log.info('inactive %s / %d,  unclean %s / %d,  stale %s / %d',
+             len(inactive), num_inactive,
+             len(unclean), num_unclean,
+             len(stale), num_stale)
+    assert len(inactive) == num_inactive
+    assert len(unclean) == num_unclean
+    assert len(stale) == num_stale
+
+def task(ctx, config):
+    """
+    Test the dump_stuck command.
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    assert config is None, \
+        'dump_stuck requires no configuration'
+    assert teuthology.num_instances_of_type(ctx.cluster, 'osd') == 2, \
+        'dump_stuck requires exactly 2 osds'
+
+    timeout = 60
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    manager.flush_pg_stats([0, 1])
+    manager.wait_for_clean(timeout)
+
+    manager.raw_cluster_cmd('tell', 'mon.a', 'injectargs', '--',
+#                            '--mon-osd-report-timeout 90',
+                            '--mon-pg-stuck-threshold 10')
+
+    # all active+clean
+    check_stuck(
+        manager,
+        num_inactive=0,
+        num_unclean=0,
+        num_stale=0,
+        )
+    num_pgs = manager.get_num_pgs()
+
+    manager.mark_out_osd(0)
+    time.sleep(timeout)
+    manager.flush_pg_stats([1])
+    manager.wait_for_recovery(timeout)
+
+    # all active+clean+remapped
+    check_stuck(
+        manager,
+        num_inactive=0,
+        num_unclean=0,
+        num_stale=0,
+        )
+
+    manager.mark_in_osd(0)
+    manager.flush_pg_stats([0, 1])
+    manager.wait_for_clean(timeout)
+
+    # all active+clean
+    check_stuck(
+        manager,
+        num_inactive=0,
+        num_unclean=0,
+        num_stale=0,
+        )
+
+    log.info('stopping first osd')
+    manager.kill_osd(0)
+    manager.mark_down_osd(0)
+    manager.wait_for_active(timeout)
+
+    log.info('waiting for all to be unclean')
+    starttime = time.time()
+    done = False
+    while not done:
+        try:
+            check_stuck(
+                manager,
+                num_inactive=0,
+                num_unclean=num_pgs,
+                num_stale=0,
+                )
+            done = True
+        except AssertionError:
+            # wait up to 15 minutes to become stale
+            if time.time() - starttime > 900:
+                raise
+
+
+    log.info('stopping second osd')
+    manager.kill_osd(1)
+    manager.mark_down_osd(1)
+
+    log.info('waiting for all to be stale')
+    starttime = time.time()
+    done = False
+    while not done:
+        try:
+            check_stuck(
+                manager,
+                num_inactive=0,
+                num_unclean=num_pgs,
+                num_stale=num_pgs,
+                )
+            done = True
+        except AssertionError:
+            # wait up to 15 minutes to become stale
+            if time.time() - starttime > 900:
+                raise
+
+    log.info('reviving')
+    for id_ in teuthology.all_roles_of_type(ctx.cluster, 'osd'):
+        manager.revive_osd(id_)
+        manager.mark_in_osd(id_)
+    while True:
+        try:
+            manager.flush_pg_stats([0, 1])
+            break
+        except Exception:
+            log.exception('osds must not be started yet, waiting...')
+            time.sleep(1)
+    manager.wait_for_clean(timeout)
+
+    check_stuck(
+        manager,
+        num_inactive=0,
+        num_unclean=0,
+        num_stale=0,
+        )
diff --git a/qa/tasks/ec_inconsistent_hinfo.py b/qa/tasks/ec_inconsistent_hinfo.py
new file mode 100644
index 000000000..fa10f2c45
--- /dev/null
+++ b/qa/tasks/ec_inconsistent_hinfo.py
@@ -0,0 +1,225 @@
+"""
+Inconsistent_hinfo
+"""
+import logging
+import time
+from dateutil.parser import parse
+from tasks import ceph_manager
+from tasks.util.rados import rados
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+def wait_for_deep_scrub_complete(manager, pgid, check_time_now, inconsistent):
+    log.debug("waiting for pg %s deep-scrub complete (check_time_now=%s)" %
+              (pgid, check_time_now))
+    for i in range(300):
+        time.sleep(5)
+        manager.flush_pg_stats([0, 1, 2, 3])
+        pgs = manager.get_pg_stats()
+        pg = next((pg for pg in pgs if pg['pgid'] == pgid), None)
+        log.debug('pg=%s' % pg);
+        assert pg
+
+        last_deep_scrub_time = parse(pg['last_deep_scrub_stamp']).strftime('%s')
+        if last_deep_scrub_time < check_time_now:
+            log.debug('not scrubbed')
+            continue
+
+        status = pg['state'].split('+')
+        if inconsistent:
+            assert 'inconsistent' in status
+        else:
+            assert 'inconsistent' not in status
+        return
+
+    assert False, 'not scrubbed'
+
+
+def wait_for_backfilling_complete(manager, pgid, from_osd, to_osd):
+    log.debug("waiting for pg %s backfill from osd.%s to osd.%s complete" %
+              (pgid, from_osd, to_osd))
+    for i in range(300):
+        time.sleep(5)
+        manager.flush_pg_stats([0, 1, 2, 3])
+        pgs = manager.get_pg_stats()
+        pg = next((pg for pg in pgs if pg['pgid'] == pgid), None)
+        log.info('pg=%s' % pg);
+        assert pg
+        status = pg['state'].split('+')
+        if 'active' not in status:
+            log.debug('not active')
+            continue
+        if 'backfilling' in status:
+            assert from_osd in pg['acting'] and to_osd in pg['up']
+            log.debug('backfilling')
+            continue
+        if to_osd not in pg['up']:
+            log.debug('backfill not started yet')
+            continue
+        log.debug('backfilled!')
+        break
+
+def task(ctx, config):
+    """
+    Test handling of objects with inconsistent hash info during backfill and deep-scrub.
+
+    A pretty rigid cluster is brought up and tested by this task
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'ec_inconsistent_hinfo task only accepts a dict for configuration'
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    profile = config.get('erasure_code_profile', {
+        'k': '2',
+        'm': '1',
+        'crush-failure-domain': 'osd'
+    })
+    profile_name = profile.get('name', 'backfill_unfound')
+    manager.create_erasure_code_profile(profile_name, profile)
+    pool = manager.create_pool_with_unique_name(
+        pg_num=1,
+        erasure_code_profile_name=profile_name,
+        min_size=2)
+    manager.raw_cluster_cmd('osd', 'pool', 'set', pool,
+                            'pg_autoscale_mode', 'off')
+
+    manager.flush_pg_stats([0, 1, 2, 3])
+    manager.wait_for_clean()
+
+    pool_id = manager.get_pool_num(pool)
+    pgid = '%d.0' % pool_id
+    pgs = manager.get_pg_stats()
+    acting = next((pg['acting'] for pg in pgs if pg['pgid'] == pgid), None)
+    log.info("acting=%s" % acting)
+    assert acting
+    primary = acting[0]
+
+    # something that is always there, readable and never empty
+    dummyfile = '/etc/group'
+
+    # kludge to make sure they get a map
+    rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile])
+
+    manager.flush_pg_stats([0, 1])
+    manager.wait_for_recovery()
+
+    log.debug("create test object")
+    obj = 'test'
+    rados(ctx, mon, ['-p', pool, 'put', obj, dummyfile])
+
+    victim = acting[1]
+
+    log.info("remove test object hash info from osd.%s shard and test deep-scrub and repair"
+             % victim)
+
+    manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key',
+                             object_name=obj, osd=victim)
+    check_time_now = time.strftime('%s')
+    manager.raw_cluster_cmd('pg', 'deep-scrub', pgid)
+    wait_for_deep_scrub_complete(manager, pgid, check_time_now, True)
+
+    check_time_now = time.strftime('%s')
+    manager.raw_cluster_cmd('pg', 'repair', pgid)
+    wait_for_deep_scrub_complete(manager, pgid, check_time_now, False)
+
+    log.info("remove test object hash info from primary osd.%s shard and test backfill"
+             % primary)
+
+    log.debug("write some data")
+    rados(ctx, mon, ['-p', pool, 'bench', '30', 'write', '-b', '4096',
+                     '--no-cleanup'])
+
+    manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key',
+                             object_name=obj, osd=primary)
+
+    # mark the osd out to trigger a rebalance/backfill
+    source = acting[1]
+    target = [x for x in [0, 1, 2, 3] if x not in acting][0]
+    manager.mark_out_osd(source)
+
+    # wait for everything to peer, backfill and recover
+    wait_for_backfilling_complete(manager, pgid, source, target)
+    manager.wait_for_clean()
+
+    manager.flush_pg_stats([0, 1, 2, 3])
+    pgs = manager.get_pg_stats()
+    pg = next((pg for pg in pgs if pg['pgid'] == pgid), None)
+    log.debug('pg=%s' % pg)
+    assert pg
+    assert 'clean' in pg['state'].split('+')
+    assert 'inconsistent' not in pg['state'].split('+')
+    unfound = manager.get_num_unfound_objects()
+    log.debug("there are %d unfound objects" % unfound)
+    assert unfound == 0
+
+    source, target = target, source
+    log.info("remove test object hash info from non-primary osd.%s shard and test backfill"
+             % source)
+
+    manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key',
+                             object_name=obj, osd=source)
+
+    # mark the osd in to trigger a rebalance/backfill
+    manager.mark_in_osd(target)
+
+    # wait for everything to peer, backfill and recover
+    wait_for_backfilling_complete(manager, pgid, source, target)
+    manager.wait_for_clean()
+
+    manager.flush_pg_stats([0, 1, 2, 3])
+    pgs = manager.get_pg_stats()
+    pg = next((pg for pg in pgs if pg['pgid'] == pgid), None)
+    log.debug('pg=%s' % pg)
+    assert pg
+    assert 'clean' in pg['state'].split('+')
+    assert 'inconsistent' not in pg['state'].split('+')
+    unfound = manager.get_num_unfound_objects()
+    log.debug("there are %d unfound objects" % unfound)
+    assert unfound == 0
+
+    log.info("remove hash info from two shards and test backfill")
+
+    source = acting[2]
+    target = [x for x in [0, 1, 2, 3] if x not in acting][0]
+    manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key',
+                             object_name=obj, osd=primary)
+    manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key',
+                             object_name=obj, osd=source)
+
+    # mark the osd out to trigger a rebalance/backfill
+    manager.mark_out_osd(source)
+
+    # wait for everything to peer, backfill and detect unfound object
+    wait_for_backfilling_complete(manager, pgid, source, target)
+
+    # verify that there is unfound object
+    manager.flush_pg_stats([0, 1, 2, 3])
+    pgs = manager.get_pg_stats()
+    pg = next((pg for pg in pgs if pg['pgid'] == pgid), None)
+    log.debug('pg=%s' % pg)
+    assert pg
+    assert 'backfill_unfound' in pg['state'].split('+')
+    unfound = manager.get_num_unfound_objects()
+    log.debug("there are %d unfound objects" % unfound)
+    assert unfound == 1
+    m = manager.list_pg_unfound(pgid)
+    log.debug('list_pg_unfound=%s' % m)
+    assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound']
+
+    # mark stuff lost
+    pgs = manager.get_pg_stats()
+    manager.raw_cluster_cmd('pg', pgid, 'mark_unfound_lost', 'delete')
+
+    # wait for everything to peer and be happy...
+    manager.flush_pg_stats([0, 1, 2, 3])
+    manager.wait_for_recovery()
diff --git a/qa/tasks/ec_lost_unfound.py b/qa/tasks/ec_lost_unfound.py
new file mode 100644
index 000000000..57a9364ec
--- /dev/null
+++ b/qa/tasks/ec_lost_unfound.py
@@ -0,0 +1,159 @@
+"""
+Lost_unfound
+"""
+import logging
+import time
+from tasks import ceph_manager
+from tasks.util.rados import rados
+from teuthology import misc as teuthology
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Test handling of lost objects on an ec pool.
+
+    A pretty rigid cluster is brought up and tested by this task
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'lost_unfound task only accepts a dict for configuration'
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    manager.wait_for_clean()
+
+    profile = config.get('erasure_code_profile', {
+        'k': '2',
+        'm': '2',
+        'crush-failure-domain': 'osd'
+    })
+    profile_name = profile.get('name', 'lost_unfound')
+    manager.create_erasure_code_profile(profile_name, profile)
+    pool = manager.create_pool_with_unique_name(
+        erasure_code_profile_name=profile_name,
+        min_size=2)
+
+    # something that is always there, readable and never empty
+    dummyfile = '/etc/group'
+
+    # kludge to make sure they get a map
+    rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile])
+
+    manager.flush_pg_stats([0, 1])
+    manager.wait_for_recovery()
+
+    # create old objects
+    for f in range(1, 10):
+        rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile])
+        rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile])
+        rados(ctx, mon, ['-p', pool, 'rm', 'existed_%d' % f])
+
+    # delay recovery, and make the pg log very long (to prevent backfill)
+    manager.raw_cluster_cmd(
+            'tell', 'osd.1',
+            'injectargs',
+            '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000'
+            )
+
+    manager.kill_osd(0)
+    manager.mark_down_osd(0)
+    manager.kill_osd(3)
+    manager.mark_down_osd(3)
+    
+    for f in range(1, 10):
+        rados(ctx, mon, ['-p', pool, 'put', 'new_%d' % f, dummyfile])
+        rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile])
+        rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile])
+
+    # take out osd.1 and a necessary shard of those objects.
+    manager.kill_osd(1)
+    manager.mark_down_osd(1)
+    manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it')
+    manager.revive_osd(0)
+    manager.wait_till_osd_is_up(0)
+    manager.revive_osd(3)
+    manager.wait_till_osd_is_up(3)
+
+    manager.flush_pg_stats([0, 2, 3])
+    manager.wait_till_active()
+    manager.flush_pg_stats([0, 2, 3])
+
+    # verify that there are unfound objects
+    unfound = manager.get_num_unfound_objects()
+    log.info("there are %d unfound objects" % unfound)
+    assert unfound
+
+    testdir = teuthology.get_testdir(ctx)
+    procs = []
+    if config.get('parallel_bench', True):
+        procs.append(mon.run(
+            args=[
+                "/bin/sh", "-c",
+                " ".join(['adjust-ulimits',
+                          'ceph-coverage',
+                          '{tdir}/archive/coverage',
+                          'rados',
+                          '--no-log-to-stderr',
+                          '--name', 'client.admin',
+                          '-b', str(4<<10),
+                          '-p' , pool,
+                          '-t', '20',
+                          'bench', '240', 'write',
+                      ]).format(tdir=testdir),
+            ],
+            logger=log.getChild('radosbench.{id}'.format(id='client.admin')),
+            stdin=run.PIPE,
+            wait=False
+        ))
+    time.sleep(10)
+
+    # mark stuff lost
+    pgs = manager.get_pg_stats()
+    for pg in pgs:
+        if pg['stat_sum']['num_objects_unfound'] > 0:
+            # verify that i can list them direct from the osd
+            log.info('listing missing/lost in %s state %s', pg['pgid'],
+                     pg['state']);
+            m = manager.list_pg_unfound(pg['pgid'])
+            log.info('%s' % m)
+            assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound']
+
+            log.info("reverting unfound in %s", pg['pgid'])
+            manager.raw_cluster_cmd('pg', pg['pgid'],
+                                    'mark_unfound_lost', 'delete')
+        else:
+            log.info("no unfound in %s", pg['pgid'])
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5')
+    manager.raw_cluster_cmd('tell', 'osd.3', 'debug', 'kick_recovery_wq', '5')
+    manager.flush_pg_stats([0, 2, 3])
+    manager.wait_for_recovery()
+
+    if not config.get('parallel_bench', True):
+        time.sleep(20)
+
+    # verify result
+    for f in range(1, 10):
+        err = rados(ctx, mon, ['-p', pool, 'get', 'new_%d' % f, '-'])
+        assert err
+        err = rados(ctx, mon, ['-p', pool, 'get', 'existed_%d' % f, '-'])
+        assert err
+        err = rados(ctx, mon, ['-p', pool, 'get', 'existing_%d' % f, '-'])
+        assert err
+
+    # see if osd.1 can cope
+    manager.revive_osd(1)
+    manager.wait_till_osd_is_up(1)
+    manager.wait_for_clean()
+    run.wait(procs)
+    manager.wait_for_clean()
diff --git a/qa/tasks/exec_on_cleanup.py b/qa/tasks/exec_on_cleanup.py
new file mode 100644
index 000000000..5a630781a
--- /dev/null
+++ b/qa/tasks/exec_on_cleanup.py
@@ -0,0 +1,61 @@
+"""
+Exececute custom commands during unwind/cleanup
+"""
+import logging
+import contextlib
+
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Execute commands on a given role
+
+        tasks:
+        - ceph:
+        - kclient: [client.a]
+        - exec:
+            client.a:
+              - "echo 'module libceph +p' > /sys/kernel/debug/dynamic_debug/control"
+              - "echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control"
+        - interactive:
+
+    It stops and fails with the first command that does not return on success. It means
+    that if the first command fails, the second won't run at all.
+
+    To avoid confusion it is recommended to explicitly enclose the commands in 
+    double quotes. For instance if the command is false (without double quotes) it will
+    be interpreted as a boolean by the YAML parser.
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    try:
+        yield
+    finally:
+        log.info('Executing custom commands...')
+        assert isinstance(config, dict), "task exec got invalid config"
+
+        testdir = teuthology.get_testdir(ctx)
+
+        if 'all' in config and len(config) == 1:
+            a = config['all']
+            roles = teuthology.all_roles(ctx.cluster)
+            config = dict((id_, a) for id_ in roles)
+
+        for role, ls in config.items():
+            (remote,) = ctx.cluster.only(role).remotes.keys()
+            log.info('Running commands on role %s host %s', role, remote.name)
+            for c in ls:
+                c.replace('$TESTDIR', testdir)
+                remote.run(
+                    args=[
+                        'sudo',
+                        'TESTDIR={tdir}'.format(tdir=testdir),
+                        'bash',
+                        '-c',
+                        c],
+                )
+
diff --git a/qa/tasks/fs.py b/qa/tasks/fs.py
new file mode 100644
index 000000000..7e62c8031
--- /dev/null
+++ b/qa/tasks/fs.py
@@ -0,0 +1,167 @@
+"""
+CephFS sub-tasks.
+"""
+
+import logging
+import re
+
+from tasks.cephfs.filesystem import Filesystem, MDSCluster
+
+log = logging.getLogger(__name__)
+
+# Everything up to CEPH_MDSMAP_ALLOW_STANDBY_REPLAY
+CEPH_MDSMAP_ALLOW_STANDBY_REPLAY = (1<<5)
+CEPH_MDSMAP_NOT_JOINABLE = (1 << 0)
+CEPH_MDSMAP_LAST = CEPH_MDSMAP_ALLOW_STANDBY_REPLAY
+UPGRADE_FLAGS_MASK = ((CEPH_MDSMAP_LAST<<1) - 1)
+def pre_upgrade_save(ctx, config):
+    """
+    That the upgrade procedure doesn't clobber state: save state.
+    """
+
+    mdsc = MDSCluster(ctx)
+    status = mdsc.status()
+
+    state = {}
+    ctx['mds-upgrade-state'] = state
+
+    for fs in list(status.get_filesystems()):
+        fscid = fs['id']
+        mdsmap = fs['mdsmap']
+        fs_state = {}
+        fs_state['epoch'] = mdsmap['epoch']
+        fs_state['max_mds'] = mdsmap['max_mds']
+        fs_state['flags'] = mdsmap['flags'] & UPGRADE_FLAGS_MASK
+        state[fscid] = fs_state
+        log.debug(f"fs fscid={fscid},name={mdsmap['fs_name']} state = {fs_state}")
+
+
+def post_upgrade_checks(ctx, config):
+    """
+    That the upgrade procedure doesn't clobber state.
+    """
+
+    state = ctx['mds-upgrade-state']
+
+    mdsc = MDSCluster(ctx)
+    status = mdsc.status()
+
+    for fs in list(status.get_filesystems()):
+        fscid = fs['id']
+        mdsmap = fs['mdsmap']
+        fs_state = state[fscid]
+        log.debug(f"checking fs fscid={fscid},name={mdsmap['fs_name']} state = {fs_state}")
+
+        # check state was restored to previous values
+        assert fs_state['max_mds'] == mdsmap['max_mds']
+        assert fs_state['flags'] == (mdsmap['flags'] & UPGRADE_FLAGS_MASK)
+
+        # now confirm that the upgrade procedure was followed
+        epoch = mdsmap['epoch']
+        pre_upgrade_epoch = fs_state['epoch']
+        assert pre_upgrade_epoch < epoch
+        multiple_max_mds = fs_state['max_mds'] > 1
+        did_decrease_max_mds = False
+        should_disable_allow_standby_replay = fs_state['flags'] & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY
+        did_disable_allow_standby_replay = False
+        did_fail_fs = False
+        for i in range(pre_upgrade_epoch+1, mdsmap['epoch']):
+            old_status = mdsc.status(epoch=i)
+            old_fs = old_status.get_fsmap(fscid)
+            old_mdsmap = old_fs['mdsmap']
+            if not multiple_max_mds \
+                    and (old_mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE):
+                raise RuntimeError('mgr is failing fs when there is only one '
+                                   f'rank in epoch {i}.')
+            if multiple_max_mds \
+                    and (old_mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE) \
+                    and old_mdsmap['max_mds'] == 1:
+                raise RuntimeError('mgr is failing fs as well the max_mds '
+                                   f'is reduced in epoch {i}')
+            if old_mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE:
+                log.debug(f"max_mds not reduced in epoch {i} as fs was failed "
+                          "for carrying out rapid multi-rank mds upgrade")
+                did_fail_fs = True
+            if multiple_max_mds and old_mdsmap['max_mds'] == 1:
+                log.debug(f"max_mds reduced in epoch {i}")
+                did_decrease_max_mds = True
+            if should_disable_allow_standby_replay and not (old_mdsmap['flags'] & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY):
+                log.debug(f"allow_standby_replay disabled in epoch {i}")
+                did_disable_allow_standby_replay = True
+        assert not multiple_max_mds or did_fail_fs or did_decrease_max_mds
+        assert not should_disable_allow_standby_replay or did_disable_allow_standby_replay
+
+
+def ready(ctx, config):
+    """
+    That the file system is ready for clients.
+    """
+
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'task only accepts a dict for configuration'
+
+    timeout = config.get('timeout', 300)
+
+    mdsc = MDSCluster(ctx)
+    status = mdsc.status()
+
+    for filesystem in status.get_filesystems():
+        fs = Filesystem(ctx, fscid=filesystem['id'])
+        fs.wait_for_daemons(timeout=timeout, status=status)
+
+def clients_evicted(ctx, config):
+    """
+    Check clients are evicted, unmount (cleanup) if so.
+    """
+
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'task only accepts a dict for configuration'
+
+    clients = config.get('clients')
+
+    if clients is None:
+        clients = {("client."+client_id): True for client_id in ctx.mounts}
+
+    log.info("clients is {}".format(str(clients)))
+
+    fs = Filesystem(ctx)
+    status = fs.status()
+
+    has_session = set()
+    mounts = {}
+    for client in clients:
+        client_id = re.match("^client.([0-9]+)$", client).groups(1)[0]
+        mounts[client] = ctx.mounts.get(client_id)
+
+    for rank in fs.get_ranks(status=status):
+        ls = fs.rank_asok(['session', 'ls'], rank=rank['rank'], status=status)
+        for session in ls:
+            for client, evicted in clients.items():
+                mount = mounts.get(client)
+                if mount is not None:
+                    global_id = mount.get_global_id()
+                    if session['id'] == global_id:
+                        if evicted:
+                            raise RuntimeError("client still has session: {}".format(str(session)))
+                        else:
+                            log.info("client {} has a session with MDS {}.{}".format(client, fs.id, rank['rank']))
+                            has_session.add(client)
+
+    no_session = set(clients) - has_session
+    should_assert = False
+    for client, evicted in clients.items():
+        mount = mounts.get(client)
+        if mount is not None:
+            if evicted:
+                log.info("confirming client {} is blocklisted".format(client))
+                assert fs.is_addr_blocklisted(mount.get_global_addr())
+            elif client in no_session:
+                log.info("client {} should not be evicted but has no session with an MDS".format(client))
+                fs.is_addr_blocklisted(mount.get_global_addr()) # for debugging
+                should_assert = True
+    if should_assert:
+        raise RuntimeError("some clients which should not be evicted have no session with an MDS?")
diff --git a/qa/tasks/fwd_scrub.py b/qa/tasks/fwd_scrub.py
new file mode 100644
index 000000000..c1e0059cd
--- /dev/null
+++ b/qa/tasks/fwd_scrub.py
@@ -0,0 +1,165 @@
+"""
+Thrash mds by simulating failures
+"""
+import logging
+import contextlib
+
+from gevent import sleep, GreenletExit
+from gevent.greenlet import Greenlet
+from gevent.event import Event
+from teuthology import misc as teuthology
+
+from tasks import ceph_manager
+from tasks.cephfs.filesystem import MDSCluster, Filesystem
+from tasks.thrasher import Thrasher
+
+log = logging.getLogger(__name__)
+
+class ForwardScrubber(Thrasher, Greenlet):
+    """
+    ForwardScrubber::
+
+    The ForwardScrubber does forward scrubbing of file-systems during execution
+    of other tasks (workunits, etc).
+    """
+
+    def __init__(self, fs, scrub_timeout=300, sleep_between_iterations=1):
+        super(ForwardScrubber, self).__init__()
+
+        self.logger = log.getChild('fs.[{f}]'.format(f=fs.name))
+        self.fs = fs
+        self.name = 'thrasher.fs.[{f}]'.format(f=fs.name)
+        self.stopping = Event()
+        self.scrub_timeout = scrub_timeout
+        self.sleep_between_iterations = sleep_between_iterations
+
+    def _run(self):
+        try:
+            self.do_scrub()
+        except Exception as e:
+            self.set_thrasher_exception(e)
+            self.logger.exception("exception:")
+            # allow successful completion so gevent doesn't see an exception...
+
+    def stop(self):
+        self.stopping.set()
+
+    def do_scrub(self):
+        """
+        Perform the file-system scrubbing
+        """
+        self.logger.info(f'start scrubbing fs: {self.fs.name}')
+
+        try:
+            while not self.stopping.is_set():
+                self._scrub()
+                sleep(self.sleep_between_iterations)
+        except GreenletExit:
+            pass
+
+        self.logger.info(f'end scrubbing fs: {self.fs.name}')
+
+    def _scrub(self, path="/", recursive=True):
+        self.logger.info(f"scrubbing fs: {self.fs.name}")
+        scrubopts = ["force"]
+        if recursive:
+            scrubopts.append("recursive")
+        out_json = self.fs.run_scrub(["start", path, ",".join(scrubopts)])
+        assert out_json is not None
+
+        tag = out_json['scrub_tag']
+
+        assert tag is not None
+        assert out_json['return_code'] == 0
+        assert out_json['mode'] == 'asynchronous'
+
+        done = self.fs.wait_until_scrub_complete(tag=tag, sleep=30, timeout=self.scrub_timeout)
+        if not done:
+            raise RuntimeError('scrub timeout')
+        self._check_damage()
+
+    def _check_damage(self):
+        rdmg = self.fs.get_damage()
+        types = set()
+        for rank, dmg in rdmg.items():
+            if dmg:
+                for d in dmg:
+                    types.add(d['damage_type'])
+                log.error(f"rank {rank} damaged:\n{dmg}")
+        if types:
+            raise RuntimeError(f"rank damage found: {types}")
+
+def stop_all_fwd_scrubbers(thrashers):
+    for thrasher in thrashers:
+        if not isinstance(thrasher, ForwardScrubber):
+            continue
+        thrasher.stop()
+        thrasher.join()
+        if thrasher.exception is not None:
+            raise RuntimeError(f"error during scrub thrashing: {thrasher.exception}")
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Stress test the mds by running scrub iterations while another task/workunit
+    is running.
+    Example config:
+
+    - fwd_scrub:
+      scrub_timeout: 300
+      sleep_between_iterations: 1
+    """
+
+    mds_cluster = MDSCluster(ctx)
+
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'fwd_scrub task only accepts a dict for configuration'
+    mdslist = list(teuthology.all_roles_of_type(ctx.cluster, 'mds'))
+    assert len(mdslist) > 0, \
+        'fwd_scrub task requires at least 1 metadata server'
+
+    (first,) = ctx.cluster.only(f'mds.{mdslist[0]}').remotes.keys()
+    manager = ceph_manager.CephManager(
+        first, ctx=ctx, logger=log.getChild('ceph_manager'),
+    )
+
+    # make sure everyone is in active, standby, or standby-replay
+    log.info('Wait for all MDSs to reach steady state...')
+    status = mds_cluster.status()
+    while True:
+        steady = True
+        for info in status.get_all():
+            state = info['state']
+            if state not in ('up:active', 'up:standby', 'up:standby-replay'):
+                steady = False
+                break
+        if steady:
+            break
+        sleep(2)
+        status = mds_cluster.status()
+
+    log.info('Ready to start scrub thrashing')
+
+    manager.wait_for_clean()
+    assert manager.is_clean()
+
+    if 'cluster' not in config:
+        config['cluster'] = 'ceph'
+
+    for fs in status.get_filesystems():
+        fwd_scrubber = ForwardScrubber(Filesystem(ctx, fscid=fs['id']),
+                                       config['scrub_timeout'],
+                                       config['sleep_between_iterations'])
+        fwd_scrubber.start()
+        ctx.ceph[config['cluster']].thrashers.append(fwd_scrubber)
+
+    try:
+        log.debug('Yielding')
+        yield
+    finally:
+        log.info('joining ForwardScrubbers')
+        stop_all_fwd_scrubbers(ctx.ceph[config['cluster']].thrashers)
+        log.info('done joining')
diff --git a/qa/tasks/immutable_object_cache.py b/qa/tasks/immutable_object_cache.py
new file mode 100644
index 000000000..b8034de47
--- /dev/null
+++ b/qa/tasks/immutable_object_cache.py
@@ -0,0 +1,72 @@
+"""
+immutable object cache task
+"""
+import contextlib
+import logging
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def immutable_object_cache(ctx, config):
+    """
+    setup and cleanup immutable object cache
+    """
+    log.info("start immutable object cache daemon")
+    for client, client_config in config.items():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        # make sure that there is one immutable object cache daemon on the same node.
+        remote.run(
+            args=[
+                'sudo', 'killall', '-s', '9', 'ceph-immutable-object-cache', run.Raw('||'), 'true',
+                ]
+            )
+        remote.run(
+            args=[
+                'ceph-immutable-object-cache', '-b',
+                ]
+            )
+    try:
+        yield
+    finally:
+        log.info("check and cleanup immutable object cache")
+        for client, client_config in config.items():
+            client_config = client_config if client_config is not None else dict()
+            (remote,) = ctx.cluster.only(client).remotes.keys()
+            cache_path = client_config.get('immutable object cache path', '/tmp/ceph-immutable-object-cache')
+            ls_command = '"$(ls {} )"'.format(cache_path)
+            remote.run(
+                args=[
+                    'test', '-n', run.Raw(ls_command),
+                    ]
+                )
+            remote.run(
+                args=[
+                    'sudo', 'killall', '-s', '9', 'ceph-immutable-object-cache', run.Raw('||'), 'true',
+                    ]
+                )
+            remote.run(
+                args=[
+                    'sudo', 'rm', '-rf', cache_path, run.Raw('||'), 'true',
+                    ]
+                )
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    This is task for start immutable_object_cache.
+    """
+    assert isinstance(config, dict), \
+           "task immutable_object_cache only supports a dictionary for configuration"
+
+    managers = []
+    config = teuthology.replace_all_with_clients(ctx.cluster, config)
+    managers.append(
+        lambda: immutable_object_cache(ctx=ctx, config=config)
+        )
+
+    with contextutil.nested(*managers):
+        yield
diff --git a/qa/tasks/immutable_object_cache_thrash.py b/qa/tasks/immutable_object_cache_thrash.py
new file mode 100644
index 000000000..0bf3ad3a0
--- /dev/null
+++ b/qa/tasks/immutable_object_cache_thrash.py
@@ -0,0 +1,79 @@
+"""
+immutable object cache thrash task
+"""
+import contextlib
+import logging
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.orchestra import run
+
+DEFAULT_KILL_DAEMON_TIME = 2
+DEFAULT_DEAD_TIME = 30
+DEFAULT_LIVE_TIME = 120
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def thrashes_immutable_object_cache_daemon(ctx, config):
+    """
+    thrashes immutable object cache daemon.
+    It can test reconnection feature of RO cache when RO daemon crash
+    TODO : replace sleep with better method.
+    """
+    log.info("thrashes immutable object cache daemon")
+
+    # just thrash one rbd client.
+    client, client_config = list(config.items())[0]
+    (remote,) = ctx.cluster.only(client).remotes.keys()
+    client_config = client_config if client_config is not None else dict()
+    kill_daemon_time = client_config.get('kill_daemon_time', DEFAULT_KILL_DAEMON_TIME)
+    dead_time = client_config.get('dead_time', DEFAULT_DEAD_TIME)
+    live_time = client_config.get('live_time', DEFAULT_LIVE_TIME)
+
+    for i in range(kill_daemon_time):
+        log.info("ceph-immutable-object-cache crash....")
+        remote.run(
+            args=[
+                'sudo', 'killall', '-s', '9', 'ceph-immutable-object-cache', run.Raw('||'), 'true',
+                 ]
+            )
+        # librbd shoud normally run when ceph-immutable-object-cache
+        remote.run(
+            args=[
+                'sleep', '{dead_time}'.format(dead_time=dead_time),
+                 ]
+            )
+        # librbd should reconnect daemon
+        log.info("startup ceph-immutable-object-cache")
+        remote.run(
+            args=[
+                'ceph-immutable-object-cache', '-b',
+                 ]
+            )
+        remote.run(
+            args=[
+                'sleep', '{live_time}'.format(live_time=live_time),
+                 ]
+            )
+    try:
+        yield
+    finally:
+        log.info("cleanup")
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    This is task for testing immutable_object_cache thrash.
+    """
+    assert isinstance(config, dict), \
+            "task immutable_object_cache_thrash only supports a dictionary for configuration"
+
+    managers = []
+    config = teuthology.replace_all_with_clients(ctx.cluster, config)
+    managers.append(
+        lambda: thrashes_immutable_object_cache_daemon(ctx=ctx, config=config)
+        )
+
+    with contextutil.nested(*managers):
+        yield
diff --git a/qa/tasks/kafka.py b/qa/tasks/kafka.py
new file mode 100644
index 000000000..48bf3611f
--- /dev/null
+++ b/qa/tasks/kafka.py
@@ -0,0 +1,204 @@
+"""
+Deploy and configure Kafka for Teuthology
+"""
+import contextlib
+import logging
+import time
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+def get_kafka_version(config):
+    for client, client_config in config.items():
+        if 'kafka_version' in client_config:
+            kafka_version = client_config.get('kafka_version')
+    return kafka_version
+
+def get_kafka_dir(ctx, config):
+    kafka_version = get_kafka_version(config)
+    current_version = 'kafka-' + kafka_version + '-src'
+    return '{tdir}/{ver}'.format(tdir=teuthology.get_testdir(ctx),ver=current_version)
+
+
+@contextlib.contextmanager
+def install_kafka(ctx, config):
+    """
+    Downloading the kafka tar file.
+    """
+    assert isinstance(config, dict)
+    log.info('Installing Kafka...')
+
+    for (client, _) in config.items():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        test_dir=teuthology.get_testdir(ctx)
+        current_version = get_kafka_version(config)
+
+        link1 = 'https://archive.apache.org/dist/kafka/' + current_version + '/kafka-' + current_version + '-src.tgz'
+        ctx.cluster.only(client).run(
+            args=['cd', '{tdir}'.format(tdir=test_dir), run.Raw('&&'), 'wget', link1],
+        )
+
+        file1 = 'kafka-' + current_version + '-src.tgz'
+        ctx.cluster.only(client).run(
+            args=['cd', '{tdir}'.format(tdir=test_dir), run.Raw('&&'), 'tar', '-xvzf', file1],
+        )
+
+    try:
+        yield
+    finally:
+        log.info('Removing packaged dependencies of Kafka...')
+        test_dir=get_kafka_dir(ctx, config)
+        current_version = get_kafka_version(config)
+        for (client,_) in config.items():
+            ctx.cluster.only(client).run(
+                args=['rm', '-rf', '{tdir}/logs'.format(tdir=test_dir)],
+            )
+
+            ctx.cluster.only(client).run(
+                args=['rm', '-rf', test_dir],
+            )
+
+            rmfile1 = 'kafka-' + current_version + '-src.tgz'
+            ctx.cluster.only(client).run(
+                args=['rm', '-rf', '{tdir}/{doc}'.format(tdir=teuthology.get_testdir(ctx),doc=rmfile1)],
+            )
+
+
+@contextlib.contextmanager
+def run_kafka(ctx,config):
+    """
+    This includes two parts:
+    1. Starting Zookeeper service
+    2. Starting Kafka service
+    """
+    assert isinstance(config, dict)
+    log.info('Bringing up Zookeeper and Kafka services...')
+    for (client,_) in config.items():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+
+        ctx.cluster.only(client).run(
+            args=['cd', '{tdir}'.format(tdir=get_kafka_dir(ctx, config)), run.Raw('&&'),
+             './gradlew', 'jar', 
+             '-PscalaVersion=2.13.2'
+            ],
+        )
+
+        ctx.cluster.only(client).run(
+            args=['cd', '{tdir}/bin'.format(tdir=get_kafka_dir(ctx, config)), run.Raw('&&'),
+             './zookeeper-server-start.sh',
+             '{tir}/config/zookeeper.properties'.format(tir=get_kafka_dir(ctx, config)),
+             run.Raw('&'), 'exit'
+            ],
+        )
+
+        ctx.cluster.only(client).run(
+            args=['cd', '{tdir}/bin'.format(tdir=get_kafka_dir(ctx, config)), run.Raw('&&'),
+             './kafka-server-start.sh',
+             '{tir}/config/server.properties'.format(tir=get_kafka_dir(ctx, config)),
+             run.Raw('&'), 'exit'
+            ],
+        )
+
+    try:
+        yield
+    finally:
+        log.info('Stopping Zookeeper and Kafka Services...')
+
+        for (client, _) in config.items():
+            (remote,) = ctx.cluster.only(client).remotes.keys()
+
+            ctx.cluster.only(client).run(
+                args=['cd', '{tdir}/bin'.format(tdir=get_kafka_dir(ctx, config)), run.Raw('&&'),
+                 './kafka-server-stop.sh',  
+                 '{tir}/config/kafka.properties'.format(tir=get_kafka_dir(ctx, config)),
+                ],
+            )
+
+            time.sleep(5)
+
+            ctx.cluster.only(client).run(
+                args=['cd', '{tdir}/bin'.format(tdir=get_kafka_dir(ctx, config)), run.Raw('&&'), 
+                 './zookeeper-server-stop.sh',
+                 '{tir}/config/zookeeper.properties'.format(tir=get_kafka_dir(ctx, config)),
+                ],
+            )
+
+            time.sleep(5)
+
+            ctx.cluster.only(client).run(args=['killall', '-9', 'java'])
+
+
+@contextlib.contextmanager
+def run_admin_cmds(ctx,config):
+    """
+    Running Kafka Admin commands in order to check the working of producer anf consumer and creation of topic.
+    """
+    assert isinstance(config, dict)
+    log.info('Checking kafka server through producer/consumer commands...')
+    for (client,_) in config.items():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+
+        ctx.cluster.only(client).run(
+            args=[
+                'cd', '{tdir}/bin'.format(tdir=get_kafka_dir(ctx, config)), run.Raw('&&'), 
+                './kafka-topics.sh', '--create', '--topic', 'quickstart-events',
+                '--bootstrap-server', 'localhost:9092'
+            ],
+        )
+
+        ctx.cluster.only(client).run(
+            args=[
+                'cd', '{tdir}/bin'.format(tdir=get_kafka_dir(ctx, config)), run.Raw('&&'),
+                'echo', "First", run.Raw('|'),
+                './kafka-console-producer.sh', '--topic', 'quickstart-events',
+                '--bootstrap-server', 'localhost:9092'
+            ],
+        )
+
+        ctx.cluster.only(client).run(
+            args=[
+                'cd', '{tdir}/bin'.format(tdir=get_kafka_dir(ctx, config)), run.Raw('&&'),
+                './kafka-console-consumer.sh', '--topic', 'quickstart-events',
+                '--from-beginning',
+                '--bootstrap-server', 'localhost:9092',
+                run.Raw('&'), 'exit'
+            ],
+        )
+
+    try:
+        yield
+    finally:
+        pass
+
+
+@contextlib.contextmanager
+def task(ctx,config):
+    """
+    Following is the way how to run kafka::
+    tasks:
+    - kafka:
+        client.0:
+          kafka_version: 2.6.0
+    """
+    assert config is None or isinstance(config, list) \
+        or isinstance(config, dict), \
+        "task kafka only supports a list or dictionary for configuration"
+
+    all_clients = ['client.{id}'.format(id=id_)
+                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+    if config is None:
+        config = all_clients
+    if isinstance(config, list):
+        config = dict.fromkeys(config)
+
+    log.debug('Kafka config is %s', config)
+
+    with contextutil.nested(
+        lambda: install_kafka(ctx=ctx, config=config),
+        lambda: run_kafka(ctx=ctx, config=config),
+        lambda: run_admin_cmds(ctx=ctx, config=config),
+        ):
+        yield
diff --git a/qa/tasks/kclient.py b/qa/tasks/kclient.py
new file mode 100644
index 000000000..ca202df71
--- /dev/null
+++ b/qa/tasks/kclient.py
@@ -0,0 +1,144 @@
+"""
+Mount/unmount a ``kernel`` client.
+"""
+import contextlib
+import logging
+
+from teuthology.misc import deep_merge
+from teuthology.exceptions import CommandFailedError
+from teuthology import misc
+from teuthology.contextutil import MaxWhileTries
+from tasks.cephfs.kernel_mount import KernelMount
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Mount/unmount a ``kernel`` client.
+
+    The config is optional and defaults to mounting on all clients. If
+    a config is given, it is expected to be a list of clients to do
+    this operation on. This lets you e.g. set up one client with
+    ``ceph-fuse`` and another with ``kclient``.
+
+    ``brxnet`` should be a Private IPv4 Address range, default range is
+    [192.168.0.0/16]
+
+    Example that mounts all clients::
+
+        tasks:
+        - ceph:
+        - kclient:
+        - interactive:
+        - brxnet: [192.168.0.0/16]
+
+    Example that uses both ``kclient` and ``ceph-fuse``::
+
+        tasks:
+        - ceph:
+        - ceph-fuse: [client.0]
+        - kclient: [client.1]
+        - interactive:
+
+
+    Pass a dictionary instead of lists to specify per-client config:
+
+        tasks:
+        -kclient:
+            client.0:
+                debug: true
+                mntopts: ["nowsync"]
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    log.info('Mounting kernel clients...')
+
+    if config is None:
+        ids = misc.all_roles_of_type(ctx.cluster, 'client')
+        client_roles = [f'client.{id_}' for id_ in ids]
+        config = dict([r, dict()] for r in client_roles)
+    elif isinstance(config, list):
+        client_roles = config
+        config = dict([r, dict()] for r in client_roles)
+    elif isinstance(config, dict):
+        client_roles = filter(lambda x: 'client.' in x, config.keys())
+    else:
+        raise ValueError(f"Invalid config object: {config} ({config.__class__})")
+    log.info(f"config is {config}")
+
+    clients = list(misc.get_clients(ctx=ctx, roles=client_roles))
+
+    test_dir = misc.get_testdir(ctx)
+
+    for id_, remote in clients:
+        KernelMount.cleanup_stale_netnses_and_bridge(remote)
+
+    mounts = {}
+    overrides = ctx.config.get('overrides', {}).get('kclient', {})
+    top_overrides = dict(filter(lambda x: 'client.' not in x[0], overrides.items()))
+    for id_, remote in clients:
+        entity = f"client.{id_}"
+        client_config = config.get(entity)
+        if client_config is None:
+            client_config = {}
+        # top level overrides
+        deep_merge(client_config, top_overrides)
+        # mount specific overrides
+        client_config_overrides = overrides.get(entity)
+        deep_merge(client_config, client_config_overrides)
+        log.info(f"{entity} config is {client_config}")
+
+        cephfs_name = client_config.get("cephfs_name")
+        if config.get("disabled", False) or not client_config.get('mounted', True):
+            continue
+
+        kernel_mount = KernelMount(
+            ctx=ctx,
+            test_dir=test_dir,
+            client_id=id_,
+            client_remote=remote,
+            brxnet=ctx.teuthology_config.get('brxnet', None),
+            client_config=client_config,
+            cephfs_name=cephfs_name)
+
+        mounts[id_] = kernel_mount
+
+        if client_config.get('debug', False):
+            remote.run(args=["sudo", "bash", "-c", "echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control"])
+            remote.run(args=["sudo", "bash", "-c", "echo 'module libceph +p' > /sys/kernel/debug/dynamic_debug/control"])
+
+        kernel_mount.mount(mntopts=client_config.get('mntopts', []))
+
+    def umount_all():
+        log.info('Unmounting kernel clients...')
+
+        forced = False
+        for mount in mounts.values():
+            if mount.is_mounted():
+                try:
+                    mount.umount()
+                except (CommandFailedError, MaxWhileTries):
+                    log.warning("Ordinary umount failed, forcing...")
+                    forced = True
+                    mount.umount_wait(force=True)
+
+        for id_, remote in clients:
+            KernelMount.cleanup_stale_netnses_and_bridge(remote)
+
+        return forced
+
+    ctx.mounts = mounts
+    try:
+        yield mounts
+    except:
+        umount_all()  # ignore forced retval, we are already in error handling
+    finally:
+
+        forced = umount_all()
+        if forced:
+            # The context managers within the kclient manager worked (i.e.
+            # the test workload passed) but for some reason we couldn't
+            # umount, so turn this into a test failure.
+            raise RuntimeError("Kernel mounts did not umount cleanly")
diff --git a/qa/tasks/keycloak.py b/qa/tasks/keycloak.py
new file mode 100644
index 000000000..1d89a27a5
--- /dev/null
+++ b/qa/tasks/keycloak.py
@@ -0,0 +1,468 @@
+"""
+Deploy and configure Keycloak for Teuthology
+"""
+import contextlib
+import logging
+import os
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.orchestra import run
+from teuthology.exceptions import ConfigError
+
+log = logging.getLogger(__name__)
+
+def get_keycloak_version(config):
+    for client, client_config in config.items():
+        if 'keycloak_version' in client_config:
+            keycloak_version = client_config.get('keycloak_version')
+    return keycloak_version
+
+def get_keycloak_dir(ctx, config):
+    keycloak_version = get_keycloak_version(config)
+    current_version = 'keycloak-'+keycloak_version
+    return '{tdir}/{ver}'.format(tdir=teuthology.get_testdir(ctx),ver=current_version)
+
+def run_in_keycloak_dir(ctx, client, config, args, **kwargs):
+    return ctx.cluster.only(client).run(
+        args=[ 'cd', get_keycloak_dir(ctx,config), run.Raw('&&'), ] + args,
+        **kwargs
+    )
+
+def get_toxvenv_dir(ctx):
+    return ctx.tox.venv_path
+
+def toxvenv_sh(ctx, remote, args, **kwargs):
+    activate = get_toxvenv_dir(ctx) + '/bin/activate'
+    return remote.sh(['source', activate, run.Raw('&&')] + args, **kwargs)
+
+@contextlib.contextmanager
+def install_packages(ctx, config):
+    """
+    Downloading the two required tar files
+    1. Keycloak
+    2. Wildfly (Application Server)
+    """
+    assert isinstance(config, dict)
+    log.info('Installing packages for Keycloak...')
+
+    for (client, _) in config.items():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        test_dir=teuthology.get_testdir(ctx)
+        current_version = get_keycloak_version(config)
+        link1 = 'https://downloads.jboss.org/keycloak/'+current_version+'/keycloak-'+current_version+'.tar.gz'
+        toxvenv_sh(ctx, remote, ['wget', link1])
+        
+        file1 = 'keycloak-'+current_version+'.tar.gz'
+        toxvenv_sh(ctx, remote, ['tar', '-C', test_dir, '-xvzf', file1])
+
+        link2 ='https://downloads.jboss.org/keycloak/'+current_version+'/adapters/keycloak-oidc/keycloak-wildfly-adapter-dist-'+current_version+'.tar.gz' 
+        toxvenv_sh(ctx, remote, ['cd', '{tdir}'.format(tdir=get_keycloak_dir(ctx,config)), run.Raw('&&'), 'wget', link2])
+        
+        file2 = 'keycloak-wildfly-adapter-dist-'+current_version+'.tar.gz'
+        toxvenv_sh(ctx, remote, ['tar', '-C', '{tdir}'.format(tdir=get_keycloak_dir(ctx,config)), '-xvzf', '{tdr}/{file}'.format(tdr=get_keycloak_dir(ctx,config),file=file2)])
+
+    try:
+        yield
+    finally:
+        log.info('Removing packaged dependencies of Keycloak...')
+        for client in config:
+            current_version = get_keycloak_version(config)
+            ctx.cluster.only(client).run(
+                args=['cd', '{tdir}'.format(tdir=get_keycloak_dir(ctx,config)), run.Raw('&&'), 'rm', '-rf', 'keycloak-wildfly-adapter-dist-' + current_version + '.tar.gz'],
+            )
+
+            ctx.cluster.only(client).run(
+                args=['rm', '-rf', '{tdir}'.format(tdir=get_keycloak_dir(ctx,config))],
+            )
+
+@contextlib.contextmanager
+def download_conf(ctx, config):
+    """
+    Downloads confi.py used in run_admin_cmds
+    """
+    assert isinstance(config, dict)
+    log.info('Downloading conf...')
+    testdir = teuthology.get_testdir(ctx)
+    conf_branch = 'main'
+    conf_repo = 'https://github.com/TRYTOBE8TME/scripts.git'
+    for (client, _) in config.items():
+        ctx.cluster.only(client).run(
+            args=[
+                'git', 'clone',
+                '-b', conf_branch,
+                conf_repo,
+                '{tdir}/scripts'.format(tdir=testdir),
+                ],
+            )
+    try:
+        yield
+    finally:
+        log.info('Removing conf...')
+        testdir = teuthology.get_testdir(ctx)
+        for client in config:
+            ctx.cluster.only(client).run(
+                args=[
+                    'rm',
+                    '-rf',
+                    '{tdir}/scripts'.format(tdir=testdir),
+                    ],
+                )
+
+@contextlib.contextmanager
+def build(ctx,config):
+    """
+    Build process which needs to be done before starting a server.
+    """
+    assert isinstance(config, dict)
+    log.info('Building Keycloak...')
+    for (client,_) in config.items():
+        run_in_keycloak_dir(ctx, client, config,['cd', 'bin', run.Raw('&&'), './jboss-cli.sh', '--file=adapter-elytron-install-offline.cli'])
+    try:
+        yield
+    finally:
+        pass
+
+@contextlib.contextmanager
+def run_keycloak(ctx,config):
+    """
+    This includes two parts:
+    1. Adding a user to keycloak which is actually used to log in when we start the server and check in browser.
+    2. Starting the server.
+    """
+    assert isinstance(config, dict)
+    log.info('Bringing up Keycloak...')
+    for (client,_) in config.items():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        
+        ctx.cluster.only(client).run(
+            args=[
+                '{tdir}/bin/add-user-keycloak.sh'.format(tdir=get_keycloak_dir(ctx,config)),
+                '-r', 'master',
+                '-u', 'admin',
+                '-p', 'admin',
+            ],
+        )
+
+        toxvenv_sh(ctx, remote, ['cd', '{tdir}/bin'.format(tdir=get_keycloak_dir(ctx,config)), run.Raw('&&'), './standalone.sh', run.Raw('&'), 'exit'])
+    try:
+        yield
+    finally:
+        log.info('Stopping Keycloak Server...')
+
+        for (client, _) in config.items():
+            (remote,) = ctx.cluster.only(client).remotes.keys()
+            toxvenv_sh(ctx, remote, ['cd', '{tdir}/bin'.format(tdir=get_keycloak_dir(ctx,config)), run.Raw('&&'), './jboss-cli.sh', '--connect', 'command=:shutdown'])
+
+@contextlib.contextmanager
+def run_admin_cmds(ctx,config):
+    """
+    Running Keycloak Admin commands(kcadm commands) in order to get the token, aud value, thumbprint and realm name.
+    """
+    assert isinstance(config, dict)
+    log.info('Running admin commands...')
+    for (client,_) in config.items():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+
+        remote.run(
+           args=[
+                '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)),
+                'config', 'credentials',
+                '--server', 'http://localhost:8080/auth',
+                '--realm', 'master',
+                '--user', 'admin',
+                '--password', 'admin',
+                '--client', 'admin-cli',
+                ],
+            )
+
+        realm_name='demorealm'
+        realm='realm={}'.format(realm_name)
+
+        remote.run(
+           args=[
+                '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)),
+                'create', 'realms',
+                '-s', realm,
+                '-s', 'enabled=true',
+                '-s', 'accessTokenLifespan=1800',
+                '-o',
+            ],
+        )
+
+        client_name='my_client'
+        client='clientId={}'.format(client_name)
+
+        remote.run(
+           args=[
+                '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)),
+                'create', 'clients',
+                '-r', realm_name,
+                '-s', client,
+                '-s', 'directAccessGrantsEnabled=true',
+                '-s', 'redirectUris=["http://localhost:8080/myapp/*"]',
+            ],
+        )
+
+        ans1= toxvenv_sh(ctx, remote, 
+                 [
+                  'cd', '{tdir}/bin'.format(tdir=get_keycloak_dir(ctx,config)), run.Raw('&&'), 
+                  './kcadm.sh', 'get', 'clients', 
+                  '-r', realm_name, 
+                  '-F', 'id,clientId', run.Raw('|'), 
+                  'jq', '-r', '.[] | select (.clientId == "my_client") | .id'
+                 ])
+
+        pre0=ans1.rstrip()
+        pre1="clients/{}".format(pre0)
+
+        remote.run(
+            args=[
+                '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)),
+                'update', pre1,
+                '-r', realm_name,
+                '-s', 'enabled=true',
+                '-s', 'serviceAccountsEnabled=true',
+                '-s', 'redirectUris=["http://localhost:8080/myapp/*"]',
+            ],
+        )
+
+        ans2= pre1+'/client-secret'
+
+        out2= toxvenv_sh(ctx, remote, 
+                 [
+                  'cd', '{tdir}/bin'.format(tdir=get_keycloak_dir(ctx,config)), run.Raw('&&'), 
+                  './kcadm.sh', 'get', ans2, 
+                  '-r', realm_name, 
+                  '-F', 'value'
+                 ])
+
+        ans0= '{client}:{secret}'.format(client=client_name,secret=out2[15:51])
+        ans3= 'client_secret={}'.format(out2[15:51])
+        clientid='client_id={}'.format(client_name)
+
+        proto_map = pre1+"/protocol-mappers/models"
+        uname = "username=testuser"
+        upass = "password=testuser"
+
+        remote.run(
+            args=[
+                '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)),
+                'create', 'users',
+                '-s', uname,
+                '-s', 'enabled=true',
+                '-s', 'attributes.\"https://aws.amazon.com/tags\"=\"{"principal_tags":{"Department":["Engineering", "Marketing"]}}\"',
+                '-r', realm_name, 
+            ],
+        )
+
+        sample = 'testuser'
+
+        remote.run(
+            args=[
+                '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)),
+                'set-password',
+                '-r', realm_name,
+                '--username', sample,
+                '--new-password', sample,
+            ],
+        )
+
+        file_path = '{tdir}/scripts/confi.py'.format(tdir=teuthology.get_testdir(ctx))
+
+        remote.run(
+            args=[
+                '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)),
+                'create', proto_map,
+                '-r', realm_name,
+                '-f', file_path,
+            ],
+        )
+
+        remote.run(
+            args=[
+                '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)),
+                'config', 'credentials',
+                '--server', 'http://localhost:8080/auth',
+                '--realm', realm_name,
+                '--user', sample,
+                '--password', sample,
+                '--client', 'admin-cli',
+            ],
+        )
+
+        out9= toxvenv_sh(ctx, remote,
+                 [
+                  'curl', '-k', '-v',
+                  '-X', 'POST',
+                  '-H', 'Content-Type:application/x-www-form-urlencoded',
+                  '-d', 'scope=openid',
+                  '-d', 'grant_type=password',
+                  '-d', clientid,
+                  '-d', ans3,
+                  '-d', uname,
+                  '-d', upass,
+                  'http://localhost:8080/auth/realms/'+realm_name+'/protocol/openid-connect/token', run.Raw('|'),
+                  'jq', '-r', '.access_token'
+                 ])
+
+        user_token_pre = out9.rstrip()
+        user_token = '{}'.format(user_token_pre)
+
+        out3= toxvenv_sh(ctx, remote, 
+                 [
+                  'curl', '-k', '-v', 
+                  '-X', 'POST', 
+                  '-H', 'Content-Type:application/x-www-form-urlencoded', 
+                  '-d', 'scope=openid', 
+                  '-d', 'grant_type=client_credentials', 
+                  '-d', clientid, 
+                  '-d', ans3, 
+                  'http://localhost:8080/auth/realms/'+realm_name+'/protocol/openid-connect/token', run.Raw('|'), 
+                  'jq', '-r', '.access_token'
+                 ])
+
+        pre2=out3.rstrip()
+        acc_token= 'token={}'.format(pre2)
+        ans4= '{}'.format(pre2)
+
+        out4= toxvenv_sh(ctx, remote, 
+                 [
+                  'curl', '-k', '-v', 
+                  '-X', 'GET', 
+                  '-H', 'Content-Type:application/x-www-form-urlencoded', 
+                  'http://localhost:8080/auth/realms/'+realm_name+'/protocol/openid-connect/certs', run.Raw('|'), 
+                  'jq', '-r', '.keys[].x5c[]'
+                 ])
+
+        pre3=out4.rstrip()
+        cert_value='{}'.format(pre3)
+        start_value= "-----BEGIN CERTIFICATE-----\n"
+        end_value= "\n-----END CERTIFICATE-----"
+        user_data=""
+        user_data+=start_value
+        user_data+=cert_value
+        user_data+=end_value
+
+        remote.write_file(
+            path='{tdir}/bin/certificate.crt'.format(tdir=get_keycloak_dir(ctx,config)),
+            data=user_data
+            )
+
+        out5= toxvenv_sh(ctx, remote, 
+                 [
+                  'openssl', 'x509', 
+                  '-in', '{tdir}/bin/certificate.crt'.format(tdir=get_keycloak_dir(ctx,config)), 
+                  '--fingerprint', '--noout', '-sha1'
+                 ])
+
+        pre_ans= '{}'.format(out5[17:76])
+        ans5=""
+
+        for character in pre_ans:
+            if(character!=':'):
+                ans5+=character
+
+        str1 = 'curl'
+        str2 = '-k'
+        str3 = '-v'
+        str4 = '-X'
+        str5 = 'POST'
+        str6 = '-u'
+        str7 = '-d'
+        str8 = 'http://localhost:8080/auth/realms/'+realm_name+'/protocol/openid-connect/token/introspect'
+
+        out6= toxvenv_sh(ctx, remote,
+                 [
+                  str1, str2, str3, str4, str5, str6, ans0, str7, acc_token, str8, run.Raw('|'), 'jq', '-r', '.aud'
+                 ])
+
+        out7= toxvenv_sh(ctx, remote,
+                 [ 
+                  str1, str2, str3, str4, str5, str6, ans0, str7, acc_token, str8, run.Raw('|'), 'jq', '-r', '.sub'
+                 ])
+
+        out8= toxvenv_sh(ctx, remote,
+                 [ 
+                  str1, str2, str3, str4, str5, str6, ans0, str7, acc_token, str8, run.Raw('|'), 'jq', '-r', '.azp'
+                 ])
+
+        ans6=out6.rstrip()
+        ans7=out7.rstrip()
+        ans8=out8.rstrip()
+
+        os.environ['TOKEN']=ans4
+        os.environ['THUMBPRINT']=ans5
+        os.environ['AUD']=ans6
+        os.environ['SUB']=ans7
+        os.environ['AZP']=ans8
+        os.environ['USER_TOKEN']=user_token
+        os.environ['KC_REALM']=realm_name
+
+    try:
+        yield
+    finally:
+        log.info('Removing certificate.crt file...')
+        for (client,_) in config.items():
+            (remote,) = ctx.cluster.only(client).remotes.keys()
+            remote.run(
+                 args=['rm', '-f',
+                       '{tdir}/bin/certificate.crt'.format(tdir=get_keycloak_dir(ctx,config)),
+                 ],
+                 )
+
+            remote.run(
+                 args=['rm', '-f',
+                       '{tdir}/confi.py'.format(tdir=teuthology.get_testdir(ctx)),
+                 ],
+                 )
+
+@contextlib.contextmanager
+def task(ctx,config):
+    """
+    To run keycloak the prerequisite is to run the tox task. Following is the way how to run
+    tox and then keycloak::
+
+    tasks:
+    - tox: [ client.0 ]
+    - keycloak:
+        client.0:
+            keycloak_version: 11.0.0
+   
+    To pass extra arguments to nose (e.g. to run a certain test)::
+
+    tasks:
+    - tox: [ client.0 ]
+    - keycloak:
+        client.0:
+            keycloak_version: 11.0.0
+    - s3tests:
+        client.0:
+          extra_attrs: ['webidentity_test']
+ 
+    """
+    assert config is None or isinstance(config, list) \
+        or isinstance(config, dict), \
+        "task keycloak only supports a list or dictionary for configuration"
+
+    if not hasattr(ctx, 'tox'):
+        raise ConfigError('keycloak must run after the tox task')
+
+    all_clients = ['client.{id}'.format(id=id_)
+                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+    if config is None:
+        config = all_clients
+    if isinstance(config, list):
+        config = dict.fromkeys(config)
+
+    log.debug('Keycloak config is %s', config)
+    
+    with contextutil.nested(
+        lambda: install_packages(ctx=ctx, config=config),
+        lambda: build(ctx=ctx, config=config),
+        lambda: run_keycloak(ctx=ctx, config=config),
+        lambda: download_conf(ctx=ctx, config=config),
+        lambda: run_admin_cmds(ctx=ctx, config=config),
+        ):
+        yield
+
diff --git a/qa/tasks/keystone.py b/qa/tasks/keystone.py
new file mode 100644
index 000000000..7aa785055
--- /dev/null
+++ b/qa/tasks/keystone.py
@@ -0,0 +1,481 @@
+"""
+Deploy and configure Keystone for Teuthology
+"""
+import argparse
+import contextlib
+import logging
+
+# still need this for python3.6
+from collections import OrderedDict
+from itertools import chain
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.orchestra import run
+from teuthology.packaging import install_package
+from teuthology.packaging import remove_package
+from teuthology.exceptions import ConfigError
+
+log = logging.getLogger(__name__)
+
+
+def get_keystone_dir(ctx):
+    return '{tdir}/keystone'.format(tdir=teuthology.get_testdir(ctx))
+
+def run_in_keystone_dir(ctx, client, args, **kwargs):
+    return ctx.cluster.only(client).run(
+        args=[ 'cd', get_keystone_dir(ctx), run.Raw('&&'), ] + args,
+        **kwargs
+    )
+
+def get_toxvenv_dir(ctx):
+    return ctx.tox.venv_path
+
+def toxvenv_sh(ctx, remote, args, **kwargs):
+    activate = get_toxvenv_dir(ctx) + '/bin/activate'
+    return remote.sh(['source', activate, run.Raw('&&')] + args, **kwargs)
+
+def run_in_keystone_venv(ctx, client, args):
+    run_in_keystone_dir(ctx, client,
+                        [   'source',
+                            '.tox/venv/bin/activate',
+                            run.Raw('&&')
+                        ] + args)
+
+def get_keystone_venved_cmd(ctx, cmd, args, env=[]):
+    kbindir = get_keystone_dir(ctx) + '/.tox/venv/bin/'
+    return env + [ kbindir + 'python', kbindir + cmd ] + args
+
+@contextlib.contextmanager
+def download(ctx, config):
+    """
+    Download the Keystone from github.
+    Remove downloaded file upon exit.
+
+    The context passed in should be identical to the context
+    passed in to the main task.
+    """
+    assert isinstance(config, dict)
+    log.info('Downloading keystone...')
+    keystonedir = get_keystone_dir(ctx)
+
+    for (client, cconf) in config.items():
+        ctx.cluster.only(client).run(
+            args=[
+                'git', 'clone',
+                '-b', cconf.get('force-branch', 'master'),
+                'https://github.com/openstack/keystone.git',
+                keystonedir,
+                ],
+            )
+
+        sha1 = cconf.get('sha1')
+        if sha1 is not None:
+            run_in_keystone_dir(ctx, client, [
+                    'git', 'reset', '--hard', sha1,
+                ],
+            )
+
+        # hax for http://tracker.ceph.com/issues/23659
+        run_in_keystone_dir(ctx, client, [
+                'sed', '-i',
+                's/pysaml2<4.0.3,>=2.4.0/pysaml2>=4.5.0/',
+                'requirements.txt'
+            ],
+        )
+    try:
+        yield
+    finally:
+        log.info('Removing keystone...')
+        for client in config:
+            ctx.cluster.only(client).run(
+                args=[ 'rm', '-rf', keystonedir ],
+            )
+
+patch_bindep_template = """\
+import fileinput
+import sys
+import os
+fixed=False
+os.chdir("{keystone_dir}")
+for line in fileinput.input("bindep.txt", inplace=True):
+ if line == "python34-devel [platform:centos]\\n":
+  line="python34-devel [platform:centos-7]\\npython36-devel [platform:centos-8]\\n" 
+  fixed=True
+ print(line,end="")
+
+print("Fixed line" if fixed else "No fix necessary", file=sys.stderr)
+exit(0)
+"""
+
+@contextlib.contextmanager
+def install_packages(ctx, config):
+    """
+    Download the packaged dependencies of Keystone.
+    Remove install packages upon exit.
+
+    The context passed in should be identical to the context
+    passed in to the main task.
+    """
+    assert isinstance(config, dict)
+    log.info('Installing packages for Keystone...')
+
+    patch_bindep = patch_bindep_template \
+        .replace("{keystone_dir}", get_keystone_dir(ctx))
+    packages = {}
+    for (client, _) in config.items():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        toxvenv_sh(ctx, remote, ['python'], stdin=patch_bindep)
+        # use bindep to read which dependencies we need from keystone/bindep.txt
+        toxvenv_sh(ctx, remote, ['pip', 'install', 'bindep'])
+        packages[client] = toxvenv_sh(ctx, remote,
+                ['bindep', '--brief', '--file', '{}/bindep.txt'.format(get_keystone_dir(ctx))],
+                check_status=False).splitlines() # returns 1 on success?
+        for dep in packages[client]:
+            install_package(dep, remote)
+    try:
+        yield
+    finally:
+        log.info('Removing packaged dependencies of Keystone...')
+
+        for (client, _) in config.items():
+            (remote,) = ctx.cluster.only(client).remotes.keys()
+            for dep in packages[client]:
+                remove_package(dep, remote)
+
+def run_mysql_query(ctx, remote, query):
+    query_arg = '--execute="{}"'.format(query)
+    args = ['sudo', 'mysql', run.Raw(query_arg)]
+    remote.run(args=args)
+
+@contextlib.contextmanager
+def setup_database(ctx, config):
+    """
+    Setup database for Keystone.
+    """
+    assert isinstance(config, dict)
+    log.info('Setting up database for keystone...')
+
+    for (client, cconf) in config.items():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+
+        # MariaDB on RHEL/CentOS needs service started after package install
+        # while Ubuntu starts service by default.
+        if remote.os.name == 'rhel' or remote.os.name == 'centos':
+            remote.run(args=['sudo', 'systemctl', 'restart', 'mariadb'])
+
+        run_mysql_query(ctx, remote, "CREATE USER 'keystone'@'localhost' IDENTIFIED BY 'SECRET';")
+        run_mysql_query(ctx, remote, "CREATE DATABASE keystone;")
+        run_mysql_query(ctx, remote, "GRANT ALL PRIVILEGES ON keystone.* TO 'keystone'@'localhost';")
+        run_mysql_query(ctx, remote, "FLUSH PRIVILEGES;")
+
+    try:
+        yield
+    finally:
+        pass
+
+@contextlib.contextmanager
+def setup_venv(ctx, config):
+    """
+    Setup the virtualenv for Keystone using tox.
+    """
+    assert isinstance(config, dict)
+    log.info('Setting up virtualenv for keystone...')
+    for (client, _) in config.items():
+        run_in_keystone_dir(ctx, client,
+            ['sed', '-i', 's/usedevelop.*/usedevelop=false/g', 'tox.ini'])
+
+        run_in_keystone_dir(ctx, client,
+            [   'source',
+                '{tvdir}/bin/activate'.format(tvdir=get_toxvenv_dir(ctx)),
+                run.Raw('&&'),
+                'tox', '-e', 'venv', '--notest'
+            ])
+
+        run_in_keystone_venv(ctx, client,
+            [   'pip', 'install',
+                'python-openstackclient==5.2.1',
+                'osc-lib==2.0.0'
+             ])
+    try:
+        yield
+    finally:
+        pass
+
+@contextlib.contextmanager
+def configure_instance(ctx, config):
+    assert isinstance(config, dict)
+    log.info('Configuring keystone...')
+
+    kdir = get_keystone_dir(ctx)
+    keyrepo_dir = '{kdir}/etc/fernet-keys'.format(kdir=kdir)
+    for (client, _) in config.items():
+        # prepare the config file
+        run_in_keystone_dir(ctx, client,
+            [
+                'source',
+                f'{get_toxvenv_dir(ctx)}/bin/activate',
+                run.Raw('&&'),
+                'tox', '-e', 'genconfig'
+            ])
+        run_in_keystone_dir(ctx, client,
+            [
+                'cp', '-f',
+                'etc/keystone.conf.sample',
+                'etc/keystone.conf'
+            ])
+        run_in_keystone_dir(ctx, client,
+            [
+                'sed',
+                '-e', 's^#key_repository =.*^key_repository = {kr}^'.format(kr = keyrepo_dir),
+                '-i', 'etc/keystone.conf'
+            ])
+        run_in_keystone_dir(ctx, client,
+            [
+                'sed',
+                '-e', 's^#connection =.*^connection = mysql+pymysql://keystone:SECRET@localhost/keystone^',
+                '-i', 'etc/keystone.conf'
+            ])
+        # log to a file that gets archived
+        log_file = '{p}/archive/keystone.{c}.log'.format(p=teuthology.get_testdir(ctx), c=client)
+        run_in_keystone_dir(ctx, client,
+            [
+                'sed',
+                '-e', 's^#log_file =.*^log_file = {}^'.format(log_file),
+                '-i', 'etc/keystone.conf'
+            ])
+        # copy the config to archive
+        run_in_keystone_dir(ctx, client, [
+                'cp', 'etc/keystone.conf',
+                '{}/archive/keystone.{}.conf'.format(teuthology.get_testdir(ctx), client)
+            ])
+
+        conf_file = '{kdir}/etc/keystone.conf'.format(kdir=get_keystone_dir(ctx))
+
+        # prepare key repository for Fetnet token authenticator
+        run_in_keystone_dir(ctx, client, [ 'mkdir', '-p', keyrepo_dir ])
+        run_in_keystone_venv(ctx, client, [ 'keystone-manage', '--config-file', conf_file, 'fernet_setup' ])
+
+        # sync database
+        run_in_keystone_venv(ctx, client, [ 'keystone-manage', '--config-file', conf_file, 'db_sync' ])
+    yield
+
+@contextlib.contextmanager
+def run_keystone(ctx, config):
+    assert isinstance(config, dict)
+    log.info('Configuring keystone...')
+
+    conf_file = '{kdir}/etc/keystone.conf'.format(kdir=get_keystone_dir(ctx))
+
+    for (client, _) in config.items():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        cluster_name, _, client_id = teuthology.split_role(client)
+
+        # start the public endpoint
+        client_public_with_id = 'keystone.public' + '.' + client_id
+
+        public_host, public_port = ctx.keystone.public_endpoints[client]
+        run_cmd = get_keystone_venved_cmd(ctx, 'keystone-wsgi-public',
+            [   '--host', public_host, '--port', str(public_port),
+                # Let's put the Keystone in background, wait for EOF
+                # and after receiving it, send SIGTERM to the daemon.
+                # This crazy hack is because Keystone, in contrast to
+                # our other daemons, doesn't quit on stdin.close().
+                # Teuthology relies on this behaviour.
+		run.Raw('& { read; kill %1; }')
+            ],
+            [
+                run.Raw('OS_KEYSTONE_CONFIG_FILES={}'.format(conf_file)),
+            ],
+        )
+        ctx.daemons.add_daemon(
+            remote, 'keystone', client_public_with_id,
+            cluster=cluster_name,
+            args=run_cmd,
+            logger=log.getChild(client),
+            stdin=run.PIPE,
+            wait=False,
+            check_status=False,
+        )
+
+        # sleep driven synchronization
+        run_in_keystone_venv(ctx, client, [ 'sleep', '15' ])
+    try:
+        yield
+    finally:
+        log.info('Stopping Keystone public instance')
+        ctx.daemons.get_daemon('keystone', client_public_with_id,
+                               cluster_name).stop()
+
+
+def dict_to_args(specials, items):
+    """
+    Transform
+        [(key1, val1), (special, val_special), (key3, val3) ]
+    into:
+        [ '--key1', 'val1', '--key3', 'val3', 'val_special' ]
+    """
+    args = []
+    special_vals = OrderedDict((k, '') for k in specials.split(','))
+    for (k, v) in items:
+        if k in special_vals:
+            special_vals[k] = v
+        else:
+            args.append('--{k}'.format(k=k))
+            args.append(v)
+    args.extend(arg for arg in special_vals.values() if arg)
+    return args
+
+def run_section_cmds(ctx, cclient, section_cmd, specials,
+                     section_config_list):
+    public_host, public_port = ctx.keystone.public_endpoints[cclient]
+
+    auth_section = [
+        ( 'os-username', 'admin' ),
+        ( 'os-password', 'ADMIN' ),
+        ( 'os-user-domain-id', 'default' ),
+        ( 'os-project-name', 'admin' ),
+        ( 'os-project-domain-id', 'default' ),
+        ( 'os-identity-api-version', '3' ),
+        ( 'os-auth-url', 'http://{host}:{port}/v3'.format(host=public_host,
+                                                          port=public_port) ),
+    ]
+
+    for section_item in section_config_list:
+        run_in_keystone_venv(ctx, cclient,
+            [ 'openstack' ] + section_cmd.split() +
+            dict_to_args(specials, auth_section + list(section_item.items())) +
+            [ '--debug' ])
+
+def create_endpoint(ctx, cclient, service, url, adminurl=None):
+    endpoint_sections = [
+        {'service': service, 'interface': 'public', 'url': url},
+    ]
+    if adminurl:
+        endpoint_sections.append(
+            {'service': service, 'interface': 'admin', 'url': adminurl}
+        )
+    run_section_cmds(ctx, cclient, 'endpoint create',
+                     'service,interface,url',
+                     endpoint_sections)
+
+@contextlib.contextmanager
+def fill_keystone(ctx, config):
+    assert isinstance(config, dict)
+
+    for (cclient, cconfig) in config.items():
+        public_host, public_port = ctx.keystone.public_endpoints[cclient]
+        url = 'http://{host}:{port}/v3'.format(host=public_host,
+                                               port=public_port)
+        opts = {'password': 'ADMIN',
+                'region-id': 'RegionOne',
+                'internal-url': url,
+                'admin-url': url,
+                'public-url': url}
+        bootstrap_args = chain.from_iterable(('--bootstrap-{}'.format(k), v)
+                                             for k, v in opts.items())
+        conf_file = '{kdir}/etc/keystone.conf'.format(kdir=get_keystone_dir(ctx))
+        run_in_keystone_venv(ctx, cclient,
+                             ['keystone-manage', '--config-file', conf_file, 'bootstrap'] +
+                             list(bootstrap_args))
+
+        # configure tenants/projects
+        run_section_cmds(ctx, cclient, 'domain create --or-show', 'name',
+                         cconfig.get('domains', []))
+        run_section_cmds(ctx, cclient, 'project create --or-show', 'name',
+                         cconfig.get('projects', []))
+        run_section_cmds(ctx, cclient, 'user create --or-show', 'name',
+                         cconfig.get('users', []))
+        run_section_cmds(ctx, cclient, 'role create --or-show', 'name',
+                         cconfig.get('roles', []))
+        run_section_cmds(ctx, cclient, 'role add', 'name',
+                         cconfig.get('role-mappings', []))
+        run_section_cmds(ctx, cclient, 'service create', 'type',
+                         cconfig.get('services', []))
+
+        # for the deferred endpoint creation; currently it's used in rgw.py
+        ctx.keystone.create_endpoint = create_endpoint
+
+        # sleep driven synchronization -- just in case
+        run_in_keystone_venv(ctx, cclient, [ 'sleep', '3' ])
+    try:
+        yield
+    finally:
+        pass
+
+def assign_ports(ctx, config, initial_port):
+    """
+    Assign port numbers starting from @initial_port
+    """
+    port = initial_port
+    role_endpoints = {}
+    for remote, roles_for_host in ctx.cluster.remotes.items():
+        for role in roles_for_host:
+            if role in config:
+                role_endpoints[role] = (remote.name.split('@')[1], port)
+                port += 1
+
+    return role_endpoints
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Deploy and configure Keystone
+
+    Example of configuration:
+
+      - install:
+      - ceph:
+      - tox: [ client.0 ]
+      - keystone:
+          client.0:
+            force-branch: master
+            domains:
+              - name: custom
+                description: Custom domain
+            projects:
+              - name: custom
+                description: Custom project
+            users:
+              - name: custom
+                password: SECRET
+                project: custom
+            roles: [ name: custom ]
+            role-mappings:
+              - name: custom
+                user: custom
+                project: custom
+            services:
+              - name: swift
+                type: object-store
+                description: Swift Service
+    """
+    assert config is None or isinstance(config, list) \
+        or isinstance(config, dict), \
+        "task keystone only supports a list or dictionary for configuration"
+
+    if not hasattr(ctx, 'tox'):
+        raise ConfigError('keystone must run after the tox task')
+
+    all_clients = ['client.{id}'.format(id=id_)
+                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+    if config is None:
+        config = all_clients
+    if isinstance(config, list):
+        config = dict.fromkeys(config)
+
+    log.debug('Keystone config is %s', config)
+
+    ctx.keystone = argparse.Namespace()
+    ctx.keystone.public_endpoints = assign_ports(ctx, config, 5000)
+
+    with contextutil.nested(
+        lambda: download(ctx=ctx, config=config),
+        lambda: install_packages(ctx=ctx, config=config),
+        lambda: setup_database(ctx=ctx, config=config),
+        lambda: setup_venv(ctx=ctx, config=config),
+        lambda: configure_instance(ctx=ctx, config=config),
+        lambda: run_keystone(ctx=ctx, config=config),
+        lambda: fill_keystone(ctx=ctx, config=config),
+        ):
+        yield
diff --git a/qa/tasks/kubeadm.py b/qa/tasks/kubeadm.py
new file mode 100644
index 000000000..00417fc86
--- /dev/null
+++ b/qa/tasks/kubeadm.py
@@ -0,0 +1,563 @@
+"""
+Kubernetes cluster task, deployed via kubeadm
+"""
+import argparse
+import contextlib
+import ipaddress
+import json
+import logging
+import random
+import yaml
+from io import BytesIO
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.config import config as teuth_config
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+
+def _kubectl(ctx, config, args, **kwargs):
+    cluster_name = config['cluster']
+    ctx.kubeadm[cluster_name].bootstrap_remote.run(
+        args=['kubectl'] + args,
+        **kwargs,
+    )
+
+
+def kubectl(ctx, config):
+    if isinstance(config, str):
+        config = [config]
+    assert isinstance(config, list)
+    for c in config:
+        if isinstance(c, str):
+            _kubectl(ctx, config, c.split(' '))
+        else:
+            _kubectl(ctx, config, c)
+
+
+@contextlib.contextmanager
+def preflight(ctx, config):
+    run.wait(
+        ctx.cluster.run(
+            args=[
+                'sudo', 'modprobe', 'br_netfilter',
+                run.Raw('&&'),
+                'sudo', 'sysctl', 'net.bridge.bridge-nf-call-ip6tables=1',
+                run.Raw('&&'),
+                'sudo', 'sysctl', 'net.bridge.bridge-nf-call-iptables=1',
+                run.Raw('&&'),
+                'sudo', 'sysctl', 'net.ipv4.ip_forward=1',
+                run.Raw('&&'),
+                'sudo', 'swapoff', '-a',
+            ],
+            wait=False,
+        )
+    )
+
+    # set docker cgroup driver = systemd
+    #  see https://kubernetes.io/docs/setup/production-environment/container-runtimes/#docker
+    #  see https://github.com/kubernetes/kubeadm/issues/2066
+    for remote in ctx.cluster.remotes.keys():
+        try:
+            orig = remote.read_file('/etc/docker/daemon.json', sudo=True)
+            j = json.loads(orig)
+        except Exception as e:
+            log.info(f'Failed to pull old daemon.json: {e}')
+            j = {}
+        j["exec-opts"] = ["native.cgroupdriver=systemd"]
+        j["log-driver"] = "json-file"
+        j["log-opts"] = {"max-size": "100m"}
+        j["storage-driver"] = "overlay2"
+        remote.write_file('/etc/docker/daemon.json', json.dumps(j), sudo=True)
+    run.wait(
+        ctx.cluster.run(
+            args=[
+                'sudo', 'systemctl', 'restart', 'docker',
+                run.Raw('||'),
+                'true',
+            ],
+            wait=False,
+        )
+    )
+    yield
+
+
+@contextlib.contextmanager
+def kubeadm_install(ctx, config):
+    version = config.get('version', '1.21')
+
+    os_type = teuthology.get_distro(ctx)
+    os_version = teuthology.get_distro_version(ctx)
+
+    try:
+        if os_type in ['centos', 'rhel']:
+            os = f"CentOS_{os_version.split('.')[0]}"
+            log.info('Installing cri-o')
+            run.wait(
+                ctx.cluster.run(
+                    args=[
+                        'sudo',
+                        'curl', '-L', '-o',
+                        '/etc/yum.repos.d/devel:kubic:libcontainers:stable.repo',
+                        f'https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/{os}/devel:kubic:libcontainers:stable.repo',
+                        run.Raw('&&'),
+                        'sudo',
+                        'curl', '-L', '-o',
+                        f'/etc/yum.repos.d/devel:kubic:libcontainers:stable:cri-o:{version}.repo',
+                        f'https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable:/cri-o:/{version}/{os}/devel:kubic:libcontainers:stable:cri-o:{version}.repo',
+                        run.Raw('&&'),
+                        'sudo', 'dnf', 'install', '-y', 'cri-o',
+                    ],
+                    wait=False,
+                )
+            )
+
+            log.info('Installing kube{adm,ctl,let}')
+            repo = """[kubernetes]
+name=Kubernetes
+baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-$basearch
+enabled=1
+gpgcheck=1
+repo_gpgcheck=1
+gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg
+"""
+            for remote in ctx.cluster.remotes.keys():
+                remote.write_file(
+                    '/etc/yum.repos.d/kubernetes.repo',
+                    repo,
+                    sudo=True,
+                )
+            run.wait(
+                ctx.cluster.run(
+                    args=[
+                        'sudo', 'dnf', 'install', '-y',
+                        'kubelet', 'kubeadm', 'kubectl',
+                        'iproute-tc', 'bridge-utils',
+                    ],
+                    wait=False,
+                )
+            )
+
+            # fix cni config
+            for remote in ctx.cluster.remotes.keys():
+                conf = """# from https://github.com/cri-o/cri-o/blob/master/tutorials/kubernetes.md#flannel-network
+{
+    "name": "crio",
+    "type": "flannel"
+}
+"""
+                remote.write_file('/etc/cni/net.d/10-crio-flannel.conf', conf, sudo=True)
+                remote.run(args=[
+                    'sudo', 'rm', '-f',
+                    '/etc/cni/net.d/87-podman-bridge.conflist',
+                    '/etc/cni/net.d/100-crio-bridge.conf',
+                ])
+
+            # start crio
+            run.wait(
+                ctx.cluster.run(
+                    args=[
+                        'sudo', 'systemctl', 'daemon-reload',
+                        run.Raw('&&'),
+                        'sudo', 'systemctl', 'enable', 'crio', '--now',
+                    ],
+                    wait=False,
+                )
+            )
+
+        elif os_type == 'ubuntu':
+            os = f"xUbuntu_{os_version}"
+            log.info('Installing kube{adm,ctl,let}')
+            run.wait(
+                ctx.cluster.run(
+                    args=[
+                        'sudo', 'apt', 'update',
+                        run.Raw('&&'),
+                        'sudo', 'apt', 'install', '-y',
+                        'apt-transport-https', 'ca-certificates', 'curl',
+                        run.Raw('&&'),
+                        'sudo', 'curl', '-fsSLo',
+                        '/usr/share/keyrings/kubernetes-archive-keyring.gpg',
+                        'https://packages.cloud.google.com/apt/doc/apt-key.gpg',
+                        run.Raw('&&'),
+                        'echo', 'deb [signed-by=/usr/share/keyrings/kubernetes-archive-keyring.gpg] https://apt.kubernetes.io/ kubernetes-xenial main',
+                        run.Raw('|'),
+                        'sudo', 'tee', '/etc/apt/sources.list.d/kubernetes.list',
+                        run.Raw('&&'),
+                        'sudo', 'apt', 'update',
+                        run.Raw('&&'),
+                        'sudo', 'apt', 'install', '-y',
+                        'kubelet', 'kubeadm', 'kubectl',
+                        'bridge-utils',
+                    ],
+                    wait=False,
+                )
+            )
+
+        else:
+            raise RuntimeError(f'unsupported distro {os_type} for cri-o')
+
+        run.wait(
+            ctx.cluster.run(
+                args=[
+                    'sudo', 'systemctl', 'enable', '--now', 'kubelet',
+                    run.Raw('&&'),
+                    'sudo', 'kubeadm', 'config', 'images', 'pull',
+                ],
+                wait=False,
+            )
+        )
+
+        yield
+
+    finally:
+        if config.get('uninstall', True):
+            log.info('Uninstalling kube{adm,let,ctl}')
+            if os_type in ['centos', 'rhel']:
+                run.wait(
+                    ctx.cluster.run(
+                        args=[
+                            'sudo', 'rm', '-f',
+                            '/etc/yum.repos.d/kubernetes.repo',
+                            run.Raw('&&'),
+                            'sudo', 'dnf', 'remove', '-y',
+                            'kubeadm', 'kubelet', 'kubectl', 'cri-o',
+                        ],
+                        wait=False
+                    )
+                )
+            elif os_type == 'ubuntu' and False:
+                run.wait(
+                    ctx.cluster.run(
+                        args=[
+                            'sudo', 'rm', '-f',
+                            '/etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list',
+                            f'/etc/apt/sources.list.d/devel:kubic:libcontainers:stable:cri-o:{version}.list',
+                            '/etc/apt/trusted.gpg.d/libcontainers-cri-o.gpg',
+                            run.Raw('&&'),
+                            'sudo', 'apt', 'remove', '-y',
+                            'kkubeadm', 'kubelet', 'kubectl', 'cri-o', 'cri-o-runc',
+                        ],
+                        wait=False,
+                    )
+                )
+
+
+@contextlib.contextmanager
+def kubeadm_init_join(ctx, config):
+    cluster_name = config['cluster']
+
+    bootstrap_remote = None
+    remotes = {}      # remote -> ip
+    for remote, roles in ctx.cluster.remotes.items():
+        for role in roles:
+            if role.startswith('host.'):
+                if not bootstrap_remote:
+                    bootstrap_remote = remote
+                if remote not in remotes:
+                    remotes[remote] = remote.ssh.get_transport().getpeername()[0]
+    if not bootstrap_remote:
+        raise RuntimeError('must define at least one host.something role')
+    ctx.kubeadm[cluster_name].bootstrap_remote = bootstrap_remote
+    ctx.kubeadm[cluster_name].remotes = remotes
+    ctx.kubeadm[cluster_name].token = 'abcdef.' + ''.join([
+        random.choice('0123456789abcdefghijklmnopqrstuvwxyz') for _ in range(16)
+    ])
+    log.info(f'Token: {ctx.kubeadm[cluster_name].token}')
+    log.info(f'Remotes: {ctx.kubeadm[cluster_name].remotes}')
+
+    try:
+        # init
+        cmd = [
+            'sudo', 'kubeadm', 'init',
+            '--node-name', ctx.kubeadm[cluster_name].bootstrap_remote.shortname,
+            '--token', ctx.kubeadm[cluster_name].token,
+            '--pod-network-cidr', str(ctx.kubeadm[cluster_name].pod_subnet),
+        ]
+        bootstrap_remote.run(args=cmd)
+
+        # join additional nodes
+        joins = []
+        for remote, ip in ctx.kubeadm[cluster_name].remotes.items():
+            if remote == bootstrap_remote:
+                continue
+            cmd = [
+                'sudo', 'kubeadm', 'join',
+                ctx.kubeadm[cluster_name].remotes[ctx.kubeadm[cluster_name].bootstrap_remote] + ':6443',
+                '--node-name', remote.shortname,
+                '--token', ctx.kubeadm[cluster_name].token,
+                '--discovery-token-unsafe-skip-ca-verification',
+            ]
+            joins.append(remote.run(args=cmd, wait=False))
+        run.wait(joins)
+        yield
+
+    except Exception as e:
+        log.exception(e)
+        raise
+
+    finally:
+        log.info('Cleaning up node')
+        run.wait(
+            ctx.cluster.run(
+                args=['sudo', 'kubeadm', 'reset', 'cleanup-node', '-f'],
+                wait=False,
+            )
+        )
+
+
+@contextlib.contextmanager
+def kubectl_config(ctx, config):
+    cluster_name = config['cluster']
+    bootstrap_remote = ctx.kubeadm[cluster_name].bootstrap_remote
+
+    ctx.kubeadm[cluster_name].admin_conf = \
+        bootstrap_remote.read_file('/etc/kubernetes/admin.conf', sudo=True)
+
+    log.info('Setting up kubectl')
+    try:
+        ctx.cluster.run(args=[
+            'mkdir', '-p', '.kube',
+            run.Raw('&&'),
+            'sudo', 'mkdir', '-p', '/root/.kube',
+        ])
+        for remote in ctx.kubeadm[cluster_name].remotes.keys():
+            remote.write_file('.kube/config', ctx.kubeadm[cluster_name].admin_conf)
+            remote.sudo_write_file('/root/.kube/config',
+                                   ctx.kubeadm[cluster_name].admin_conf)
+        yield
+
+    except Exception as e:
+        log.exception(e)
+        raise
+
+    finally:
+        log.info('Deconfiguring kubectl')
+        ctx.cluster.run(args=[
+            'rm', '-rf', '.kube',
+            run.Raw('&&'),
+            'sudo', 'rm', '-rf', '/root/.kube',
+        ])
+
+
+def map_vnet(mip):
+    for mapping in teuth_config.get('vnet', []):
+        mnet = ipaddress.ip_network(mapping['machine_subnet'])
+        vnet = ipaddress.ip_network(mapping['virtual_subnet'])
+        if vnet.prefixlen >= mnet.prefixlen:
+            log.error(f"virtual_subnet {vnet} prefix >= machine_subnet {mnet} prefix")
+            return None
+        if mip in mnet:
+            pos = list(mnet.hosts()).index(mip)
+            log.info(f"{mip} is in {mnet} at pos {pos}")
+            sub = list(vnet.subnets(32 - mnet.prefixlen))[pos]
+            return sub
+    return None
+
+
+@contextlib.contextmanager
+def allocate_pod_subnet(ctx, config):
+    """
+    Allocate a private subnet that will not collide with other test machines/clusters
+    """
+    cluster_name = config['cluster']
+    assert cluster_name == 'kubeadm', 'multiple subnets not yet implemented'
+
+    log.info('Identifying pod subnet')
+    remote = list(ctx.cluster.remotes.keys())[0]
+    ip = remote.ssh.get_transport().getpeername()[0]
+    mip = ipaddress.ip_address(ip)
+    vnet = map_vnet(mip)
+    assert vnet
+    log.info(f'Pod subnet: {vnet}')
+    ctx.kubeadm[cluster_name].pod_subnet = vnet
+    yield
+
+
+@contextlib.contextmanager
+def pod_network(ctx, config):
+    cluster_name = config['cluster']
+    pnet = config.get('pod_network', 'calico')
+    if pnet == 'flannel':
+        r = ctx.kubeadm[cluster_name].bootstrap_remote.run(
+            args=[
+                'curl',
+                'https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml',
+            ],
+            stdout=BytesIO(),
+        )
+        assert r.exitstatus == 0
+        flannel = list(yaml.load_all(r.stdout.getvalue(), Loader=yaml.FullLoader))
+        for o in flannel:
+            if o.get('data', {}).get('net-conf.json'):
+                log.info(f'Updating {o}')
+                o['data']['net-conf.json'] = o['data']['net-conf.json'].replace(
+                    '10.244.0.0/16',
+                    str(ctx.kubeadm[cluster_name].pod_subnet)
+                )
+                log.info(f'Now {o}')
+        flannel_yaml = yaml.dump_all(flannel)
+        log.debug(f'Flannel:\n{flannel_yaml}')
+        _kubectl(ctx, config, ['apply', '-f', '-'], stdin=flannel_yaml)
+
+    elif pnet == 'calico':
+        _kubectl(ctx, config, [
+            'create', '-f',
+            'https://docs.projectcalico.org/manifests/tigera-operator.yaml'
+        ])
+        cr = {
+            'apiVersion': 'operator.tigera.io/v1',
+            'kind': 'Installation',
+            'metadata': {'name': 'default'},
+            'spec': {
+                'calicoNetwork': {
+                    'ipPools': [
+                        {
+                            'blockSize': 26,
+                            'cidr': str(ctx.kubeadm[cluster_name].pod_subnet),
+                            'encapsulation': 'IPIPCrossSubnet',
+                            'natOutgoing': 'Enabled',
+                            'nodeSelector': 'all()',
+                        }
+                    ]
+                }
+            }
+        }
+        _kubectl(ctx, config, ['create', '-f', '-'], stdin=yaml.dump(cr))
+
+    else:
+        raise RuntimeError(f'unrecognized pod_network {pnet}')
+
+    try:
+        yield
+
+    finally:
+        if pnet == 'flannel':
+            _kubectl(ctx, config, [
+                'delete', '-f',
+                'https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml',
+            ])
+
+        elif pnet == 'calico':
+            _kubectl(ctx, config, ['delete', 'installation', 'default'])
+            _kubectl(ctx, config, [
+                'delete', '-f',
+                'https://docs.projectcalico.org/manifests/tigera-operator.yaml'
+            ])
+
+
+@contextlib.contextmanager
+def setup_pvs(ctx, config):
+    """
+    Create PVs for all scratch LVs and set up a trivial provisioner
+    """
+    log.info('Scanning for scratch devices')
+    crs = []
+    for remote in ctx.cluster.remotes.keys():
+        ls = remote.read_file('/scratch_devs').decode('utf-8').strip().splitlines()
+        log.info(f'Scratch devices on {remote.shortname}: {ls}')
+        for dev in ls:
+            devname = dev.split('/')[-1].replace("_", "-")
+            crs.append({
+                'apiVersion': 'v1',
+                'kind': 'PersistentVolume',
+                'metadata': {'name': f'{remote.shortname}-{devname}'},
+                'spec': {
+                    'volumeMode': 'Block',
+                    'accessModes': ['ReadWriteOnce'],
+                    'capacity': {'storage': '100Gi'},  # doesn't matter?
+                    'persistentVolumeReclaimPolicy': 'Retain',
+                    'storageClassName': 'scratch',
+                    'local': {'path': dev},
+                    'nodeAffinity': {
+                        'required': {
+                            'nodeSelectorTerms': [
+                                {
+                                    'matchExpressions': [
+                                        {
+                                            'key': 'kubernetes.io/hostname',
+                                            'operator': 'In',
+                                            'values': [remote.shortname]
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    }
+                }
+            })
+            # overwriting first few MB is enough to make k8s happy
+            remote.run(args=[
+                'sudo', 'dd', 'if=/dev/zero', f'of={dev}', 'bs=1M', 'count=10'
+            ])
+    crs.append({
+        'kind': 'StorageClass',
+        'apiVersion': 'storage.k8s.io/v1',
+        'metadata': {'name': 'scratch'},
+        'provisioner': 'kubernetes.io/no-provisioner',
+        'volumeBindingMode': 'WaitForFirstConsumer',
+    })
+    y = yaml.dump_all(crs)
+    log.info('Creating PVs + StorageClass')
+    log.debug(y)
+    _kubectl(ctx, config, ['create', '-f', '-'], stdin=y)
+
+    yield
+
+
+@contextlib.contextmanager
+def final(ctx, config):
+    cluster_name = config['cluster']
+
+    # remove master node taint
+    _kubectl(ctx, config, [
+        'taint', 'node',
+        ctx.kubeadm[cluster_name].bootstrap_remote.shortname,
+        'node-role.kubernetes.io/master-',
+        run.Raw('||'),
+        'true',
+    ])
+
+    yield
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    if not config:
+        config = {}
+    assert isinstance(config, dict), \
+        "task only supports a dictionary for configuration"
+
+    log.info('Kubeadm start')
+
+    overrides = ctx.config.get('overrides', {})
+    teuthology.deep_merge(config, overrides.get('kubeadm', {}))
+    log.info('Config: ' + str(config))
+
+    # set up cluster context
+    if not hasattr(ctx, 'kubeadm'):
+        ctx.kubeadm = {}
+    if 'cluster' not in config:
+        config['cluster'] = 'kubeadm'
+    cluster_name = config['cluster']
+    if cluster_name not in ctx.kubeadm:
+        ctx.kubeadm[cluster_name] = argparse.Namespace()
+
+    with contextutil.nested(
+            lambda: preflight(ctx, config),
+            lambda: allocate_pod_subnet(ctx, config),
+            lambda: kubeadm_install(ctx, config),
+            lambda: kubeadm_init_join(ctx, config),
+            lambda: kubectl_config(ctx, config),
+            lambda: pod_network(ctx, config),
+            lambda: setup_pvs(ctx, config),
+            lambda: final(ctx, config),
+    ):
+        try:
+            log.info('Kubeadm complete, yielding')
+            yield
+
+        finally:
+            log.info('Tearing down kubeadm')
diff --git a/qa/tasks/locktest.py b/qa/tasks/locktest.py
new file mode 100755
index 000000000..9de5ba40c
--- /dev/null
+++ b/qa/tasks/locktest.py
@@ -0,0 +1,134 @@
+"""
+locktests
+"""
+import logging
+
+from teuthology.orchestra import run
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Run locktests, from the xfstests suite, on the given
+    clients. Whether the clients are ceph-fuse or kernel does not
+    matter, and the two clients can refer to the same mount.
+
+    The config is a list of two clients to run the locktest on. The
+    first client will be the host.
+
+    For example:
+       tasks:
+       - ceph:
+       - ceph-fuse: [client.0, client.1]
+       - locktest:
+           [client.0, client.1]
+
+    This task does not yield; there would be little point.
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+
+    assert isinstance(config, list)
+    log.info('fetching and building locktests...')
+    (host,) = ctx.cluster.only(config[0]).remotes
+    (client,) = ctx.cluster.only(config[1]).remotes
+    ( _, _, host_id) = config[0].partition('.')
+    ( _, _, client_id) = config[1].partition('.')
+    testdir = teuthology.get_testdir(ctx)
+    hostmnt = '{tdir}/mnt.{id}'.format(tdir=testdir, id=host_id)
+    clientmnt = '{tdir}/mnt.{id}'.format(tdir=testdir, id=client_id)
+
+    try:
+        for client_name in config:
+            log.info('building on {client_}'.format(client_=client_name))
+            ctx.cluster.only(client_name).run(
+                args=[
+                    # explicitly does not support multiple autotest tasks
+                    # in a single run; the result archival would conflict
+                    'mkdir', '{tdir}/archive/locktest'.format(tdir=testdir),
+                    run.Raw('&&'),
+                    'mkdir', '{tdir}/locktest'.format(tdir=testdir),
+                    run.Raw('&&'),
+                    'wget',
+                    '-nv',
+                    'https://raw.github.com/gregsfortytwo/xfstests-ceph/master/src/locktest.c',
+                    '-O', '{tdir}/locktest/locktest.c'.format(tdir=testdir),
+                    run.Raw('&&'),
+                    'g++', '{tdir}/locktest/locktest.c'.format(tdir=testdir),
+                    '-o', '{tdir}/locktest/locktest'.format(tdir=testdir)
+                    ],
+                logger=log.getChild('locktest_client.{id}'.format(id=client_name)),
+                )
+
+        log.info('built locktest on each client')
+
+        host.run(args=['sudo', 'touch',
+                       '{mnt}/locktestfile'.format(mnt=hostmnt),
+                       run.Raw('&&'),
+                       'sudo', 'chown', 'ubuntu.ubuntu',
+                       '{mnt}/locktestfile'.format(mnt=hostmnt)
+                       ]
+                 )
+
+        log.info('starting on host')
+        hostproc = host.run(
+            args=[
+                '{tdir}/locktest/locktest'.format(tdir=testdir),
+                '-p', '6788',
+                '-d',
+                '{mnt}/locktestfile'.format(mnt=hostmnt),
+                ],
+            wait=False,
+            logger=log.getChild('locktest.host'),
+            )
+        log.info('starting on client')
+        (_,_,hostaddr) = host.name.partition('@')
+        clientproc = client.run(
+            args=[
+                '{tdir}/locktest/locktest'.format(tdir=testdir),
+                '-p', '6788',
+                '-d',
+                '-h', hostaddr,
+                '{mnt}/locktestfile'.format(mnt=clientmnt),
+                ],
+            logger=log.getChild('locktest.client'),
+            wait=False
+            )
+
+        hostresult = hostproc.wait()
+        clientresult = clientproc.wait()
+        if (hostresult != 0) or (clientresult != 0):
+            raise Exception("Did not pass locking test!")
+        log.info('finished locktest executable with results {r} and {s}'. \
+                     format(r=hostresult, s=clientresult))
+
+    finally:
+        log.info('cleaning up host dir')
+        host.run(
+            args=[
+                'mkdir', '-p', '{tdir}/locktest'.format(tdir=testdir),
+                run.Raw('&&'),
+                'rm', '-f', '{tdir}/locktest/locktest.c'.format(tdir=testdir),
+                run.Raw('&&'),
+                'rm', '-f', '{tdir}/locktest/locktest'.format(tdir=testdir),
+                run.Raw('&&'),
+                'rmdir', '{tdir}/locktest'
+                ],
+            logger=log.getChild('.{id}'.format(id=config[0])),
+            )
+        log.info('cleaning up client dir')
+        client.run(
+            args=[
+                'mkdir', '-p', '{tdir}/locktest'.format(tdir=testdir),
+                run.Raw('&&'),
+                'rm', '-f', '{tdir}/locktest/locktest.c'.format(tdir=testdir),
+                run.Raw('&&'),
+                'rm', '-f', '{tdir}/locktest/locktest'.format(tdir=testdir),
+                run.Raw('&&'),
+                'rmdir', '{tdir}/locktest'.format(tdir=testdir)
+                ],
+            logger=log.getChild('.{id}'.format(\
+                    id=config[1])),
+            )
diff --git a/qa/tasks/logrotate.conf b/qa/tasks/logrotate.conf
new file mode 100644
index 000000000..b0cb8012f
--- /dev/null
+++ b/qa/tasks/logrotate.conf
@@ -0,0 +1,13 @@
+/var/log/ceph/*{daemon_type}*.log {{
+    rotate 100
+    size {max_size}
+    compress
+    sharedscripts
+    postrotate
+        killall {daemon_type} -1 || true
+    endscript
+    missingok
+    notifempty
+    su root root
+}}
+
diff --git a/qa/tasks/lost_unfound.py b/qa/tasks/lost_unfound.py
new file mode 100644
index 000000000..5a9142a70
--- /dev/null
+++ b/qa/tasks/lost_unfound.py
@@ -0,0 +1,180 @@
+"""
+Lost_unfound
+"""
+import logging
+import time
+from tasks import ceph_manager
+from tasks.util.rados import rados
+from teuthology import misc as teuthology
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Test handling of lost objects.
+
+    A pretty rigid cluster is brought up and tested by this task
+    """
+    POOL = 'unfound_pool'
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'lost_unfound task only accepts a dict for configuration'
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    while len(manager.get_osd_status()['up']) < 3:
+        time.sleep(10)
+
+    manager.wait_for_clean()
+
+    manager.create_pool(POOL)
+
+    # something that is always there
+    dummyfile = '/etc/fstab'
+
+    # take an osd out until the very end
+    manager.kill_osd(2)
+    manager.mark_down_osd(2)
+    manager.mark_out_osd(2)
+
+    # kludge to make sure they get a map
+    rados(ctx, mon, ['-p', POOL, 'put', 'dummy', dummyfile])
+
+    manager.flush_pg_stats([0, 1])
+    manager.wait_for_recovery()
+
+    # create old objects
+    for f in range(1, 10):
+        rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile])
+        rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile])
+        rados(ctx, mon, ['-p', POOL, 'rm', 'existed_%d' % f])
+
+    # delay recovery, and make the pg log very long (to prevent backfill)
+    manager.raw_cluster_cmd(
+            'tell', 'osd.1',
+            'injectargs',
+            '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000'
+            )
+
+    manager.kill_osd(0)
+    manager.mark_down_osd(0)
+    
+    for f in range(1, 10):
+        rados(ctx, mon, ['-p', POOL, 'put', 'new_%d' % f, dummyfile])
+        rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile])
+        rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile])
+
+    # bring osd.0 back up, let it peer, but don't replicate the new
+    # objects...
+    log.info('osd.0 command_args is %s' % 'foo')
+    log.info(ctx.daemons.get_daemon('osd', 0).command_args)
+    ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend([
+            '--osd-recovery-delay-start', '1000'
+            ])
+    manager.revive_osd(0)
+    manager.mark_in_osd(0)
+    manager.wait_till_osd_is_up(0)
+
+    manager.flush_pg_stats([1, 0])
+    manager.wait_till_active()
+
+    # take out osd.1 and the only copy of those objects.
+    manager.kill_osd(1)
+    manager.mark_down_osd(1)
+    manager.mark_out_osd(1)
+    manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it')
+
+    # bring up osd.2 so that things would otherwise, in theory, recovery fully
+    manager.revive_osd(2)
+    manager.mark_in_osd(2)
+    manager.wait_till_osd_is_up(2)
+
+    manager.flush_pg_stats([0, 2])
+    manager.wait_till_active()
+    manager.flush_pg_stats([0, 2])
+
+    # verify that there are unfound objects
+    unfound = manager.get_num_unfound_objects()
+    log.info("there are %d unfound objects" % unfound)
+    assert unfound
+
+    testdir = teuthology.get_testdir(ctx)
+    procs = []
+    if config.get('parallel_bench', True):
+        procs.append(mon.run(
+            args=[
+                "/bin/sh", "-c",
+                " ".join(['adjust-ulimits',
+                          'ceph-coverage',
+                          '{tdir}/archive/coverage',
+                          'rados',
+                          '--no-log-to-stderr',
+                          '--name', 'client.admin',
+                          '-b', str(4<<10),
+                          '-p' , POOL,
+                          '-t', '20',
+                          'bench', '240', 'write',
+                      ]).format(tdir=testdir),
+            ],
+            logger=log.getChild('radosbench.{id}'.format(id='client.admin')),
+            stdin=run.PIPE,
+            wait=False
+        ))
+    time.sleep(10)
+
+    # mark stuff lost
+    pgs = manager.get_pg_stats()
+    for pg in pgs:
+        if pg['stat_sum']['num_objects_unfound'] > 0:
+            primary = 'osd.%d' % pg['acting'][0]
+
+            # verify that i can list them direct from the osd
+            log.info('listing missing/lost in %s state %s', pg['pgid'],
+                     pg['state']);
+            m = manager.list_pg_unfound(pg['pgid'])
+            #log.info('%s' % m)
+            assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound']
+            assert m['available_might_have_unfound'] == True
+            assert m['might_have_unfound'][0]['osd'] == "1"
+            assert m['might_have_unfound'][0]['status'] == "osd is down"
+            num_unfound=0
+            for o in m['objects']:
+                if len(o['locations']) == 0:
+                    num_unfound += 1
+            assert m['num_unfound'] == num_unfound
+
+            log.info("reverting unfound in %s on %s", pg['pgid'], primary)
+            manager.raw_cluster_cmd('pg', pg['pgid'],
+                                    'mark_unfound_lost', 'revert')
+        else:
+            log.info("no unfound in %s", pg['pgid'])
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5')
+    manager.flush_pg_stats([0, 2])
+    manager.wait_for_recovery()
+
+    # verify result
+    for f in range(1, 10):
+        err = rados(ctx, mon, ['-p', POOL, 'get', 'new_%d' % f, '-'])
+        assert err
+        err = rados(ctx, mon, ['-p', POOL, 'get', 'existed_%d' % f, '-'])
+        assert err
+        err = rados(ctx, mon, ['-p', POOL, 'get', 'existing_%d' % f, '-'])
+        assert not err
+
+    # see if osd.1 can cope
+    manager.mark_in_osd(1)
+    manager.revive_osd(1)
+    manager.wait_till_osd_is_up(1)
+    manager.wait_for_clean()
+    run.wait(procs)
+    manager.wait_for_clean()
diff --git a/qa/tasks/manypools.py b/qa/tasks/manypools.py
new file mode 100644
index 000000000..7fe7e43e1
--- /dev/null
+++ b/qa/tasks/manypools.py
@@ -0,0 +1,73 @@
+"""
+Force pg creation on all osds
+"""
+from teuthology import misc as teuthology
+from teuthology.orchestra import run
+import logging
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Create the specified number of pools and write 16 objects to them (thereby forcing
+    the PG creation on each OSD). This task creates pools from all the clients,
+    in parallel. It is easy to add other daemon types which have the appropriate
+    permissions, but I don't think anything else does.
+    The config is just the number of pools to create. I recommend setting
+    "mon create pg interval" to a very low value in your ceph config to speed
+    this up.
+    
+    You probably want to do this to look at memory consumption, and
+    maybe to test how performance changes with the number of PGs. For example:
+    
+    tasks:
+    - ceph:
+        config:
+          mon:
+            mon create pg interval: 1
+    - manypools: 3000
+    - radosbench:
+        clients: [client.0]
+        time: 360
+    """
+    
+    log.info('creating {n} pools'.format(n=config))
+    
+    poolnum = int(config)
+    creator_remotes = []
+    client_roles = teuthology.all_roles_of_type(ctx.cluster, 'client')
+    log.info('got client_roles={client_roles_}'.format(client_roles_=client_roles))
+    for role in client_roles:
+        log.info('role={role_}'.format(role_=role))
+        (creator_remote, ) = ctx.cluster.only('client.{id}'.format(id=role)).remotes.keys()
+        creator_remotes.append((creator_remote, 'client.{id}'.format(id=role)))
+
+    remaining_pools = poolnum
+    poolprocs=dict()
+    while (remaining_pools > 0):
+        log.info('{n} pools remaining to create'.format(n=remaining_pools))
+        for remote, role_ in creator_remotes:
+            poolnum = remaining_pools
+            remaining_pools -= 1
+            if remaining_pools < 0:
+                continue
+            log.info('creating pool{num} on {role}'.format(num=poolnum, role=role_))
+            proc = remote.run(
+                args=[
+                    'ceph',
+                    '--name', role_,
+                    'osd', 'pool', 'create', 'pool{num}'.format(num=poolnum), '8',
+                    run.Raw('&&'),
+                    'rados',
+                    '--name', role_,
+                    '--pool', 'pool{num}'.format(num=poolnum),
+                    'bench', '0', 'write', '-t', '16', '--block-size', '1'
+                ],
+                wait = False
+            )
+            log.info('waiting for pool and object creates')
+            poolprocs[remote] = proc
+
+        run.wait(poolprocs.values())
+
+    log.info('created all {n} pools and wrote 16 objects to each'.format(n=poolnum))
diff --git a/qa/tasks/mds_creation_failure.py b/qa/tasks/mds_creation_failure.py
new file mode 100644
index 000000000..2ab8f70dd
--- /dev/null
+++ b/qa/tasks/mds_creation_failure.py
@@ -0,0 +1,70 @@
+# FIXME: this file has many undefined vars which are accessed!
+# flake8: noqa
+import logging
+import contextlib
+import time
+from tasks import ceph_manager
+from teuthology import misc
+from teuthology.exceptions import CommandFailedError
+from teuthology.orchestra.run import Raw
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Go through filesystem creation with a synthetic failure in an MDS
+    in its 'up:creating' state, to exercise the retry behaviour.
+    """
+    # Grab handles to the teuthology objects of interest
+    mdslist = list(misc.all_roles_of_type(ctx.cluster, 'mds'))
+    if len(mdslist) != 1:
+        # Require exactly one MDS, the code path for creation failure when
+        # a standby is available is different
+        raise RuntimeError("This task requires exactly one MDS")
+
+    mds_id = mdslist[0]
+    (mds_remote,) = ctx.cluster.only('mds.{_id}'.format(_id=mds_id)).remotes.keys()
+    manager = ceph_manager.CephManager(
+        mds_remote, ctx=ctx, logger=log.getChild('ceph_manager'),
+    )
+
+    # Stop MDS
+    self.fs.set_max_mds(0)
+    self.fs.mds_stop(mds_id)
+    self.fs.mds_fail(mds_id)
+
+    # Reset the filesystem so that next start will go into CREATING
+    manager.raw_cluster_cmd('fs', 'rm', "default", "--yes-i-really-mean-it")
+    manager.raw_cluster_cmd('fs', 'new', "default", "metadata", "data")
+
+    # Start the MDS with mds_kill_create_at set, it will crash during creation
+    mds.restart_with_args(["--mds_kill_create_at=1"])
+    try:
+        mds.wait_for_exit()
+    except CommandFailedError as e:
+        if e.exitstatus == 1:
+            log.info("MDS creation killed as expected")
+        else:
+            log.error("Unexpected status code %s" % e.exitstatus)
+            raise
+
+    # Since I have intentionally caused a crash, I will clean up the resulting core
+    # file to avoid task.internal.coredump seeing it as a failure.
+    log.info("Removing core file from synthetic MDS failure")
+    mds_remote.run(args=['rm', '-f', Raw("{archive}/coredump/*.core".format(archive=misc.get_archive_dir(ctx)))])
+
+    # It should have left the MDS map state still in CREATING
+    status = self.fs.status().get_mds(mds_id)
+    assert status['state'] == 'up:creating'
+
+    # Start the MDS again without the kill flag set, it should proceed with creation successfully
+    mds.restart()
+
+    # Wait for state ACTIVE
+    self.fs.wait_for_state("up:active", timeout=120, mds_id=mds_id)
+
+    # The system should be back up in a happy healthy state, go ahead and run any further tasks
+    # inside this context.
+    yield
diff --git a/qa/tasks/mds_pre_upgrade.py b/qa/tasks/mds_pre_upgrade.py
new file mode 100644
index 000000000..812d402ed
--- /dev/null
+++ b/qa/tasks/mds_pre_upgrade.py
@@ -0,0 +1,27 @@
+"""
+Prepare MDS cluster for upgrade.
+"""
+
+import logging
+
+from tasks.cephfs.filesystem import Filesystem
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Prepare MDS cluster for upgrade.
+
+    This task reduces ranks to 1 and stops all standbys.
+    """
+
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'snap-upgrade task only accepts a dict for configuration'
+
+    fs = Filesystem(ctx)
+    fs.getinfo() # load name
+    fs.set_allow_standby_replay(False)
+    fs.set_max_mds(1)
+    fs.reach_max_mds()
diff --git a/qa/tasks/mds_thrash.py b/qa/tasks/mds_thrash.py
new file mode 100644
index 000000000..7b7b420f9
--- /dev/null
+++ b/qa/tasks/mds_thrash.py
@@ -0,0 +1,434 @@
+"""
+Thrash mds by simulating failures
+"""
+import logging
+import contextlib
+import itertools
+import random
+import time
+
+from gevent import sleep
+from gevent.greenlet import Greenlet
+from gevent.event import Event
+from teuthology import misc as teuthology
+
+from tasks import ceph_manager
+from tasks.cephfs.filesystem import MDSCluster, Filesystem, FSMissing
+from tasks.thrasher import Thrasher
+
+log = logging.getLogger(__name__)
+
+class MDSThrasher(Thrasher, Greenlet):
+    """
+    MDSThrasher::
+
+    The MDSThrasher thrashes MDSs during execution of other tasks (workunits, etc).
+
+    The config is optional.  Many of the config parameters are a a maximum value
+    to use when selecting a random value from a range.  To always use the maximum
+    value, set no_random to true.  The config is a dict containing some or all of:
+
+    max_thrash: [default: 1] the maximum number of active MDSs per FS that will be thrashed at
+      any given time.
+
+    max_thrash_delay: [default: 30] maximum number of seconds to delay before
+      thrashing again.
+
+    max_replay_thrash_delay: [default: 4] maximum number of seconds to delay while in
+      the replay state before thrashing.
+
+    max_revive_delay: [default: 10] maximum number of seconds to delay before
+      bringing back a thrashed MDS.
+
+    randomize: [default: true] enables randomization and use the max/min values
+
+    seed: [no default] seed the random number generator
+
+    thrash_in_replay: [default: 0.0] likelihood that the MDS will be thrashed
+      during replay.  Value should be between 0.0 and 1.0.
+
+    thrash_max_mds: [default: 0.05] likelihood that the max_mds of the mds
+      cluster will be modified to a value [1, current) or (current, starting
+      max_mds]. Value should be between 0.0 and 1.0.
+
+    thrash_while_stopping: [default: false] thrash an MDS while there
+      are MDS in up:stopping (because max_mds was changed and some
+      MDS were deactivated).
+
+    thrash_weights: allows specific MDSs to be thrashed more/less frequently.
+      This option overrides anything specified by max_thrash.  This option is a
+      dict containing mds.x: weight pairs.  For example, [mds.a: 0.7, mds.b:
+      0.3, mds.c: 0.0].  Each weight is a value from 0.0 to 1.0.  Any MDSs not
+      specified will be automatically given a weight of 0.0 (not thrashed).
+      For a given MDS, by default the trasher delays for up to
+      max_thrash_delay, trashes, waits for the MDS to recover, and iterates.
+      If a non-zero weight is specified for an MDS, for each iteration the
+      thrasher chooses whether to thrash during that iteration based on a
+      random value [0-1] not exceeding the weight of that MDS.
+
+    Examples::
+
+
+      The following example sets the likelihood that mds.a will be thrashed
+      to 80%, mds.b to 20%, and other MDSs will not be thrashed.  It also sets the
+      likelihood that an MDS will be thrashed in replay to 40%.
+      Thrash weights do not have to sum to 1.
+
+      tasks:
+      - ceph:
+      - mds_thrash:
+          thrash_weights:
+            - mds.a: 0.8
+            - mds.b: 0.2
+          thrash_in_replay: 0.4
+      - ceph-fuse:
+      - workunit:
+          clients:
+            all: [suites/fsx.sh]
+
+      The following example disables randomization, and uses the max delay values:
+
+      tasks:
+      - ceph:
+      - mds_thrash:
+          max_thrash_delay: 10
+          max_revive_delay: 1
+          max_replay_thrash_delay: 4
+
+    """
+
+    def __init__(self, ctx, manager, config, fs, max_mds):
+        super(MDSThrasher, self).__init__()
+
+        self.config = config
+        self.ctx = ctx
+        self.logger = log.getChild('fs.[{f}]'.format(f = fs.name))
+        self.fs = fs
+        self.manager = manager
+        self.max_mds = max_mds
+        self.name = 'thrasher.fs.[{f}]'.format(f = fs.name)
+        self.stopping = Event()
+
+        self.randomize = bool(self.config.get('randomize', True))
+        self.thrash_max_mds = float(self.config.get('thrash_max_mds', 0.05))
+        self.max_thrash = int(self.config.get('max_thrash', 1))
+        self.max_thrash_delay = float(self.config.get('thrash_delay', 120.0))
+        self.thrash_in_replay = float(self.config.get('thrash_in_replay', False))
+        assert self.thrash_in_replay >= 0.0 and self.thrash_in_replay <= 1.0, 'thrash_in_replay ({v}) must be between [0.0, 1.0]'.format(
+            v=self.thrash_in_replay)
+        self.max_replay_thrash_delay = float(self.config.get('max_replay_thrash_delay', 4.0))
+        self.max_revive_delay = float(self.config.get('max_revive_delay', 10.0))
+
+    def _run(self):
+        try:
+            self.do_thrash()
+        except FSMissing:
+            pass
+        except Exception as e:
+            # Log exceptions here so we get the full backtrace (gevent loses them).
+            # Also allow successful completion as gevent exception handling is a broken mess:
+            #
+            # 2017-02-03T14:34:01.259 CRITICAL:root:  File "gevent.libev.corecext.pyx", line 367, in gevent.libev.corecext.loop.handle_error (src/gevent/libev/gevent.corecext.c:5051)
+            #   File "/home/teuthworker/src/git.ceph.com_git_teuthology_master/virtualenv/local/lib/python2.7/site-packages/gevent/hub.py", line 558, in handle_error
+            #     self.print_exception(context, type, value, tb)
+            #   File "/home/teuthworker/src/git.ceph.com_git_teuthology_master/virtualenv/local/lib/python2.7/site-packages/gevent/hub.py", line 605, in print_exception
+            #     traceback.print_exception(type, value, tb, file=errstream)
+            #   File "/usr/lib/python2.7/traceback.py", line 124, in print_exception
+            #     _print(file, 'Traceback (most recent call last):')
+            #   File "/usr/lib/python2.7/traceback.py", line 13, in _print
+            #     file.write(str+terminator)
+            # 2017-02-03T14:34:01.261 CRITICAL:root:IOError
+            self.set_thrasher_exception(e)
+            self.logger.exception("exception:")
+            # allow successful completion so gevent doesn't see an exception...
+
+    def log(self, x):
+        """Write data to the logger assigned to MDSThrasher"""
+        self.logger.info(x)
+
+    def stop(self):
+        self.stopping.set()
+
+    def kill_mds(self, mds):
+        if self.config.get('powercycle'):
+            (remote,) = (self.ctx.cluster.only('mds.{m}'.format(m=mds)).
+                         remotes.keys())
+            self.log('kill_mds on mds.{m} doing powercycle of {s}'.
+                     format(m=mds, s=remote.name))
+            self._assert_ipmi(remote)
+            remote.console.power_off()
+        else:
+            self.ctx.daemons.get_daemon('mds', mds).stop()
+
+    @staticmethod
+    def _assert_ipmi(remote):
+        assert remote.console.has_ipmi_credentials, (
+            "powercycling requested but RemoteConsole is not "
+            "initialized.  Check ipmi config.")
+
+    def revive_mds(self, mds):
+        """
+        Revive mds -- do an ipmpi powercycle (if indicated by the config)
+        and then restart.
+        """
+        if self.config.get('powercycle'):
+            (remote,) = (self.ctx.cluster.only('mds.{m}'.format(m=mds)).
+                         remotes.keys())
+            self.log('revive_mds on mds.{m} doing powercycle of {s}'.
+                     format(m=mds, s=remote.name))
+            self._assert_ipmi(remote)
+            remote.console.power_on()
+            self.manager.make_admin_daemon_dir(self.ctx, remote)
+        args = []
+        self.ctx.daemons.get_daemon('mds', mds).restart(*args)
+
+    def wait_for_stable(self, rank = None, gid = None):
+        self.log('waiting for mds cluster to stabilize...')
+        for itercount in itertools.count():
+            status = self.fs.status()
+            max_mds = status.get_fsmap(self.fs.id)['mdsmap']['max_mds']
+            ranks = list(status.get_ranks(self.fs.id))
+            stopping = sum(1 for _ in ranks if "up:stopping" == _['state'])
+            actives = sum(1 for _ in ranks
+                          if "up:active" == _['state'] and "laggy_since" not in _)
+
+            if not bool(self.config.get('thrash_while_stopping', False)) and stopping > 0:
+                if itercount % 5 == 0:
+                    self.log('cluster is considered unstable while MDS are in up:stopping (!thrash_while_stopping)')
+            else:
+                if rank is not None:
+                    try:
+                        info = status.get_rank(self.fs.id, rank)
+                        if info['gid'] != gid and "up:active" == info['state']:
+                            self.log('mds.{name} has gained rank={rank}, replacing gid={gid}'.format(name = info['name'], rank = rank, gid = gid))
+                            return status
+                    except:
+                        pass # no rank present
+                    if actives >= max_mds:
+                        # no replacement can occur!
+                        self.log("cluster has {actives} actives (max_mds is {max_mds}), no MDS can replace rank {rank}".format(
+                            actives=actives, max_mds=max_mds, rank=rank))
+                        return status
+                else:
+                    if actives == max_mds:
+                        self.log('mds cluster has {count} alive and active, now stable!'.format(count = actives))
+                        return status, None
+            if itercount > 300/2: # 5 minutes
+                 raise RuntimeError('timeout waiting for cluster to stabilize')
+            elif itercount % 5 == 0:
+                self.log('mds map: {status}'.format(status=status))
+            else:
+                self.log('no change')
+            sleep(2)
+
+    def do_thrash(self):
+        """
+        Perform the random thrashing action
+        """
+
+        self.log('starting mds_do_thrash for fs {fs}'.format(fs = self.fs.name))
+        stats = {
+            "max_mds": 0,
+            "deactivate": 0,
+            "kill": 0,
+        }
+
+        while not self.stopping.is_set():
+            delay = self.max_thrash_delay
+            if self.randomize:
+                delay = random.randrange(0.0, self.max_thrash_delay)
+
+            if delay > 0.0:
+                self.log('waiting for {delay} secs before thrashing'.format(delay=delay))
+                self.stopping.wait(delay)
+                if self.stopping.is_set():
+                    continue
+
+            status = self.fs.status()
+
+            if random.random() <= self.thrash_max_mds:
+                max_mds = status.get_fsmap(self.fs.id)['mdsmap']['max_mds']
+                options = [i for i in range(1, self.max_mds + 1) if i != max_mds]
+                if len(options) > 0:
+                    new_max_mds = random.choice(options)
+                    self.log('thrashing max_mds: %d -> %d' % (max_mds, new_max_mds))
+                    self.fs.set_max_mds(new_max_mds)
+                    stats['max_mds'] += 1
+                    self.wait_for_stable()
+
+            count = 0
+            for info in status.get_ranks(self.fs.id):
+                name = info['name']
+                label = 'mds.' + name
+                rank = info['rank']
+                gid = info['gid']
+
+                # if thrash_weights isn't specified and we've reached max_thrash,
+                # we're done
+                count = count + 1
+                if 'thrash_weights' not in self.config and count > self.max_thrash:
+                    break
+
+                weight = 1.0
+                if 'thrash_weights' in self.config:
+                    weight = self.config['thrash_weights'].get(label, '0.0')
+                skip = random.random()
+                if weight <= skip:
+                    self.log('skipping thrash iteration with skip ({skip}) > weight ({weight})'.format(skip=skip, weight=weight))
+                    continue
+
+                self.log('kill {label} (rank={rank})'.format(label=label, rank=rank))
+                self.kill_mds(name)
+                stats['kill'] += 1
+
+                # wait for mon to report killed mds as crashed
+                last_laggy_since = None
+                itercount = 0
+                while True:
+                    status = self.fs.status()
+                    info = status.get_mds(name)
+                    if not info:
+                        break
+                    if 'laggy_since' in info:
+                        last_laggy_since = info['laggy_since']
+                        break
+                    if any([(f == name) for f in status.get_fsmap(self.fs.id)['mdsmap']['failed']]):
+                        break
+                    self.log(
+                        'waiting till mds map indicates {label} is laggy/crashed, in failed state, or {label} is removed from mdsmap'.format(
+                            label=label))
+                    itercount = itercount + 1
+                    if itercount > 10:
+                        self.log('mds map: {status}'.format(status=status))
+                    sleep(2)
+
+                if last_laggy_since:
+                    self.log(
+                        '{label} reported laggy/crashed since: {since}'.format(label=label, since=last_laggy_since))
+                else:
+                    self.log('{label} down, removed from mdsmap'.format(label=label))
+
+                # wait for a standby mds to takeover and become active
+                status = self.wait_for_stable(rank, gid)
+
+                # wait for a while before restarting old active to become new
+                # standby
+                delay = self.max_revive_delay
+                if self.randomize:
+                    delay = random.randrange(0.0, self.max_revive_delay)
+
+                self.log('waiting for {delay} secs before reviving {label}'.format(
+                    delay=delay, label=label))
+                sleep(delay)
+
+                self.log('reviving {label}'.format(label=label))
+                self.revive_mds(name)
+
+                for itercount in itertools.count():
+                    if itercount > 300/2: # 5 minutes
+                        raise RuntimeError('timeout waiting for MDS to revive')
+                    status = self.fs.status()
+                    info = status.get_mds(name)
+                    if info and info['state'] in ('up:standby', 'up:standby-replay', 'up:active'):
+                        self.log('{label} reported in {state} state'.format(label=label, state=info['state']))
+                        break
+                    self.log(
+                        'waiting till mds map indicates {label} is in active, standby or standby-replay'.format(label=label))
+                    sleep(2)
+
+        for stat in stats:
+            self.log("stat['{key}'] = {value}".format(key = stat, value = stats[stat]))
+
+             # don't do replay thrashing right now
+#            for info in status.get_replays(self.fs.id):
+#                # this might race with replay -> active transition...
+#                if status['state'] == 'up:replay' and random.randrange(0.0, 1.0) < self.thrash_in_replay:
+#                    delay = self.max_replay_thrash_delay
+#                    if self.randomize:
+#                        delay = random.randrange(0.0, self.max_replay_thrash_delay)
+#                sleep(delay)
+#                self.log('kill replaying mds.{id}'.format(id=self.to_kill))
+#                self.kill_mds(self.to_kill)
+#
+#                delay = self.max_revive_delay
+#                if self.randomize:
+#                    delay = random.randrange(0.0, self.max_revive_delay)
+#
+#                self.log('waiting for {delay} secs before reviving mds.{id}'.format(
+#                    delay=delay, id=self.to_kill))
+#                sleep(delay)
+#
+#                self.log('revive mds.{id}'.format(id=self.to_kill))
+#                self.revive_mds(self.to_kill)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Stress test the mds by thrashing while another task/workunit
+    is running.
+
+    Please refer to MDSThrasher class for further information on the
+    available options.
+    """
+
+    mds_cluster = MDSCluster(ctx)
+
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'mds_thrash task only accepts a dict for configuration'
+    mdslist = list(teuthology.all_roles_of_type(ctx.cluster, 'mds'))
+    assert len(mdslist) > 1, \
+        'mds_thrash task requires at least 2 metadata servers'
+
+    # choose random seed
+    if 'seed' in config:
+        seed = int(config['seed'])
+    else:
+        seed = int(time.time())
+    log.info('mds thrasher using random seed: {seed}'.format(seed=seed))
+    random.seed(seed)
+
+    (first,) = ctx.cluster.only('mds.{_id}'.format(_id=mdslist[0])).remotes.keys()
+    manager = ceph_manager.CephManager(
+        first, ctx=ctx, logger=log.getChild('ceph_manager'),
+    )
+
+    # make sure everyone is in active, standby, or standby-replay
+    log.info('Wait for all MDSs to reach steady state...')
+    status = mds_cluster.status()
+    while True:
+        steady = True
+        for info in status.get_all():
+            state = info['state']
+            if state not in ('up:active', 'up:standby', 'up:standby-replay'):
+                steady = False
+                break
+        if steady:
+            break
+        sleep(2)
+        status = mds_cluster.status()
+    log.info('Ready to start thrashing')
+
+    manager.wait_for_clean()
+    assert manager.is_clean()
+
+    if 'cluster' not in config:
+        config['cluster'] = 'ceph'
+
+    for fs in status.get_filesystems():
+        thrasher = MDSThrasher(ctx, manager, config, Filesystem(ctx, fscid=fs['id']), fs['mdsmap']['max_mds'])
+        thrasher.start()
+        ctx.ceph[config['cluster']].thrashers.append(thrasher)
+
+    try:
+        log.debug('Yielding')
+        yield
+    finally:
+        log.info('joining mds_thrasher')
+        thrasher.stop()
+        if thrasher.exception is not None:
+            raise RuntimeError('error during thrashing')
+        thrasher.join()
+        log.info('done joining')
diff --git a/qa/tasks/metadata.yaml b/qa/tasks/metadata.yaml
new file mode 100644
index 000000000..ccdc3b077
--- /dev/null
+++ b/qa/tasks/metadata.yaml
@@ -0,0 +1,2 @@
+instance-id: test
+local-hostname: test
diff --git a/qa/tasks/mgr/__init__.py b/qa/tasks/mgr/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/tasks/mgr/__init__.py
diff --git a/qa/tasks/mgr/dashboard/__init__.py b/qa/tasks/mgr/dashboard/__init__.py
new file mode 100644
index 000000000..2b022e024
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/__init__.py
@@ -0,0 +1 @@
+DEFAULT_API_VERSION = '1.0'
diff --git a/qa/tasks/mgr/dashboard/helper.py b/qa/tasks/mgr/dashboard/helper.py
new file mode 100644
index 000000000..d80e238a2
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/helper.py
@@ -0,0 +1,724 @@
+# -*- coding: utf-8 -*-
+# pylint: disable=W0212,too-many-return-statements,too-many-public-methods
+from __future__ import absolute_import
+
+import json
+import logging
+import random
+import re
+import string
+import time
+from collections import namedtuple
+from typing import List
+
+import requests
+from tasks.mgr.mgr_test_case import MgrTestCase
+from teuthology.exceptions import \
+    CommandFailedError  # pylint: disable=import-error
+
+from . import DEFAULT_API_VERSION
+
+log = logging.getLogger(__name__)
+
+
+class DashboardTestCase(MgrTestCase):
+    # Display full error diffs
+    maxDiff = None
+
+    # Increased x3 (20 -> 60)
+    TIMEOUT_HEALTH_CLEAR = 60
+
+    MGRS_REQUIRED = 2
+    MDSS_REQUIRED = 1
+    REQUIRE_FILESYSTEM = True
+    CLIENTS_REQUIRED = 1
+    CEPHFS = False
+    ORCHESTRATOR = False
+    ORCHESTRATOR_TEST_DATA = {
+        'inventory': [
+            {
+                'name': 'test-host0',
+                'addr': '1.2.3.4',
+                'devices': [
+                    {
+                        'path': '/dev/sda',
+                    }
+                ]
+            },
+            {
+                'name': 'test-host1',
+                'addr': '1.2.3.5',
+                'devices': [
+                    {
+                        'path': '/dev/sdb',
+                    }
+                ]
+            }
+        ],
+        'daemons': [
+            {
+                'nodename': 'test-host0',
+                'daemon_type': 'mon',
+                'daemon_id': 'a'
+            },
+            {
+                'nodename': 'test-host0',
+                'daemon_type': 'mgr',
+                'daemon_id': 'x'
+            },
+            {
+                'nodename': 'test-host0',
+                'daemon_type': 'osd',
+                'daemon_id': '0'
+            },
+            {
+                'nodename': 'test-host1',
+                'daemon_type': 'osd',
+                'daemon_id': '1'
+            }
+        ]
+    }
+
+    _session = None  # type: requests.sessions.Session
+    _token = None
+    _resp = None  # type: requests.models.Response
+    _loggedin = False
+    _base_uri = None
+
+    AUTO_AUTHENTICATE = True
+
+    AUTH_ROLES = ['administrator']
+
+    @classmethod
+    def create_user(cls, username, password, roles=None,
+                    force_password=True, cmd_args=None):
+        # pylint: disable=too-many-arguments
+        """
+        :param username: The name of the user.
+        :type username: str
+        :param password: The password.
+        :type password: str
+        :param roles: A list of roles.
+        :type roles: list
+        :param force_password: Force the use of the specified password. This
+          will bypass the password complexity check. Defaults to 'True'.
+        :type force_password: bool
+        :param cmd_args: Additional command line arguments for the
+          'ac-user-create' command.
+        :type cmd_args: None | list[str]
+        """
+        try:
+            cls._ceph_cmd(['dashboard', 'ac-user-show', username])
+            cls._ceph_cmd(['dashboard', 'ac-user-delete', username])
+        except CommandFailedError as ex:
+            if ex.exitstatus != 2:
+                raise ex
+
+        user_create_args = [
+            'dashboard', 'ac-user-create', username
+        ]
+        if force_password:
+            user_create_args.append('--force-password')
+        if cmd_args:
+            user_create_args.extend(cmd_args)
+        cls._ceph_cmd_with_secret(user_create_args, password)
+        if roles:
+            set_roles_args = ['dashboard', 'ac-user-set-roles', username]
+            for idx, role in enumerate(roles):
+                if isinstance(role, str):
+                    set_roles_args.append(role)
+                else:
+                    assert isinstance(role, dict)
+                    rolename = 'test_role_{}'.format(idx)
+                    try:
+                        cls._ceph_cmd(['dashboard', 'ac-role-show', rolename])
+                        cls._ceph_cmd(['dashboard', 'ac-role-delete', rolename])
+                    except CommandFailedError as ex:
+                        if ex.exitstatus != 2:
+                            raise ex
+                    cls._ceph_cmd(['dashboard', 'ac-role-create', rolename])
+                    for mod, perms in role.items():
+                        args = ['dashboard', 'ac-role-add-scope-perms', rolename, mod]
+                        args.extend(perms)
+                        cls._ceph_cmd(args)
+                    set_roles_args.append(rolename)
+            cls._ceph_cmd(set_roles_args)
+
+    @classmethod
+    def create_pool(cls, name, pg_num, pool_type, application='rbd'):
+        data = {
+            'pool': name,
+            'pg_num': pg_num,
+            'pool_type': pool_type,
+            'application_metadata': [application]
+        }
+        if pool_type == 'erasure':
+            data['flags'] = ['ec_overwrites']
+        cls._task_post("/api/pool", data)
+
+    @classmethod
+    def login(cls, username, password, set_cookies=False):
+        if cls._loggedin:
+            cls.logout()
+        cls._post('/api/auth', {'username': username,
+                                'password': password}, set_cookies=set_cookies)
+        cls._assertEq(cls._resp.status_code, 201)
+        cls._token = cls.jsonBody()['token']
+        cls._loggedin = True
+
+    @classmethod
+    def logout(cls, set_cookies=False):
+        if cls._loggedin:
+            cls._post('/api/auth/logout', set_cookies=set_cookies)
+            cls._assertEq(cls._resp.status_code, 200)
+            cls._token = None
+            cls._loggedin = False
+
+    @classmethod
+    def delete_user(cls, username, roles=None):
+        if roles is None:
+            roles = []
+        cls._ceph_cmd(['dashboard', 'ac-user-delete', username])
+        for idx, role in enumerate(roles):
+            if isinstance(role, dict):
+                cls._ceph_cmd(['dashboard', 'ac-role-delete', 'test_role_{}'.format(idx)])
+
+    @classmethod
+    def RunAs(cls, username, password, roles=None, force_password=True,
+              cmd_args=None, login=True):
+        # pylint: disable=too-many-arguments
+        def wrapper(func):
+            def execute(self, *args, **kwargs):
+                self.create_user(username, password, roles,
+                                 force_password, cmd_args)
+                if login:
+                    self.login(username, password)
+                res = func(self, *args, **kwargs)
+                if login:
+                    self.logout()
+                self.delete_user(username, roles)
+                return res
+
+            return execute
+
+        return wrapper
+
+    @classmethod
+    def set_jwt_token(cls, token):
+        cls._token = token
+
+    @classmethod
+    def setUpClass(cls):
+        super(DashboardTestCase, cls).setUpClass()
+        cls._assign_ports("dashboard", "ssl_server_port")
+        cls._load_module("dashboard")
+        cls.update_base_uri()
+
+        if cls.CEPHFS:
+            cls.mds_cluster.clear_firewall()
+
+            # To avoid any issues with e.g. unlink bugs, we destroy and recreate
+            # the filesystem rather than just doing a rm -rf of files
+            cls.mds_cluster.mds_stop()
+            cls.mds_cluster.mds_fail()
+            cls.mds_cluster.delete_all_filesystems()
+            cls.fs = None  # is now invalid!
+
+            cls.fs = cls.mds_cluster.newfs(create=True)
+            cls.fs.mds_restart()
+
+            # In case some test messed with auth caps, reset them
+            # pylint: disable=not-an-iterable
+            client_mount_ids = [m.client_id for m in cls.mounts]
+            for client_id in client_mount_ids:
+                cls.mds_cluster.mon_manager.raw_cluster_cmd_result(
+                    'auth', 'caps', "client.{0}".format(client_id),
+                    'mds', 'allow',
+                    'mon', 'allow r',
+                    'osd', 'allow rw pool={0}'.format(cls.fs.get_data_pool_name()))
+
+            # wait for mds restart to complete...
+            cls.fs.wait_for_daemons()
+
+        if cls.ORCHESTRATOR:
+            cls._load_module("test_orchestrator")
+
+            cmd = ['orch', 'set', 'backend', 'test_orchestrator']
+            cls.mgr_cluster.mon_manager.raw_cluster_cmd(*cmd)
+
+            cmd = ['test_orchestrator', 'load_data', '-i', '-']
+            cls.mgr_cluster.mon_manager.raw_cluster_cmd_result(*cmd, stdin=json.dumps(
+                cls.ORCHESTRATOR_TEST_DATA
+            ))
+
+        cls._token = None
+        cls._session = requests.Session()
+        cls._resp = None
+
+        cls.create_user('admin', 'admin', cls.AUTH_ROLES)
+        if cls.AUTO_AUTHENTICATE:
+            cls.login('admin', 'admin')
+
+    @classmethod
+    def update_base_uri(cls):
+        if cls._base_uri is None:
+            cls._base_uri = cls._get_uri("dashboard").rstrip('/')
+
+    def setUp(self):
+        super(DashboardTestCase, self).setUp()
+        if not self._loggedin and self.AUTO_AUTHENTICATE:
+            self.login('admin', 'admin')
+        self.wait_for_health_clear(self.TIMEOUT_HEALTH_CLEAR)
+
+    @classmethod
+    def tearDownClass(cls):
+        super(DashboardTestCase, cls).tearDownClass()
+
+    # pylint: disable=inconsistent-return-statements, too-many-arguments, too-many-branches
+    @classmethod
+    def _request(cls, url, method, data=None, params=None, version=DEFAULT_API_VERSION,
+                 set_cookies=False, headers=None):
+        url = "{}{}".format(cls._base_uri, url)
+        log.debug("Request %s to %s", method, url)
+        if headers is None:
+            headers = {}
+        cookies = {}
+        if cls._token:
+            if set_cookies:
+                cookies['token'] = cls._token
+            else:
+                headers['Authorization'] = "Bearer {}".format(cls._token)
+        if version is None:
+            headers['Accept'] = 'application/json'
+        else:
+            headers['Accept'] = 'application/vnd.ceph.api.v{}+json'.format(version)
+
+        if set_cookies:
+            if method == 'GET':
+                cls._resp = cls._session.get(url, params=params, verify=False,
+                                             headers=headers, cookies=cookies)
+            elif method == 'POST':
+                cls._resp = cls._session.post(url, json=data, params=params,
+                                              verify=False, headers=headers, cookies=cookies)
+            elif method == 'DELETE':
+                cls._resp = cls._session.delete(url, json=data, params=params,
+                                                verify=False, headers=headers, cookies=cookies)
+            elif method == 'PUT':
+                cls._resp = cls._session.put(url, json=data, params=params,
+                                             verify=False, headers=headers, cookies=cookies)
+            else:
+                assert False
+        else:
+            if method == 'GET':
+                cls._resp = cls._session.get(url, params=params, verify=False,
+                                             headers=headers)
+            elif method == 'POST':
+                cls._resp = cls._session.post(url, json=data, params=params,
+                                              verify=False, headers=headers)
+            elif method == 'DELETE':
+                cls._resp = cls._session.delete(url, json=data, params=params,
+                                                verify=False, headers=headers)
+            elif method == 'PUT':
+                cls._resp = cls._session.put(url, json=data, params=params,
+                                             verify=False, headers=headers)
+            else:
+                assert False
+        try:
+            if not cls._resp.ok:
+                # Output response for easier debugging.
+                log.error("Request response: %s", cls._resp.text)
+            content_type = cls._resp.headers['content-type']
+            if re.match(r'^application/.*json',
+                        content_type) and cls._resp.text and cls._resp.text != "":
+                return cls._resp.json()
+            return cls._resp.text
+        except ValueError as ex:
+            log.exception("Failed to decode response: %s", cls._resp.text)
+            raise ex
+
+    @classmethod
+    def _get(cls, url, params=None, version=DEFAULT_API_VERSION, set_cookies=False, headers=None):
+        return cls._request(url, 'GET', params=params, version=version,
+                            set_cookies=set_cookies, headers=headers)
+
+    @classmethod
+    def _view_cache_get(cls, url, retries=5):
+        retry = True
+        while retry and retries > 0:
+            retry = False
+            res = cls._get(url, version=DEFAULT_API_VERSION)
+            if isinstance(res, dict):
+                res = [res]
+            for view in res:
+                assert 'value' in view
+                if not view['value']:
+                    retry = True
+            retries -= 1
+        if retries == 0:
+            raise Exception("{} view cache exceeded number of retries={}"
+                            .format(url, retries))
+        return res
+
+    @classmethod
+    def _post(cls, url, data=None, params=None, version=DEFAULT_API_VERSION, set_cookies=False):
+        cls._request(url, 'POST', data, params, version=version, set_cookies=set_cookies)
+
+    @classmethod
+    def _delete(cls, url, data=None, params=None, version=DEFAULT_API_VERSION, set_cookies=False):
+        cls._request(url, 'DELETE', data, params, version=version, set_cookies=set_cookies)
+
+    @classmethod
+    def _put(cls, url, data=None, params=None, version=DEFAULT_API_VERSION, set_cookies=False):
+        cls._request(url, 'PUT', data, params, version=version, set_cookies=set_cookies)
+
+    @classmethod
+    def _assertEq(cls, v1, v2):
+        if not v1 == v2:
+            raise Exception("assertion failed: {} != {}".format(v1, v2))
+
+    @classmethod
+    def _assertIn(cls, v1, v2):
+        if v1 not in v2:
+            raise Exception("assertion failed: {} not in {}".format(v1, v2))
+
+    @classmethod
+    def _assertIsInst(cls, v1, v2):
+        if not isinstance(v1, v2):
+            raise Exception("assertion failed: {} not instance of {}".format(v1, v2))
+
+    # pylint: disable=too-many-arguments
+    @classmethod
+    def _task_request(cls, method, url, data, timeout, version=DEFAULT_API_VERSION,
+                      set_cookies=False):
+        res = cls._request(url, method, data, version=version, set_cookies=set_cookies)
+        cls._assertIn(cls._resp.status_code, [200, 201, 202, 204, 400, 403, 404])
+
+        if cls._resp.status_code == 403:
+            return None
+
+        if cls._resp.status_code != 202:
+            log.debug("task finished immediately")
+            return res
+
+        cls._assertIn('name', res)
+        cls._assertIn('metadata', res)
+        task_name = res['name']
+        task_metadata = res['metadata']
+
+        retries = int(timeout)
+        res_task = None
+        while retries > 0 and not res_task:
+            retries -= 1
+            log.debug("task (%s, %s) is still executing", task_name, task_metadata)
+            time.sleep(1)
+            _res = cls._get('/api/task?name={}'.format(task_name), version=version)
+            cls._assertEq(cls._resp.status_code, 200)
+            executing_tasks = [task for task in _res['executing_tasks'] if
+                               task['metadata'] == task_metadata]
+            finished_tasks = [task for task in _res['finished_tasks'] if
+                              task['metadata'] == task_metadata]
+            if not executing_tasks and finished_tasks:
+                res_task = finished_tasks[0]
+
+        if retries <= 0:
+            raise Exception("Waiting for task ({}, {}) to finish timed out. {}"
+                            .format(task_name, task_metadata, _res))
+
+        log.debug("task (%s, %s) finished", task_name, task_metadata)
+        if res_task['success']:
+            if method == 'POST':
+                cls._resp.status_code = 201
+            elif method == 'PUT':
+                cls._resp.status_code = 200
+            elif method == 'DELETE':
+                cls._resp.status_code = 204
+            return res_task['ret_value']
+
+        if 'status' in res_task['exception']:
+            cls._resp.status_code = res_task['exception']['status']
+        else:
+            cls._resp.status_code = 500
+        return res_task['exception']
+
+    @classmethod
+    def _task_post(cls, url, data=None, timeout=60, version=DEFAULT_API_VERSION, set_cookies=False):
+        return cls._task_request('POST', url, data, timeout, version=version,
+                                 set_cookies=set_cookies)
+
+    @classmethod
+    def _task_delete(cls, url, timeout=60, version=DEFAULT_API_VERSION, set_cookies=False):
+        return cls._task_request('DELETE', url, None, timeout, version=version,
+                                 set_cookies=set_cookies)
+
+    @classmethod
+    def _task_put(cls, url, data=None, timeout=60, version=DEFAULT_API_VERSION, set_cookies=False):
+        return cls._task_request('PUT', url, data, timeout, version=version,
+                                 set_cookies=set_cookies)
+
+    @classmethod
+    def cookies(cls):
+        return cls._resp.cookies
+
+    @classmethod
+    def jsonBody(cls):
+        return cls._resp.json()
+
+    @classmethod
+    def reset_session(cls):
+        cls._session = requests.Session()
+
+    def assertSubset(self, data, biggerData):
+        for key, value in data.items():
+            self.assertEqual(biggerData[key], value)
+
+    def assertJsonBody(self, data):
+        body = self._resp.json()
+        self.assertEqual(body, data)
+
+    def assertJsonSubset(self, data):
+        self.assertSubset(data, self._resp.json())
+
+    def assertSchema(self, data, schema):
+        try:
+            return _validate_json(data, schema)
+        except _ValError as e:
+            self.assertEqual(data, str(e))
+
+    def assertSchemaBody(self, schema):
+        self.assertSchema(self.jsonBody(), schema)
+
+    def assertBody(self, body):
+        self.assertEqual(self._resp.text, body)
+
+    def assertStatus(self, status):
+        if isinstance(status, list):
+            self.assertIn(self._resp.status_code, status)
+        else:
+            self.assertEqual(self._resp.status_code, status)
+
+    def assertHeaders(self, headers):
+        for name, value in headers.items():
+            self.assertIn(name, self._resp.headers)
+            self.assertEqual(self._resp.headers[name], value)
+
+    def assertError(self, code=None, component=None, detail=None):
+        body = self._resp.json()
+        if code:
+            self.assertEqual(body['code'], code)
+        if component:
+            self.assertEqual(body['component'], component)
+        if detail:
+            self.assertEqual(body['detail'], detail)
+
+    @classmethod
+    def _ceph_cmd(cls, cmd):
+        res = cls.mgr_cluster.mon_manager.raw_cluster_cmd(*cmd)
+        log.debug("command result: %s", res)
+        return res
+
+    @classmethod
+    def _ceph_cmd_result(cls, cmd):
+        exitstatus = cls.mgr_cluster.mon_manager.raw_cluster_cmd_result(*cmd)
+        log.debug("command exit status: %d", exitstatus)
+        return exitstatus
+
+    @classmethod
+    def _ceph_cmd_with_secret(cls, cmd: List[str], secret: str, return_exit_code: bool = False):
+        cmd.append('-i')
+        cmd.append('{}'.format(cls._ceph_create_tmp_file(secret)))
+        if return_exit_code:
+            return cls._ceph_cmd_result(cmd)
+        return cls._ceph_cmd(cmd)
+
+    @classmethod
+    def _ceph_create_tmp_file(cls, content: str) -> str:
+        """Create a temporary file in the remote cluster"""
+        file_name = ''.join(random.choices(string.ascii_letters + string.digits, k=20))
+        file_path = '/tmp/{}'.format(file_name)
+        cls._cmd(['sh', '-c', 'echo -n {} > {}'.format(content, file_path)])
+        return file_path
+
+    def set_config_key(self, key, value):
+        self._ceph_cmd(['config-key', 'set', key, value])
+
+    def get_config_key(self, key):
+        return self._ceph_cmd(['config-key', 'get', key])
+
+    @classmethod
+    def _cmd(cls, args):
+        return cls.mgr_cluster.admin_remote.run(args=args)
+
+    @classmethod
+    def _rbd_cmd(cls, cmd):
+        args = ['rbd']
+        args.extend(cmd)
+        cls._cmd(args)
+
+    @classmethod
+    def _radosgw_admin_cmd(cls, cmd):
+        args = ['radosgw-admin']
+        args.extend(cmd)
+        cls._cmd(args)
+
+    @classmethod
+    def _rados_cmd(cls, cmd):
+        args = ['rados']
+        args.extend(cmd)
+        cls._cmd(args)
+
+    @classmethod
+    def mons(cls):
+        out = cls.ceph_cluster.mon_manager.raw_cluster_cmd('quorum_status')
+        j = json.loads(out)
+        return [mon['name'] for mon in j['monmap']['mons']]
+
+    @classmethod
+    def find_object_in_list(cls, key, value, iterable):
+        """
+        Get the first occurrence of an object within a list with
+        the specified key/value.
+        :param key: The name of the key.
+        :param value: The value to search for.
+        :param iterable: The list to process.
+        :return: Returns the found object or None.
+        """
+        for obj in iterable:
+            if key in obj and obj[key] == value:
+                return obj
+        return None
+
+
+# TODP: pass defaults=(False,) to namedtuple() if python3.7
+class JLeaf(namedtuple('JLeaf', ['typ', 'none'])):
+    def __new__(cls, typ, none=False):
+        return super().__new__(cls, typ, none)
+
+
+JList = namedtuple('JList', ['elem_typ'])
+
+JTuple = namedtuple('JTuple', ['elem_typs'])
+
+JUnion = namedtuple('JUnion', ['elem_typs'])
+
+
+class JObj(namedtuple('JObj', ['sub_elems', 'allow_unknown', 'none', 'unknown_schema'])):
+    def __new__(cls, sub_elems, allow_unknown=False, none=False, unknown_schema=None):
+        """
+        :type sub_elems: dict[str, JAny | JLeaf | JList | JObj | type]
+        :type allow_unknown: bool
+        :type none: bool
+        :type unknown_schema: int, str, JAny | JLeaf | JList | JObj
+        :return:
+        """
+        return super(JObj, cls).__new__(cls, sub_elems, allow_unknown, none, unknown_schema)
+
+
+JAny = namedtuple('JAny', ['none'])
+
+module_options_object_schema = JObj({
+    'name': str,
+    'type': str,
+    'level': str,
+    'flags': int,
+    'default_value': JAny(none=True),
+    'min': JAny(none=False),
+    'max': JAny(none=False),
+    'enum_allowed': JList(str),
+    'see_also': JList(str),
+    'desc': str,
+    'long_desc': str,
+    'tags': JList(str),
+})
+
+module_options_schema = JObj(
+    {},
+    allow_unknown=True,
+    unknown_schema=module_options_object_schema)
+
+addrvec_schema = JList(JObj({
+    'addr': str,
+    'nonce': int,
+    'type': str
+}))
+
+devices_schema = JList(JObj({
+    'daemons': JList(str),
+    'devid': str,
+    'location': JList(JObj({
+        'host': str,
+        'dev': str,
+        'path': str
+    }))
+}, allow_unknown=True))
+
+
+class _ValError(Exception):
+    def __init__(self, msg, path):
+        path_str = ''.join('[{}]'.format(repr(p)) for p in path)
+        super(_ValError, self).__init__('In `input{}`: {}'.format(path_str, msg))
+
+
+# pylint: disable=dangerous-default-value,inconsistent-return-statements,too-many-branches
+def _validate_json(val, schema, path=[]):
+    """
+    >>> d = {'a': 1, 'b': 'x', 'c': range(10)}
+    ... ds = JObj({'a': int, 'b': str, 'c': JList(int)})
+    ... _validate_json(d, ds)
+    True
+    >>> _validate_json({'num': 1}, JObj({'num': JUnion([int,float])}))
+    True
+    >>> _validate_json({'num': 'a'}, JObj({'num': JUnion([int,float])}))
+    False
+    """
+    if isinstance(schema, JAny):
+        if not schema.none and val is None:
+            raise _ValError('val is None', path)
+        return True
+    if isinstance(schema, JLeaf):
+        if schema.none and val is None:
+            return True
+        if not isinstance(val, schema.typ):
+            raise _ValError('val not of type {}'.format(schema.typ), path)
+        return True
+    if isinstance(schema, JList):
+        if not isinstance(val, list):
+            raise _ValError('val="{}" is not a list'.format(val), path)
+        return all(_validate_json(e, schema.elem_typ, path + [i]) for i, e in enumerate(val))
+    if isinstance(schema, JTuple):
+        return all(_validate_json(val[i], typ, path + [i])
+                   for i, typ in enumerate(schema.elem_typs))
+    if isinstance(schema, JUnion):
+        for typ in schema.elem_typs:
+            try:
+                if _validate_json(val, typ, path):
+                    return True
+            except _ValError:
+                pass
+        return False
+    if isinstance(schema, JObj):
+        if val is None and schema.none:
+            return True
+        if val is None:
+            raise _ValError('val is None', path)
+        if not hasattr(val, 'keys'):
+            raise _ValError('val="{}" is not a dict'.format(val), path)
+        missing_keys = set(schema.sub_elems.keys()).difference(set(val.keys()))
+        if missing_keys:
+            raise _ValError('missing keys: {}'.format(missing_keys), path)
+        unknown_keys = set(val.keys()).difference(set(schema.sub_elems.keys()))
+        if not schema.allow_unknown and unknown_keys:
+            raise _ValError('unknown keys: {}'.format(unknown_keys), path)
+        result = all(
+            _validate_json(val[key], sub_schema, path + [key])
+            for key, sub_schema in schema.sub_elems.items()
+        )
+        if unknown_keys and schema.allow_unknown and schema.unknown_schema:
+            result += all(
+                _validate_json(val[key], schema.unknown_schema, path + [key])
+                for key in unknown_keys
+            )
+        return result
+    if schema in [str, int, float, bool]:
+        return _validate_json(val, JLeaf(schema), path)
+
+    assert False, str(path)
diff --git a/qa/tasks/mgr/dashboard/test_api.py b/qa/tasks/mgr/dashboard/test_api.py
new file mode 100644
index 000000000..22f235698
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_api.py
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import absolute_import
+
+import unittest
+
+from . import DEFAULT_API_VERSION
+from .helper import DashboardTestCase
+
+
+class VersionReqTest(DashboardTestCase, unittest.TestCase):
+    def test_version(self):
+        for (version, expected_status) in [
+                (DEFAULT_API_VERSION, 200),
+                (None, 415),
+                ("99.99", 415)
+        ]:
+            with self.subTest(version=version):
+                self._get('/api/summary', version=version)
+                self.assertStatus(expected_status)
diff --git a/qa/tasks/mgr/dashboard/test_auth.py b/qa/tasks/mgr/dashboard/test_auth.py
new file mode 100644
index 000000000..a2266229b
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_auth.py
@@ -0,0 +1,352 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import absolute_import
+
+import time
+
+import jwt
+from teuthology.orchestra.run import \
+    CommandFailedError  # pylint: disable=import-error
+
+from .helper import DashboardTestCase, JLeaf, JObj
+
+
+class AuthTest(DashboardTestCase):
+
+    AUTO_AUTHENTICATE = False
+
+    def setUp(self):
+        super(AuthTest, self).setUp()
+        self.reset_session()
+
+    def _validate_jwt_token(self, token, username, permissions):
+        payload = jwt.decode(token, options={'verify_signature': False})
+        self.assertIn('username', payload)
+        self.assertEqual(payload['username'], username)
+
+        for scope, perms in permissions.items():
+            self.assertIsNotNone(scope)
+            self.assertIn('read', perms)
+            self.assertIn('update', perms)
+            self.assertIn('create', perms)
+            self.assertIn('delete', perms)
+
+    def test_login_without_password(self):
+        with self.assertRaises(CommandFailedError):
+            self.create_user('admin2', '', ['administrator'], force_password=True)
+
+    def test_a_set_login_credentials(self):
+        # test with Authorization header
+        self.create_user('admin2', 'admin2', ['administrator'])
+        self._post("/api/auth", {'username': 'admin2', 'password': 'admin2'})
+        self.assertStatus(201)
+        data = self.jsonBody()
+        self._validate_jwt_token(data['token'], "admin2", data['permissions'])
+        self.delete_user('admin2')
+
+        # test with Cookies set
+        self.create_user('admin2', 'admin2', ['administrator'])
+        self._post("/api/auth", {'username': 'admin2', 'password': 'admin2'}, set_cookies=True)
+        self.assertStatus(201)
+        data = self.jsonBody()
+        self._validate_jwt_token(data['token'], "admin2", data['permissions'])
+        self.delete_user('admin2')
+
+    def test_login_valid(self):
+        # test with Authorization header
+        self._post("/api/auth", {'username': 'admin', 'password': 'admin'})
+        self.assertStatus(201)
+        data = self.jsonBody()
+        self.assertSchema(data, JObj(sub_elems={
+            'token': JLeaf(str),
+            'username': JLeaf(str),
+            'permissions': JObj(sub_elems={}, allow_unknown=True),
+            'sso': JLeaf(bool),
+            'pwdExpirationDate': JLeaf(int, none=True),
+            'pwdUpdateRequired': JLeaf(bool)
+        }, allow_unknown=False))
+        self._validate_jwt_token(data['token'], "admin", data['permissions'])
+
+        # test with Cookies set
+        self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True)
+        self.assertStatus(201)
+        data = self.jsonBody()
+        self.assertSchema(data, JObj(sub_elems={
+            'token': JLeaf(str),
+            'username': JLeaf(str),
+            'permissions': JObj(sub_elems={}, allow_unknown=True),
+            'sso': JLeaf(bool),
+            'pwdExpirationDate': JLeaf(int, none=True),
+            'pwdUpdateRequired': JLeaf(bool)
+        }, allow_unknown=False))
+        self._validate_jwt_token(data['token'], "admin", data['permissions'])
+
+    def test_login_invalid(self):
+        # test with Authorization header
+        self._post("/api/auth", {'username': 'admin', 'password': 'inval'})
+        self.assertStatus(400)
+        self.assertJsonBody({
+            "component": "auth",
+            "code": "invalid_credentials",
+            "detail": "Invalid credentials"
+        })
+
+    def test_lockout_user(self):
+        # test with Authorization header
+        self._ceph_cmd(['dashboard', 'set-account-lockout-attempts', '3'])
+        for _ in range(3):
+            self._post("/api/auth", {'username': 'admin', 'password': 'inval'})
+        self._post("/api/auth", {'username': 'admin', 'password': 'admin'})
+        self.assertStatus(400)
+        self.assertJsonBody({
+            "component": "auth",
+            "code": "invalid_credentials",
+            "detail": "Invalid credentials"
+        })
+        self._ceph_cmd(['dashboard', 'ac-user-enable', 'admin'])
+        self._post("/api/auth", {'username': 'admin', 'password': 'admin'})
+        self.assertStatus(201)
+        data = self.jsonBody()
+        self.assertSchema(data, JObj(sub_elems={
+            'token': JLeaf(str),
+            'username': JLeaf(str),
+            'permissions': JObj(sub_elems={}, allow_unknown=True),
+            'sso': JLeaf(bool),
+            'pwdExpirationDate': JLeaf(int, none=True),
+            'pwdUpdateRequired': JLeaf(bool)
+        }, allow_unknown=False))
+        self._validate_jwt_token(data['token'], "admin", data['permissions'])
+
+        # test with Cookies set
+        self._ceph_cmd(['dashboard', 'set-account-lockout-attempts', '3'])
+        for _ in range(3):
+            self._post("/api/auth", {'username': 'admin', 'password': 'inval'}, set_cookies=True)
+        self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True)
+        self.assertStatus(400)
+        self.assertJsonBody({
+            "component": "auth",
+            "code": "invalid_credentials",
+            "detail": "Invalid credentials"
+        })
+        self._ceph_cmd(['dashboard', 'ac-user-enable', 'admin'])
+        self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True)
+        self.assertStatus(201)
+        data = self.jsonBody()
+        self.assertSchema(data, JObj(sub_elems={
+            'token': JLeaf(str),
+            'username': JLeaf(str),
+            'permissions': JObj(sub_elems={}, allow_unknown=True),
+            'sso': JLeaf(bool),
+            'pwdExpirationDate': JLeaf(int, none=True),
+            'pwdUpdateRequired': JLeaf(bool)
+        }, allow_unknown=False))
+        self._validate_jwt_token(data['token'], "admin", data['permissions'])
+
+    def test_logout(self):
+        # test with Authorization header
+        self._post("/api/auth", {'username': 'admin', 'password': 'admin'})
+        self.assertStatus(201)
+        data = self.jsonBody()
+        self._validate_jwt_token(data['token'], "admin", data['permissions'])
+        self.set_jwt_token(data['token'])
+        self._post("/api/auth/logout")
+        self.assertStatus(200)
+        self.assertJsonBody({
+            "redirect_url": "#/login"
+        })
+        self._get("/api/host", version='1.1')
+        self.assertStatus(401)
+        self.set_jwt_token(None)
+
+        # test with Cookies set
+        self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True)
+        self.assertStatus(201)
+        data = self.jsonBody()
+        self._validate_jwt_token(data['token'], "admin", data['permissions'])
+        self.set_jwt_token(data['token'])
+        self._post("/api/auth/logout", set_cookies=True)
+        self.assertStatus(200)
+        self.assertJsonBody({
+            "redirect_url": "#/login"
+        })
+        self._get("/api/host", set_cookies=True, version='1.1')
+        self.assertStatus(401)
+        self.set_jwt_token(None)
+
+    def test_token_ttl(self):
+        # test with Authorization header
+        self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5'])
+        self._post("/api/auth", {'username': 'admin', 'password': 'admin'})
+        self.assertStatus(201)
+        self.set_jwt_token(self.jsonBody()['token'])
+        self._get("/api/host", version='1.1')
+        self.assertStatus(200)
+        time.sleep(6)
+        self._get("/api/host", version='1.1')
+        self.assertStatus(401)
+        self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '28800'])
+        self.set_jwt_token(None)
+
+        # test with Cookies set
+        self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5'])
+        self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True)
+        self.assertStatus(201)
+        self.set_jwt_token(self.jsonBody()['token'])
+        self._get("/api/host", set_cookies=True, version='1.1')
+        self.assertStatus(200)
+        time.sleep(6)
+        self._get("/api/host", set_cookies=True, version='1.1')
+        self.assertStatus(401)
+        self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '28800'])
+        self.set_jwt_token(None)
+
+    def test_remove_from_blocklist(self):
+        # test with Authorization header
+        self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5'])
+        self._post("/api/auth", {'username': 'admin', 'password': 'admin'})
+        self.assertStatus(201)
+        self.set_jwt_token(self.jsonBody()['token'])
+        # the following call adds the token to the blocklist
+        self._post("/api/auth/logout")
+        self.assertStatus(200)
+        self._get("/api/host", version='1.1')
+        self.assertStatus(401)
+        time.sleep(6)
+        self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '28800'])
+        self.set_jwt_token(None)
+        self._post("/api/auth", {'username': 'admin', 'password': 'admin'})
+        self.assertStatus(201)
+        self.set_jwt_token(self.jsonBody()['token'])
+        # the following call removes expired tokens from the blocklist
+        self._post("/api/auth/logout")
+        self.assertStatus(200)
+
+        # test with Cookies set
+        self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5'])
+        self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True)
+        self.assertStatus(201)
+        self.set_jwt_token(self.jsonBody()['token'])
+        # the following call adds the token to the blocklist
+        self._post("/api/auth/logout", set_cookies=True)
+        self.assertStatus(200)
+        self._get("/api/host", set_cookies=True, version='1.1')
+        self.assertStatus(401)
+        time.sleep(6)
+        self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '28800'])
+        self.set_jwt_token(None)
+        self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True)
+        self.assertStatus(201)
+        self.set_jwt_token(self.jsonBody()['token'])
+        # the following call removes expired tokens from the blocklist
+        self._post("/api/auth/logout", set_cookies=True)
+        self.assertStatus(200)
+
+    def test_unauthorized(self):
+        # test with Authorization header
+        self._get("/api/host", version='1.1')
+        self.assertStatus(401)
+
+        # test with Cookies set
+        self._get("/api/host", set_cookies=True, version='1.1')
+        self.assertStatus(401)
+
+    def test_invalidate_token_by_admin(self):
+        # test with Authorization header
+        self._get("/api/host", version='1.1')
+        self.assertStatus(401)
+        self.create_user('user', 'user', ['read-only'])
+        time.sleep(1)
+        self._post("/api/auth", {'username': 'user', 'password': 'user'})
+        self.assertStatus(201)
+        self.set_jwt_token(self.jsonBody()['token'])
+        self._get("/api/host", version='1.1')
+        self.assertStatus(200)
+        time.sleep(1)
+        self._ceph_cmd_with_secret(['dashboard', 'ac-user-set-password', '--force-password',
+                                    'user'],
+                                   'user2')
+        time.sleep(1)
+        self._get("/api/host", version='1.1')
+        self.assertStatus(401)
+        self.set_jwt_token(None)
+        self._post("/api/auth", {'username': 'user', 'password': 'user2'})
+        self.assertStatus(201)
+        self.set_jwt_token(self.jsonBody()['token'])
+        self._get("/api/host", version='1.1')
+        self.assertStatus(200)
+        self.delete_user("user")
+
+        # test with Cookies set
+        self._get("/api/host", set_cookies=True, version='1.1')
+        self.assertStatus(401)
+        self.create_user('user', 'user', ['read-only'])
+        time.sleep(1)
+        self._post("/api/auth", {'username': 'user', 'password': 'user'}, set_cookies=True)
+        self.assertStatus(201)
+        self.set_jwt_token(self.jsonBody()['token'])
+        self._get("/api/host", set_cookies=True, version='1.1')
+        self.assertStatus(200)
+        time.sleep(1)
+        self._ceph_cmd_with_secret(['dashboard', 'ac-user-set-password', '--force-password',
+                                    'user'],
+                                   'user2')
+        time.sleep(1)
+        self._get("/api/host", set_cookies=True, version='1.1')
+        self.assertStatus(401)
+        self.set_jwt_token(None)
+        self._post("/api/auth", {'username': 'user', 'password': 'user2'}, set_cookies=True)
+        self.assertStatus(201)
+        self.set_jwt_token(self.jsonBody()['token'])
+        self._get("/api/host", set_cookies=True, version='1.1')
+        self.assertStatus(200)
+        self.delete_user("user")
+
+    def test_check_token(self):
+        # test with Authorization header
+        self.login("admin", "admin")
+        self._post("/api/auth/check", {"token": self.jsonBody()["token"]})
+        self.assertStatus(200)
+        data = self.jsonBody()
+        self.assertSchema(data, JObj(sub_elems={
+            "username": JLeaf(str),
+            "permissions": JObj(sub_elems={}, allow_unknown=True),
+            "sso": JLeaf(bool),
+            "pwdUpdateRequired": JLeaf(bool)
+        }, allow_unknown=False))
+        self.logout()
+
+        # test with Cookies set
+        self.login("admin", "admin", set_cookies=True)
+        self._post("/api/auth/check", {"token": self.jsonBody()["token"]}, set_cookies=True)
+        self.assertStatus(200)
+        data = self.jsonBody()
+        self.assertSchema(data, JObj(sub_elems={
+            "username": JLeaf(str),
+            "permissions": JObj(sub_elems={}, allow_unknown=True),
+            "sso": JLeaf(bool),
+            "pwdUpdateRequired": JLeaf(bool)
+        }, allow_unknown=False))
+        self.logout(set_cookies=True)
+
+    def test_check_wo_token(self):
+        # test with Authorization header
+        self.login("admin", "admin")
+        self._post("/api/auth/check", {"token": ""})
+        self.assertStatus(200)
+        data = self.jsonBody()
+        self.assertSchema(data, JObj(sub_elems={
+            "login_url": JLeaf(str),
+            "cluster_status": JLeaf(str)
+        }, allow_unknown=False))
+        self.logout()
+
+        # test with Cookies set
+        self.login("admin", "admin", set_cookies=True)
+        self._post("/api/auth/check", {"token": ""}, set_cookies=True)
+        self.assertStatus(200)
+        data = self.jsonBody()
+        self.assertSchema(data, JObj(sub_elems={
+            "login_url": JLeaf(str),
+            "cluster_status": JLeaf(str)
+        }, allow_unknown=False))
+        self.logout(set_cookies=True)
diff --git a/qa/tasks/mgr/dashboard/test_cephfs.py b/qa/tasks/mgr/dashboard/test_cephfs.py
new file mode 100644
index 000000000..4295b580f
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_cephfs.py
@@ -0,0 +1,292 @@
+# -*- coding: utf-8 -*-
+# pylint: disable=too-many-public-methods
+
+from contextlib import contextmanager
+
+from .helper import DashboardTestCase, JLeaf, JList, JObj
+
+
+class CephfsTest(DashboardTestCase):
+    CEPHFS = True
+
+    AUTH_ROLES = ['cephfs-manager']
+
+    QUOTA_PATH = '/quotas'
+
+    def assertToHave(self, data, key):
+        self.assertIn(key, data)
+        self.assertIsNotNone(data[key])
+
+    def get_fs_id(self):
+        return self.fs.get_namespace_id()
+
+    def mk_dirs(self, path, expectedStatus=200):
+        self._post("/api/cephfs/{}/tree".format(self.get_fs_id()),
+                   params={'path': path})
+        self.assertStatus(expectedStatus)
+
+    def rm_dir(self, path, expectedStatus=200):
+        self._delete("/api/cephfs/{}/tree".format(self.get_fs_id()),
+                     params={'path': path})
+        self.assertStatus(expectedStatus)
+
+    def get_root_directory(self, expectedStatus=200):
+        data = self._get("/api/cephfs/{}/get_root_directory".format(self.get_fs_id()))
+        self.assertStatus(expectedStatus)
+        self.assertIsInstance(data, dict)
+        return data
+
+    def ls_dir(self, path, expectedLength, depth=None):
+        return self._ls_dir(path, expectedLength, depth, "api")
+
+    def ui_ls_dir(self, path, expectedLength, depth=None):
+        return self._ls_dir(path, expectedLength, depth, "ui-api")
+
+    def _ls_dir(self, path, expectedLength, depth, baseApiPath):
+        params = {'path': path}
+        if depth is not None:
+            params['depth'] = depth
+        data = self._get("/{}/cephfs/{}/ls_dir".format(baseApiPath, self.get_fs_id()),
+                         params=params)
+        self.assertStatus(200)
+        self.assertIsInstance(data, list)
+        self.assertEqual(len(data), expectedLength)
+        return data
+
+    def set_quotas(self, max_bytes=None, max_files=None):
+        quotas = {
+            'max_bytes': max_bytes,
+            'max_files': max_files
+        }
+        self._put("/api/cephfs/{}/quota".format(self.get_fs_id()), data=quotas,
+                  params={'path': self.QUOTA_PATH})
+        self.assertStatus(200)
+
+    def assert_quotas(self, max_bytes, files):
+        data = self.ls_dir('/', 1)[0]
+        self.assertEqual(data['quotas']['max_bytes'], max_bytes)
+        self.assertEqual(data['quotas']['max_files'], files)
+
+    @contextmanager
+    def new_quota_dir(self):
+        self.mk_dirs(self.QUOTA_PATH)
+        self.set_quotas(1024 ** 3, 1024)
+        yield 1
+        self.rm_dir(self.QUOTA_PATH)
+
+    @DashboardTestCase.RunAs('test', 'test', ['block-manager'])
+    def test_access_permissions(self):
+        fs_id = self.get_fs_id()
+        self._get("/api/cephfs/{}/clients".format(fs_id))
+        self.assertStatus(403)
+        self._get("/api/cephfs/{}".format(fs_id))
+        self.assertStatus(403)
+        self._get("/api/cephfs/{}/mds_counters".format(fs_id))
+        self.assertStatus(403)
+        self._get("/ui-api/cephfs/{}/tabs".format(fs_id))
+        self.assertStatus(403)
+
+    def test_cephfs_clients(self):
+        fs_id = self.get_fs_id()
+        data = self._get("/api/cephfs/{}/clients".format(fs_id))
+        self.assertStatus(200)
+
+        self.assertIn('status', data)
+        self.assertIn('data', data)
+
+    def test_cephfs_evict_client_does_not_exist(self):
+        fs_id = self.get_fs_id()
+        self._delete("/api/cephfs/{}/client/1234".format(fs_id))
+        self.assertStatus(404)
+
+    def test_cephfs_evict_invalid_client_id(self):
+        fs_id = self.get_fs_id()
+        self._delete("/api/cephfs/{}/client/xyz".format(fs_id))
+        self.assertStatus(400)
+        self.assertJsonBody({
+            "component": 'cephfs',
+            "code": "invalid_cephfs_client_id",
+            "detail": "Invalid cephfs client ID xyz"
+        })
+
+    def test_cephfs_get(self):
+        fs_id = self.get_fs_id()
+        data = self._get("/api/cephfs/{}/".format(fs_id))
+        self.assertStatus(200)
+
+        self.assertToHave(data, 'cephfs')
+        self.assertToHave(data, 'standbys')
+        self.assertToHave(data, 'versions')
+
+    def test_cephfs_mds_counters(self):
+        fs_id = self.get_fs_id()
+        data = self._get("/api/cephfs/{}/mds_counters".format(fs_id))
+        self.assertStatus(200)
+
+        self.assertIsInstance(data, dict)
+        self.assertIsNotNone(data)
+
+    def test_cephfs_mds_counters_wrong(self):
+        self._get("/api/cephfs/baadbaad/mds_counters")
+        self.assertStatus(400)
+        self.assertJsonBody({
+            "component": 'cephfs',
+            "code": "invalid_cephfs_id",
+            "detail": "Invalid cephfs ID baadbaad"
+        })
+
+    def test_cephfs_list(self):
+        data = self._get("/api/cephfs/")
+        self.assertStatus(200)
+
+        self.assertIsInstance(data, list)
+        cephfs = data[0]
+        self.assertToHave(cephfs, 'id')
+        self.assertToHave(cephfs, 'mdsmap')
+
+    def test_cephfs_get_quotas(self):
+        fs_id = self.get_fs_id()
+        data = self._get("/api/cephfs/{}/quota?path=/".format(fs_id))
+        self.assertStatus(200)
+        self.assertSchema(data, JObj({
+            'max_bytes': int,
+            'max_files': int
+        }))
+
+    def test_cephfs_tabs(self):
+        fs_id = self.get_fs_id()
+        data = self._get("/ui-api/cephfs/{}/tabs".format(fs_id))
+        self.assertStatus(200)
+        self.assertIsInstance(data, dict)
+
+        # Pools
+        pools = data['pools']
+        self.assertIsInstance(pools, list)
+        self.assertGreater(len(pools), 0)
+        for pool in pools:
+            self.assertEqual(pool['size'], pool['used'] + pool['avail'])
+
+        # Ranks
+        self.assertToHave(data, 'ranks')
+        self.assertIsInstance(data['ranks'], list)
+
+        # Name
+        self.assertToHave(data, 'name')
+        self.assertIsInstance(data['name'], str)
+
+        # Standbys
+        self.assertToHave(data, 'standbys')
+        self.assertIsInstance(data['standbys'], str)
+
+        # MDS counters
+        counters = data['mds_counters']
+        self.assertIsInstance(counters, dict)
+        self.assertGreater(len(counters.keys()), 0)
+        for k, v in counters.items():
+            self.assertEqual(v['name'], k)
+
+        # Clients
+        self.assertToHave(data, 'clients')
+        clients = data['clients']
+        self.assertToHave(clients, 'data')
+        self.assertIsInstance(clients['data'], list)
+        self.assertToHave(clients, 'status')
+        self.assertIsInstance(clients['status'], int)
+
+    def test_ls_mk_rm_dir(self):
+        self.ls_dir('/', 0)
+
+        self.mk_dirs('/pictures/birds')
+        self.ls_dir('/', 2, 3)
+        self.ls_dir('/pictures', 1)
+
+        self.rm_dir('/pictures', 500)
+        self.rm_dir('/pictures/birds')
+        self.rm_dir('/pictures')
+
+        self.ls_dir('/', 0)
+
+    def test_snapshots(self):
+        fs_id = self.get_fs_id()
+        self.mk_dirs('/movies/dune/extended_version')
+
+        self._post("/api/cephfs/{}/snapshot".format(fs_id),
+                   params={'path': '/movies/dune', 'name': 'test'})
+        self.assertStatus(200)
+
+        data = self.ls_dir('/movies', 1)
+        self.assertSchema(data[0], JObj(sub_elems={
+            'name': JLeaf(str),
+            'path': JLeaf(str),
+            'parent': JLeaf(str),
+            'snapshots': JList(JObj(sub_elems={
+                'name': JLeaf(str),
+                'path': JLeaf(str),
+                'created': JLeaf(str)
+            })),
+            'quotas': JObj(sub_elems={
+                'max_bytes': JLeaf(int),
+                'max_files': JLeaf(int)
+            })
+        }))
+        snapshots = data[0]['snapshots']
+        self.assertEqual(len(snapshots), 1)
+        snapshot = snapshots[0]
+        self.assertEqual(snapshot['name'], "test")
+        self.assertEqual(snapshot['path'], "/movies/dune/.snap/test")
+
+        # Should have filtered out "_test_$timestamp"
+        data = self.ls_dir('/movies/dune', 1)
+        snapshots = data[0]['snapshots']
+        self.assertEqual(len(snapshots), 0)
+
+        self._delete("/api/cephfs/{}/snapshot".format(fs_id),
+                     params={'path': '/movies/dune', 'name': 'test'})
+        self.assertStatus(200)
+
+        data = self.ls_dir('/movies', 1)
+        self.assertEqual(len(data[0]['snapshots']), 0)
+
+        # Cleanup. Note, the CephFS Python extension (and therefor the Dashboard
+        # REST API) does not support recursive deletion of a directory.
+        self.rm_dir('/movies/dune/extended_version')
+        self.rm_dir('/movies/dune')
+        self.rm_dir('/movies')
+
+    def test_quotas_default(self):
+        self.mk_dirs(self.QUOTA_PATH)
+        self.assert_quotas(0, 0)
+        self.rm_dir(self.QUOTA_PATH)
+
+    def test_quotas_set_both(self):
+        with self.new_quota_dir():
+            self.assert_quotas(1024 ** 3, 1024)
+
+    def test_quotas_set_only_bytes(self):
+        with self.new_quota_dir():
+            self.set_quotas(2048 ** 3)
+            self.assert_quotas(2048 ** 3, 1024)
+
+    def test_quotas_set_only_files(self):
+        with self.new_quota_dir():
+            self.set_quotas(None, 2048)
+            self.assert_quotas(1024 ** 3, 2048)
+
+    def test_quotas_unset_both(self):
+        with self.new_quota_dir():
+            self.set_quotas(0, 0)
+            self.assert_quotas(0, 0)
+
+    def test_listing_of_root_dir(self):
+        self.ls_dir('/', 0)  # Should not list root
+        ui_root = self.ui_ls_dir('/', 1)[0]  # Should list root by default
+        root = self.get_root_directory()
+        self.assertEqual(ui_root, root)
+
+    def test_listing_of_ui_api_ls_on_deeper_levels(self):
+        # The UI-API and API ls_dir methods should behave the same way on deeper levels
+        self.mk_dirs('/pictures')
+        api_ls = self.ls_dir('/pictures', 0)
+        ui_api_ls = self.ui_ls_dir('/pictures', 0)
+        self.assertEqual(api_ls, ui_api_ls)
+        self.rm_dir('/pictures')
diff --git a/qa/tasks/mgr/dashboard/test_cluster.py b/qa/tasks/mgr/dashboard/test_cluster.py
new file mode 100644
index 000000000..14f854279
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_cluster.py
@@ -0,0 +1,23 @@
+from .helper import DashboardTestCase, JLeaf, JObj
+
+
+class ClusterTest(DashboardTestCase):
+
+    def setUp(self):
+        super().setUp()
+        self.reset_session()
+
+    def test_get_status(self):
+        data = self._get('/api/cluster', version='0.1')
+        self.assertStatus(200)
+        self.assertSchema(data, JObj(sub_elems={
+            "status": JLeaf(str)
+        }, allow_unknown=False))
+
+    def test_update_status(self):
+        req = {'status': 'POST_INSTALLED'}
+        self._put('/api/cluster', req, version='0.1')
+        self.assertStatus(200)
+        data = self._get('/api/cluster', version='0.1')
+        self.assertStatus(200)
+        self.assertEqual(data, req)
diff --git a/qa/tasks/mgr/dashboard/test_cluster_configuration.py b/qa/tasks/mgr/dashboard/test_cluster_configuration.py
new file mode 100644
index 000000000..9c8245d23
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_cluster_configuration.py
@@ -0,0 +1,398 @@
+from __future__ import absolute_import
+
+from .helper import DashboardTestCase
+
+
+class ClusterConfigurationTest(DashboardTestCase):
+
+    def test_list(self):
+        data = self._get('/api/cluster_conf')
+        self.assertStatus(200)
+        self.assertIsInstance(data, list)
+        self.assertGreater(len(data), 1000)
+        for conf in data:
+            self._validate_single(conf)
+
+    def test_get(self):
+        data = self._get('/api/cluster_conf/admin_socket')
+        self.assertStatus(200)
+        self._validate_single(data)
+        self.assertIn('enum_values', data)
+
+        data = self._get('/api/cluster_conf/fantasy_name')
+        self.assertStatus(404)
+
+    def test_get_specific_db_config_option(self):
+        config_name = 'mon_allow_pool_delete'
+
+        orig_value = self._get_config_by_name(config_name)
+
+        self._ceph_cmd(['config', 'set', 'mon', config_name, 'true'])
+        self.wait_until_equal(
+            lambda: self._get_config_by_name(config_name),
+            [{'section': 'mon', 'value': 'true'}],
+            timeout=30,
+            period=1)
+
+        self._ceph_cmd(['config', 'set', 'mon', config_name, 'false'])
+        self.wait_until_equal(
+            lambda: self._get_config_by_name(config_name),
+            [{'section': 'mon', 'value': 'false'}],
+            timeout=30,
+            period=1)
+
+        # restore value
+        if orig_value:
+            self._ceph_cmd(['config', 'set', 'mon', config_name, orig_value[0]['value']])
+
+    def test_filter_config_options(self):
+        config_names = ['osd_scrub_during_recovery', 'osd_scrub_begin_hour', 'osd_scrub_end_hour']
+        data = self._get('/api/cluster_conf/filter?names={}'.format(','.join(config_names)))
+        self.assertStatus(200)
+        self.assertIsInstance(data, list)
+        self.assertEqual(len(data), 3)
+        for conf in data:
+            self._validate_single(conf)
+            self.assertIn(conf['name'], config_names)
+
+    def test_filter_config_options_empty_names(self):
+        self._get('/api/cluster_conf/filter?names=')
+        self.assertStatus(404)
+        self.assertEqual(self._resp.json()['detail'], 'Config options `` not found')
+
+    def test_filter_config_options_unknown_name(self):
+        self._get('/api/cluster_conf/filter?names=abc')
+        self.assertStatus(404)
+        self.assertEqual(self._resp.json()['detail'], 'Config options `abc` not found')
+
+    def test_filter_config_options_contains_unknown_name(self):
+        config_names = ['osd_scrub_during_recovery', 'osd_scrub_begin_hour', 'abc']
+        data = self._get('/api/cluster_conf/filter?names={}'.format(','.join(config_names)))
+        self.assertStatus(200)
+        self.assertIsInstance(data, list)
+        self.assertEqual(len(data), 2)
+        for conf in data:
+            self._validate_single(conf)
+            self.assertIn(conf['name'], config_names)
+
+    def test_create(self):
+        config_name = 'debug_ms'
+        orig_value = self._get_config_by_name(config_name)
+
+        # remove all existing settings for equal preconditions
+        self._clear_all_values_for_config_option(config_name)
+
+        expected_result = [{'section': 'mon', 'value': '0/3'}]
+
+        self._post('/api/cluster_conf', {
+            'name': config_name,
+            'value': expected_result
+        })
+        self.assertStatus(201)
+        self.wait_until_equal(
+            lambda: self._get_config_by_name(config_name),
+            expected_result,
+            timeout=30,
+            period=1)
+
+        # reset original value
+        self._clear_all_values_for_config_option(config_name)
+        self._reset_original_values(config_name, orig_value)
+
+    def test_delete(self):
+        config_name = 'debug_ms'
+        orig_value = self._get_config_by_name(config_name)
+
+        # set a config option
+        expected_result = [{'section': 'mon', 'value': '0/3'}]
+        self._post('/api/cluster_conf', {
+            'name': config_name,
+            'value': expected_result
+        })
+        self.assertStatus(201)
+        self.wait_until_equal(
+            lambda: self._get_config_by_name(config_name),
+            expected_result,
+            timeout=30,
+            period=1)
+
+        # delete it and check if it's deleted
+        self._delete('/api/cluster_conf/{}?section={}'.format(config_name, 'mon'))
+        self.assertStatus(204)
+        self.wait_until_equal(
+            lambda: self._get_config_by_name(config_name),
+            None,
+            timeout=30,
+            period=1)
+
+        # reset original value
+        self._clear_all_values_for_config_option(config_name)
+        self._reset_original_values(config_name, orig_value)
+
+    def test_create_cant_update_at_runtime(self):
+        config_name = 'public_bind_addr'  # not updatable
+        config_value = [{'section': 'global', 'value': 'true'}]
+        orig_value = self._get_config_by_name(config_name)
+
+        # try to set config option and check if it fails
+        self._post('/api/cluster_conf', {
+            'name': config_name,
+            'value': config_value
+        })
+        self.assertStatus(400)
+        self.assertError(code='config_option_not_updatable_at_runtime',
+                         component='cluster_configuration',
+                         detail='Config option {} is/are not updatable at runtime'.format(
+                             config_name))
+
+        # check if config option value is still the original one
+        self.wait_until_equal(
+            lambda: self._get_config_by_name(config_name),
+            orig_value,
+            timeout=30,
+            period=1)
+
+    def test_create_two_values(self):
+        config_name = 'debug_ms'
+        orig_value = self._get_config_by_name(config_name)
+
+        # remove all existing settings for equal preconditions
+        self._clear_all_values_for_config_option(config_name)
+
+        expected_result = [{'section': 'mon', 'value': '0/3'},
+                           {'section': 'osd', 'value': '0/5'}]
+
+        self._post('/api/cluster_conf', {
+            'name': config_name,
+            'value': expected_result
+        })
+        self.assertStatus(201)
+        self.wait_until_equal(
+            lambda: self._get_config_by_name(config_name),
+            expected_result,
+            timeout=30,
+            period=1)
+
+        # reset original value
+        self._clear_all_values_for_config_option(config_name)
+        self._reset_original_values(config_name, orig_value)
+
+    def test_create_can_handle_none_values(self):
+        config_name = 'debug_ms'
+        orig_value = self._get_config_by_name(config_name)
+
+        # remove all existing settings for equal preconditions
+        self._clear_all_values_for_config_option(config_name)
+
+        self._post('/api/cluster_conf', {
+            'name': config_name,
+            'value': [{'section': 'mon', 'value': '0/3'},
+                      {'section': 'osd', 'value': None}]
+        })
+        self.assertStatus(201)
+
+        expected_result = [{'section': 'mon', 'value': '0/3'}]
+        self.wait_until_equal(
+            lambda: self._get_config_by_name(config_name),
+            expected_result,
+            timeout=30,
+            period=1)
+
+        # reset original value
+        self._clear_all_values_for_config_option(config_name)
+        self._reset_original_values(config_name, orig_value)
+
+    def test_create_can_handle_boolean_values(self):
+        config_name = 'mon_allow_pool_delete'
+        orig_value = self._get_config_by_name(config_name)
+
+        # remove all existing settings for equal preconditions
+        self._clear_all_values_for_config_option(config_name)
+
+        expected_result = [{'section': 'mon', 'value': 'true'}]
+
+        self._post('/api/cluster_conf', {
+            'name': config_name,
+            'value': [{'section': 'mon', 'value': True}]})
+        self.assertStatus(201)
+
+        self.wait_until_equal(
+            lambda: self._get_config_by_name(config_name),
+            expected_result,
+            timeout=30,
+            period=1)
+
+        # reset original value
+        self._clear_all_values_for_config_option(config_name)
+        self._reset_original_values(config_name, orig_value)
+
+    def test_bulk_set(self):
+        expected_result = {
+            'osd_max_backfills': {'section': 'osd', 'value': '1'},
+            'osd_recovery_max_active': {'section': 'osd', 'value': '3'},
+            'osd_recovery_max_single_start': {'section': 'osd', 'value': '1'},
+            'osd_recovery_sleep': {'section': 'osd', 'value': '2.000000'}
+        }
+        orig_values = dict()
+
+        for config_name in expected_result:
+            orig_values[config_name] = self._get_config_by_name(config_name)
+
+            # remove all existing settings for equal preconditions
+            self._clear_all_values_for_config_option(config_name)
+
+        self._put('/api/cluster_conf', {'options': expected_result})
+        self.assertStatus(200)
+
+        for config_name, value in expected_result.items():
+            self.wait_until_equal(
+                lambda: self._get_config_by_name(config_name),
+                [value],
+                timeout=30,
+                period=1)
+
+            # reset original value
+            self._clear_all_values_for_config_option(config_name)
+            self._reset_original_values(config_name, orig_values[config_name])
+
+    def test_bulk_set_cant_update_at_runtime(self):
+        config_options = {
+            'public_bind_addr': {'section': 'global', 'value': '1.2.3.4:567'},  # not updatable
+            'public_network': {'section': 'global', 'value': '10.0.0.0/8'}  # not updatable
+        }
+        orig_values = dict()
+
+        for config_name in config_options:
+            orig_values[config_name] = self._get_config_by_name(config_name)
+
+        # try to set config options and see if it fails
+        self._put('/api/cluster_conf', {'options': config_options})
+        self.assertStatus(400)
+        self.assertError(code='config_option_not_updatable_at_runtime',
+                         component='cluster_configuration',
+                         detail='Config option {} is/are not updatable at runtime'.format(
+                             ', '.join(config_options.keys())))
+
+        # check if config option values are still the original ones
+        for config_name, value in orig_values.items():
+            self.wait_until_equal(
+                lambda: self._get_config_by_name(config_name),
+                value,
+                timeout=30,
+                period=1)
+
+    def test_bulk_set_cant_update_at_runtime_partial(self):
+        config_options = {
+            'public_bind_addr': {'section': 'global', 'value': 'true'},  # not updatable
+            'log_to_stderr': {'section': 'global', 'value': 'true'}  # updatable
+        }
+        orig_values = dict()
+
+        for config_name in config_options:
+            orig_values[config_name] = self._get_config_by_name(config_name)
+
+        # try to set config options and see if it fails
+        self._put('/api/cluster_conf', {'options': config_options})
+        self.assertStatus(400)
+        self.assertError(code='config_option_not_updatable_at_runtime',
+                         component='cluster_configuration',
+                         detail='Config option {} is/are not updatable at runtime'.format(
+                             'public_bind_addr'))
+
+        # check if config option values are still the original ones
+        for config_name, value in orig_values.items():
+            self.wait_until_equal(
+                lambda: self._get_config_by_name(config_name),
+                value,
+                timeout=30,
+                period=1)
+
+    def test_check_existence(self):
+        """
+        This test case is intended to check the existence of all hard coded config options used by
+        the dashboard.
+        If you include further hard coded options in the dashboard, feel free to add them to the
+        list.
+        """
+        hard_coded_options = [
+            'osd_max_backfills',  # osd-recv-speed
+            'osd_recovery_max_active',  # osd-recv-speed
+            'osd_recovery_max_single_start',  # osd-recv-speed
+            'osd_recovery_sleep',  # osd-recv-speed
+            'osd_scrub_during_recovery',  # osd-pg-scrub
+            'osd_scrub_begin_hour',  # osd-pg-scrub
+            'osd_scrub_end_hour',  # osd-pg-scrub
+            'osd_scrub_begin_week_day',  # osd-pg-scrub
+            'osd_scrub_end_week_day',  # osd-pg-scrub
+            'osd_scrub_min_interval',  # osd-pg-scrub
+            'osd_scrub_max_interval',  # osd-pg-scrub
+            'osd_deep_scrub_interval',  # osd-pg-scrub
+            'osd_scrub_auto_repair',  # osd-pg-scrub
+            'osd_max_scrubs',  # osd-pg-scrub
+            'osd_scrub_priority',  # osd-pg-scrub
+            'osd_scrub_sleep',  # osd-pg-scrub
+            'osd_scrub_auto_repair_num_errors',  # osd-pg-scrub
+            'osd_debug_deep_scrub_sleep',  # osd-pg-scrub
+            'osd_deep_scrub_keys',  # osd-pg-scrub
+            'osd_deep_scrub_large_omap_object_key_threshold',  # osd-pg-scrub
+            'osd_deep_scrub_large_omap_object_value_sum_threshold',  # osd-pg-scrub
+            'osd_deep_scrub_randomize_ratio',  # osd-pg-scrub
+            'osd_deep_scrub_stride',  # osd-pg-scrub
+            'osd_deep_scrub_update_digest_min_age',  # osd-pg-scrub
+            'osd_requested_scrub_priority',  # osd-pg-scrub
+            'osd_scrub_backoff_ratio',  # osd-pg-scrub
+            'osd_scrub_chunk_max',  # osd-pg-scrub
+            'osd_scrub_chunk_min',  # osd-pg-scrub
+            'osd_scrub_cost',  # osd-pg-scrub
+            'osd_scrub_interval_randomize_ratio',  # osd-pg-scrub
+            'osd_scrub_invalid_stats',  # osd-pg-scrub
+            'osd_scrub_load_threshold',  # osd-pg-scrub
+            'osd_scrub_max_preemptions',  # osd-pg-scrub
+            'mon_allow_pool_delete'  # pool-list
+        ]
+
+        for config_option in hard_coded_options:
+            self._get('/api/cluster_conf/{}'.format(config_option))
+            self.assertStatus(200)
+
+    def _validate_single(self, data):
+        self.assertIn('name', data)
+        self.assertIn('daemon_default', data)
+        self.assertIn('long_desc', data)
+        self.assertIn('level', data)
+        self.assertIn('default', data)
+        self.assertIn('see_also', data)
+        self.assertIn('tags', data)
+        self.assertIn('min', data)
+        self.assertIn('max', data)
+        self.assertIn('services', data)
+        self.assertIn('type', data)
+        self.assertIn('desc', data)
+        self.assertIn(data['type'], ['str', 'bool', 'float', 'int', 'size', 'uint', 'addr',
+                                     'addrvec', 'uuid', 'secs', 'millisecs'])
+
+        if 'value' in data:
+            self.assertIn('source', data)
+            self.assertIsInstance(data['value'], list)
+
+            for entry in data['value']:
+                self.assertIsInstance(entry, dict)
+                self.assertIn('section', entry)
+                self.assertIn('value', entry)
+
+    def _get_config_by_name(self, conf_name):
+        data = self._get('/api/cluster_conf/{}'.format(conf_name))
+        if 'value' in data:
+            return data['value']
+        return None
+
+    def _clear_all_values_for_config_option(self, config_name):
+        values = self._get_config_by_name(config_name)
+        if values:
+            for value in values:
+                self._ceph_cmd(['config', 'rm', value['section'], config_name])
+
+    def _reset_original_values(self, config_name, orig_values):
+        if orig_values:
+            for value in orig_values:
+                self._ceph_cmd(['config', 'set', value['section'], config_name, value['value']])
diff --git a/qa/tasks/mgr/dashboard/test_crush_rule.py b/qa/tasks/mgr/dashboard/test_crush_rule.py
new file mode 100644
index 000000000..aa2250b1d
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_crush_rule.py
@@ -0,0 +1,84 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import absolute_import
+
+from .helper import DashboardTestCase, JList, JObj
+
+
+class CrushRuleTest(DashboardTestCase):
+
+    AUTH_ROLES = ['pool-manager']
+
+    rule_schema = JObj(sub_elems={
+        'rule_id': int,
+        'rule_name': str,
+        'steps': JList(JObj({}, allow_unknown=True))
+    }, allow_unknown=True)
+
+    def create_and_delete_rule(self, data):
+        name = data['name']
+        # Creates rule
+        self._post('/api/crush_rule', data)
+        self.assertStatus(201)
+        # Makes sure rule exists
+        rule = self._get('/api/crush_rule/{}'.format(name), version='2.0')
+        self.assertStatus(200)
+        self.assertSchemaBody(self.rule_schema)
+        self.assertEqual(rule['rule_name'], name)
+        # Deletes rule
+        self._delete('/api/crush_rule/{}'.format(name))
+        self.assertStatus(204)
+
+    @DashboardTestCase.RunAs('test', 'test', ['rgw-manager'])
+    def test_read_access_permissions(self):
+        self._get('/api/crush_rule', version='2.0')
+        self.assertStatus(403)
+
+    @DashboardTestCase.RunAs('test', 'test', ['read-only'])
+    def test_write_access_permissions(self):
+        self._get('/api/crush_rule', version='2.0')
+        self.assertStatus(200)
+        data = {'name': 'some_rule', 'root': 'default', 'failure_domain': 'osd'}
+        self._post('/api/crush_rule', data)
+        self.assertStatus(403)
+        self._delete('/api/crush_rule/default')
+        self.assertStatus(403)
+
+    @classmethod
+    def tearDownClass(cls):
+        super(CrushRuleTest, cls).tearDownClass()
+        cls._ceph_cmd(['osd', 'crush', 'rule', 'rm', 'some_rule'])
+        cls._ceph_cmd(['osd', 'crush', 'rule', 'rm', 'another_rule'])
+
+    def test_list(self):
+        self._get('/api/crush_rule', version='2.0')
+        self.assertStatus(200)
+        self.assertSchemaBody(JList(self.rule_schema))
+
+    def test_create(self):
+        self.create_and_delete_rule({
+            'name': 'some_rule',
+            'root': 'default',
+            'failure_domain': 'osd'
+        })
+
+    @DashboardTestCase.RunAs('test', 'test', ['pool-manager', 'cluster-manager'])
+    def test_create_with_ssd(self):
+        data = self._get('/api/osd/0')
+        self.assertStatus(200)
+        device_class = data['osd_metadata']['default_device_class']
+        self.create_and_delete_rule({
+            'name': 'another_rule',
+            'root': 'default',
+            'failure_domain': 'osd',
+            'device_class': device_class
+        })
+
+    def test_crush_rule_info(self):
+        self._get('/ui-api/crush_rule/info')
+        self.assertStatus(200)
+        self.assertSchemaBody(JObj({
+            'names': JList(str),
+            'nodes': JList(JObj({}, allow_unknown=True)),
+            'roots': JList(int)
+        }))
diff --git a/qa/tasks/mgr/dashboard/test_erasure_code_profile.py b/qa/tasks/mgr/dashboard/test_erasure_code_profile.py
new file mode 100644
index 000000000..7fb7c1c82
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_erasure_code_profile.py
@@ -0,0 +1,105 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import absolute_import
+
+from .helper import DashboardTestCase, JList, JObj
+
+
+class ECPTest(DashboardTestCase):
+
+    AUTH_ROLES = ['pool-manager']
+
+    @DashboardTestCase.RunAs('test', 'test', ['rgw-manager'])
+    def test_read_access_permissions(self):
+        self._get('/api/erasure_code_profile')
+        self.assertStatus(403)
+
+    @DashboardTestCase.RunAs('test', 'test', ['read-only'])
+    def test_write_access_permissions(self):
+        self._get('/api/erasure_code_profile')
+        self.assertStatus(200)
+        data = {'name': 'ecp32', 'k': 3, 'm': 2}
+        self._post('/api/erasure_code_profile', data)
+        self.assertStatus(403)
+        self._delete('/api/erasure_code_profile/default')
+        self.assertStatus(403)
+
+    @classmethod
+    def tearDownClass(cls):
+        super(ECPTest, cls).tearDownClass()
+        cls._ceph_cmd(['osd', 'erasure-code-profile', 'rm', 'ecp32'])
+        cls._ceph_cmd(['osd', 'erasure-code-profile', 'rm', 'lrc'])
+
+    def test_list(self):
+        data = self._get('/api/erasure_code_profile')
+        self.assertStatus(200)
+
+        default = [p for p in data if p['name'] == 'default']
+        if default:
+            default_ecp = {
+                'k': 2,
+                'technique': 'reed_sol_van',
+                'm': 1,
+                'name': 'default',
+                'plugin': 'jerasure'
+            }
+            if 'crush-failure-domain' in default[0]:
+                default_ecp['crush-failure-domain'] = default[0]['crush-failure-domain']
+            self.assertSubset(default_ecp, default[0])
+            get_data = self._get('/api/erasure_code_profile/default')
+            self.assertEqual(get_data, default[0])
+
+    def test_create(self):
+        data = {'name': 'ecp32', 'k': 3, 'm': 2}
+        self._post('/api/erasure_code_profile', data)
+        self.assertStatus(201)
+
+        self._get('/api/erasure_code_profile/ecp32')
+        self.assertJsonSubset({
+            'crush-device-class': '',
+            'crush-failure-domain': 'osd',
+            'crush-root': 'default',
+            'jerasure-per-chunk-alignment': 'false',
+            'k': 3,
+            'm': 2,
+            'name': 'ecp32',
+            'plugin': 'jerasure',
+            'technique': 'reed_sol_van',
+        })
+
+        self.assertStatus(200)
+
+        self._delete('/api/erasure_code_profile/ecp32')
+        self.assertStatus(204)
+
+    def test_create_plugin(self):
+        data = {'name': 'lrc', 'k': '2', 'm': '2', 'l': '2', 'plugin': 'lrc'}
+        self._post('/api/erasure_code_profile', data)
+        self.assertJsonBody(None)
+        self.assertStatus(201)
+
+        self._get('/api/erasure_code_profile/lrc')
+        self.assertJsonBody({
+            'crush-device-class': '',
+            'crush-failure-domain': 'host',
+            'crush-root': 'default',
+            'k': 2,
+            'l': '2',
+            'm': 2,
+            'name': 'lrc',
+            'plugin': 'lrc'
+        })
+
+        self.assertStatus(200)
+
+        self._delete('/api/erasure_code_profile/lrc')
+        self.assertStatus(204)
+
+    def test_ecp_info(self):
+        self._get('/ui-api/erasure_code_profile/info')
+        self.assertSchemaBody(JObj({
+            'names': JList(str),
+            'plugins': JList(str),
+            'directory': str,
+            'nodes': JList(JObj({}, allow_unknown=True))
+        }))
diff --git a/qa/tasks/mgr/dashboard/test_feedback.py b/qa/tasks/mgr/dashboard/test_feedback.py
new file mode 100644
index 000000000..0ec5ac318
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_feedback.py
@@ -0,0 +1,36 @@
+import time
+
+from .helper import DashboardTestCase
+
+
+class FeedbackTest(DashboardTestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        super().setUpClass()
+        cls._ceph_cmd(['mgr', 'module', 'enable', 'feedback'])
+        time.sleep(10)
+
+    def test_create_api_key(self):
+        self._post('/api/feedback/api_key', {'api_key': 'testapikey'}, version='0.1')
+        self.assertStatus(201)
+
+    def test_get_api_key(self):
+        response = self._get('/api/feedback/api_key', version='0.1')
+        self.assertStatus(200)
+        self.assertEqual(response, 'testapikey')
+
+    def test_remove_api_key(self):
+        self._delete('/api/feedback/api_key', version='0.1')
+        self.assertStatus(204)
+
+    def test_issue_tracker_create_with_invalid_key(self):
+        self._post('/api/feedback', {'api_key': 'invalidapikey', 'description': 'test',
+                                     'project': 'dashboard', 'subject': 'sub', 'tracker': 'bug'},
+                   version='0.1')
+        self.assertStatus(400)
+
+    def test_issue_tracker_create_with_invalid_params(self):
+        self._post('/api/feedback', {'api_key': '', 'description': 'test', 'project': 'xyz',
+                                     'subject': 'testsub', 'tracker': 'invalid'}, version='0.1')
+        self.assertStatus(400)
diff --git a/qa/tasks/mgr/dashboard/test_health.py b/qa/tasks/mgr/dashboard/test_health.py
new file mode 100644
index 000000000..b6ffade4c
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_health.py
@@ -0,0 +1,309 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+
+from .helper import (DashboardTestCase, JAny, JLeaf, JList, JObj,
+                     addrvec_schema, module_options_schema)
+
+
+class HealthTest(DashboardTestCase):
+    CEPHFS = True
+
+    __pg_info_schema = JObj({
+        'object_stats': JObj({
+            'num_objects': int,
+            'num_object_copies': int,
+            'num_objects_degraded': int,
+            'num_objects_misplaced': int,
+            'num_objects_unfound': int
+        }),
+        'pgs_per_osd': float,
+        'statuses': JObj({}, allow_unknown=True, unknown_schema=int)
+    })
+
+    __mdsmap_schema = JObj({
+        'session_autoclose': int,
+        'balancer': str,
+        'bal_rank_mask': str,
+        'up': JObj({}, allow_unknown=True),
+        'last_failure_osd_epoch': int,
+        'in': JList(int),
+        'last_failure': int,
+        'max_file_size': int,
+        'explicitly_allowed_features': int,
+        'damaged': JList(int),
+        'tableserver': int,
+        'failed': JList(int),
+        'metadata_pool': int,
+        'epoch': int,
+        'stopped': JList(int),
+        'max_mds': int,
+        'compat': JObj({
+            'compat': JObj({}, allow_unknown=True),
+            'ro_compat': JObj({}, allow_unknown=True),
+            'incompat': JObj({}, allow_unknown=True)
+        }),
+        'required_client_features': JObj({}, allow_unknown=True),
+        'data_pools': JList(int),
+        'info': JObj({}, allow_unknown=True),
+        'fs_name': str,
+        'created': str,
+        'standby_count_wanted': int,
+        'enabled': bool,
+        'modified': str,
+        'session_timeout': int,
+        'flags': int,
+        'flags_state': JObj({
+            'joinable': bool,
+            'allow_snaps': bool,
+            'allow_multimds_snaps': bool,
+            'allow_standby_replay': bool,
+            'refuse_client_session': bool
+        }),
+        'ever_allowed_features': int,
+        'root': int
+    })
+
+    def test_minimal_health(self):
+        data = self._get('/api/health/minimal')
+        self.assertStatus(200)
+        schema = JObj({
+            'client_perf': JObj({
+                'read_bytes_sec': int,
+                'read_op_per_sec': int,
+                'recovering_bytes_per_sec': int,
+                'write_bytes_sec': int,
+                'write_op_per_sec': int
+            }),
+            'df': JObj({
+                'stats': JObj({
+                    'total_avail_bytes': int,
+                    'total_bytes': int,
+                    'total_used_raw_bytes': int,
+                })
+            }),
+            'fs_map': JObj({
+                'filesystems': JList(
+                    JObj({
+                        'mdsmap': self.__mdsmap_schema
+                    }),
+                ),
+                'standbys': JList(JObj({}, allow_unknown=True)),
+            }),
+            'health': JObj({
+                'checks': JList(JObj({}, allow_unknown=True)),
+                'mutes': JList(JObj({}, allow_unknown=True)),
+                'status': str,
+            }),
+            'hosts': int,
+            'iscsi_daemons': JObj({
+                'up': int,
+                'down': int
+            }),
+            'mgr_map': JObj({
+                'active_name': str,
+                'standbys': JList(JLeaf(dict))
+            }),
+            'mon_status': JObj({
+                'monmap': JObj({
+                    'mons': JList(JLeaf(dict)),
+                }),
+                'quorum': JList(int)
+            }),
+            'osd_map': JObj({
+                'osds': JList(
+                    JObj({
+                        'in': int,
+                        'up': int,
+                        'state': JList(str)
+                    })),
+            }),
+            'pg_info': self.__pg_info_schema,
+            'pools': JList(JLeaf(dict)),
+            'rgw': int,
+            'scrub_status': str
+        })
+        self.assertSchema(data, schema)
+
+    def test_full_health(self):
+        data = self._get('/api/health/full')
+        self.assertStatus(200)
+        module_info_schema = JObj({
+            'can_run': bool,
+            'error_string': str,
+            'name': str,
+            'module_options': module_options_schema
+        })
+        schema = JObj({
+            'client_perf': JObj({
+                'read_bytes_sec': int,
+                'read_op_per_sec': int,
+                'recovering_bytes_per_sec': int,
+                'write_bytes_sec': int,
+                'write_op_per_sec': int
+            }),
+            'df': JObj({
+                'pools': JList(JObj({
+                    'stats': JObj({
+                        'stored': int,
+                        'stored_data': int,
+                        'stored_omap': int,
+                        'objects': int,
+                        'kb_used': int,
+                        'bytes_used': int,
+                        'data_bytes_used': int,
+                        'omap_bytes_used': int,
+                        'percent_used': float,
+                        'max_avail': int,
+                        'quota_objects': int,
+                        'quota_bytes': int,
+                        'dirty': int,
+                        'rd': int,
+                        'rd_bytes': int,
+                        'wr': int,
+                        'wr_bytes': int,
+                        'compress_bytes_used': int,
+                        'compress_under_bytes': int,
+                        'stored_raw': int,
+                        'avail_raw': int
+                    }),
+                    'name': str,
+                    'id': int
+                })),
+                'stats': JObj({
+                    'total_avail_bytes': int,
+                    'total_bytes': int,
+                    'total_used_bytes': int,
+                    'total_used_raw_bytes': int,
+                    'total_used_raw_ratio': float,
+                    'num_osds': int,
+                    'num_per_pool_osds': int,
+                    'num_per_pool_omap_osds': int
+                })
+            }),
+            'fs_map': JObj({
+                'compat': JObj({
+                    'compat': JObj({}, allow_unknown=True, unknown_schema=str),
+                    'incompat': JObj(
+                        {}, allow_unknown=True, unknown_schema=str),
+                    'ro_compat': JObj(
+                        {}, allow_unknown=True, unknown_schema=str)
+                }),
+                'default_fscid': int,
+                'epoch': int,
+                'feature_flags': JObj(
+                    {}, allow_unknown=True, unknown_schema=bool),
+                'filesystems': JList(
+                    JObj({
+                        'id': int,
+                        'mdsmap': self.__mdsmap_schema
+                    }),
+                ),
+                'standbys': JList(JObj({}, allow_unknown=True)),
+            }),
+            'health': JObj({
+                'checks': JList(JObj({}, allow_unknown=True)),
+                'mutes': JList(JObj({}, allow_unknown=True)),
+                'status': str,
+            }),
+            'hosts': int,
+            'iscsi_daemons': JObj({
+                'up': int,
+                'down': int
+            }),
+            'mgr_map': JObj({
+                'active_addr': str,
+                'active_addrs': JObj({
+                    'addrvec': addrvec_schema
+                }),
+                'active_change': str,  # timestamp
+                'active_mgr_features': int,
+                'active_gid': int,
+                'active_name': str,
+                'always_on_modules': JObj({}, allow_unknown=True),
+                'available': bool,
+                'available_modules': JList(module_info_schema),
+                'epoch': int,
+                'modules': JList(str),
+                'services': JObj(
+                    {'dashboard': str},  # This module should always be present
+                    allow_unknown=True, unknown_schema=str
+                ),
+                'standbys': JList(JObj({
+                    'available_modules': JList(module_info_schema),
+                    'gid': int,
+                    'name': str,
+                    'mgr_features': int
+                }, allow_unknown=True))
+            }, allow_unknown=True),
+            'mon_status': JObj({
+                'election_epoch': int,
+                'extra_probe_peers': JList(JAny(none=True)),
+                'feature_map': JObj(
+                    {}, allow_unknown=True, unknown_schema=JList(JObj({
+                        'features': str,
+                        'num': int,
+                        'release': str
+                    }))
+                ),
+                'features': JObj({
+                    'quorum_con': str,
+                    'quorum_mon': JList(str),
+                    'required_con': str,
+                    'required_mon': JList(str)
+                }),
+                'monmap': JObj({
+                    # @TODO: expand on monmap schema
+                    'mons': JList(JLeaf(dict)),
+                }, allow_unknown=True),
+                'name': str,
+                'outside_quorum': JList(int),
+                'quorum': JList(int),
+                'quorum_age': int,
+                'rank': int,
+                'state': str,
+                # @TODO: What type should be expected here?
+                'sync_provider': JList(JAny(none=True)),
+                'stretch_mode': bool
+            }),
+            'osd_map': JObj({
+                # @TODO: define schema for crush map and osd_metadata, among
+                # others
+                'osds': JList(
+                    JObj({
+                        'in': int,
+                        'up': int,
+                    }, allow_unknown=True)),
+            }, allow_unknown=True),
+            'pg_info': self.__pg_info_schema,
+            'pools': JList(JLeaf(dict)),
+            'rgw': int,
+            'scrub_status': str
+        })
+        self.assertSchema(data, schema)
+
+        cluster_pools = self.ceph_cluster.mon_manager.list_pools()
+        self.assertEqual(len(cluster_pools), len(data['pools']))
+        for pool in data['pools']:
+            self.assertIn(pool['pool_name'], cluster_pools)
+
+    @DashboardTestCase.RunAs('test', 'test', ['pool-manager'])
+    def test_health_permissions(self):
+        data = self._get('/api/health/full')
+        self.assertStatus(200)
+
+        schema = JObj({
+            'client_perf': JObj({}, allow_unknown=True),
+            'df': JObj({}, allow_unknown=True),
+            'health': JObj({
+                'checks': JList(JObj({}, allow_unknown=True)),
+                'mutes': JList(JObj({}, allow_unknown=True)),
+                'status': str
+            }),
+            'pools': JList(JLeaf(dict)),
+        })
+        self.assertSchema(data, schema)
+
+        cluster_pools = self.ceph_cluster.mon_manager.list_pools()
+        self.assertEqual(len(cluster_pools), len(data['pools']))
+        for pool in data['pools']:
+            self.assertIn(pool['pool_name'], cluster_pools)
diff --git a/qa/tasks/mgr/dashboard/test_host.py b/qa/tasks/mgr/dashboard/test_host.py
new file mode 100644
index 000000000..78d784473
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_host.py
@@ -0,0 +1,158 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+
+from .helper import DashboardTestCase, JList, JObj, devices_schema
+
+
+class HostControllerTest(DashboardTestCase):
+
+    AUTH_ROLES = ['read-only']
+
+    URL_HOST = '/api/host'
+    URL_UI_HOST = '/ui-api/host'
+
+    ORCHESTRATOR = True
+
+    @classmethod
+    def setUpClass(cls):
+        super(HostControllerTest, cls).setUpClass()
+
+    @classmethod
+    def tearDownClass(cls):
+        cmd = ['test_orchestrator', 'load_data', '-i', '-']
+        cls.mgr_cluster.mon_manager.raw_cluster_cmd_result(*cmd, stdin='{}')
+
+    @property
+    def test_data_inventory(self):
+        return self.ORCHESTRATOR_TEST_DATA['inventory']
+
+    @property
+    def test_data_daemons(self):
+        return self.ORCHESTRATOR_TEST_DATA['daemons']
+
+    @DashboardTestCase.RunAs('test', 'test', ['block-manager'])
+    def test_access_permissions(self):
+        self._get(self.URL_HOST, version='1.1')
+        self.assertStatus(403)
+
+    def test_host_list(self):
+        data = self._get(self.URL_HOST, version='1.1')
+        self.assertStatus(200)
+
+        orch_hostnames = {inventory_node['name'] for inventory_node in
+                          self.ORCHESTRATOR_TEST_DATA['inventory']}
+
+        for server in data:
+            self.assertIn('services', server)
+            self.assertIn('hostname', server)
+            self.assertIn('ceph_version', server)
+            self.assertIsNotNone(server['hostname'])
+            self.assertIsNotNone(server['ceph_version'])
+            for service in server['services']:
+                self.assertIn('type', service)
+                self.assertIn('id', service)
+                self.assertIsNotNone(service['type'])
+                self.assertIsNotNone(service['id'])
+
+            self.assertIn('sources', server)
+            in_ceph, in_orchestrator = server['sources']['ceph'], server['sources']['orchestrator']
+            if in_ceph:
+                self.assertGreaterEqual(len(server['services']), 1)
+                if not in_orchestrator:
+                    self.assertNotIn(server['hostname'], orch_hostnames)
+            if in_orchestrator:
+                self.assertEqual(len(server['services']), 0)
+                self.assertIn(server['hostname'], orch_hostnames)
+
+    def test_host_list_with_sources(self):
+        data = self._get('{}?sources=orchestrator'.format(self.URL_HOST), version='1.1')
+        self.assertStatus(200)
+        test_hostnames = {inventory_node['name'] for inventory_node in
+                          self.ORCHESTRATOR_TEST_DATA['inventory']}
+        resp_hostnames = {host['hostname'] for host in data}
+        self.assertEqual(test_hostnames, resp_hostnames)
+
+        data = self._get('{}?sources=ceph'.format(self.URL_HOST), version='1.1')
+        self.assertStatus(200)
+        test_hostnames = {inventory_node['name'] for inventory_node in
+                          self.ORCHESTRATOR_TEST_DATA['inventory']}
+        resp_hostnames = {host['hostname'] for host in data}
+        self.assertEqual(len(test_hostnames.intersection(resp_hostnames)), 0)
+
+    def test_host_devices(self):
+        hosts = self._get('{}'.format(self.URL_HOST), version='1.1')
+        hosts = [host['hostname'] for host in hosts if host['hostname'] != '']
+        assert hosts[0]
+        data = self._get('{}/devices'.format('{}/{}'.format(self.URL_HOST, hosts[0])))
+        self.assertStatus(200)
+        self.assertSchema(data, devices_schema)
+
+    def test_host_daemons(self):
+        hosts = self._get('{}'.format(self.URL_HOST), version='1.1')
+        hosts = [host['hostname'] for host in hosts if host['hostname'] != '']
+        assert hosts[0]
+        data = self._get('{}/daemons'.format('{}/{}'.format(self.URL_HOST, hosts[0])))
+        self.assertStatus(200)
+        self.assertSchema(data, JList(JObj({
+            'hostname': str,
+            'daemon_id': str,
+            'daemon_type': str
+        })))
+
+    def test_host_smart(self):
+        hosts = self._get('{}'.format(self.URL_HOST), version='1.1')
+        hosts = [host['hostname'] for host in hosts if host['hostname'] != '']
+        assert hosts[0]
+        self._get('{}/smart'.format('{}/{}'.format(self.URL_HOST, hosts[0])))
+        self.assertStatus(200)
+
+    def _validate_inventory(self, data, resp_data):
+        self.assertEqual(data['name'], resp_data['name'])
+        self.assertEqual(len(data['devices']), len(resp_data['devices']))
+
+        if not data['devices']:
+            return
+        test_devices = sorted(data['devices'], key=lambda d: d['path'])
+        resp_devices = sorted(resp_data['devices'], key=lambda d: d['path'])
+
+        for test, resp in zip(test_devices, resp_devices):
+            self._validate_device(test, resp)
+
+    def _validate_device(self, data, resp_data):
+        for key, value in data.items():
+            self.assertEqual(value, resp_data[key])
+
+    def test_inventory_get(self):
+        # get a inventory
+        node = self.test_data_inventory[0]
+        resp = self._get('{}/{}/inventory'.format(self.URL_HOST, node['name']))
+        self.assertStatus(200)
+        self._validate_inventory(node, resp)
+
+    def test_inventory_list(self):
+        # get all inventory
+        data = self._get('{}/inventory'.format(self.URL_UI_HOST))
+        self.assertStatus(200)
+
+        def sorting_key(node):
+            return node['name']
+
+        test_inventory = sorted(self.test_data_inventory, key=sorting_key)
+        resp_inventory = sorted(data, key=sorting_key)
+        self.assertEqual(len(test_inventory), len(resp_inventory))
+        for test, resp in zip(test_inventory, resp_inventory):
+            self._validate_inventory(test, resp)
+
+
+class HostControllerNoOrchestratorTest(DashboardTestCase):
+    def test_host_create(self):
+        self._post('/api/host?hostname=foo', {'status': ''}, version='0.1')
+        self.assertStatus(503)
+        self.assertError(code='orchestrator_status_unavailable',
+                         component='orchestrator')
+
+    def test_host_delete(self):
+        self._delete('/api/host/bar')
+        self.assertStatus(503)
+        self.assertError(code='orchestrator_status_unavailable',
+                         component='orchestrator')
diff --git a/qa/tasks/mgr/dashboard/test_logs.py b/qa/tasks/mgr/dashboard/test_logs.py
new file mode 100644
index 000000000..63f6e16ed
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_logs.py
@@ -0,0 +1,34 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+
+from .helper import DashboardTestCase, JList, JObj, addrvec_schema
+
+
+class LogsTest(DashboardTestCase):
+    CEPHFS = True
+
+    def test_logs(self):
+        data = self._get("/api/logs/all")
+        self.assertStatus(200)
+        log_entry_schema = JList(JObj({
+            'addrs': JObj({
+                'addrvec': addrvec_schema
+            }),
+            'channel': str,
+            'message': str,
+            'name': str,
+            'priority': str,
+            'rank': str,
+            'seq': int,
+            'stamp': str
+        }))
+        schema = JObj({
+            'audit_log': log_entry_schema,
+            'clog': log_entry_schema
+        })
+        self.assertSchema(data, schema)
+
+    @DashboardTestCase.RunAs('test', 'test', ['pool-manager'])
+    def test_log_perms(self):
+        self._get("/api/logs/all")
+        self.assertStatus(403)
diff --git a/qa/tasks/mgr/dashboard/test_mgr_module.py b/qa/tasks/mgr/dashboard/test_mgr_module.py
new file mode 100644
index 000000000..c196c7124
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_mgr_module.py
@@ -0,0 +1,154 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+
+import logging
+
+import requests
+
+from .helper import (DashboardTestCase, JLeaf, JList, JObj,
+                     module_options_object_schema, module_options_schema)
+
+logger = logging.getLogger(__name__)
+
+
+class MgrModuleTestCase(DashboardTestCase):
+    MGRS_REQUIRED = 1
+
+    def wait_until_rest_api_accessible(self):
+        """
+        Wait until the REST API is accessible.
+        """
+
+        def _check_connection():
+            try:
+                # Try reaching an API endpoint successfully.
+                self._get('/api/mgr/module')
+                if self._resp.status_code == 200:
+                    return True
+            except requests.ConnectionError:
+                pass
+            return False
+
+        self.wait_until_true(_check_connection, timeout=30)
+
+
+class MgrModuleTest(MgrModuleTestCase):
+
+    def test_list_disabled_module(self):
+        self._ceph_cmd(['mgr', 'module', 'disable', 'iostat'])
+        self.wait_until_rest_api_accessible()
+        data = self._get('/api/mgr/module')
+        self.assertStatus(200)
+        self.assertSchema(
+            data,
+            JList(
+                JObj(sub_elems={
+                    'name': JLeaf(str),
+                    'enabled': JLeaf(bool),
+                    'always_on': JLeaf(bool),
+                    'options': module_options_schema
+                })))
+        module_info = self.find_object_in_list('name', 'iostat', data)
+        self.assertIsNotNone(module_info)
+        self.assertFalse(module_info['enabled'])
+
+    def test_list_enabled_module(self):
+        self._ceph_cmd(['mgr', 'module', 'enable', 'iostat'])
+        self.wait_until_rest_api_accessible()
+        data = self._get('/api/mgr/module')
+        self.assertStatus(200)
+        self.assertSchema(
+            data,
+            JList(
+                JObj(sub_elems={
+                    'name': JLeaf(str),
+                    'enabled': JLeaf(bool),
+                    'always_on': JLeaf(bool),
+                    'options': module_options_schema
+                })))
+        module_info = self.find_object_in_list('name', 'iostat', data)
+        self.assertIsNotNone(module_info)
+        self.assertTrue(module_info['enabled'])
+
+    def test_get(self):
+        data = self._get('/api/mgr/module/telemetry')
+        self.assertStatus(200)
+        self.assertSchema(
+            data,
+            JObj(
+                allow_unknown=True,
+                sub_elems={
+                    'channel_basic': bool,
+                    'channel_ident': bool,
+                    'channel_crash': bool,
+                    'channel_device': bool,
+                    'channel_perf': bool,
+                    'contact': str,
+                    'description': str,
+                    'enabled': bool,
+                    'interval': int,
+                    'last_opt_revision': int,
+                    'leaderboard': bool,
+                    'leaderboard_description': str,
+                    'organization': str,
+                    'proxy': str,
+                    'url': str
+                }))
+
+    def test_module_options(self):
+        data = self._get('/api/mgr/module/telemetry/options')
+        self.assertStatus(200)
+        schema = JObj({
+            'channel_basic': module_options_object_schema,
+            'channel_crash': module_options_object_schema,
+            'channel_device': module_options_object_schema,
+            'channel_ident': module_options_object_schema,
+            'channel_perf': module_options_object_schema,
+            'contact': module_options_object_schema,
+            'description': module_options_object_schema,
+            'device_url': module_options_object_schema,
+            'enabled': module_options_object_schema,
+            'interval': module_options_object_schema,
+            'last_opt_revision': module_options_object_schema,
+            'leaderboard': module_options_object_schema,
+            'leaderboard_description': module_options_object_schema,
+            'log_level': module_options_object_schema,
+            'log_to_cluster': module_options_object_schema,
+            'log_to_cluster_level': module_options_object_schema,
+            'log_to_file': module_options_object_schema,
+            'organization': module_options_object_schema,
+            'proxy': module_options_object_schema,
+            'url': module_options_object_schema
+        })
+        self.assertSchema(data, schema)
+
+    def test_module_enable(self):
+        self._post('/api/mgr/module/telemetry/enable')
+        self.assertStatus(200)
+
+    def test_disable(self):
+        self._post('/api/mgr/module/iostat/disable')
+        self.assertStatus(200)
+
+    def test_put(self):
+        self.set_config_key('config/mgr/mgr/iostat/log_level', 'critical')
+        self.set_config_key('config/mgr/mgr/iostat/log_to_cluster', 'False')
+        self.set_config_key('config/mgr/mgr/iostat/log_to_cluster_level', 'info')
+        self.set_config_key('config/mgr/mgr/iostat/log_to_file', 'True')
+        self._put(
+            '/api/mgr/module/iostat',
+            data={
+                'config': {
+                    'log_level': 'debug',
+                    'log_to_cluster': True,
+                    'log_to_cluster_level': 'warning',
+                    'log_to_file': False
+                }
+            })
+        self.assertStatus(200)
+        data = self._get('/api/mgr/module/iostat')
+        self.assertStatus(200)
+        self.assertEqual(data['log_level'], 'debug')
+        self.assertTrue(data['log_to_cluster'])
+        self.assertEqual(data['log_to_cluster_level'], 'warning')
+        self.assertFalse(data['log_to_file'])
diff --git a/qa/tasks/mgr/dashboard/test_monitor.py b/qa/tasks/mgr/dashboard/test_monitor.py
new file mode 100644
index 000000000..e32c2c10c
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_monitor.py
@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+
+from .helper import DashboardTestCase
+
+
+class MonitorTest(DashboardTestCase):
+    AUTH_ROLES = ['cluster-manager']
+
+    @DashboardTestCase.RunAs('test', 'test', ['block-manager'])
+    def test_access_permissions(self):
+        self._get('/api/monitor')
+        self.assertStatus(403)
+
+    def test_monitor_default(self):
+        data = self._get("/api/monitor")
+        self.assertStatus(200)
+
+        self.assertIn('mon_status', data)
+        self.assertIn('in_quorum', data)
+        self.assertIn('out_quorum', data)
+        self.assertIsNotNone(data['mon_status'])
+        self.assertIsNotNone(data['in_quorum'])
+        self.assertIsNotNone(data['out_quorum'])
diff --git a/qa/tasks/mgr/dashboard/test_motd.py b/qa/tasks/mgr/dashboard/test_motd.py
new file mode 100644
index 000000000..2edbf36ba
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_motd.py
@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+# pylint: disable=too-many-public-methods
+
+from __future__ import absolute_import
+
+import time
+
+from .helper import DashboardTestCase
+
+
+class MotdTest(DashboardTestCase):
+    @classmethod
+    def tearDownClass(cls):
+        cls._ceph_cmd(['dashboard', 'motd', 'clear'])
+        super(MotdTest, cls).tearDownClass()
+
+    def setUp(self):
+        super(MotdTest, self).setUp()
+        self._ceph_cmd(['dashboard', 'motd', 'clear'])
+
+    def test_none(self):
+        data = self._get('/ui-api/motd')
+        self.assertStatus(200)
+        self.assertIsNone(data)
+
+    def test_set(self):
+        self._ceph_cmd(['dashboard', 'motd', 'set', 'info', '0', 'foo bar baz'])
+        data = self._get('/ui-api/motd')
+        self.assertStatus(200)
+        self.assertIsInstance(data, dict)
+
+    def test_expired(self):
+        self._ceph_cmd(['dashboard', 'motd', 'set', 'info', '2s', 'foo bar baz'])
+        time.sleep(5)
+        data = self._get('/ui-api/motd')
+        self.assertStatus(200)
+        self.assertIsNone(data)
diff --git a/qa/tasks/mgr/dashboard/test_orchestrator.py b/qa/tasks/mgr/dashboard/test_orchestrator.py
new file mode 100644
index 000000000..2a804c4c2
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_orchestrator.py
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+
+from .helper import DashboardTestCase
+
+
+class OrchestratorControllerTest(DashboardTestCase):
+
+    AUTH_ROLES = ['cluster-manager']
+
+    URL_STATUS = '/ui-api/orchestrator/status'
+
+    ORCHESTRATOR = True
+
+    @classmethod
+    def setUpClass(cls):
+        super(OrchestratorControllerTest, cls).setUpClass()
+
+    @classmethod
+    def tearDownClass(cls):
+        cmd = ['test_orchestrator', 'load_data', '-i', '-']
+        cls.mgr_cluster.mon_manager.raw_cluster_cmd_result(*cmd, stdin='{}')
+
+    def test_status_get(self):
+        data = self._get(self.URL_STATUS)
+        self.assertStatus(200)
+        self.assertTrue(data['available'])
diff --git a/qa/tasks/mgr/dashboard/test_osd.py b/qa/tasks/mgr/dashboard/test_osd.py
new file mode 100644
index 000000000..71cf3d871
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_osd.py
@@ -0,0 +1,368 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import absolute_import
+
+import json
+
+from .helper import (DashboardTestCase, JAny, JLeaf, JList, JObj, JTuple,
+                     devices_schema)
+
+
+class OsdTest(DashboardTestCase):
+
+    AUTH_ROLES = ['cluster-manager']
+
+    @classmethod
+    def setUpClass(cls):
+        super(OsdTest, cls).setUpClass()
+        cls._load_module('test_orchestrator')
+        cmd = ['orch', 'set', 'backend', 'test_orchestrator']
+        cls.mgr_cluster.mon_manager.raw_cluster_cmd(*cmd)
+
+    def tearDown(self):
+        self._put('/api/osd/0/mark', data={'action': 'in'})
+
+    @DashboardTestCase.RunAs('test', 'test', ['block-manager'])
+    def test_access_permissions(self):
+        self._get('/api/osd')
+        self.assertStatus(403)
+        self._get('/api/osd/0')
+        self.assertStatus(403)
+
+    def assert_in_and_not_none(self, data, properties):
+        self.assertSchema(data, JObj({p: JAny(none=False) for p in properties}, allow_unknown=True))
+
+    def test_list(self):
+        data = self._get('/api/osd')
+        self.assertStatus(200)
+
+        self.assertGreaterEqual(len(data), 1)
+        data = data[0]
+        self.assert_in_and_not_none(data, ['host', 'tree', 'state', 'stats', 'stats_history'])
+        self.assert_in_and_not_none(data['host'], ['name'])
+        self.assert_in_and_not_none(data['tree'], ['id'])
+        self.assert_in_and_not_none(data['stats'], ['numpg', 'stat_bytes_used', 'stat_bytes',
+                                                    'op_r', 'op_w'])
+        self.assert_in_and_not_none(data['stats_history'], ['op_out_bytes', 'op_in_bytes'])
+        self.assertSchema(data['stats_history']['op_out_bytes'],
+                          JList(JTuple([JLeaf(float), JLeaf(float)])))
+
+    def test_details(self):
+        data = self._get('/api/osd/0')
+        self.assertStatus(200)
+        self.assert_in_and_not_none(data, ['osd_metadata'])
+
+    def test_histogram(self):
+        data = self._get('/api/osd/0/histogram')
+        self.assertStatus(200)
+        self.assert_in_and_not_none(data['osd'], ['op_w_latency_in_bytes_histogram',
+                                                  'op_r_latency_out_bytes_histogram'])
+
+    def test_scrub(self):
+        self._post('/api/osd/0/scrub?deep=False')
+        self.assertStatus(200)
+
+        self._post('/api/osd/0/scrub?deep=True')
+        self.assertStatus(200)
+
+    def test_safe_to_delete(self):
+        data = self._get('/api/osd/safe_to_delete?svc_ids=0')
+        self.assertStatus(200)
+        self.assertSchema(data, JObj({
+            'is_safe_to_delete': JAny(none=True),
+            'message': str
+        }))
+        self.assertTrue(data['is_safe_to_delete'])
+
+    def test_osd_smart(self):
+        self._get('/api/osd/0/smart')
+        self.assertStatus(200)
+
+    def test_mark_out_and_in(self):
+        self._put('/api/osd/0/mark', data={'action': 'out'})
+        self.assertStatus(200)
+
+        self._put('/api/osd/0/mark', data={'action': 'in'})
+        self.assertStatus(200)
+
+    def test_mark_down(self):
+        self._put('/api/osd/0/mark', data={'action': 'down'})
+        self.assertStatus(200)
+
+    def test_reweight(self):
+        self._post('/api/osd/0/reweight', {'weight': 0.4})
+        self.assertStatus(200)
+
+        def get_reweight_value():
+            self._get('/api/osd/0')
+            response = self.jsonBody()
+            if 'osd_map' in response and 'weight' in response['osd_map']:
+                return round(response['osd_map']['weight'], 1)
+            return None
+        self.wait_until_equal(get_reweight_value, 0.4, 10)
+        self.assertStatus(200)
+
+        # Undo
+        self._post('/api/osd/0/reweight', {'weight': 1})
+
+    def test_create_lost_destroy_remove(self):
+        sample_data = {
+            'uuid': 'f860ca2e-757d-48ce-b74a-87052cad563f',
+            'svc_id': 5
+        }
+
+        # Create
+        self._task_post('/api/osd', {
+            'method': 'bare',
+            'data': sample_data,
+            'tracking_id': 'bare-5'
+        })
+        self.assertStatus(201)
+
+        # invalid method
+        self._task_post('/api/osd', {
+            'method': 'xyz',
+            'data': {
+                'uuid': 'f860ca2e-757d-48ce-b74a-87052cad563f',
+                'svc_id': 5
+            },
+            'tracking_id': 'bare-5'
+        })
+        self.assertStatus(400)
+
+        # Lost
+        self._put('/api/osd/5/mark', data={'action': 'lost'})
+        self.assertStatus(200)
+        # Destroy
+        self._post('/api/osd/5/destroy')
+        self.assertStatus(200)
+        # Purge
+        self._post('/api/osd/5/purge')
+        self.assertStatus(200)
+
+    def test_create_with_drive_group(self):
+        data = {
+            'method': 'drive_groups',
+            'data': [
+                {
+                    'service_type': 'osd',
+                    'service_id': 'test',
+                    'host_pattern': '*',
+                    'data_devices': {
+                        'vendor': 'abc',
+                        'model': 'cba',
+                        'rotational': True,
+                        'size': '4 TB'
+                    },
+                    'wal_devices': {
+                        'vendor': 'def',
+                        'model': 'fed',
+                        'rotational': False,
+                        'size': '1 TB'
+                    },
+                    'db_devices': {
+                        'vendor': 'ghi',
+                        'model': 'ihg',
+                        'rotational': False,
+                        'size': '512 GB'
+                    },
+                    'wal_slots': 5,
+                    'db_slots': 5,
+                    'encrypted': True
+                }
+            ],
+            'tracking_id': 'test'
+        }
+        self._post('/api/osd', data)
+        self.assertStatus(201)
+
+    def test_safe_to_destroy(self):
+        osd_dump = json.loads(self._ceph_cmd(['osd', 'dump', '-f', 'json']))
+        max_id = max(map(lambda e: e['osd'], osd_dump['osds']))
+
+        def get_pg_status_equal_unknown(osd_ids):
+            self._get('/api/osd/safe_to_destroy?ids={}'.format(osd_ids))
+            if 'message' in self.jsonBody():
+                return 'pgs have unknown state' in self.jsonBody()['message']
+            return False
+
+        # 1 OSD safe to destroy
+        unused_osd_id = max_id + 10
+        self.wait_until_equal(
+            lambda: get_pg_status_equal_unknown(unused_osd_id), False, 30)
+        self.assertStatus(200)
+        self.assertJsonBody({
+            'is_safe_to_destroy': True,
+            'active': [],
+            'missing_stats': [],
+            'safe_to_destroy': [unused_osd_id],
+            'stored_pgs': [],
+        })
+
+        # multiple OSDs safe to destroy
+        unused_osd_ids = [max_id + 11, max_id + 12]
+        self.wait_until_equal(
+            lambda: get_pg_status_equal_unknown(str(unused_osd_ids)), False, 30)
+        self.assertStatus(200)
+        self.assertJsonBody({
+            'is_safe_to_destroy': True,
+            'active': [],
+            'missing_stats': [],
+            'safe_to_destroy': unused_osd_ids,
+            'stored_pgs': [],
+        })
+
+        # 1 OSD unsafe to destroy
+        def get_destroy_status():
+            self._get('/api/osd/safe_to_destroy?ids=0')
+            if 'is_safe_to_destroy' in self.jsonBody():
+                return self.jsonBody()['is_safe_to_destroy']
+            return None
+        self.wait_until_equal(get_destroy_status, False, 10)
+        self.assertStatus(200)
+
+    def test_osd_devices(self):
+        data = self._get('/api/osd/0/devices')
+        self.assertStatus(200)
+        self.assertSchema(data, devices_schema)
+
+
+class OsdFlagsTest(DashboardTestCase):
+    def __init__(self, *args, **kwargs):
+        super(OsdFlagsTest, self).__init__(*args, **kwargs)
+        self._initial_flags = ['sortbitwise', 'recovery_deletes', 'purged_snapdirs',
+                               'pglog_hardlimit']  # These flags cannot be unset
+
+    @classmethod
+    def _put_flags(cls, flags, ids=None):
+        url = '/api/osd/flags'
+        data = {'flags': flags}
+
+        if ids:
+            url = url + '/individual'
+            data['ids'] = ids
+
+        cls._put(url, data=data)
+        return cls._resp.json()
+
+    def test_list_osd_flags(self):
+        flags = self._get('/api/osd/flags')
+        self.assertStatus(200)
+        self.assertEqual(len(flags), 4)
+        self.assertCountEqual(flags, self._initial_flags)
+
+    def test_add_osd_flag(self):
+        flags = self._put_flags([
+            'sortbitwise', 'recovery_deletes', 'purged_snapdirs', 'noout',
+            'pause', 'pglog_hardlimit'
+        ])
+        self.assertCountEqual(flags, [
+            'sortbitwise', 'recovery_deletes', 'purged_snapdirs', 'noout',
+            'pause', 'pglog_hardlimit'
+        ])
+
+        # Restore flags
+        self._put_flags(self._initial_flags)
+
+    def test_get_indiv_flag(self):
+        initial = self._get('/api/osd/flags/individual')
+        self.assertStatus(200)
+        self.assertSchema(initial, JList(JObj({
+            'osd': int,
+            'flags': JList(str)
+        })))
+
+        self._ceph_cmd(['osd', 'set-group', 'noout,noin', 'osd.0', 'osd.1', 'osd.2'])
+        flags_added = self._get('/api/osd/flags/individual')
+        self.assertStatus(200)
+        for osd in flags_added:
+            if osd['osd'] in [0, 1, 2]:
+                self.assertIn('noout', osd['flags'])
+                self.assertIn('noin', osd['flags'])
+                for osd_initial in initial:
+                    if osd['osd'] == osd_initial['osd']:
+                        self.assertGreater(len(osd['flags']), len(osd_initial['flags']))
+
+        self._ceph_cmd(['osd', 'unset-group', 'noout,noin', 'osd.0', 'osd.1', 'osd.2'])
+        flags_removed = self._get('/api/osd/flags/individual')
+        self.assertStatus(200)
+        for osd in flags_removed:
+            if osd['osd'] in [0, 1, 2]:
+                self.assertNotIn('noout', osd['flags'])
+                self.assertNotIn('noin', osd['flags'])
+
+    def test_add_indiv_flag(self):
+        flags_update = {'noup': None, 'nodown': None, 'noin': None, 'noout': True}
+        svc_id = 0
+
+        resp = self._put_flags(flags_update, [svc_id])
+        self._check_indiv_flags_resp(resp, [svc_id], ['noout'], [], ['noup', 'nodown', 'noin'])
+        self._check_indiv_flags_osd([svc_id], ['noout'], ['noup', 'nodown', 'noin'])
+
+        self._ceph_cmd(['osd', 'unset-group', 'noout', 'osd.{}'.format(svc_id)])
+
+    def test_add_multiple_indiv_flags(self):
+        flags_update = {'noup': None, 'nodown': None, 'noin': True, 'noout': True}
+        svc_id = 0
+
+        resp = self._put_flags(flags_update, [svc_id])
+        self._check_indiv_flags_resp(resp, [svc_id], ['noout', 'noin'], [], ['noup', 'nodown'])
+        self._check_indiv_flags_osd([svc_id], ['noout', 'noin'], ['noup', 'nodown'])
+
+        self._ceph_cmd(['osd', 'unset-group', 'noout,noin', 'osd.{}'.format(svc_id)])
+
+    def test_add_multiple_indiv_flags_multiple_osds(self):
+        flags_update = {'noup': None, 'nodown': None, 'noin': True, 'noout': True}
+        svc_id = [0, 1, 2]
+
+        resp = self._put_flags(flags_update, svc_id)
+        self._check_indiv_flags_resp(resp, svc_id, ['noout', 'noin'], [], ['noup', 'nodown'])
+        self._check_indiv_flags_osd([svc_id], ['noout', 'noin'], ['noup', 'nodown'])
+
+        self._ceph_cmd(['osd', 'unset-group', 'noout,noin', 'osd.0', 'osd.1', 'osd.2'])
+
+    def test_remove_indiv_flag(self):
+        flags_update = {'noup': None, 'nodown': None, 'noin': None, 'noout': False}
+        svc_id = 0
+        self._ceph_cmd(['osd', 'set-group', 'noout', 'osd.{}'.format(svc_id)])
+
+        resp = self._put_flags(flags_update, [svc_id])
+        self._check_indiv_flags_resp(resp, [svc_id], [], ['noout'], ['noup', 'nodown', 'noin'])
+        self._check_indiv_flags_osd([svc_id], [], ['noup', 'nodown', 'noin', 'noout'])
+
+    def test_remove_multiple_indiv_flags(self):
+        flags_update = {'noup': None, 'nodown': None, 'noin': False, 'noout': False}
+        svc_id = 0
+        self._ceph_cmd(['osd', 'set-group', 'noout,noin', 'osd.{}'.format(svc_id)])
+
+        resp = self._put_flags(flags_update, [svc_id])
+        self._check_indiv_flags_resp(resp, [svc_id], [], ['noout', 'noin'], ['noup', 'nodown'])
+        self._check_indiv_flags_osd([svc_id], [], ['noout', 'noin', 'noup', 'nodown'])
+
+    def test_remove_multiple_indiv_flags_multiple_osds(self):
+        flags_update = {'noup': None, 'nodown': None, 'noin': False, 'noout': False}
+        svc_id = [0, 1, 2]
+        self._ceph_cmd(['osd', 'unset-group', 'noout,noin', 'osd.0', 'osd.1', 'osd.2'])
+
+        resp = self._put_flags(flags_update, svc_id)
+        self._check_indiv_flags_resp(resp, svc_id, [], ['noout', 'noin'], ['noup', 'nodown'])
+        self._check_indiv_flags_osd([svc_id], [], ['noout', 'noin', 'noup', 'nodown'])
+
+    def _check_indiv_flags_resp(self, resp, ids, added, removed, ignored):
+        self.assertStatus(200)
+        self.assertCountEqual(resp['ids'], ids)
+        self.assertCountEqual(resp['added'], added)
+        self.assertCountEqual(resp['removed'], removed)
+
+        for flag in ignored:
+            self.assertNotIn(flag, resp['added'])
+            self.assertNotIn(flag, resp['removed'])
+
+    def _check_indiv_flags_osd(self, ids, activated_flags, deactivated_flags):
+        osds = json.loads(self._ceph_cmd(['osd', 'dump', '--format=json']))['osds']
+        for osd in osds:
+            if osd['osd'] in ids:
+                for flag in activated_flags:
+                    self.assertIn(flag, osd['state'])
+                for flag in deactivated_flags:
+                    self.assertNotIn(flag, osd['state'])
diff --git a/qa/tasks/mgr/dashboard/test_perf_counters.py b/qa/tasks/mgr/dashboard/test_perf_counters.py
new file mode 100644
index 000000000..c01368bce
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_perf_counters.py
@@ -0,0 +1,71 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+
+from .helper import DashboardTestCase, JObj
+
+
+class PerfCountersControllerTest(DashboardTestCase):
+
+    def test_perf_counters_list(self):
+        data = self._get('/api/perf_counters')
+        self.assertStatus(200)
+
+        self.assertIsInstance(data, dict)
+        for mon in self.mons():
+            self.assertIn('mon.{}'.format(mon), data)
+
+        osds = self.ceph_cluster.mon_manager.get_osd_dump()
+        for osd in osds:
+            self.assertIn('osd.{}'.format(osd['osd']), data)
+
+    def _validate_perf(self, srv_id, srv_type, data, allow_empty):
+        self.assertIsInstance(data, dict)
+        self.assertEqual(srv_type, data['service']['type'])
+        self.assertEqual(str(srv_id), data['service']['id'])
+        self.assertIsInstance(data['counters'], list)
+        if not allow_empty:
+            self.assertGreater(len(data['counters']), 0)
+        for counter in data['counters'][0:1]:
+            self.assertIsInstance(counter, dict)
+            self.assertIn('description', counter)
+            self.assertIn('name', counter)
+            self.assertIn('unit', counter)
+            self.assertIn('value', counter)
+
+    def test_perf_counters_mon_get(self):
+        mon = self.mons()[0]
+        data = self._get('/api/perf_counters/mon/{}'.format(mon))
+        self.assertStatus(200)
+        self._validate_perf(mon, 'mon', data, allow_empty=False)
+
+    def test_perf_counters_mgr_get(self):
+        mgr = list(self.mgr_cluster.mgr_ids)[0]
+        data = self._get('/api/perf_counters/mgr/{}'.format(mgr))
+        self.assertStatus(200)
+        self._validate_perf(mgr, 'mgr', data, allow_empty=False)
+
+    def test_perf_counters_mds_get(self):
+        for mds in self.mds_cluster.mds_ids:
+            data = self._get('/api/perf_counters/mds/{}'.format(mds))
+            self.assertStatus(200)
+            self._validate_perf(mds, 'mds', data, allow_empty=True)
+
+    def test_perf_counters_osd_get(self):
+        for osd in self.ceph_cluster.mon_manager.get_osd_dump():
+            osd = osd['osd']
+            data = self._get('/api/perf_counters/osd/{}'.format(osd))
+            self.assertStatus(200)
+            self._validate_perf(osd, 'osd', data, allow_empty=False)
+
+    def test_perf_counters_not_found(self):
+        osds = self.ceph_cluster.mon_manager.get_osd_dump()
+        unused_id = int(list(map(lambda o: o['osd'], osds)).pop()) + 1
+
+        self._get('/api/perf_counters/osd/{}'.format(unused_id))
+        self.assertStatus(404)
+        schema = JObj(sub_elems={
+            'status': str,
+            'detail': str,
+        }, allow_unknown=True)
+        self.assertEqual(self._resp.json()['detail'], "'osd.{}' not found".format(unused_id))
+        self.assertSchemaBody(schema)
diff --git a/qa/tasks/mgr/dashboard/test_pool.py b/qa/tasks/mgr/dashboard/test_pool.py
new file mode 100644
index 000000000..0699be48c
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_pool.py
@@ -0,0 +1,435 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+
+import logging
+import time
+from contextlib import contextmanager
+
+from .helper import DashboardTestCase, JAny, JList, JObj, JUnion
+
+log = logging.getLogger(__name__)
+
+
+class PoolTest(DashboardTestCase):
+    AUTH_ROLES = ['pool-manager']
+
+    pool_schema = JObj(sub_elems={
+        'pool_name': str,
+        'type': str,
+        'application_metadata': JList(str),
+        'flags': int,
+        'flags_names': str,
+    }, allow_unknown=True)
+
+    pool_list_stat_schema = JObj(sub_elems={
+        'latest': JUnion([int, float]),
+        'rate': float,
+        'rates': JList(JAny(none=False)),
+    })
+
+    pool_list_stats_schema = JObj(sub_elems={
+        'avail_raw': pool_list_stat_schema,
+        'bytes_used': pool_list_stat_schema,
+        'max_avail': pool_list_stat_schema,
+        'percent_used': pool_list_stat_schema,
+        'rd_bytes': pool_list_stat_schema,
+        'wr_bytes': pool_list_stat_schema,
+        'rd': pool_list_stat_schema,
+        'wr': pool_list_stat_schema,
+    }, allow_unknown=True)
+
+    pool_rbd_conf_schema = JList(JObj(sub_elems={
+        'name': str,
+        'value': str,
+        'source': int
+    }))
+
+    @contextmanager
+    def __yield_pool(self, name=None, data=None, deletion_name=None):
+        """
+        Use either just a name or whole description of a pool to create one.
+        This also validates the correct creation and deletion after the pool was used.
+
+        :param name: Name of the pool
+        :param data: Describes the pool in full length
+        :param deletion_name: Only needed if the pool was renamed
+        :return:
+        """
+        data = self._create_pool(name, data)
+        yield data
+        self._delete_pool(deletion_name or data['pool'])
+
+    def _create_pool(self, name, data):
+        data = data or {
+            'pool': name,
+            'pg_num': '32',
+            'pool_type': 'replicated',
+            'compression_algorithm': 'snappy',
+            'compression_mode': 'passive',
+            'compression_max_blob_size': '131072',
+            'compression_required_ratio': '0.875',
+            'application_metadata': ['rbd'],
+            'configuration': {
+                'rbd_qos_bps_limit': 1024000,
+                'rbd_qos_iops_limit': 5000,
+            }
+        }
+        self._task_post('/api/pool/', data)
+        self.assertStatus(201)
+        self._validate_pool_properties(data, self._get_pool(data['pool']))
+        return data
+
+    def _delete_pool(self, name):
+        self._task_delete('/api/pool/' + name)
+        self.assertStatus(204)
+
+    def _validate_pool_properties(self, data, pool, timeout=DashboardTestCase.TIMEOUT_HEALTH_CLEAR):
+        # pylint: disable=too-many-branches
+        for prop, value in data.items():
+            if prop == 'pool_type':
+                self.assertEqual(pool['type'], value)
+            elif prop == 'size':
+                self.assertEqual(pool[prop], int(value),
+                                 '{}: {} != {}'.format(prop, pool[prop], value))
+            elif prop == 'pg_num':
+                self._check_pg_num(pool['pool_name'], int(value))
+            elif prop == 'application_metadata':
+                self.assertIsInstance(pool[prop], list)
+                self.assertEqual(value, pool[prop])
+            elif prop == 'pool':
+                self.assertEqual(pool['pool_name'], value)
+            elif prop.startswith('compression'):
+                if value is not None:
+                    if prop.endswith('size'):
+                        value = int(value)
+                    elif prop.endswith('ratio'):
+                        value = float(value)
+                    self.assertEqual(pool['options'][prop], value)
+                else:
+                    self.assertEqual(pool['options'], {})
+            elif prop == 'configuration':
+                # configuration cannot really be checked here for two reasons:
+                #   1.  The default value cannot be given to this method, which becomes relevant
+                #       when resetting a value, because it's not always zero.
+                #   2.  The expected `source` cannot be given to this method, and it cannot
+                #       reliably be determined (see 1)
+                pass
+            else:
+                self.assertEqual(pool[prop], value, '{}: {} != {}'.format(prop, pool[prop], value))
+
+        self.wait_for_health_clear(timeout)
+
+    def _get_pool(self, pool_name):
+        pool = self._get("/api/pool/" + pool_name)
+        self.assertStatus(200)
+        self.assertSchemaBody(self.pool_schema)
+        return pool
+
+    def _check_pg_num(self, pool_name, pg_num):
+        """
+        If both properties have not the same value, the cluster goes into a warning state, which
+        will only happen during a pg update on an existing pool. The test that does that is
+        currently commented out because our QA systems can't deal with the change. Feel free to test
+        it locally.
+        """
+        self.wait_until_equal(
+            lambda: self._get_pool(pool_name)['pg_placement_num'],
+            expect_val=pg_num,
+            timeout=180
+        )
+
+        pool = self._get_pool(pool_name)
+
+        for prop in ['pg_num', 'pg_placement_num']:
+            self.assertEqual(pool[prop], int(pg_num),
+                             '{}: {} != {}'.format(prop, pool[prop], pg_num))
+
+    @DashboardTestCase.RunAs('test', 'test', [{'pool': ['create', 'update', 'delete']}])
+    def test_read_access_permissions(self):
+        self._get('/api/pool')
+        self.assertStatus(403)
+        self._get('/api/pool/bla')
+        self.assertStatus(403)
+
+    @DashboardTestCase.RunAs('test', 'test', [{'pool': ['read', 'update', 'delete']}])
+    def test_create_access_permissions(self):
+        self._task_post('/api/pool/', {})
+        self.assertStatus(403)
+
+    @DashboardTestCase.RunAs('test', 'test', [{'pool': ['read', 'create', 'update']}])
+    def test_delete_access_permissions(self):
+        self._delete('/api/pool/ddd')
+        self.assertStatus(403)
+
+    def test_pool_configuration(self):
+        pool_name = '.mgr'
+        data = self._get('/api/pool/{}/configuration'.format(pool_name))
+        self.assertStatus(200)
+        self.assertSchema(data, JList(JObj({
+            'name': str,
+            'value': str,
+            'source': int
+        })))
+
+    def test_pool_list(self):
+        data = self._get("/api/pool")
+        self.assertStatus(200)
+
+        cluster_pools = self.ceph_cluster.mon_manager.list_pools()
+        self.assertEqual(len(cluster_pools), len(data))
+        self.assertSchemaBody(JList(self.pool_schema))
+        for pool in data:
+            self.assertNotIn('pg_status', pool)
+            self.assertNotIn('stats', pool)
+            self.assertIn(pool['pool_name'], cluster_pools)
+
+    def test_pool_list_attrs(self):
+        data = self._get("/api/pool?attrs=type,flags")
+        self.assertStatus(200)
+
+        cluster_pools = self.ceph_cluster.mon_manager.list_pools()
+        self.assertEqual(len(cluster_pools), len(data))
+        for pool in data:
+            self.assertIn('pool_name', pool)
+            self.assertIn('type', pool)
+            self.assertIn('flags', pool)
+            self.assertNotIn('flags_names', pool)
+            self.assertNotIn('pg_status', pool)
+            self.assertNotIn('stats', pool)
+            self.assertIn(pool['pool_name'], cluster_pools)
+
+    def test_pool_list_stats(self):
+        data = self._get("/api/pool?stats=true")
+        self.assertStatus(200)
+
+        cluster_pools = self.ceph_cluster.mon_manager.list_pools()
+        self.assertEqual(len(cluster_pools), len(data))
+        self.assertSchemaBody(JList(self.pool_schema))
+        for pool in data:
+            self.assertIn('pool_name', pool)
+            self.assertIn('type', pool)
+            self.assertIn('application_metadata', pool)
+            self.assertIn('flags', pool)
+            self.assertIn('pg_status', pool)
+            self.assertSchema(pool['stats'], self.pool_list_stats_schema)
+            self.assertIn('flags_names', pool)
+            self.assertIn(pool['pool_name'], cluster_pools)
+
+    def test_pool_get(self):
+        cluster_pools = self.ceph_cluster.mon_manager.list_pools()
+        pool = self._get("/api/pool/{}?stats=true&attrs=type,flags,stats"
+                         .format(cluster_pools[0]))
+        self.assertEqual(pool['pool_name'], cluster_pools[0])
+        self.assertIn('type', pool)
+        self.assertIn('flags', pool)
+        self.assertNotIn('pg_status', pool)
+        self.assertSchema(pool['stats'], self.pool_list_stats_schema)
+        self.assertNotIn('flags_names', pool)
+        self.assertSchema(pool['configuration'], self.pool_rbd_conf_schema)
+
+    def test_pool_create_with_two_applications(self):
+        self.__yield_pool(None, {
+            'pool': 'dashboard_pool1',
+            'pg_num': '32',
+            'pool_type': 'replicated',
+            'application_metadata': ['rbd', 'sth'],
+        })
+
+    def test_pool_create_with_ecp_and_rule(self):
+        self._ceph_cmd(['osd', 'crush', 'rule', 'create-erasure', 'ecrule'])
+        self._ceph_cmd(
+            ['osd', 'erasure-code-profile', 'set', 'ecprofile', 'crush-failure-domain=osd'])
+        self.__yield_pool(None, {
+            'pool': 'dashboard_pool2',
+            'pg_num': '32',
+            'pool_type': 'erasure',
+            'application_metadata': ['rbd'],
+            'erasure_code_profile': 'ecprofile',
+            'crush_rule': 'ecrule',
+        })
+        self._ceph_cmd(['osd', 'erasure-code-profile', 'rm', 'ecprofile'])
+
+    def test_pool_create_with_compression(self):
+        pool = {
+            'pool': 'dashboard_pool3',
+            'pg_num': '32',
+            'pool_type': 'replicated',
+            'compression_algorithm': 'zstd',
+            'compression_mode': 'aggressive',
+            'compression_max_blob_size': '10000000',
+            'compression_required_ratio': '0.8',
+            'application_metadata': ['rbd'],
+            'configuration': {
+                'rbd_qos_bps_limit': 2048,
+                'rbd_qos_iops_limit': None,
+            },
+        }
+        with self.__yield_pool(None, pool):
+            expected_configuration = [{
+                'name': 'rbd_qos_bps_limit',
+                'source': 1,
+                'value': '2048',
+            }, {
+                'name': 'rbd_qos_iops_limit',
+                'source': 0,
+                'value': '0',
+            }]
+            new_pool = self._get_pool(pool['pool'])
+            for conf in expected_configuration:
+                self.assertIn(conf, new_pool['configuration'])
+
+    def test_pool_create_with_quotas(self):
+        pools = [
+            {
+                'pool_data': {
+                    'pool': 'dashboard_pool_quota1',
+                    'pg_num': '32',
+                    'pool_type': 'replicated',
+                    'application_metadata': ['rbd'],
+                },
+                'pool_quotas_to_check': {
+                    'quota_max_objects': 0,
+                    'quota_max_bytes': 0,
+                }
+            },
+            {
+                'pool_data': {
+                    'pool': 'dashboard_pool_quota2',
+                    'pg_num': '32',
+                    'pool_type': 'replicated',
+                    'application_metadata': ['rbd'],
+                    'quota_max_objects': 1024,
+                    'quota_max_bytes': 1000,
+                },
+                'pool_quotas_to_check': {
+                    'quota_max_objects': 1024,
+                    'quota_max_bytes': 1000,
+                }
+            }
+        ]
+
+        for pool in pools:
+            pool_name = pool['pool_data']['pool']
+            with self.__yield_pool(pool_name, pool['pool_data']):
+                self._validate_pool_properties(pool['pool_quotas_to_check'],
+                                               self._get_pool(pool_name))
+
+    def test_pool_update_name(self):
+        name = 'pool_update'
+        updated_name = 'pool_updated_name'
+        with self.__yield_pool(name, None, updated_name):
+            props = {'pool': updated_name}
+            self._task_put('/api/pool/{}'.format(name), props)
+            time.sleep(5)
+            self.assertStatus(200)
+            self._validate_pool_properties(props, self._get_pool(updated_name))
+
+    def test_pool_update_metadata(self):
+        pool_name = 'pool_update_metadata'
+        with self.__yield_pool(pool_name):
+            props = {'application_metadata': ['rbd', 'sth']}
+            self._task_put('/api/pool/{}'.format(pool_name), props)
+            self._validate_pool_properties(props, self._get_pool(pool_name),
+                                           self.TIMEOUT_HEALTH_CLEAR * 2)
+
+            properties = {'application_metadata': ['rgw']}
+            self._task_put('/api/pool/' + pool_name, properties)
+            self._validate_pool_properties(properties, self._get_pool(pool_name),
+                                           self.TIMEOUT_HEALTH_CLEAR * 2)
+
+            properties = {'application_metadata': ['rbd', 'sth']}
+            self._task_put('/api/pool/' + pool_name, properties)
+            self._validate_pool_properties(properties, self._get_pool(pool_name),
+                                           self.TIMEOUT_HEALTH_CLEAR * 2)
+
+            properties = {'application_metadata': ['rgw']}
+            self._task_put('/api/pool/' + pool_name, properties)
+            self._validate_pool_properties(properties, self._get_pool(pool_name),
+                                           self.TIMEOUT_HEALTH_CLEAR * 2)
+
+    def test_pool_update_configuration(self):
+        pool_name = 'pool_update_configuration'
+        with self.__yield_pool(pool_name):
+            configuration = {
+                'rbd_qos_bps_limit': 1024,
+                'rbd_qos_iops_limit': None,
+            }
+            expected_configuration = [{
+                'name': 'rbd_qos_bps_limit',
+                'source': 1,
+                'value': '1024',
+            }, {
+                'name': 'rbd_qos_iops_limit',
+                'source': 0,
+                'value': '0',
+            }]
+            self._task_put('/api/pool/' + pool_name, {'configuration': configuration})
+            time.sleep(5)
+            pool_config = self._get_pool(pool_name)['configuration']
+            for conf in expected_configuration:
+                self.assertIn(conf, pool_config)
+
+    def test_pool_update_compression(self):
+        pool_name = 'pool_update_compression'
+        with self.__yield_pool(pool_name):
+            properties = {
+                'compression_algorithm': 'zstd',
+                'compression_mode': 'aggressive',
+                'compression_max_blob_size': '10000000',
+                'compression_required_ratio': '0.8',
+            }
+            self._task_put('/api/pool/' + pool_name, properties)
+            time.sleep(5)
+            self._validate_pool_properties(properties, self._get_pool(pool_name))
+
+    def test_pool_update_unset_compression(self):
+        pool_name = 'pool_update_unset_compression'
+        with self.__yield_pool(pool_name):
+            self._task_put('/api/pool/' + pool_name, {'compression_mode': 'unset'})
+            time.sleep(5)
+            self._validate_pool_properties({
+                'compression_algorithm': None,
+                'compression_mode': None,
+                'compression_max_blob_size': None,
+                'compression_required_ratio': None,
+            }, self._get_pool(pool_name))
+
+    def test_pool_update_quotas(self):
+        pool_name = 'pool_update_quotas'
+        with self.__yield_pool(pool_name):
+            properties = {
+                'quota_max_objects': 1024,
+                'quota_max_bytes': 1000,
+            }
+            self._task_put('/api/pool/' + pool_name, properties)
+            time.sleep(5)
+            self._validate_pool_properties(properties, self._get_pool(pool_name))
+
+    def test_pool_create_fail(self):
+        data = {'pool_type': u'replicated', 'rule_name': u'dnf', 'pg_num': u'8', 'pool': u'sadfs'}
+        self._task_post('/api/pool/', data)
+        self.assertStatus(400)
+        self.assertJsonBody({
+            'component': 'pool',
+            'code': "2",
+            'detail': "[errno -2] specified rule dnf doesn't exist"
+        })
+
+    def test_pool_info(self):
+        self._get("/ui-api/pool/info")
+        self.assertSchemaBody(JObj({
+            'pool_names': JList(str),
+            'compression_algorithms': JList(str),
+            'compression_modes': JList(str),
+            'is_all_bluestore': bool,
+            'bluestore_compression_algorithm': str,
+            'osd_count': int,
+            'crush_rules_replicated': JList(JObj({}, allow_unknown=True)),
+            'crush_rules_erasure': JList(JObj({}, allow_unknown=True)),
+            'pg_autoscale_default_mode': str,
+            'pg_autoscale_modes': JList(str),
+            'erasure_code_profiles': JList(JObj({}, allow_unknown=True)),
+            'used_rules': JObj({}, allow_unknown=True),
+            'used_profiles': JObj({}, allow_unknown=True),
+            'nodes': JList(JObj({}, allow_unknown=True)),
+        }))
diff --git a/qa/tasks/mgr/dashboard/test_rbd.py b/qa/tasks/mgr/dashboard/test_rbd.py
new file mode 100644
index 000000000..c2ffbd48e
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_rbd.py
@@ -0,0 +1,978 @@
+# -*- coding: utf-8 -*-
+# pylint: disable=too-many-public-methods
+
+from __future__ import absolute_import
+
+import time
+
+from .helper import DashboardTestCase, JLeaf, JList, JObj
+
+
+class RbdTest(DashboardTestCase):
+    AUTH_ROLES = ['pool-manager', 'block-manager', 'cluster-manager']
+    LIST_VERSION = '2.0'
+
+    @DashboardTestCase.RunAs('test', 'test', [{'rbd-image': ['create', 'update', 'delete']}])
+    def test_read_access_permissions(self):
+        self._get('/api/block/image?offset=0&limit=-1&search=&sort=+name',
+                  version=RbdTest.LIST_VERSION)
+        self.assertStatus(403)
+        self.get_image('pool', None, 'image')
+        self.assertStatus(403)
+
+    @DashboardTestCase.RunAs('test', 'test', [{'rbd-image': ['read', 'update', 'delete']}])
+    def test_create_access_permissions(self):
+        self.create_image('pool', None, 'name', 0)
+        self.assertStatus(403)
+        self.create_snapshot('pool', None, 'image', 'snapshot', False)
+        self.assertStatus(403)
+        self.copy_image('src_pool', None, 'src_image', 'dest_pool', None, 'dest_image')
+        self.assertStatus(403)
+        self.clone_image('parent_pool', None, 'parent_image', 'parent_snap', 'pool', None, 'name')
+        self.assertStatus(403)
+
+    @DashboardTestCase.RunAs('test', 'test', [{'rbd-image': ['read', 'create', 'delete']}])
+    def test_update_access_permissions(self):
+        self.edit_image('pool', None, 'image')
+        self.assertStatus(403)
+        self.update_snapshot('pool', None, 'image', 'snapshot', None, None)
+        self.assertStatus(403)
+        self.rollback_snapshot('rbd', None, 'rollback_img', 'snap1')
+        self.assertStatus(403)
+        self.flatten_image('pool', None, 'image')
+        self.assertStatus(403)
+
+    @DashboardTestCase.RunAs('test', 'test', [{'rbd-image': ['read', 'create', 'update']}])
+    def test_delete_access_permissions(self):
+        self.remove_image('pool', None, 'image')
+        self.assertStatus(403)
+        self.remove_snapshot('pool', None, 'image', 'snapshot')
+        self.assertStatus(403)
+
+    @classmethod
+    def create_namespace(cls, pool, namespace):
+        data = {'namespace': namespace}
+        return cls._post('/api/block/pool/{}/namespace'.format(pool), data)
+
+    @classmethod
+    def remove_namespace(cls, pool, namespace):
+        return cls._delete('/api/block/pool/{}/namespace/{}'.format(pool, namespace))
+
+    @classmethod
+    def create_image(cls, pool, namespace, name, size, **kwargs):
+        data = {'name': name, 'pool_name': pool, 'namespace': namespace, 'size': size}
+        data.update(kwargs)
+        return cls._task_post('/api/block/image', data)
+
+    @classmethod
+    def get_image(cls, pool, namespace, name):
+        namespace = '{}%2F'.format(namespace) if namespace else ''
+        return cls._get('/api/block/image/{}%2F{}{}'.format(pool, namespace, name))
+
+    @classmethod
+    def clone_image(cls, parent_pool, parent_namespace, parent_image, parent_snap, pool, namespace,
+                    name, **kwargs):
+        # pylint: disable=too-many-arguments
+        data = {'child_image_name': name, 'child_namespace': namespace, 'child_pool_name': pool}
+        data.update(kwargs)
+        parent_namespace = '{}%2F'.format(parent_namespace) if parent_namespace else ''
+        return cls._task_post('/api/block/image/{}%2F{}{}/snap/{}/clone'
+                              .format(parent_pool, parent_namespace, parent_image, parent_snap),
+                              data)
+
+    @classmethod
+    def copy_image(cls, src_pool, src_namespace, src_image, dest_pool, dest_namespace, dest_image,
+                   **kwargs):
+        # pylint: disable=too-many-arguments
+        data = {'dest_image_name': dest_image,
+                'dest_pool_name': dest_pool,
+                'dest_namespace': dest_namespace}
+        data.update(kwargs)
+        src_namespace = '{}%2F'.format(src_namespace) if src_namespace else ''
+        return cls._task_post('/api/block/image/{}%2F{}{}/copy'
+                              .format(src_pool, src_namespace, src_image), data)
+
+    @classmethod
+    def remove_image(cls, pool, namespace, image):
+        namespace = '{}%2F'.format(namespace) if namespace else ''
+        return cls._task_delete('/api/block/image/{}%2F{}{}'.format(pool, namespace, image))
+
+    # pylint: disable=too-many-arguments
+    @classmethod
+    def edit_image(cls, pool, namespace, image, name=None, size=None, features=None, **kwargs):
+        kwargs.update({'name': name, 'size': size, 'features': features})
+        namespace = '{}%2F'.format(namespace) if namespace else ''
+        return cls._task_put('/api/block/image/{}%2F{}{}'.format(pool, namespace, image), kwargs)
+
+    @classmethod
+    def flatten_image(cls, pool, namespace, image):
+        namespace = '{}%2F'.format(namespace) if namespace else ''
+        return cls._task_post('/api/block/image/{}%2F{}{}/flatten'.format(pool, namespace, image))
+
+    @classmethod
+    def create_snapshot(cls, pool, namespace, image, snapshot, mirrorImageSnapshot):
+        namespace = '{}%2F'.format(namespace) if namespace else ''
+        return cls._task_post('/api/block/image/{}%2F{}{}/snap'.format(pool, namespace, image),
+                              {'snapshot_name': snapshot, 'mirrorImageSnapshot': mirrorImageSnapshot})  # noqa E501 #pylint: disable=line-too-long
+
+    @classmethod
+    def remove_snapshot(cls, pool, namespace, image, snapshot):
+        namespace = '{}%2F'.format(namespace) if namespace else ''
+        return cls._task_delete('/api/block/image/{}%2F{}{}/snap/{}'.format(pool, namespace, image,
+                                                                            snapshot))
+
+    @classmethod
+    def update_snapshot(cls, pool, namespace, image, snapshot, new_name, is_protected):
+        namespace = '{}%2F'.format(namespace) if namespace else ''
+        return cls._task_put('/api/block/image/{}%2F{}{}/snap/{}'.format(pool, namespace, image,
+                                                                         snapshot),
+                             {'new_snap_name': new_name, 'is_protected': is_protected})
+
+    @classmethod
+    def rollback_snapshot(cls, pool, namespace, image, snapshot):
+        namespace = '{}%2F'.format(namespace) if namespace else ''
+        return cls._task_post('/api/block/image/{}%2F{}{}/snap/{}/rollback'.format(pool,
+                                                                                   namespace,
+                                                                                   image,
+                                                                                   snapshot))
+
+    @classmethod
+    def setUpClass(cls):
+        super(RbdTest, cls).setUpClass()
+        cls.create_pool('rbd', 2**3, 'replicated')
+        cls.create_pool('rbd_iscsi', 2**3, 'replicated')
+
+        cls.create_image('rbd', None, 'img1', 2**30)
+        cls.create_image('rbd', None, 'img2', 2*2**30)
+        cls.create_image('rbd_iscsi', None, 'img1', 2**30)
+        cls.create_image('rbd_iscsi', None, 'img2', 2*2**30)
+
+        osd_metadata = cls.ceph_cluster.mon_manager.get_osd_metadata()
+        cls.bluestore_support = True
+        for osd in osd_metadata:
+            if osd['osd_objectstore'] != 'bluestore':
+                cls.bluestore_support = False
+                break
+
+    @classmethod
+    def tearDownClass(cls):
+        super(RbdTest, cls).tearDownClass()
+        cls._ceph_cmd(['osd', 'pool', 'delete', 'rbd', 'rbd', '--yes-i-really-really-mean-it'])
+        cls._ceph_cmd(['osd', 'pool', 'delete', 'rbd_iscsi', 'rbd_iscsi',
+                       '--yes-i-really-really-mean-it'])
+        cls._ceph_cmd(['osd', 'pool', 'delete', 'rbd_data', 'rbd_data',
+                       '--yes-i-really-really-mean-it'])
+
+    def create_image_in_trash(self, pool, name, delay=0):
+        self.create_image(pool, None, name, 10240)
+        img = self._get('/api/block/image/{}%2F{}'.format(pool, name))
+
+        self._task_post("/api/block/image/{}%2F{}/move_trash".format(pool, name),
+                        {'delay': delay})
+        self.assertStatus([200, 201])
+        return img['id']
+
+    @classmethod
+    def remove_trash(cls, pool, image_id, force=False):
+        return cls._task_delete('/api/block/image/trash/{}%2F{}/?force={}'.format(
+            pool, image_id, force))
+
+    @classmethod
+    def restore_trash(cls, pool, namespace, image_id, new_image_name):
+        data = {'new_image_name': new_image_name}
+        namespace = '{}%2F'.format(namespace) if namespace else ''
+        return cls._task_post('/api/block/image/trash/{}%2F{}{}/restore'.format(pool,
+                                                                                namespace,
+                                                                                image_id), data)
+
+    @classmethod
+    def purge_trash(cls, pool):
+        return cls._task_post('/api/block/image/trash/purge?pool_name={}'.format(pool))
+
+    @classmethod
+    def get_trash(cls, pool, image_id):
+        trash = cls._get('/api/block/image/trash/?pool_name={}'.format(pool))
+        if isinstance(trash, list):
+            for trash_pool in trash:
+                for image in trash_pool['value']:
+                    if image['id'] == image_id:
+                        return image
+
+        return None
+
+    def _validate_image(self, img, **kwargs):
+        """
+        Example of an RBD image json:
+
+        {
+            "size": 1073741824,
+            "obj_size": 4194304,
+            "mirror_mode": "journal",
+            "num_objs": 256,
+            "order": 22,
+            "block_name_prefix": "rbd_data.10ae2ae8944a",
+            "name": "img1",
+            "pool_name": "rbd",
+            "features": 61,
+            "primary": true,
+            "features_name": ["deep-flatten", "exclusive-lock", "fast-diff", "layering",
+                              "object-map"]
+        }
+        """
+        schema = JObj(sub_elems={
+            'size': JLeaf(int),
+            'obj_size': JLeaf(int),
+            'num_objs': JLeaf(int),
+            'order': JLeaf(int),
+            'block_name_prefix': JLeaf(str),
+            'name': JLeaf(str),
+            'id': JLeaf(str),
+            'unique_id': JLeaf(str),
+            'image_format': JLeaf(int),
+            'pool_name': JLeaf(str),
+            'namespace': JLeaf(str, none=True),
+            'primary': JLeaf(bool, none=True),
+            'features': JLeaf(int),
+            'features_name': JList(JLeaf(str)),
+            'stripe_count': JLeaf(int, none=True),
+            'stripe_unit': JLeaf(int, none=True),
+            'parent': JObj(sub_elems={'pool_name': JLeaf(str),
+                                      'pool_namespace': JLeaf(str, none=True),
+                                      'image_name': JLeaf(str),
+                                      'snap_name': JLeaf(str)}, none=True),
+            'data_pool': JLeaf(str, none=True),
+            'snapshots': JList(JLeaf(dict)),
+            'timestamp': JLeaf(str, none=True),
+            'disk_usage': JLeaf(int, none=True),
+            'total_disk_usage': JLeaf(int, none=True),
+            'configuration': JList(JObj(sub_elems={
+                'name': JLeaf(str),
+                'source': JLeaf(int),
+                'value': JLeaf(str),
+            })),
+            'metadata': JObj({}, allow_unknown=True),
+            'mirror_mode': JLeaf(str),
+        })
+        self.assertSchema(img, schema)
+
+        for k, v in kwargs.items():
+            if isinstance(v, list):
+                self.assertSetEqual(set(img[k]), set(v))
+            else:
+                self.assertEqual(img[k], v)
+
+    def _validate_snapshot(self, snap, **kwargs):
+        self.assertIn('id', snap)
+        self.assertIn('name', snap)
+        self.assertIn('is_protected', snap)
+        self.assertIn('timestamp', snap)
+        self.assertIn('size', snap)
+        self.assertIn('children', snap)
+
+        for k, v in kwargs.items():
+            if isinstance(v, list):
+                self.assertSetEqual(set(snap[k]), set(v))
+            else:
+                self.assertEqual(snap[k], v)
+
+    def _validate_snapshot_list(self, snap_list, snap_name=None, **kwargs):
+        found = False
+        for snap in snap_list:
+            self.assertIn('name', snap)
+            if snap_name and snap['name'] == snap_name:
+                found = True
+                self._validate_snapshot(snap, **kwargs)
+                break
+        if snap_name and not found:
+            self.fail("Snapshot {} not found".format(snap_name))
+
+    def test_list(self):
+        data = self._get('/api/block/image?offset=0&limit=-1&search=&sort=+name',
+                         version=RbdTest.LIST_VERSION)
+        self.assertStatus(200)
+        self.assertEqual(len(data), 2)
+
+        for pool_view in data:
+            self.assertIsNotNone(pool_view['value'])
+            self.assertIn('pool_name', pool_view)
+            self.assertIn(pool_view['pool_name'], ['rbd', 'rbd_iscsi'])
+            image_list = pool_view['value']
+            self.assertEqual(len(image_list), 2)
+
+            for img in image_list:
+                self.assertIn('name', img)
+                self.assertIn('pool_name', img)
+                self.assertIn(img['pool_name'], ['rbd', 'rbd_iscsi'])
+                if img['name'] == 'img1':
+                    self._validate_image(img, size=1073741824,
+                                         num_objs=256, obj_size=4194304,
+                                         features_name=['deep-flatten',
+                                                        'exclusive-lock',
+                                                        'fast-diff',
+                                                        'layering',
+                                                        'object-map'])
+                elif img['name'] == 'img2':
+                    self._validate_image(img, size=2147483648,
+                                         num_objs=512, obj_size=4194304,
+                                         features_name=['deep-flatten',
+                                                        'exclusive-lock',
+                                                        'fast-diff',
+                                                        'layering',
+                                                        'object-map'])
+                else:
+                    assert False, "Unexcepted image '{}' in result list".format(img['name'])
+
+    def test_create(self):
+        rbd_name = 'test_rbd'
+        self.create_image('rbd', None, rbd_name, 10240)
+        self.assertStatus(201)
+
+        img = self.get_image('rbd', None, 'test_rbd')
+        self.assertStatus(200)
+
+        self._validate_image(img, name=rbd_name, size=10240,
+                             num_objs=1, obj_size=4194304,
+                             features_name=['deep-flatten',
+                                            'exclusive-lock',
+                                            'fast-diff', 'layering',
+                                            'object-map'])
+
+        self.remove_image('rbd', None, rbd_name)
+
+    def test_create_with_configuration(self):
+        pool = 'rbd'
+        image_name = 'image_with_config'
+        size = 10240
+        configuration = {
+            'rbd_qos_bps_limit': 10240,
+            'rbd_qos_bps_burst': 10240 * 2,
+        }
+        expected = [{
+            'name': 'rbd_qos_bps_limit',
+            'source': 2,
+            'value': str(10240),
+        }, {
+            'name': 'rbd_qos_bps_burst',
+            'source': 2,
+            'value': str(10240 * 2),
+        }]
+
+        self.create_image(pool, None, image_name, size, configuration=configuration)
+        self.assertStatus(201)
+        img = self.get_image('rbd', None, image_name)
+        self.assertStatus(200)
+        for conf in expected:
+            self.assertIn(conf, img['configuration'])
+
+        self.remove_image(pool, None, image_name)
+
+    def test_create_with_metadata(self):
+        pool = 'rbd'
+        image_name = 'image_with_meta'
+        size = 10240
+        metadata = {
+            'test1': 'test',
+            'test2': 'value',
+        }
+
+        self.create_image(pool, None, image_name, size, metadata=metadata)
+        self.assertStatus(201)
+        img = self.get_image('rbd', None, image_name)
+        self.assertStatus(200)
+        self.assertEqual(len(metadata), len(img['metadata']))
+        for meta in metadata:
+            self.assertIn(meta, img['metadata'])
+
+        self.remove_image(pool, None, image_name)
+
+    def test_create_rbd_in_data_pool(self):
+        if not self.bluestore_support:
+            self.skipTest('requires bluestore cluster')
+
+        self.create_pool('data_pool', 2**4, 'erasure')
+
+        rbd_name = 'test_rbd_in_data_pool'
+        self.create_image('rbd', None, rbd_name, 10240, data_pool='data_pool')
+        self.assertStatus(201)
+
+        img = self.get_image('rbd', None, 'test_rbd_in_data_pool')
+        self.assertStatus(200)
+
+        self._validate_image(img, name=rbd_name, size=10240,
+                             num_objs=1, obj_size=4194304,
+                             data_pool='data_pool',
+                             features_name=['data-pool', 'deep-flatten',
+                                            'exclusive-lock',
+                                            'fast-diff', 'layering',
+                                            'object-map'])
+
+        self.remove_image('rbd', None, rbd_name)
+        self.assertStatus(204)
+        self._ceph_cmd(['osd', 'pool', 'delete', 'data_pool', 'data_pool',
+                        '--yes-i-really-really-mean-it'])
+
+    def test_create_rbd_twice(self):
+        res = self.create_image('rbd', None, 'test_rbd_twice', 10240)
+
+        res = self.create_image('rbd', None, 'test_rbd_twice', 10240)
+        self.assertStatus(400)
+        self.assertEqual(res, {"code": '17', 'status': 400, "component": "rbd",
+                               "detail": "[errno 17] RBD image already exists (error creating "
+                                         "image)",
+                               'task': {'name': 'rbd/create',
+                                        'metadata': {'pool_name': 'rbd', 'namespace': None,
+                                                     'image_name': 'test_rbd_twice'}}})
+        self.remove_image('rbd', None, 'test_rbd_twice')
+        self.assertStatus(204)
+
+    def test_snapshots_and_clone_info(self):
+        self.create_snapshot('rbd', None, 'img1', 'snap1', False)
+        self.create_snapshot('rbd', None, 'img1', 'snap2', False)
+        self._rbd_cmd(['snap', 'protect', 'rbd/img1@snap1'])
+        self._rbd_cmd(['clone', 'rbd/img1@snap1', 'rbd_iscsi/img1_clone'])
+
+        img = self.get_image('rbd', None, 'img1')
+        self.assertStatus(200)
+        self._validate_image(img, name='img1', size=1073741824,
+                             num_objs=256, obj_size=4194304, parent=None,
+                             features_name=['deep-flatten', 'exclusive-lock',
+                                            'fast-diff', 'layering',
+                                            'object-map'])
+        for snap in img['snapshots']:
+            if snap['name'] == 'snap1':
+                self._validate_snapshot(snap, is_protected=True)
+                self.assertEqual(len(snap['children']), 1)
+                self.assertDictEqual(snap['children'][0],
+                                     {'pool_name': 'rbd_iscsi',
+                                      'image_name': 'img1_clone'})
+            elif snap['name'] == 'snap2':
+                self._validate_snapshot(snap, is_protected=False)
+
+        img = self.get_image('rbd_iscsi', None, 'img1_clone')
+        self.assertStatus(200)
+        self._validate_image(img, name='img1_clone', size=1073741824,
+                             num_objs=256, obj_size=4194304,
+                             parent={'pool_name': 'rbd', 'pool_namespace': '',
+                                     'image_name': 'img1', 'snap_name': 'snap1'},
+                             features_name=['deep-flatten', 'exclusive-lock',
+                                            'fast-diff', 'layering',
+                                            'object-map'])
+        self.remove_image('rbd_iscsi', None, 'img1_clone')
+        self.assertStatus(204)
+
+    def test_disk_usage(self):
+        self._rbd_cmd(['bench', '--io-type', 'write', '--io-total', '50M', 'rbd/img2'])
+        self.create_snapshot('rbd', None, 'img2', 'snap1', False)
+        self._rbd_cmd(['bench', '--io-type', 'write', '--io-total', '20M', 'rbd/img2'])
+        self.create_snapshot('rbd', None, 'img2', 'snap2', False)
+        self._rbd_cmd(['bench', '--io-type', 'write', '--io-total', '10M', 'rbd/img2'])
+        self.create_snapshot('rbd', None, 'img2', 'snap3', False)
+        self._rbd_cmd(['bench', '--io-type', 'write', '--io-total', '5M', 'rbd/img2'])
+        img = self.get_image('rbd', None, 'img2')
+        self.assertStatus(200)
+        self._validate_image(img, name='img2', size=2147483648,
+                             total_disk_usage=268435456, disk_usage=67108864)
+
+    def test_delete_non_existent_image(self):
+        res = self.remove_image('rbd', None, 'i_dont_exist')
+        self.assertStatus(404)
+        self.assertEqual(res, {u'code': 404, "status": 404, "component": None,
+                               "detail": "(404, 'Image not found')",
+                               'task': {'name': 'rbd/delete',
+                                        'metadata': {'image_spec': 'rbd/i_dont_exist'}}})
+
+    def test_image_delete(self):
+        self.create_image('rbd', None, 'delete_me', 2**30)
+        self.assertStatus(201)
+        self.create_snapshot('rbd', None, 'delete_me', 'snap1', False)
+        self.assertStatus(201)
+        self.create_snapshot('rbd', None, 'delete_me', 'snap2', False)
+        self.assertStatus(201)
+
+        img = self.get_image('rbd', None, 'delete_me')
+        self.assertStatus(200)
+        self._validate_image(img, name='delete_me', size=2**30)
+        self.assertEqual(len(img['snapshots']), 2)
+
+        self.remove_snapshot('rbd', None, 'delete_me', 'snap1')
+        self.assertStatus(204)
+        self.remove_snapshot('rbd', None, 'delete_me', 'snap2')
+        self.assertStatus(204)
+
+        img = self.get_image('rbd', None, 'delete_me')
+        self.assertStatus(200)
+        self._validate_image(img, name='delete_me', size=2**30)
+        self.assertEqual(len(img['snapshots']), 0)
+
+        self.remove_image('rbd', None, 'delete_me')
+        self.assertStatus(204)
+
+    def test_image_delete_with_snapshot(self):
+        self.create_image('rbd', None, 'delete_me', 2**30)
+        self.assertStatus(201)
+        self.create_snapshot('rbd', None, 'delete_me', 'snap1', False)
+        self.assertStatus(201)
+        self.create_snapshot('rbd', None, 'delete_me', 'snap2', False)
+        self.assertStatus(201)
+
+        img = self.get_image('rbd', None, 'delete_me')
+        self.assertStatus(200)
+        self._validate_image(img, name='delete_me', size=2**30)
+        self.assertEqual(len(img['snapshots']), 2)
+
+        self.remove_image('rbd', None, 'delete_me')
+        self.assertStatus(204)
+
+    def test_image_rename(self):
+        self.create_image('rbd', None, 'edit_img', 2**30)
+        self.assertStatus(201)
+        self.get_image('rbd', None, 'edit_img')
+        self.assertStatus(200)
+        self.edit_image('rbd', None, 'edit_img', 'new_edit_img')
+        self.assertStatus(200)
+        self.get_image('rbd', None, 'edit_img')
+        self.assertStatus(404)
+        self.get_image('rbd', None, 'new_edit_img')
+        self.assertStatus(200)
+        self.remove_image('rbd', None, 'new_edit_img')
+        self.assertStatus(204)
+
+    def test_image_resize(self):
+        self.create_image('rbd', None, 'edit_img', 2**30)
+        self.assertStatus(201)
+        img = self.get_image('rbd', None, 'edit_img')
+        self.assertStatus(200)
+        self._validate_image(img, size=2**30)
+        self.edit_image('rbd', None, 'edit_img', size=2*2**30)
+        self.assertStatus(200)
+        img = self.get_image('rbd', None, 'edit_img')
+        self.assertStatus(200)
+        self._validate_image(img, size=2*2**30)
+        self.remove_image('rbd', None, 'edit_img')
+        self.assertStatus(204)
+
+    def test_image_change_features(self):
+        self.create_image('rbd', None, 'edit_img', 2**30, features=["layering"])
+        self.assertStatus(201)
+        img = self.get_image('rbd', None, 'edit_img')
+        self.assertStatus(200)
+        self._validate_image(img, features_name=["layering"])
+        self.edit_image('rbd', None, 'edit_img',
+                        features=["fast-diff", "object-map", "exclusive-lock"])
+        self.assertStatus(200)
+        img = self.get_image('rbd', None, 'edit_img')
+        self.assertStatus(200)
+        self._validate_image(img, features_name=['exclusive-lock',
+                                                 'fast-diff', 'layering',
+                                                 'object-map'])
+        self.edit_image('rbd', None, 'edit_img',
+                        features=["journaling", "exclusive-lock"])
+        self.assertStatus(200)
+        img = self.get_image('rbd', None, 'edit_img')
+        self.assertStatus(200)
+        self._validate_image(img, features_name=['exclusive-lock',
+                                                 'journaling', 'layering'])
+        self.remove_image('rbd', None, 'edit_img')
+        self.assertStatus(204)
+
+    def test_image_change_config(self):
+        pool = 'rbd'
+        image = 'image_with_config'
+        initial_conf = {
+            'rbd_qos_bps_limit': 10240,
+            'rbd_qos_write_iops_limit': None
+        }
+        initial_expect = [{
+            'name': 'rbd_qos_bps_limit',
+            'source': 2,
+            'value': '10240',
+        }, {
+            'name': 'rbd_qos_write_iops_limit',
+            'source': 0,
+            'value': '0',
+        }]
+        new_conf = {
+            'rbd_qos_bps_limit': 0,
+            'rbd_qos_bps_burst': 20480,
+            'rbd_qos_write_iops_limit': None
+        }
+        new_expect = [{
+            'name': 'rbd_qos_bps_limit',
+            'source': 2,
+            'value': '0',
+        }, {
+            'name': 'rbd_qos_bps_burst',
+            'source': 2,
+            'value': '20480',
+        }, {
+            'name': 'rbd_qos_write_iops_limit',
+            'source': 0,
+            'value': '0',
+        }]
+
+        self.create_image(pool, None, image, 2**30, configuration=initial_conf)
+        self.assertStatus(201)
+        img = self.get_image(pool, None, image)
+        self.assertStatus(200)
+        for conf in initial_expect:
+            self.assertIn(conf, img['configuration'])
+
+        self.edit_image(pool, None, image, configuration=new_conf)
+        img = self.get_image(pool, None, image)
+        self.assertStatus(200)
+        for conf in new_expect:
+            self.assertIn(conf, img['configuration'])
+
+        self.remove_image(pool, None, image)
+        self.assertStatus(204)
+
+    def test_image_change_meta(self):
+        pool = 'rbd'
+        image = 'image_with_meta'
+        initial_meta = {
+            'test1': 'test',
+            'test2': 'value',
+            'test3': None,
+        }
+        initial_expect = {
+            'test1': 'test',
+            'test2': 'value',
+        }
+        new_meta = {
+            'test1': None,
+            'test2': 'new_value',
+            'test3': 'value',
+            'test4': None,
+        }
+        new_expect = {
+            'test2': 'new_value',
+            'test3': 'value',
+        }
+
+        self.create_image(pool, None, image, 2**30, metadata=initial_meta)
+        self.assertStatus(201)
+        img = self.get_image(pool, None, image)
+        self.assertStatus(200)
+        self.assertEqual(len(initial_expect), len(img['metadata']))
+        for meta in initial_expect:
+            self.assertIn(meta, img['metadata'])
+
+        self.edit_image(pool, None, image, metadata=new_meta)
+        img = self.get_image(pool, None, image)
+        self.assertStatus(200)
+        self.assertEqual(len(new_expect), len(img['metadata']))
+        for meta in new_expect:
+            self.assertIn(meta, img['metadata'])
+
+        self.remove_image(pool, None, image)
+        self.assertStatus(204)
+
+    def test_update_snapshot(self):
+        self.create_snapshot('rbd', None, 'img1', 'snap5', False)
+        self.assertStatus(201)
+        img = self.get_image('rbd', None, 'img1')
+        self._validate_snapshot_list(img['snapshots'], 'snap5', is_protected=False)
+
+        self.update_snapshot('rbd', None, 'img1', 'snap5', 'snap6', None)
+        self.assertStatus(200)
+        img = self.get_image('rbd', None, 'img1')
+        self._validate_snapshot_list(img['snapshots'], 'snap6', is_protected=False)
+
+        self.update_snapshot('rbd', None, 'img1', 'snap6', None, True)
+        self.assertStatus(200)
+        img = self.get_image('rbd', None, 'img1')
+        self._validate_snapshot_list(img['snapshots'], 'snap6', is_protected=True)
+
+        self.update_snapshot('rbd', None, 'img1', 'snap6', 'snap5', False)
+        self.assertStatus(200)
+        img = self.get_image('rbd', None, 'img1')
+        self._validate_snapshot_list(img['snapshots'], 'snap5', is_protected=False)
+
+        self.remove_snapshot('rbd', None, 'img1', 'snap5')
+        self.assertStatus(204)
+
+    def test_snapshot_rollback(self):
+        self.create_image('rbd', None, 'rollback_img', 2**30,
+                          features=["layering", "exclusive-lock", "fast-diff",
+                                    "object-map"])
+        self.assertStatus(201)
+        self.create_snapshot('rbd', None, 'rollback_img', 'snap1', False)
+        self.assertStatus(201)
+
+        img = self.get_image('rbd', None, 'rollback_img')
+        self.assertStatus(200)
+        self.assertEqual(img['disk_usage'], 0)
+
+        self._rbd_cmd(['bench', '--io-type', 'write', '--io-total', '5M',
+                       'rbd/rollback_img'])
+
+        img = self.get_image('rbd', None, 'rollback_img')
+        self.assertStatus(200)
+        self.assertGreater(img['disk_usage'], 0)
+
+        self.rollback_snapshot('rbd', None, 'rollback_img', 'snap1')
+        self.assertStatus([201, 200])
+
+        img = self.get_image('rbd', None, 'rollback_img')
+        self.assertStatus(200)
+        self.assertEqual(img['disk_usage'], 0)
+
+        self.remove_snapshot('rbd', None, 'rollback_img', 'snap1')
+        self.assertStatus(204)
+        self.remove_image('rbd', None, 'rollback_img')
+        self.assertStatus(204)
+
+    def test_clone(self):
+        self.create_image('rbd', None, 'cimg', 2**30, features=["layering"],
+                          metadata={'key1': 'val1'})
+        self.assertStatus(201)
+        self.create_snapshot('rbd', None, 'cimg', 'snap1', False)
+        self.assertStatus(201)
+        self.update_snapshot('rbd', None, 'cimg', 'snap1', None, True)
+        self.assertStatus(200)
+        self.clone_image('rbd', None, 'cimg', 'snap1', 'rbd', None, 'cimg-clone',
+                         features=["layering", "exclusive-lock", "fast-diff",
+                                   "object-map"],
+                         metadata={'key1': None, 'key2': 'val2'})
+        self.assertStatus([200, 201])
+
+        img = self.get_image('rbd', None, 'cimg-clone')
+        self.assertStatus(200)
+        self._validate_image(img, features_name=['exclusive-lock',
+                                                 'fast-diff', 'layering',
+                                                 'object-map'],
+                             parent={'pool_name': 'rbd', 'pool_namespace': '',
+                                     'image_name': 'cimg', 'snap_name': 'snap1'},
+                             metadata={'key2': 'val2'})
+
+        res = self.remove_image('rbd', None, 'cimg')
+        self.assertStatus(400)
+        self.assertIn('code', res)
+        self.assertEqual(res['code'], '16')
+
+        self.remove_image('rbd', None, 'cimg-clone')
+        self.assertStatus(204)
+        self.remove_image('rbd', None, 'cimg')
+        self.assertStatus(204)
+
+    def test_copy(self):
+        self.create_image('rbd', None, 'coimg', 2**30,
+                          features=["layering", "exclusive-lock", "fast-diff",
+                                    "object-map"],
+                          metadata={'key1': 'val1'})
+        self.assertStatus(201)
+
+        self._rbd_cmd(['bench', '--io-type', 'write', '--io-total', '5M',
+                       'rbd/coimg'])
+
+        self.copy_image('rbd', None, 'coimg', 'rbd_iscsi', None, 'coimg-copy',
+                        features=["layering", "fast-diff", "exclusive-lock",
+                                  "object-map"],
+                        metadata={'key1': None, 'key2': 'val2'})
+        self.assertStatus([200, 201])
+
+        img = self.get_image('rbd', None, 'coimg')
+        self.assertStatus(200)
+        self._validate_image(img, features_name=['layering', 'exclusive-lock',
+                                                 'fast-diff', 'object-map'],
+                             metadata={'key1': 'val1'})
+
+        img_copy = self.get_image('rbd_iscsi', None, 'coimg-copy')
+        self._validate_image(img_copy, features_name=['exclusive-lock',
+                                                      'fast-diff', 'layering',
+                                                      'object-map'],
+                             metadata={'key2': 'val2'},
+                             disk_usage=img['disk_usage'])
+
+        self.remove_image('rbd', None, 'coimg')
+        self.assertStatus(204)
+        self.remove_image('rbd_iscsi', None, 'coimg-copy')
+        self.assertStatus(204)
+
+    def test_flatten(self):
+        self.create_snapshot('rbd', None, 'img1', 'snapf', False)
+        self.update_snapshot('rbd', None, 'img1', 'snapf', None, True)
+        self.clone_image('rbd', None, 'img1', 'snapf', 'rbd_iscsi', None, 'img1_snapf_clone')
+
+        img = self.get_image('rbd_iscsi', None, 'img1_snapf_clone')
+        self.assertStatus(200)
+        self.assertIsNotNone(img['parent'])
+
+        self.flatten_image('rbd_iscsi', None, 'img1_snapf_clone')
+        self.assertStatus([200, 201])
+
+        img = self.get_image('rbd_iscsi', None, 'img1_snapf_clone')
+        self.assertStatus(200)
+        self.assertIsNone(img['parent'])
+
+        self.update_snapshot('rbd', None, 'img1', 'snapf', None, False)
+        self.remove_snapshot('rbd', None, 'img1', 'snapf')
+        self.assertStatus(204)
+
+        self.remove_image('rbd_iscsi', None, 'img1_snapf_clone')
+        self.assertStatus(204)
+
+    def test_default_features(self):
+        default_features = self._get('/api/block/image/default_features')
+        self.assertEqual(default_features, [
+            'deep-flatten', 'exclusive-lock', 'fast-diff', 'layering', 'object-map'])
+
+    def test_clone_format_version(self):
+        config_name = 'rbd_default_clone_format'
+
+        def _get_config_by_name(conf_name):
+            data = self._get('/api/cluster_conf/{}'.format(conf_name))
+            if 'value' in data:
+                return data['value']
+            return None
+
+        # with rbd_default_clone_format = auto
+        clone_format_version = self._get('/api/block/image/clone_format_version')
+        self.assertEqual(clone_format_version, 1)
+        self.assertStatus(200)
+
+        # with rbd_default_clone_format = 1
+        value = [{'section': "global", 'value': "1"}]
+        self._post('/api/cluster_conf', {
+            'name': config_name,
+            'value': value
+        })
+        self.wait_until_equal(
+            lambda: _get_config_by_name(config_name),
+            value,
+            timeout=60)
+        clone_format_version = self._get('/api/block/image/clone_format_version')
+        self.assertEqual(clone_format_version, 1)
+        self.assertStatus(200)
+
+        # with rbd_default_clone_format = 2
+        value = [{'section': "global", 'value': "2"}]
+        self._post('/api/cluster_conf', {
+            'name': config_name,
+            'value': value
+        })
+        self.wait_until_equal(
+            lambda: _get_config_by_name(config_name),
+            value,
+            timeout=60)
+        clone_format_version = self._get('/api/block/image/clone_format_version')
+        self.assertEqual(clone_format_version, 2)
+        self.assertStatus(200)
+
+        value = []
+        self._post('/api/cluster_conf', {
+            'name': config_name,
+            'value': value
+        })
+        self.wait_until_equal(
+            lambda: _get_config_by_name(config_name),
+            None,
+            timeout=60)
+
+    def test_image_with_namespace(self):
+        self.create_namespace('rbd', 'ns')
+        self.create_image('rbd', 'ns', 'test', 10240)
+        self.assertStatus(201)
+
+        img = self.get_image('rbd', 'ns', 'test')
+        self.assertStatus(200)
+
+        self._validate_image(img, name='test', size=10240,
+                             pool_name='rbd', namespace='ns',
+                             num_objs=1, obj_size=4194304,
+                             features_name=['deep-flatten',
+                                            'exclusive-lock',
+                                            'fast-diff', 'layering',
+                                            'object-map'])
+
+        self.remove_image('rbd', 'ns', 'test')
+        self.remove_namespace('rbd', 'ns')
+
+    def test_move_image_to_trash(self):
+        img_id = self.create_image_in_trash('rbd', 'test_rbd')
+
+        self.get_image('rbd', None, 'test_rbd')
+        self.assertStatus(404)
+
+        time.sleep(1)
+
+        image = self.get_trash('rbd', img_id)
+        self.assertIsNotNone(image)
+
+        self.remove_trash('rbd', img_id)
+
+    def test_list_trash(self):
+        img_id = self.create_image_in_trash('rbd', 'test_rbd', 0)
+        data = self._get('/api/block/image/trash/?pool_name={}'.format('rbd'))
+        self.assertStatus(200)
+        self.assertIsInstance(data, list)
+        self.assertIsNotNone(data)
+
+        self.remove_trash('rbd', img_id)
+        self.assertStatus(204)
+
+    def test_restore_trash(self):
+        img_id = self.create_image_in_trash('rbd', 'test_rbd')
+
+        self.restore_trash('rbd', None, img_id, 'test_rbd')
+
+        self.get_image('rbd', None, 'test_rbd')
+        self.assertStatus(200)
+
+        image = self.get_trash('rbd', img_id)
+        self.assertIsNone(image)
+
+        self.remove_image('rbd', None, 'test_rbd')
+
+    def test_remove_expired_trash(self):
+        img_id = self.create_image_in_trash('rbd', 'test_rbd', 0)
+        self.remove_trash('rbd', img_id, False)
+        self.assertStatus(204)
+
+        image = self.get_trash('rbd', img_id)
+        self.assertIsNone(image)
+
+    def test_remove_not_expired_trash(self):
+        img_id = self.create_image_in_trash('rbd', 'test_rbd', 9999)
+        self.remove_trash('rbd', img_id, False)
+        self.assertStatus(400)
+
+        time.sleep(1)
+
+        image = self.get_trash('rbd', img_id)
+        self.assertIsNotNone(image)
+
+        self.remove_trash('rbd', img_id, True)
+
+    def test_remove_not_expired_trash_with_force(self):
+        img_id = self.create_image_in_trash('rbd', 'test_rbd', 9999)
+        self.remove_trash('rbd', img_id, True)
+        self.assertStatus(204)
+
+        image = self.get_trash('rbd', img_id)
+        self.assertIsNone(image)
+
+    def test_purge_trash(self):
+        id_expired = self.create_image_in_trash('rbd', 'test_rbd_expired', 0)
+        id_not_expired = self.create_image_in_trash('rbd', 'test_rbd', 9999)
+
+        time.sleep(1)
+
+        self.purge_trash('rbd')
+        self.assertStatus([200, 201])
+
+        time.sleep(1)
+
+        trash_not_expired = self.get_trash('rbd', id_not_expired)
+        self.assertIsNotNone(trash_not_expired)
+
+        self.wait_until_equal(lambda: self.get_trash('rbd', id_expired), None, 60)
+
+    def test_list_namespaces(self):
+        self.create_namespace('rbd', 'ns')
+
+        namespaces = self._get('/api/block/pool/rbd/namespace')
+        self.assertStatus(200)
+        self.assertEqual(len(namespaces), 1)
+
+        self.remove_namespace('rbd', 'ns')
diff --git a/qa/tasks/mgr/dashboard/test_rbd_mirroring.py b/qa/tasks/mgr/dashboard/test_rbd_mirroring.py
new file mode 100644
index 000000000..b6a86e405
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_rbd_mirroring.py
@@ -0,0 +1,195 @@
+# -*- coding: utf-8 -*-
+# pylint: disable=too-many-public-methods
+
+from __future__ import absolute_import
+
+from .helper import DashboardTestCase
+
+
+class RbdMirroringTest(DashboardTestCase):
+    AUTH_ROLES = ['pool-manager', 'block-manager']
+
+    @classmethod
+    def get_pool(cls, pool):
+        data = cls._get('/api/block/mirroring/pool/{}'.format(pool))
+        if isinstance(data, dict):
+            return data
+        return {}
+
+    @classmethod
+    def update_pool(cls, pool, mirror_mode):
+        data = {'mirror_mode': mirror_mode}
+        return cls._task_put('/api/block/mirroring/pool/{}'.format(pool),
+                             data)
+
+    @classmethod
+    def list_peers(cls, pool):
+        data = cls._get('/api/block/mirroring/pool/{}/peer'.format(pool))
+        if isinstance(data, list):
+            return data
+        return []
+
+    @classmethod
+    def get_peer(cls, pool, peer_uuid):
+        data = cls._get('/api/block/mirroring/pool/{}/peer/{}'.format(pool, peer_uuid))
+        if isinstance(data, dict):
+            return data
+        return {}
+
+    @classmethod
+    def create_peer(cls, pool, cluster_name, client_id, **kwargs):
+        data = {'cluster_name': cluster_name, 'client_id': client_id}
+        data.update(kwargs)
+        return cls._task_post('/api/block/mirroring/pool/{}/peer'.format(pool),
+                              data)
+
+    @classmethod
+    def update_peer(cls, pool, peer_uuid, **kwargs):
+        return cls._task_put('/api/block/mirroring/pool/{}/peer/{}'.format(pool, peer_uuid),
+                             kwargs)
+
+    @classmethod
+    def delete_peer(cls, pool, peer_uuid):
+        return cls._task_delete('/api/block/mirroring/pool/{}/peer/{}'.format(pool, peer_uuid))
+
+    @classmethod
+    def setUpClass(cls):
+        super(RbdMirroringTest, cls).setUpClass()
+        cls.create_pool('rbd', 2**3, 'replicated')
+
+    @classmethod
+    def tearDownClass(cls):
+        super(RbdMirroringTest, cls).tearDownClass()
+        cls._ceph_cmd(['osd', 'pool', 'delete', 'rbd', 'rbd', '--yes-i-really-really-mean-it'])
+
+    @DashboardTestCase.RunAs('test', 'test', [{'rbd-mirroring': ['create', 'update', 'delete']}])
+    def test_read_access_permissions(self):
+        self.get_pool('rbd')
+        self.assertStatus(403)
+        self.list_peers('rbd')
+        self.assertStatus(403)
+        self.get_peer('rbd', '123')
+        self.assertStatus(403)
+
+    @DashboardTestCase.RunAs('test', 'test', [{'rbd-mirroring': ['read', 'update', 'delete']}])
+    def test_create_access_permissions(self):
+        self.create_peer('rbd', 'remote', 'id')
+        self.assertStatus(403)
+
+    @DashboardTestCase.RunAs('test', 'test', [{'rbd-mirroring': ['read', 'create', 'delete']}])
+    def test_update_access_permissions(self):
+        self.update_peer('rbd', '123')
+        self.assertStatus(403)
+
+    @DashboardTestCase.RunAs('test', 'test', [{'rbd-mirroring': ['read', 'create', 'update']}])
+    def test_delete_access_permissions(self):
+        self.delete_peer('rbd', '123')
+        self.assertStatus(403)
+
+    def test_mirror_mode(self):
+        self.update_pool('rbd', 'disabled')
+        mode = self.get_pool('rbd').get('mirror_mode')
+        self.assertEqual(mode, 'disabled')
+
+        self.update_pool('rbd', 'image')
+        mode = self.get_pool('rbd').get('mirror_mode')
+        self.assertEqual(mode, 'image')
+
+        self.update_pool('rbd', 'pool')
+        mode = self.get_pool('rbd').get('mirror_mode')
+        self.assertEqual(mode, 'pool')
+
+        self.update_pool('rbd', 'disabled')
+        mode = self.get_pool('rbd').get('mirror_mode')
+        self.assertEqual(mode, 'disabled')
+
+    def test_set_invalid_mirror_mode(self):
+        self.update_pool('rbd', 'invalid')
+        self.assertStatus(400)
+
+    def test_set_same_mirror_mode(self):
+        self.update_pool('rbd', 'disabled')
+        self.update_pool('rbd', 'disabled')
+        self.assertStatus(200)
+
+    def test_peer(self):
+        self.update_pool('rbd', 'image')
+        self.assertStatus(200)
+
+        peers = self.list_peers('rbd')
+        self.assertStatus(200)
+        self.assertEqual([], peers)
+
+        uuid = self.create_peer('rbd', 'remote', 'admin')['uuid']
+        self.assertStatus(201)
+
+        peers = self.list_peers('rbd')
+        self.assertStatus(200)
+        self.assertEqual([uuid], peers)
+
+        expected_peer = {
+            'uuid': uuid,
+            'cluster_name': 'remote',
+            'site_name': 'remote',
+            'client_id': 'admin',
+            'mon_host': '',
+            'key': '',
+            'direction': 'rx-tx',
+            'mirror_uuid': ''
+        }
+        peer = self.get_peer('rbd', uuid)
+        self.assertEqual(expected_peer, peer)
+
+        self.update_peer('rbd', uuid, mon_host='1.2.3.4')
+        self.assertStatus(200)
+
+        expected_peer['mon_host'] = '1.2.3.4'
+        peer = self.get_peer('rbd', uuid)
+        self.assertEqual(expected_peer, peer)
+
+        self.delete_peer('rbd', uuid)
+        self.assertStatus(204)
+
+        self.update_pool('rbd', 'disabled')
+        self.assertStatus(200)
+
+    def test_disable_mirror_with_peers(self):
+        self.update_pool('rbd', 'image')
+        self.assertStatus(200)
+
+        uuid = self.create_peer('rbd', 'remote', 'admin')['uuid']
+        self.assertStatus(201)
+
+        self.update_pool('rbd', 'disabled')
+        self.assertStatus(400)
+
+        self.delete_peer('rbd', uuid)
+        self.assertStatus(204)
+
+        self.update_pool('rbd', 'disabled')
+        self.assertStatus(200)
+
+    def test_site_name(self):
+        expected_site_name = {'site_name': 'site-a'}
+        self._task_put('/api/block/mirroring/site_name', expected_site_name)
+        self.assertStatus(200)
+
+        site_name = self._get('/api/block/mirroring/site_name')
+        self.assertStatus(200)
+        self.assertEqual(expected_site_name, site_name)
+
+    def test_bootstrap(self):
+        self.update_pool('rbd', 'image')
+        token_data = self._task_post('/api/block/mirroring/pool/rbd/bootstrap/token', {})
+        self.assertStatus(200)
+
+        import_data = {
+            'token': token_data['token'],
+            'direction': 'invalid'}
+        self._task_post('/api/block/mirroring/pool/rbd/bootstrap/peer', import_data)
+        self.assertStatus(400)
+
+        # cannot import "youself" as peer
+        import_data['direction'] = 'rx'
+        self._task_post('/api/block/mirroring/pool/rbd/bootstrap/peer', import_data)
+        self.assertStatus(400)
diff --git a/qa/tasks/mgr/dashboard/test_requests.py b/qa/tasks/mgr/dashboard/test_requests.py
new file mode 100644
index 000000000..834ba174a
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_requests.py
@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import absolute_import
+
+from . import DEFAULT_API_VERSION
+from .helper import DashboardTestCase
+
+
+class RequestsTest(DashboardTestCase):
+    def test_gzip(self):
+        self._get('/api/summary')
+        self.assertHeaders({
+            'Content-Encoding': 'gzip',
+            'Content-Type': 'application/vnd.ceph.api.v{}+json'.format(DEFAULT_API_VERSION)
+        })
+
+    def test_force_no_gzip(self):
+        self._get('/api/summary', headers={'Accept-Encoding': 'identity'})
+        self.assertNotIn('Content-Encoding', self._resp.headers)
+        self.assertHeaders({
+            'Content-Type': 'application/vnd.ceph.api.v{}+json'.format(DEFAULT_API_VERSION)
+        })
+
+    def test_server(self):
+        self._get('/api/summary')
+        self.assertHeaders({
+            'server': 'Ceph-Dashboard',
+            'Content-Type': 'application/vnd.ceph.api.v{}+json'.format(DEFAULT_API_VERSION),
+            'Content-Security-Policy': "frame-ancestors 'self';",
+            'X-Content-Type-Options': 'nosniff',
+            'Strict-Transport-Security': 'max-age=63072000; includeSubDomains; preload'
+        })
diff --git a/qa/tasks/mgr/dashboard/test_rgw.py b/qa/tasks/mgr/dashboard/test_rgw.py
new file mode 100644
index 000000000..01dbae59f
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_rgw.py
@@ -0,0 +1,868 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+
+import base64
+import logging
+import time
+from urllib import parse
+
+from cryptography.hazmat.backends import default_backend
+from cryptography.hazmat.primitives.hashes import SHA1
+from cryptography.hazmat.primitives.twofactor.totp import TOTP
+
+from .helper import DashboardTestCase, JLeaf, JList, JObj
+
+logger = logging.getLogger(__name__)
+
+
+class RgwTestCase(DashboardTestCase):
+
+    maxDiff = None
+    create_test_user = False
+
+    AUTH_ROLES = ['rgw-manager']
+
+    @classmethod
+    def setUpClass(cls):
+        super(RgwTestCase, cls).setUpClass()
+        # Create the administrator account.
+        cls._radosgw_admin_cmd([
+            'user', 'create', '--uid', 'admin', '--display-name', 'admin',
+            '--system', '--access-key', 'admin', '--secret', 'admin'
+        ])
+        # Update the dashboard configuration.
+        cls._ceph_cmd_with_secret(['dashboard', 'set-rgw-api-secret-key'], 'admin')
+        cls._ceph_cmd_with_secret(['dashboard', 'set-rgw-api-access-key'], 'admin')
+        # Create a test user?
+        if cls.create_test_user:
+            cls._radosgw_admin_cmd([
+                'user', 'create', '--uid', 'teuth-test-user', '--display-name',
+                'teuth-test-user'
+            ])
+            cls._radosgw_admin_cmd([
+                'caps', 'add', '--uid', 'teuth-test-user', '--caps',
+                'metadata=write'
+            ])
+            cls._radosgw_admin_cmd([
+                'subuser', 'create', '--uid', 'teuth-test-user', '--subuser',
+                'teuth-test-subuser', '--access', 'full', '--key-type', 's3',
+                '--access-key', 'xyz123'
+            ])
+            cls._radosgw_admin_cmd([
+                'subuser', 'create', '--uid', 'teuth-test-user', '--subuser',
+                'teuth-test-subuser2', '--access', 'full', '--key-type',
+                'swift'
+            ])
+
+    @classmethod
+    def tearDownClass(cls):
+        # Delete administrator account.
+        cls._radosgw_admin_cmd(['user', 'rm', '--uid', 'admin'])
+        if cls.create_test_user:
+            cls._radosgw_admin_cmd(['user', 'rm', '--uid=teuth-test-user', '--purge-data'])
+        super(RgwTestCase, cls).tearDownClass()
+
+    def get_rgw_user(self, uid, stats=True):
+        return self._get('/api/rgw/user/{}?stats={}'.format(uid, stats))
+
+
+class RgwApiCredentialsTest(RgwTestCase):
+
+    AUTH_ROLES = ['rgw-manager']
+
+    def test_invalid_credentials(self):
+        self._ceph_cmd_with_secret(['dashboard', 'set-rgw-api-secret-key'], 'invalid')
+        self._ceph_cmd_with_secret(['dashboard', 'set-rgw-api-access-key'], 'invalid')
+        resp = self._get('/api/rgw/user')
+        self.assertStatus(404)
+        self.assertIn('detail', resp)
+        self.assertIn('component', resp)
+        self.assertIn('Error connecting to Object Gateway', resp['detail'])
+        self.assertEqual(resp['component'], 'rgw')
+
+    def test_success(self):
+        # Set the default credentials.
+        self._ceph_cmd_with_secret(['dashboard', 'set-rgw-api-secret-key'], 'admin')
+        self._ceph_cmd_with_secret(['dashboard', 'set-rgw-api-access-key'], 'admin')
+        data = self._get('/ui-api/rgw/status')
+        self.assertStatus(200)
+        self.assertIn('available', data)
+        self.assertIn('message', data)
+        self.assertTrue(data['available'])
+
+
+class RgwSiteTest(RgwTestCase):
+
+    AUTH_ROLES = ['rgw-manager']
+
+    def test_get_placement_targets(self):
+        data = self._get('/api/rgw/site?query=placement-targets')
+        self.assertStatus(200)
+        self.assertSchema(data, JObj({
+            'zonegroup': str,
+            'placement_targets': JList(JObj({
+                'name': str,
+                'data_pool': str
+            }))
+        }))
+
+    def test_get_realms(self):
+        data = self._get('/api/rgw/site?query=realms')
+        self.assertStatus(200)
+        self.assertSchema(data, JList(str))
+
+
+class RgwBucketTest(RgwTestCase):
+
+    _mfa_token_serial = '1'
+    _mfa_token_seed = '23456723'
+    _mfa_token_time_step = 2
+
+    AUTH_ROLES = ['rgw-manager']
+
+    @classmethod
+    def setUpClass(cls):
+        cls.create_test_user = True
+        super(RgwBucketTest, cls).setUpClass()
+        # Create MFA TOTP token for test user.
+        cls._radosgw_admin_cmd([
+            'mfa', 'create', '--uid', 'teuth-test-user', '--totp-serial', cls._mfa_token_serial,
+            '--totp-seed', cls._mfa_token_seed, '--totp-seed-type', 'base32',
+            '--totp-seconds', str(cls._mfa_token_time_step), '--totp-window', '1'
+        ])
+        # Create tenanted users.
+        cls._radosgw_admin_cmd([
+            'user', 'create', '--tenant', 'testx', '--uid', 'teuth-test-user',
+            '--display-name', 'tenanted teuth-test-user'
+        ])
+        cls._radosgw_admin_cmd([
+            'user', 'create', '--tenant', 'testx2', '--uid', 'teuth-test-user2',
+            '--display-name', 'tenanted teuth-test-user 2'
+        ])
+
+    @classmethod
+    def tearDownClass(cls):
+        cls._radosgw_admin_cmd(
+            ['user', 'rm', '--tenant', 'testx', '--uid=teuth-test-user', '--purge-data'])
+        cls._radosgw_admin_cmd(
+            ['user', 'rm', '--tenant', 'testx2', '--uid=teuth-test-user2', '--purge-data'])
+        super(RgwBucketTest, cls).tearDownClass()
+
+    def _get_mfa_token_pin(self):
+        totp_key = base64.b32decode(self._mfa_token_seed)
+        totp = TOTP(totp_key, 6, SHA1(), self._mfa_token_time_step, backend=default_backend(),
+                    enforce_key_length=False)
+        time_value = int(time.time())
+        return totp.generate(time_value)
+
+    def test_all(self):
+        # Create a new bucket.
+        self._post(
+            '/api/rgw/bucket',
+            params={
+                'bucket': 'teuth-test-bucket',
+                'uid': 'admin',
+                'zonegroup': 'default',
+                'placement_target': 'default-placement'
+            })
+        self.assertStatus(201)
+        data = self.jsonBody()
+        self.assertSchema(data, JObj(sub_elems={
+            'bucket_info': JObj(sub_elems={
+                'bucket': JObj(allow_unknown=True, sub_elems={
+                    'name': JLeaf(str),
+                    'bucket_id': JLeaf(str),
+                    'tenant': JLeaf(str)
+                }),
+                'quota': JObj(sub_elems={}, allow_unknown=True),
+                'creation_time': JLeaf(str)
+            }, allow_unknown=True)
+        }, allow_unknown=True))
+        data = data['bucket_info']['bucket']
+        self.assertEqual(data['name'], 'teuth-test-bucket')
+        self.assertEqual(data['tenant'], '')
+
+        # List all buckets.
+        data = self._get('/api/rgw/bucket', version='1.1')
+        self.assertStatus(200)
+        self.assertEqual(len(data), 1)
+        self.assertIn('teuth-test-bucket', data)
+
+        # List all buckets with stats.
+        data = self._get('/api/rgw/bucket?stats=true', version='1.1')
+        self.assertStatus(200)
+        self.assertEqual(len(data), 1)
+        self.assertSchema(data[0], JObj(sub_elems={
+            'bid': JLeaf(str),
+            'bucket': JLeaf(str),
+            'bucket_quota': JObj(sub_elems={}, allow_unknown=True),
+            'id': JLeaf(str),
+            'owner': JLeaf(str),
+            'usage': JObj(sub_elems={}, allow_unknown=True),
+            'tenant': JLeaf(str),
+        }, allow_unknown=True))
+
+        # List all buckets names without stats.
+        data = self._get('/api/rgw/bucket?stats=false', version='1.1')
+        self.assertStatus(200)
+        self.assertEqual(data, ['teuth-test-bucket'])
+
+        # Get the bucket.
+        data = self._get('/api/rgw/bucket/teuth-test-bucket')
+        self.assertStatus(200)
+        self.assertSchema(data, JObj(sub_elems={
+            'id': JLeaf(str),
+            'bid': JLeaf(str),
+            'tenant': JLeaf(str),
+            'bucket': JLeaf(str),
+            'bucket_quota': JObj(sub_elems={}, allow_unknown=True),
+            'owner': JLeaf(str),
+            'mfa_delete': JLeaf(str),
+            'usage': JObj(sub_elems={}, allow_unknown=True),
+            'versioning': JLeaf(str)
+        }, allow_unknown=True))
+        self.assertEqual(data['bucket'], 'teuth-test-bucket')
+        self.assertEqual(data['owner'], 'admin')
+        self.assertEqual(data['placement_rule'], 'default-placement')
+        self.assertEqual(data['versioning'], 'Suspended')
+
+        # Update bucket: change owner, enable versioning.
+        self._put(
+            '/api/rgw/bucket/teuth-test-bucket',
+            params={
+                'bucket_id': data['id'],
+                'uid': 'teuth-test-user',
+                'versioning_state': 'Enabled'
+            })
+        self.assertStatus(200)
+        data = self._get('/api/rgw/bucket/teuth-test-bucket')
+        self.assertStatus(200)
+        self.assertSchema(data, JObj(sub_elems={
+            'owner': JLeaf(str),
+            'bid': JLeaf(str),
+            'tenant': JLeaf(str)
+        }, allow_unknown=True))
+        self.assertEqual(data['owner'], 'teuth-test-user')
+        self.assertEqual(data['versioning'], 'Enabled')
+
+        # Update bucket: enable MFA Delete.
+        self._put(
+            '/api/rgw/bucket/teuth-test-bucket',
+            params={
+                'bucket_id': data['id'],
+                'uid': 'teuth-test-user',
+                'versioning_state': 'Enabled',
+                'mfa_delete': 'Enabled',
+                'mfa_token_serial': self._mfa_token_serial,
+                'mfa_token_pin': self._get_mfa_token_pin()
+            })
+        self.assertStatus(200)
+        data = self._get('/api/rgw/bucket/teuth-test-bucket')
+        self.assertStatus(200)
+        self.assertEqual(data['versioning'], 'Enabled')
+        self.assertEqual(data['mfa_delete'], 'Enabled')
+
+        # Update bucket: disable versioning & MFA Delete.
+        time.sleep(self._mfa_token_time_step * 3)  # Required to get new TOTP pin.
+        self._put(
+            '/api/rgw/bucket/teuth-test-bucket',
+            params={
+                'bucket_id': data['id'],
+                'uid': 'teuth-test-user',
+                'versioning_state': 'Suspended',
+                'mfa_delete': 'Disabled',
+                'mfa_token_serial': self._mfa_token_serial,
+                'mfa_token_pin': self._get_mfa_token_pin()
+            })
+        self.assertStatus(200)
+        data = self._get('/api/rgw/bucket/teuth-test-bucket')
+        self.assertStatus(200)
+        self.assertEqual(data['versioning'], 'Suspended')
+        self.assertEqual(data['mfa_delete'], 'Disabled')
+
+        # Delete the bucket.
+        self._delete('/api/rgw/bucket/teuth-test-bucket')
+        self.assertStatus(204)
+        data = self._get('/api/rgw/bucket', version='1.1')
+        self.assertStatus(200)
+        self.assertEqual(len(data), 0)
+
+    def test_crud_w_tenant(self):
+        # Create a new bucket. The tenant of the user is used when
+        # the bucket is created.
+        self._post(
+            '/api/rgw/bucket',
+            params={
+                'bucket': 'teuth-test-bucket',
+                'uid': 'testx$teuth-test-user',
+                'zonegroup': 'default',
+                'placement_target': 'default-placement'
+            })
+        self.assertStatus(201)
+        # It's not possible to validate the result because there
+        # IS NO result object returned by the RGW Admin OPS API
+        # when a tenanted bucket is created.
+        data = self.jsonBody()
+        self.assertIsNone(data)
+
+        # List all buckets.
+        data = self._get('/api/rgw/bucket', version='1.1')
+        self.assertStatus(200)
+        self.assertEqual(len(data), 1)
+        self.assertIn('testx/teuth-test-bucket', data)
+
+        def _verify_tenant_bucket(bucket, tenant, uid):
+            full_bucket_name = '{}/{}'.format(tenant, bucket)
+            _data = self._get('/api/rgw/bucket/{}'.format(
+                parse.quote_plus(full_bucket_name)))
+            self.assertStatus(200)
+            self.assertSchema(_data, JObj(sub_elems={
+                'owner': JLeaf(str),
+                'bucket': JLeaf(str),
+                'tenant': JLeaf(str),
+                'bid': JLeaf(str)
+            }, allow_unknown=True))
+            self.assertEqual(_data['owner'], '{}${}'.format(tenant, uid))
+            self.assertEqual(_data['bucket'], bucket)
+            self.assertEqual(_data['tenant'], tenant)
+            self.assertEqual(_data['bid'], full_bucket_name)
+            return _data
+
+        # Get the bucket.
+        data = _verify_tenant_bucket('teuth-test-bucket', 'testx', 'teuth-test-user')
+        self.assertEqual(data['placement_rule'], 'default-placement')
+        self.assertEqual(data['versioning'], 'Suspended')
+
+        # Update bucket: different user with different tenant, enable versioning.
+        self._put(
+            '/api/rgw/bucket/{}'.format(
+                parse.quote_plus('testx/teuth-test-bucket')),
+            params={
+                'bucket_id': data['id'],
+                'uid': 'testx2$teuth-test-user2',
+                'versioning_state': 'Enabled'
+            })
+        data = _verify_tenant_bucket('teuth-test-bucket', 'testx2', 'teuth-test-user2')
+        self.assertEqual(data['versioning'], 'Enabled')
+
+        # Change owner to a non-tenanted user
+        self._put(
+            '/api/rgw/bucket/{}'.format(
+                parse.quote_plus('testx2/teuth-test-bucket')),
+            params={
+                'bucket_id': data['id'],
+                'uid': 'admin'
+            })
+        self.assertStatus(200)
+        data = self._get('/api/rgw/bucket/teuth-test-bucket')
+        self.assertStatus(200)
+        self.assertIn('owner', data)
+        self.assertEqual(data['owner'], 'admin')
+        self.assertEqual(data['tenant'], '')
+        self.assertEqual(data['bucket'], 'teuth-test-bucket')
+        self.assertEqual(data['bid'], 'teuth-test-bucket')
+        self.assertEqual(data['versioning'], 'Enabled')
+
+        # Change owner back to tenanted user, suspend versioning.
+        self._put(
+            '/api/rgw/bucket/teuth-test-bucket',
+            params={
+                'bucket_id': data['id'],
+                'uid': 'testx$teuth-test-user',
+                'versioning_state': 'Suspended'
+            })
+        self.assertStatus(200)
+        data = _verify_tenant_bucket('teuth-test-bucket', 'testx', 'teuth-test-user')
+        self.assertEqual(data['versioning'], 'Suspended')
+
+        # Delete the bucket.
+        self._delete('/api/rgw/bucket/{}'.format(
+            parse.quote_plus('testx/teuth-test-bucket')))
+        self.assertStatus(204)
+        data = self._get('/api/rgw/bucket', version='1.1')
+        self.assertStatus(200)
+        self.assertEqual(len(data), 0)
+
+    def test_crud_w_locking(self):
+        # Create
+        self._post('/api/rgw/bucket',
+                   params={
+                       'bucket': 'teuth-test-bucket',
+                       'uid': 'teuth-test-user',
+                       'zonegroup': 'default',
+                       'placement_target': 'default-placement',
+                       'lock_enabled': 'true',
+                       'lock_mode': 'GOVERNANCE',
+                       'lock_retention_period_days': '0',
+                       'lock_retention_period_years': '1'
+                   })
+        self.assertStatus(201)
+        # Read
+        data = self._get('/api/rgw/bucket/teuth-test-bucket')
+        self.assertStatus(200)
+        self.assertSchema(
+            data,
+            JObj(sub_elems={
+                'lock_enabled': JLeaf(bool),
+                'lock_mode': JLeaf(str),
+                'lock_retention_period_days': JLeaf(int),
+                'lock_retention_period_years': JLeaf(int)
+            },
+                allow_unknown=True))
+        self.assertTrue(data['lock_enabled'])
+        self.assertEqual(data['lock_mode'], 'GOVERNANCE')
+        self.assertEqual(data['lock_retention_period_days'], 0)
+        self.assertEqual(data['lock_retention_period_years'], 1)
+        # Update
+        self._put('/api/rgw/bucket/teuth-test-bucket',
+                  params={
+                      'bucket_id': data['id'],
+                      'uid': 'teuth-test-user',
+                      'lock_mode': 'COMPLIANCE',
+                      'lock_retention_period_days': '15',
+                      'lock_retention_period_years': '0'
+                  })
+        self.assertStatus(200)
+        data = self._get('/api/rgw/bucket/teuth-test-bucket')
+        self.assertTrue(data['lock_enabled'])
+        self.assertEqual(data['lock_mode'], 'COMPLIANCE')
+        self.assertEqual(data['lock_retention_period_days'], 15)
+        self.assertEqual(data['lock_retention_period_years'], 0)
+        self.assertStatus(200)
+
+        # Update: Disabling bucket versioning should fail if object locking enabled
+        self._put('/api/rgw/bucket/teuth-test-bucket',
+                  params={
+                      'bucket_id': data['id'],
+                      'uid': 'teuth-test-user',
+                      'versioning_state': 'Suspended'
+                  })
+        self.assertStatus(409)
+
+        # Delete
+        self._delete('/api/rgw/bucket/teuth-test-bucket')
+        self.assertStatus(204)
+
+
+class RgwDaemonTest(RgwTestCase):
+
+    AUTH_ROLES = ['rgw-manager']
+
+    @DashboardTestCase.RunAs('test', 'test', [{
+        'rgw': ['create', 'update', 'delete']
+    }])
+    def test_read_access_permissions(self):
+        self._get('/api/rgw/daemon')
+        self.assertStatus(403)
+        self._get('/api/rgw/daemon/id')
+        self.assertStatus(403)
+
+    def test_list(self):
+        data = self._get('/api/rgw/daemon')
+        self.assertStatus(200)
+        self.assertEqual(len(data), 1)
+        data = data[0]
+        self.assertIn('id', data)
+        self.assertIn('version', data)
+        self.assertIn('server_hostname', data)
+        self.assertIn('zonegroup_name', data)
+        self.assertIn('zone_name', data)
+        self.assertIn('port', data)
+
+    def test_get(self):
+        data = self._get('/api/rgw/daemon')
+        self.assertStatus(200)
+
+        data = self._get('/api/rgw/daemon/{}'.format(data[0]['id']))
+        self.assertStatus(200)
+        self.assertIn('rgw_metadata', data)
+        self.assertIn('rgw_id', data)
+        self.assertIn('rgw_status', data)
+        self.assertTrue(data['rgw_metadata'])
+
+    def test_status(self):
+        data = self._get('/ui-api/rgw/status')
+        self.assertStatus(200)
+        self.assertIn('available', data)
+        self.assertIn('message', data)
+        self.assertTrue(data['available'])
+
+
+class RgwUserTest(RgwTestCase):
+
+    AUTH_ROLES = ['rgw-manager']
+
+    @classmethod
+    def setUpClass(cls):
+        super(RgwUserTest, cls).setUpClass()
+
+    def _assert_user_data(self, data):
+        self.assertSchema(data, JObj(sub_elems={
+            'caps': JList(JObj(sub_elems={}, allow_unknown=True)),
+            'display_name': JLeaf(str),
+            'email': JLeaf(str),
+            'keys': JList(JObj(sub_elems={}, allow_unknown=True)),
+            'max_buckets': JLeaf(int),
+            'subusers': JList(JLeaf(str)),
+            'suspended': JLeaf(int),
+            'swift_keys': JList(JObj(sub_elems={}, allow_unknown=True)),
+            'tenant': JLeaf(str),
+            'user_id': JLeaf(str),
+            'uid': JLeaf(str)
+        }, allow_unknown=True))
+        self.assertGreaterEqual(len(data['keys']), 1)
+
+    def test_get(self):
+        data = self.get_rgw_user('admin')
+        self.assertStatus(200)
+        self._assert_user_data(data)
+        self.assertEqual(data['user_id'], 'admin')
+        self.assertTrue(data['stats'])
+        self.assertIsInstance(data['stats'], dict)
+        # Test without stats.
+        data = self.get_rgw_user('admin', False)
+        self.assertStatus(200)
+        self._assert_user_data(data)
+        self.assertEqual(data['user_id'], 'admin')
+
+    def test_list(self):
+        data = self._get('/api/rgw/user')
+        self.assertStatus(200)
+        self.assertGreaterEqual(len(data), 1)
+        self.assertIn('admin', data)
+
+    def test_get_emails(self):
+        data = self._get('/api/rgw/user/get_emails')
+        self.assertStatus(200)
+        self.assertSchema(data, JList(str))
+
+    def test_create_get_update_delete(self):
+        # Create a new user.
+        self._post('/api/rgw/user', params={
+            'uid': 'teuth-test-user',
+            'display_name': 'display name'
+        })
+        self.assertStatus(201)
+        data = self.jsonBody()
+        self._assert_user_data(data)
+        self.assertEqual(data['user_id'], 'teuth-test-user')
+        self.assertEqual(data['display_name'], 'display name')
+
+        # Get the user.
+        data = self.get_rgw_user('teuth-test-user')
+        self.assertStatus(200)
+        self._assert_user_data(data)
+        self.assertEqual(data['tenant'], '')
+        self.assertEqual(data['user_id'], 'teuth-test-user')
+        self.assertEqual(data['uid'], 'teuth-test-user')
+
+        # Update the user.
+        self._put(
+            '/api/rgw/user/teuth-test-user',
+            params={'display_name': 'new name'})
+        self.assertStatus(200)
+        data = self.jsonBody()
+        self._assert_user_data(data)
+        self.assertEqual(data['display_name'], 'new name')
+
+        # Delete the user.
+        self._delete('/api/rgw/user/teuth-test-user')
+        self.assertStatus(204)
+        self.get_rgw_user('teuth-test-user')
+        self.assertStatus(500)
+        resp = self.jsonBody()
+        self.assertIn('detail', resp)
+        self.assertIn('failed request with status code 404', resp['detail'])
+        self.assertIn('"Code":"NoSuchUser"', resp['detail'])
+        self.assertIn('"HostId"', resp['detail'])
+        self.assertIn('"RequestId"', resp['detail'])
+
+    def test_create_get_update_delete_w_tenant(self):
+        # Create a new user.
+        self._post(
+            '/api/rgw/user',
+            params={
+                'uid': 'test01$teuth-test-user',
+                'display_name': 'display name'
+            })
+        self.assertStatus(201)
+        data = self.jsonBody()
+        self._assert_user_data(data)
+        self.assertEqual(data['user_id'], 'teuth-test-user')
+        self.assertEqual(data['display_name'], 'display name')
+
+        # Get the user.
+        data = self.get_rgw_user('test01$teuth-test-user')
+        self.assertStatus(200)
+        self._assert_user_data(data)
+        self.assertEqual(data['tenant'], 'test01')
+        self.assertEqual(data['user_id'], 'teuth-test-user')
+        self.assertEqual(data['uid'], 'test01$teuth-test-user')
+
+        # Update the user.
+        self._put(
+            '/api/rgw/user/test01$teuth-test-user',
+            params={'display_name': 'new name'})
+        self.assertStatus(200)
+        data = self.jsonBody()
+        self._assert_user_data(data)
+        self.assertEqual(data['display_name'], 'new name')
+
+        # Delete the user.
+        self._delete('/api/rgw/user/test01$teuth-test-user')
+        self.assertStatus(204)
+        self.get_rgw_user('test01$teuth-test-user')
+        self.assertStatus(500)
+        resp = self.jsonBody()
+        self.assertIn('detail', resp)
+        self.assertIn('failed request with status code 404', resp['detail'])
+        self.assertIn('"Code":"NoSuchUser"', resp['detail'])
+        self.assertIn('"HostId"', resp['detail'])
+        self.assertIn('"RequestId"', resp['detail'])
+
+
+class RgwUserCapabilityTest(RgwTestCase):
+
+    AUTH_ROLES = ['rgw-manager']
+
+    @classmethod
+    def setUpClass(cls):
+        cls.create_test_user = True
+        super(RgwUserCapabilityTest, cls).setUpClass()
+
+    def test_set(self):
+        self._post(
+            '/api/rgw/user/teuth-test-user/capability',
+            params={
+                'type': 'usage',
+                'perm': 'read'
+            })
+        self.assertStatus(201)
+        data = self.jsonBody()
+        self.assertEqual(len(data), 1)
+        data = data[0]
+        self.assertEqual(data['type'], 'usage')
+        self.assertEqual(data['perm'], 'read')
+
+        # Get the user data to validate the capabilities.
+        data = self.get_rgw_user('teuth-test-user')
+        self.assertStatus(200)
+        self.assertGreaterEqual(len(data['caps']), 1)
+        self.assertEqual(data['caps'][0]['type'], 'usage')
+        self.assertEqual(data['caps'][0]['perm'], 'read')
+
+    def test_delete(self):
+        self._delete(
+            '/api/rgw/user/teuth-test-user/capability',
+            params={
+                'type': 'metadata',
+                'perm': 'write'
+            })
+        self.assertStatus(204)
+
+        # Get the user data to validate the capabilities.
+        data = self.get_rgw_user('teuth-test-user')
+        self.assertStatus(200)
+        self.assertEqual(len(data['caps']), 0)
+
+
+class RgwUserKeyTest(RgwTestCase):
+
+    AUTH_ROLES = ['rgw-manager']
+
+    @classmethod
+    def setUpClass(cls):
+        cls.create_test_user = True
+        super(RgwUserKeyTest, cls).setUpClass()
+
+    def test_create_s3(self):
+        self._post(
+            '/api/rgw/user/teuth-test-user/key',
+            params={
+                'key_type': 's3',
+                'generate_key': 'false',
+                'access_key': 'abc987',
+                'secret_key': 'aaabbbccc'
+            })
+        data = self.jsonBody()
+        self.assertStatus(201)
+        self.assertGreaterEqual(len(data), 3)
+        key = self.find_object_in_list('access_key', 'abc987', data)
+        self.assertIsInstance(key, object)
+        self.assertEqual(key['secret_key'], 'aaabbbccc')
+
+    def test_create_swift(self):
+        self._post(
+            '/api/rgw/user/teuth-test-user/key',
+            params={
+                'key_type': 'swift',
+                'subuser': 'teuth-test-subuser',
+                'generate_key': 'false',
+                'secret_key': 'xxxyyyzzz'
+            })
+        data = self.jsonBody()
+        self.assertStatus(201)
+        self.assertGreaterEqual(len(data), 2)
+        key = self.find_object_in_list('secret_key', 'xxxyyyzzz', data)
+        self.assertIsInstance(key, object)
+
+    def test_delete_s3(self):
+        self._delete(
+            '/api/rgw/user/teuth-test-user/key',
+            params={
+                'key_type': 's3',
+                'access_key': 'xyz123'
+            })
+        self.assertStatus(204)
+
+    def test_delete_swift(self):
+        self._delete(
+            '/api/rgw/user/teuth-test-user/key',
+            params={
+                'key_type': 'swift',
+                'subuser': 'teuth-test-user:teuth-test-subuser2'
+            })
+        self.assertStatus(204)
+
+
+class RgwUserQuotaTest(RgwTestCase):
+
+    AUTH_ROLES = ['rgw-manager']
+
+    @classmethod
+    def setUpClass(cls):
+        cls.create_test_user = True
+        super(RgwUserQuotaTest, cls).setUpClass()
+
+    def _assert_quota(self, data):
+        self.assertIn('user_quota', data)
+        self.assertIn('max_objects', data['user_quota'])
+        self.assertIn('enabled', data['user_quota'])
+        self.assertIn('max_size_kb', data['user_quota'])
+        self.assertIn('max_size', data['user_quota'])
+        self.assertIn('bucket_quota', data)
+        self.assertIn('max_objects', data['bucket_quota'])
+        self.assertIn('enabled', data['bucket_quota'])
+        self.assertIn('max_size_kb', data['bucket_quota'])
+        self.assertIn('max_size', data['bucket_quota'])
+
+    def test_get_quota(self):
+        data = self._get('/api/rgw/user/teuth-test-user/quota')
+        self.assertStatus(200)
+        self._assert_quota(data)
+
+    def test_set_user_quota(self):
+        self._put(
+            '/api/rgw/user/teuth-test-user/quota',
+            params={
+                'quota_type': 'user',
+                'enabled': 'true',
+                'max_size_kb': 2048,
+                'max_objects': 101
+            })
+        self.assertStatus(200)
+
+        data = self._get('/api/rgw/user/teuth-test-user/quota')
+        self.assertStatus(200)
+        self._assert_quota(data)
+        self.assertEqual(data['user_quota']['max_objects'], 101)
+        self.assertTrue(data['user_quota']['enabled'])
+        self.assertEqual(data['user_quota']['max_size_kb'], 2048)
+
+    def test_set_bucket_quota(self):
+        self._put(
+            '/api/rgw/user/teuth-test-user/quota',
+            params={
+                'quota_type': 'bucket',
+                'enabled': 'false',
+                'max_size_kb': 4096,
+                'max_objects': 2000
+            })
+        self.assertStatus(200)
+
+        data = self._get('/api/rgw/user/teuth-test-user/quota')
+        self.assertStatus(200)
+        self._assert_quota(data)
+        self.assertEqual(data['bucket_quota']['max_objects'], 2000)
+        self.assertFalse(data['bucket_quota']['enabled'])
+        self.assertEqual(data['bucket_quota']['max_size_kb'], 4096)
+
+
+class RgwUserSubuserTest(RgwTestCase):
+
+    AUTH_ROLES = ['rgw-manager']
+
+    @classmethod
+    def setUpClass(cls):
+        cls.create_test_user = True
+        super(RgwUserSubuserTest, cls).setUpClass()
+
+    def test_create_swift(self):
+        self._post(
+            '/api/rgw/user/teuth-test-user/subuser',
+            params={
+                'subuser': 'tux',
+                'access': 'readwrite',
+                'key_type': 'swift'
+            })
+        self.assertStatus(200)
+        data = self.jsonBody()
+        subuser = self.find_object_in_list('id', 'teuth-test-user:tux', data)
+        self.assertIsInstance(subuser, object)
+        self.assertEqual(subuser['permissions'], 'read-write')
+
+        # Get the user data to validate the keys.
+        data = self.get_rgw_user('teuth-test-user')
+        self.assertStatus(200)
+        key = self.find_object_in_list('user', 'teuth-test-user:tux',
+                                       data['swift_keys'])
+        self.assertIsInstance(key, object)
+
+    def test_create_s3(self):
+        self._post(
+            '/api/rgw/user/teuth-test-user/subuser',
+            params={
+                'subuser': 'hugo',
+                'access': 'write',
+                'generate_secret': 'false',
+                'access_key': 'yyy',
+                'secret_key': 'xxx'
+            })
+        self.assertStatus(200)
+        data = self.jsonBody()
+        subuser = self.find_object_in_list('id', 'teuth-test-user:hugo', data)
+        self.assertIsInstance(subuser, object)
+        self.assertEqual(subuser['permissions'], 'write')
+
+        # Get the user data to validate the keys.
+        data = self.get_rgw_user('teuth-test-user')
+        self.assertStatus(200)
+        key = self.find_object_in_list('user', 'teuth-test-user:hugo',
+                                       data['keys'])
+        self.assertIsInstance(key, object)
+        self.assertEqual(key['secret_key'], 'xxx')
+
+    def test_delete_w_purge(self):
+        self._delete(
+            '/api/rgw/user/teuth-test-user/subuser/teuth-test-subuser2')
+        self.assertStatus(204)
+
+        # Get the user data to check that the keys don't exist anymore.
+        data = self.get_rgw_user('teuth-test-user')
+        self.assertStatus(200)
+        key = self.find_object_in_list(
+            'user', 'teuth-test-user:teuth-test-subuser2', data['swift_keys'])
+        self.assertIsNone(key)
+
+    def test_delete_wo_purge(self):
+        self._delete(
+            '/api/rgw/user/teuth-test-user/subuser/teuth-test-subuser',
+            params={'purge_keys': 'false'})
+        self.assertStatus(204)
+
+        # Get the user data to check whether they keys still exist.
+        data = self.get_rgw_user('teuth-test-user')
+        self.assertStatus(200)
+        key = self.find_object_in_list(
+            'user', 'teuth-test-user:teuth-test-subuser', data['keys'])
+        self.assertIsInstance(key, object)
diff --git a/qa/tasks/mgr/dashboard/test_role.py b/qa/tasks/mgr/dashboard/test_role.py
new file mode 100644
index 000000000..dbfaea9e4
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_role.py
@@ -0,0 +1,145 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import absolute_import
+
+from .helper import DashboardTestCase
+
+
+class RoleTest(DashboardTestCase):
+    @classmethod
+    def _create_role(cls, name=None, description=None, scopes_permissions=None):
+        data = {}
+        if name:
+            data['name'] = name
+        if description:
+            data['description'] = description
+        if scopes_permissions:
+            data['scopes_permissions'] = scopes_permissions
+        cls._post('/api/role', data)
+
+    def test_crud_role(self):
+        self._create_role(name='role1',
+                          description='Description 1',
+                          scopes_permissions={'osd': ['read']})
+        self.assertStatus(201)
+        self.assertJsonBody({
+            'name': 'role1',
+            'description': 'Description 1',
+            'scopes_permissions': {'osd': ['read']},
+            'system': False
+        })
+
+        self._get('/api/role/role1')
+        self.assertStatus(200)
+        self.assertJsonBody({
+            'name': 'role1',
+            'description': 'Description 1',
+            'scopes_permissions': {'osd': ['read']},
+            'system': False
+        })
+
+        self._put('/api/role/role1', {
+            'description': 'Description 2',
+            'scopes_permissions': {'osd': ['read', 'update']},
+        })
+        self.assertStatus(200)
+        self.assertJsonBody({
+            'name': 'role1',
+            'description': 'Description 2',
+            'scopes_permissions': {'osd': ['read', 'update']},
+            'system': False
+        })
+
+        self._delete('/api/role/role1')
+        self.assertStatus(204)
+
+    def test_list_roles(self):
+        roles = self._get('/api/role')
+        self.assertStatus(200)
+
+        self.assertGreaterEqual(len(roles), 1)
+        for role in roles:
+            self.assertIn('name', role)
+            self.assertIn('description', role)
+            self.assertIn('scopes_permissions', role)
+            self.assertIn('system', role)
+
+    def test_get_role_does_not_exist(self):
+        self._get('/api/role/role2')
+        self.assertStatus(404)
+
+    def test_create_role_already_exists(self):
+        self._create_role(name='read-only',
+                          description='Description 1',
+                          scopes_permissions={'osd': ['read']})
+        self.assertStatus(400)
+        self.assertError(code='role_already_exists',
+                         component='role')
+
+    def test_create_role_no_name(self):
+        self._create_role(description='Description 1',
+                          scopes_permissions={'osd': ['read']})
+        self.assertStatus(400)
+        self.assertError(code='name_required',
+                         component='role')
+
+    def test_create_role_invalid_scope(self):
+        self._create_role(name='role1',
+                          description='Description 1',
+                          scopes_permissions={'invalid-scope': ['read']})
+        self.assertStatus(400)
+        self.assertError(code='invalid_scope',
+                         component='role')
+
+    def test_create_role_invalid_permission(self):
+        self._create_role(name='role1',
+                          description='Description 1',
+                          scopes_permissions={'osd': ['invalid-permission']})
+        self.assertStatus(400)
+        self.assertError(code='invalid_permission',
+                         component='role')
+
+    def test_delete_role_does_not_exist(self):
+        self._delete('/api/role/role2')
+        self.assertStatus(404)
+
+    def test_delete_system_role(self):
+        self._delete('/api/role/read-only')
+        self.assertStatus(400)
+        self.assertError(code='cannot_delete_system_role',
+                         component='role')
+
+    def test_delete_role_associated_with_user(self):
+        self.create_user("user", "user", ['read-only'])
+        self._create_role(name='role1',
+                          description='Description 1',
+                          scopes_permissions={'user': ['create', 'read', 'update', 'delete']})
+        self.assertStatus(201)
+        self._put('/api/user/user', {'roles': ['role1']})
+        self.assertStatus(200)
+
+        self._delete('/api/role/role1')
+        self.assertStatus(400)
+        self.assertError(code='role_is_associated_with_user',
+                         component='role')
+
+        self._put('/api/user/user', {'roles': ['administrator']})
+        self.assertStatus(200)
+        self._delete('/api/role/role1')
+        self.assertStatus(204)
+        self.delete_user("user")
+
+    def test_update_role_does_not_exist(self):
+        self._put('/api/role/role2', {})
+        self.assertStatus(404)
+
+    def test_update_system_role(self):
+        self._put('/api/role/read-only', {})
+        self.assertStatus(400)
+        self.assertError(code='cannot_update_system_role',
+                         component='role')
+
+    def test_clone_role(self):
+        self._post('/api/role/read-only/clone', {'new_name': 'foo'})
+        self.assertStatus(201)
+        self._delete('/api/role/foo')
diff --git a/qa/tasks/mgr/dashboard/test_settings.py b/qa/tasks/mgr/dashboard/test_settings.py
new file mode 100644
index 000000000..d6ad1e762
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_settings.py
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import absolute_import
+
+from .helper import DashboardTestCase, JAny, JList, JObj
+
+
+class SettingsTest(DashboardTestCase):
+    def setUp(self):
+        super(SettingsTest, self).setUp()
+        self.settings = self._get('/api/settings')
+
+    def tearDown(self):
+        self._put(
+            '/api/settings',
+            {setting['name']: setting['value']
+             for setting in self.settings})
+
+    def test_list_settings(self):
+        settings = self._get('/api/settings')
+        self.assertGreater(len(settings), 10)
+        self.assertSchema(
+            settings,
+            JList(
+                JObj({
+                    'default': JAny(none=False),
+                    'name': str,
+                    'type': str,
+                    'value': JAny(none=False)
+                })))
+        self.assertStatus(200)
+
+    def test_get_setting(self):
+        setting = self._get('/api/settings/rgw-api-access-key')
+        self.assertSchema(
+            setting,
+            JObj({
+                'default': JAny(none=False),
+                'name': str,
+                'type': str,
+                'value': JAny(none=False)
+            }))
+        self.assertStatus(200)
+
+    def test_set_setting(self):
+        self._put('/api/settings/rgw-api-access-key', {'value': 'foo'})
+        self.assertStatus(200)
+
+        value = self._get('/api/settings/rgw-api-access-key')['value']
+        self.assertEqual('foo', value)
+
+    def test_bulk_set(self):
+        self._put('/api/settings', {
+            'RGW_API_ACCESS_KEY': 'dummy-key',
+            'RGW_API_SECRET_KEY': 'dummy-secret',
+        })
+        self.assertStatus(200)
+
+        access_key = self._get('/api/settings/rgw-api-access-key')['value']
+        self.assertStatus(200)
+        self.assertEqual('dummy-key', access_key)
+
+        secret_key = self._get('/api/settings/rgw-api-secret-key')['value']
+        self.assertStatus(200)
+        self.assertEqual('dummy-secret', secret_key)
diff --git a/qa/tasks/mgr/dashboard/test_summary.py b/qa/tasks/mgr/dashboard/test_summary.py
new file mode 100644
index 000000000..a31f89146
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_summary.py
@@ -0,0 +1,39 @@
+from __future__ import absolute_import
+
+from .helper import DashboardTestCase
+
+
+class SummaryTest(DashboardTestCase):
+    CEPHFS = True
+
+    def test_summary(self):
+        data = self._get("/api/summary")
+        self.assertStatus(200)
+
+        self.assertIn('health_status', data)
+        self.assertIn('mgr_id', data)
+        self.assertIn('have_mon_connection', data)
+        self.assertIn('rbd_mirroring', data)
+        self.assertIn('executing_tasks', data)
+        self.assertIn('finished_tasks', data)
+        self.assertIn('version', data)
+        self.assertIsNotNone(data['health_status'])
+        self.assertIsNotNone(data['mgr_id'])
+        self.assertIsNotNone(data['have_mon_connection'])
+        self.assertEqual(data['rbd_mirroring'], {'errors': 0, 'warnings': 0})
+
+    @DashboardTestCase.RunAs('test', 'test', ['pool-manager'])
+    def test_summary_permissions(self):
+        data = self._get("/api/summary")
+        self.assertStatus(200)
+
+        self.assertIn('health_status', data)
+        self.assertIn('mgr_id', data)
+        self.assertIn('have_mon_connection', data)
+        self.assertNotIn('rbd_mirroring', data)
+        self.assertIn('executing_tasks', data)
+        self.assertIn('finished_tasks', data)
+        self.assertIn('version', data)
+        self.assertIsNotNone(data['health_status'])
+        self.assertIsNotNone(data['mgr_id'])
+        self.assertIsNotNone(data['have_mon_connection'])
diff --git a/qa/tasks/mgr/dashboard/test_telemetry.py b/qa/tasks/mgr/dashboard/test_telemetry.py
new file mode 100644
index 000000000..65c62c748
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_telemetry.py
@@ -0,0 +1,98 @@
+from .helper import DashboardTestCase, JObj
+
+
+class TelemetryTest(DashboardTestCase):
+
+    pre_enabled_status = True
+
+    @classmethod
+    def setUpClass(cls):
+        super(TelemetryTest, cls).setUpClass()
+        data = cls._get('/api/mgr/module/telemetry')
+        cls.pre_enabled_status = data['enabled']
+
+        # identify ourselves so we can filter these reports out on the server side
+        cls._put(
+            '/api/settings',
+            {
+                'mgr/telemetry/channel_ident': True,
+                'mgr/telemetry/organization': 'ceph-qa',
+            }
+        )
+
+    @classmethod
+    def tearDownClass(cls):
+        if cls.pre_enabled_status:
+            cls._enable_module()
+        else:
+            cls._disable_module()
+        super(TelemetryTest, cls).tearDownClass()
+
+    def test_disable_module(self):
+        self._enable_module()
+        self._check_telemetry_enabled(True)
+        self._disable_module()
+        self._check_telemetry_enabled(False)
+
+    def test_enable_module_correct_license(self):
+        self._disable_module()
+        self._check_telemetry_enabled(False)
+
+        self._put('/api/telemetry', {
+            'enable': True,
+            'license_name': 'sharing-1-0'
+        })
+        self.assertStatus(200)
+        self._check_telemetry_enabled(True)
+
+    def test_enable_module_empty_license(self):
+        self._disable_module()
+        self._check_telemetry_enabled(False)
+
+        self._put('/api/telemetry', {
+            'enable': True,
+            'license_name': ''
+        })
+        self.assertStatus(400)
+        self.assertError(code='telemetry_enable_license_missing')
+        self._check_telemetry_enabled(False)
+
+    def test_enable_module_invalid_license(self):
+        self._disable_module()
+        self._check_telemetry_enabled(False)
+
+        self._put('/api/telemetry', {
+            'enable': True,
+            'license_name': 'invalid-license'
+        })
+        self.assertStatus(400)
+        self.assertError(code='telemetry_enable_license_missing')
+        self._check_telemetry_enabled(False)
+
+    def test_get_report(self):
+        self._enable_module()
+        data = self._get('/api/telemetry/report')
+        self.assertStatus(200)
+        schema = JObj({
+            'report': JObj({}, allow_unknown=True),
+            'device_report': JObj({}, allow_unknown=True)
+        })
+        self.assertSchema(data, schema)
+
+    @classmethod
+    def _enable_module(cls):
+        cls._put('/api/telemetry', {
+            'enable': True,
+            'license_name': 'sharing-1-0'
+        })
+
+    @classmethod
+    def _disable_module(cls):
+        cls._put('/api/telemetry', {
+            'enable': False
+        })
+
+    def _check_telemetry_enabled(self, enabled):
+        data = self._get('/api/mgr/module/telemetry')
+        self.assertStatus(200)
+        self.assertEqual(data['enabled'], enabled)
diff --git a/qa/tasks/mgr/dashboard/test_user.py b/qa/tasks/mgr/dashboard/test_user.py
new file mode 100644
index 000000000..3a6464f5a
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_user.py
@@ -0,0 +1,565 @@
+# -*- coding: utf-8 -*-
+# pylint: disable=too-many-public-methods
+
+from __future__ import absolute_import
+
+import time
+from datetime import datetime, timedelta
+
+from .helper import DashboardTestCase
+
+
+class UserTest(DashboardTestCase):
+    @classmethod
+    def setUpClass(cls):
+        super(UserTest, cls).setUpClass()
+        cls._ceph_cmd(['dashboard', 'set-pwd-policy-enabled', 'true'])
+        cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-length-enabled', 'true'])
+        cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-oldpwd-enabled', 'true'])
+        cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-username-enabled', 'true'])
+        cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-exclusion-list-enabled', 'true'])
+        cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-complexity-enabled', 'true'])
+        cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-sequential-chars-enabled', 'true'])
+        cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-repetitive-chars-enabled', 'true'])
+
+    @classmethod
+    def tearDownClass(cls):
+        cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-username-enabled', 'false'])
+        cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-exclusion-list-enabled', 'false'])
+        cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-complexity-enabled', 'false'])
+        cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-sequential-chars-enabled', 'false'])
+        cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-repetitive-chars-enabled', 'false'])
+        super(UserTest, cls).tearDownClass()
+
+    @classmethod
+    def _create_user(cls, username=None, password=None, name=None, email=None, roles=None,
+                     enabled=True, pwd_expiration_date=None, pwd_update_required=False):
+        data = {}
+        if username:
+            data['username'] = username
+        if password:
+            data['password'] = password
+        if name:
+            data['name'] = name
+        if email:
+            data['email'] = email
+        if roles:
+            data['roles'] = roles
+        if pwd_expiration_date:
+            data['pwdExpirationDate'] = pwd_expiration_date
+        data['pwdUpdateRequired'] = pwd_update_required
+        data['enabled'] = enabled
+        cls._post("/api/user", data)
+
+    @classmethod
+    def _reset_login_to_admin(cls, username=None):
+        cls.logout()
+        if username:
+            cls.delete_user(username)
+        cls.login('admin', 'admin')
+
+    def test_crud_user(self):
+        self._create_user(username='user1',
+                          password='mypassword10#',
+                          name='My Name',
+                          email='my@email.com',
+                          roles=['administrator'])
+        self.assertStatus(201)
+        user = self.jsonBody()
+
+        self._get('/api/user/user1')
+        self.assertStatus(200)
+        self.assertJsonBody({
+            'username': 'user1',
+            'name': 'My Name',
+            'email': 'my@email.com',
+            'roles': ['administrator'],
+            'lastUpdate': user['lastUpdate'],
+            'enabled': True,
+            'pwdExpirationDate': None,
+            'pwdUpdateRequired': False
+        })
+
+        self._put('/api/user/user1', {
+            'name': 'My New Name',
+            'email': 'mynew@email.com',
+            'roles': ['block-manager'],
+        })
+        self.assertStatus(200)
+        user = self.jsonBody()
+        self.assertJsonBody({
+            'username': 'user1',
+            'name': 'My New Name',
+            'email': 'mynew@email.com',
+            'roles': ['block-manager'],
+            'lastUpdate': user['lastUpdate'],
+            'enabled': True,
+            'pwdExpirationDate': None,
+            'pwdUpdateRequired': False
+        })
+
+        self._delete('/api/user/user1')
+        self.assertStatus(204)
+
+    def test_crd_disabled_user(self):
+        self._create_user(username='klara',
+                          password='mypassword10#',
+                          name='Klara Musterfrau',
+                          email='klara@musterfrau.com',
+                          roles=['administrator'],
+                          enabled=False)
+        self.assertStatus(201)
+        user = self.jsonBody()
+
+        # Restart dashboard module.
+        self._unload_module('dashboard')
+        self._load_module('dashboard')
+        time.sleep(10)
+
+        self._get('/api/user/klara')
+        self.assertStatus(200)
+        self.assertJsonBody({
+            'username': 'klara',
+            'name': 'Klara Musterfrau',
+            'email': 'klara@musterfrau.com',
+            'roles': ['administrator'],
+            'lastUpdate': user['lastUpdate'],
+            'enabled': False,
+            'pwdExpirationDate': None,
+            'pwdUpdateRequired': False
+        })
+
+        self._delete('/api/user/klara')
+        self.assertStatus(204)
+
+    def test_list_users(self):
+        self._get('/api/user')
+        self.assertStatus(200)
+        user = self.jsonBody()
+        self.assertEqual(len(user), 1)
+        user = user[0]
+        self.assertJsonBody([{
+            'username': 'admin',
+            'name': None,
+            'email': None,
+            'roles': ['administrator'],
+            'lastUpdate': user['lastUpdate'],
+            'enabled': True,
+            'pwdExpirationDate': None,
+            'pwdUpdateRequired': False
+        }])
+
+    def test_create_user_already_exists(self):
+        self._create_user(username='admin',
+                          password='mypassword10#',
+                          name='administrator',
+                          email='my@email.com',
+                          roles=['administrator'])
+        self.assertStatus(400)
+        self.assertError(code='username_already_exists',
+                         component='user')
+
+    def test_create_user_invalid_role(self):
+        self._create_user(username='user1',
+                          password='mypassword10#',
+                          name='My Name',
+                          email='my@email.com',
+                          roles=['invalid-role'])
+        self.assertStatus(400)
+        self.assertError(code='role_does_not_exist',
+                         component='user')
+
+    def test_create_user_invalid_chars_in_name(self):
+        self._create_user(username='userö',
+                          password='mypassword10#',
+                          name='administrator',
+                          email='my@email.com',
+                          roles=['administrator'])
+        self.assertStatus(400)
+        self.assertError(code='ceph_type_not_valid',
+                         component='user')
+
+    def test_delete_user_does_not_exist(self):
+        self._delete('/api/user/user2')
+        self.assertStatus(404)
+
+    @DashboardTestCase.RunAs('test', 'test', [{'user': ['create', 'read', 'update', 'delete']}])
+    def test_delete_current_user(self):
+        self._delete('/api/user/test')
+        self.assertStatus(400)
+        self.assertError(code='cannot_delete_current_user',
+                         component='user')
+
+    @DashboardTestCase.RunAs('test', 'test', [{'user': ['create', 'read', 'update', 'delete']}])
+    def test_disable_current_user(self):
+        self._put('/api/user/test', {'enabled': False})
+        self.assertStatus(400)
+        self.assertError(code='cannot_disable_current_user',
+                         component='user')
+
+    def test_update_user_does_not_exist(self):
+        self._put('/api/user/user2', {'name': 'My New Name'})
+        self.assertStatus(404)
+
+    def test_update_user_invalid_role(self):
+        self._put('/api/user/admin', {'roles': ['invalid-role']})
+        self.assertStatus(400)
+        self.assertError(code='role_does_not_exist',
+                         component='user')
+
+    def test_change_password_from_other_user(self):
+        self._post('/api/user/test2/change_password', {
+            'old_password': 'abc',
+            'new_password': 'xyz'
+        })
+        self.assertStatus(400)
+        self.assertError(code='invalid_user_context', component='user')
+
+    def test_change_password_old_not_match(self):
+        self._post('/api/user/admin/change_password', {
+            'old_password': 'foo',
+            'new_password': 'bar'
+        })
+        self.assertStatus(400)
+        self.assertError(code='invalid_old_password', component='user')
+
+    def test_change_password_as_old_password(self):
+        self.create_user('test1', 'mypassword10#', ['read-only'], force_password=False)
+        self.login('test1', 'mypassword10#')
+        self._post('/api/user/test1/change_password', {
+            'old_password': 'mypassword10#',
+            'new_password': 'mypassword10#'
+        })
+        self.assertStatus(400)
+        self.assertError('password_policy_validation_failed', 'user',
+                         'Password must not be the same as the previous one.')
+        self._reset_login_to_admin('test1')
+
+    def test_change_password_contains_username(self):
+        self.create_user('test1', 'mypassword10#', ['read-only'], force_password=False)
+        self.login('test1', 'mypassword10#')
+        self._post('/api/user/test1/change_password', {
+            'old_password': 'mypassword10#',
+            'new_password': 'mypasstest1@#'
+        })
+        self.assertStatus(400)
+        self.assertError('password_policy_validation_failed', 'user',
+                         'Password must not contain username.')
+        self._reset_login_to_admin('test1')
+
+    def test_change_password_contains_forbidden_words(self):
+        self.create_user('test1', 'mypassword10#', ['read-only'], force_password=False)
+        self.login('test1', 'mypassword10#')
+        self._post('/api/user/test1/change_password', {
+            'old_password': 'mypassword10#',
+            'new_password': 'mypassOSD01'
+        })
+        self.assertStatus(400)
+        self.assertError('password_policy_validation_failed', 'user',
+                         'Password must not contain the keyword "OSD".')
+        self._reset_login_to_admin('test1')
+
+    def test_change_password_contains_sequential_characters(self):
+        self.create_user('test1', 'mypassword10#', ['read-only'], force_password=False)
+        self.login('test1', 'mypassword10#')
+        self._post('/api/user/test1/change_password', {
+            'old_password': 'mypassword10#',
+            'new_password': 'mypass123456!@$'
+        })
+        self.assertStatus(400)
+        self.assertError('password_policy_validation_failed', 'user',
+                         'Password must not contain sequential characters.')
+        self._reset_login_to_admin('test1')
+
+    def test_change_password_contains_repetetive_characters(self):
+        self.create_user('test1', 'mypassword10#', ['read-only'], force_password=False)
+        self.login('test1', 'mypassword10#')
+        self._post('/api/user/test1/change_password', {
+            'old_password': 'mypassword10#',
+            'new_password': 'aaaaA1@!#'
+        })
+        self.assertStatus(400)
+        self.assertError('password_policy_validation_failed', 'user',
+                         'Password must not contain repetitive characters.')
+        self._reset_login_to_admin('test1')
+
+    @DashboardTestCase.RunAs('test1', 'mypassword10#', ['read-only'], False)
+    def test_change_password(self):
+        self._post('/api/user/test1/change_password', {
+            'old_password': 'mypassword10#',
+            'new_password': 'newpassword01#'
+        })
+        self.assertStatus(200)
+        self.logout()
+        self._post('/api/auth', {'username': 'test1', 'password': 'mypassword10#'})
+        self.assertStatus(400)
+        self.assertError(code='invalid_credentials', component='auth')
+
+    def test_create_user_password_cli(self):
+        exitcode = self._ceph_cmd_with_secret(['dashboard', 'ac-user-create',
+                                               'test1'],
+                                              'mypassword10#',
+                                              return_exit_code=True)
+        self.assertEqual(exitcode, 0)
+        self.delete_user('test1')
+
+    @DashboardTestCase.RunAs('test2', 'foo_bar_10#', force_password=False, login=False)
+    def test_change_user_password_cli(self):
+        exitcode = self._ceph_cmd_with_secret(['dashboard', 'ac-user-set-password',
+                                               'test2'],
+                                              'foo_new-password01#',
+                                              return_exit_code=True)
+        self.assertEqual(exitcode, 0)
+
+    def test_create_user_password_force_cli(self):
+        exitcode = self._ceph_cmd_with_secret(['dashboard', 'ac-user-create',
+                                               '--force-password', 'test11'],
+                                              'bar',
+                                              return_exit_code=True)
+        self.assertEqual(exitcode, 0)
+        self.delete_user('test11')
+
+    @DashboardTestCase.RunAs('test22', 'foo_bar_10#', force_password=False, login=False)
+    def test_change_user_password_force_cli(self):
+        exitcode = self._ceph_cmd_with_secret(['dashboard', 'ac-user-set-password',
+                                               '--force-password', 'test22'],
+                                              'bar',
+                                              return_exit_code=True)
+        self.assertEqual(exitcode, 0)
+
+    def test_create_user_password_cli_fail(self):
+        exitcode = self._ceph_cmd_with_secret(['dashboard', 'ac-user-create',
+                                               'test3'],
+                                              'foo',
+                                              return_exit_code=True)
+        self.assertNotEqual(exitcode, 0)
+
+    @DashboardTestCase.RunAs('test4', 'x1z_tst+_10#', force_password=False, login=False)
+    def test_change_user_password_cli_fail(self):
+        exitcode = self._ceph_cmd_with_secret(['dashboard', 'ac-user-set-password',
+                                               'test4'],
+                                              'bar',
+                                              return_exit_code=True)
+        self.assertNotEqual(exitcode, 0)
+
+    def test_create_user_with_pwd_expiration_date(self):
+        future_date = datetime.utcnow() + timedelta(days=10)
+        future_date = int(time.mktime(future_date.timetuple()))
+
+        self._create_user(username='user1',
+                          password='mypassword10#',
+                          name='My Name',
+                          email='my@email.com',
+                          roles=['administrator'],
+                          pwd_expiration_date=future_date)
+        self.assertStatus(201)
+        user = self.jsonBody()
+
+        self._get('/api/user/user1')
+        self.assertStatus(200)
+        self.assertJsonBody({
+            'username': 'user1',
+            'name': 'My Name',
+            'email': 'my@email.com',
+            'roles': ['administrator'],
+            'lastUpdate': user['lastUpdate'],
+            'enabled': True,
+            'pwdExpirationDate': future_date,
+            'pwdUpdateRequired': False
+        })
+        self._delete('/api/user/user1')
+
+    def test_create_with_pwd_expiration_date_not_valid(self):
+        past_date = datetime.utcnow() - timedelta(days=10)
+        past_date = int(time.mktime(past_date.timetuple()))
+
+        self._create_user(username='user1',
+                          password='mypassword10#',
+                          name='My Name',
+                          email='my@email.com',
+                          roles=['administrator'],
+                          pwd_expiration_date=past_date)
+        self.assertStatus(400)
+        self.assertError(code='pwd_past_expiration_date', component='user')
+
+    def test_create_with_default_expiration_date(self):
+        future_date_1 = datetime.utcnow() + timedelta(days=9)
+        future_date_1 = int(time.mktime(future_date_1.timetuple()))
+        future_date_2 = datetime.utcnow() + timedelta(days=11)
+        future_date_2 = int(time.mktime(future_date_2.timetuple()))
+
+        self._ceph_cmd(['dashboard', 'set-user-pwd-expiration-span', '10'])
+        self._create_user(username='user1',
+                          password='mypassword10#',
+                          name='My Name',
+                          email='my@email.com',
+                          roles=['administrator'])
+        self.assertStatus(201)
+
+        user = self._get('/api/user/user1')
+        self.assertStatus(200)
+        self.assertIsNotNone(user['pwdExpirationDate'])
+        self.assertGreater(user['pwdExpirationDate'], future_date_1)
+        self.assertLess(user['pwdExpirationDate'], future_date_2)
+
+        self._delete('/api/user/user1')
+        self._ceph_cmd(['dashboard', 'set-user-pwd-expiration-span', '0'])
+
+    def test_pwd_expiration_date_update(self):
+        self._ceph_cmd(['dashboard', 'set-user-pwd-expiration-span', '10'])
+        self.create_user('user1', 'mypassword10#', ['administrator'])
+
+        user_1 = self._get('/api/user/user1')
+        self.assertStatus(200)
+
+        # Let's wait 1 s to ensure pwd expiration date is not the same
+        time.sleep(1)
+
+        self.login('user1', 'mypassword10#')
+        self._post('/api/user/user1/change_password', {
+            'old_password': 'mypassword10#',
+            'new_password': 'newpassword01#'
+        })
+        self.assertStatus(200)
+
+        # Compare password expiration dates.
+        self._reset_login_to_admin()
+        user_1_pwd_changed = self._get('/api/user/user1')
+        self.assertStatus(200)
+        self.assertLess(user_1['pwdExpirationDate'], user_1_pwd_changed['pwdExpirationDate'])
+
+        # Cleanup
+        self.delete_user('user1')
+        self._ceph_cmd(['dashboard', 'set-user-pwd-expiration-span', '0'])
+
+    def test_pwd_update_required(self):
+        self._create_user(username='user1',
+                          password='mypassword10#',
+                          name='My Name',
+                          email='my@email.com',
+                          roles=['administrator'],
+                          pwd_update_required=True)
+        self.assertStatus(201)
+
+        user_1 = self._get('/api/user/user1')
+        self.assertStatus(200)
+        self.assertEqual(user_1['pwdUpdateRequired'], True)
+
+        self.login('user1', 'mypassword10#')
+        self.assertStatus(201)
+
+        self._get('/api/osd')
+        self.assertStatus(403)
+        self._reset_login_to_admin('user1')
+
+    def test_pwd_update_required_change_pwd(self):
+        self._create_user(username='user1',
+                          password='mypassword10#',
+                          name='My Name',
+                          email='my@email.com',
+                          roles=['administrator'],
+                          pwd_update_required=True)
+        self.assertStatus(201)
+
+        self.login('user1', 'mypassword10#')
+        self._post('/api/user/user1/change_password', {
+            'old_password': 'mypassword10#',
+            'new_password': 'newpassword01#'
+        })
+
+        self.login('user1', 'newpassword01#')
+        user_1 = self._get('/api/user/user1')
+        self.assertStatus(200)
+        self.assertEqual(user_1['pwdUpdateRequired'], False)
+        self._get('/api/osd')
+        self.assertStatus(200)
+        self._reset_login_to_admin('user1')
+
+    def test_validate_password_weak(self):
+        self._post('/api/user/validate_password', {
+            'password': 'mypassword1'
+        })
+        self.assertStatus(200)
+        self.assertJsonBody({
+            'valid': True,
+            'credits': 11,
+            'valuation': 'Weak'
+        })
+
+    def test_validate_password_ok(self):
+        self._post('/api/user/validate_password', {
+            'password': 'mypassword1!@'
+        })
+        self.assertStatus(200)
+        self.assertJsonBody({
+            'valid': True,
+            'credits': 17,
+            'valuation': 'OK'
+        })
+
+    def test_validate_password_strong(self):
+        self._post('/api/user/validate_password', {
+            'password': 'testpassword0047!@'
+        })
+        self.assertStatus(200)
+        self.assertJsonBody({
+            'valid': True,
+            'credits': 22,
+            'valuation': 'Strong'
+        })
+
+    def test_validate_password_very_strong(self):
+        self._post('/api/user/validate_password', {
+            'password': 'testpassword#!$!@$'
+        })
+        self.assertStatus(200)
+        self.assertJsonBody({
+            'valid': True,
+            'credits': 30,
+            'valuation': 'Very strong'
+        })
+
+    def test_validate_password_fail(self):
+        self._post('/api/user/validate_password', {
+            'password': 'foo'
+        })
+        self.assertStatus(200)
+        self.assertJsonBody({
+            'valid': False,
+            'credits': 0,
+            'valuation': 'Password is too weak.'
+        })
+
+    def test_validate_password_fail_name(self):
+        self._post('/api/user/validate_password', {
+            'password': 'x1zhugo_10',
+            'username': 'hugo'
+        })
+        self.assertStatus(200)
+        self.assertJsonBody({
+            'valid': False,
+            'credits': 0,
+            'valuation': 'Password must not contain username.'
+        })
+
+    def test_validate_password_fail_oldpwd(self):
+        self._post('/api/user/validate_password', {
+            'password': 'x1zt-st10',
+            'old_password': 'x1zt-st10'
+        })
+        self.assertStatus(200)
+        self.assertJsonBody({
+            'valid': False,
+            'credits': 0,
+            'valuation': 'Password must not be the same as the previous one.'
+        })
+
+    def test_create_user_pwd_update_required(self):
+        self.create_user('foo', 'bar', cmd_args=['--pwd_update_required'])
+        self._get('/api/user/foo')
+        self.assertStatus(200)
+        self.assertJsonSubset({
+            'username': 'foo',
+            'pwdUpdateRequired': True
+        })
+        self.delete_user('foo')
diff --git a/qa/tasks/mgr/mgr_test_case.py b/qa/tasks/mgr/mgr_test_case.py
new file mode 100644
index 000000000..94a230c8d
--- /dev/null
+++ b/qa/tasks/mgr/mgr_test_case.py
@@ -0,0 +1,228 @@
+import json
+import logging
+
+from unittest import SkipTest
+
+from teuthology import misc
+from tasks.ceph_test_case import CephTestCase
+
+# TODO move definition of CephCluster away from the CephFS stuff
+from tasks.cephfs.filesystem import CephCluster
+
+
+log = logging.getLogger(__name__)
+
+
+class MgrCluster(CephCluster):
+    def __init__(self, ctx):
+        super(MgrCluster, self).__init__(ctx)
+        self.mgr_ids = list(misc.all_roles_of_type(ctx.cluster, 'mgr'))
+
+        if len(self.mgr_ids) == 0:
+            raise RuntimeError(
+                "This task requires at least one manager daemon")
+
+        self.mgr_daemons = dict(
+            [(mgr_id, self._ctx.daemons.get_daemon('mgr', mgr_id)) for mgr_id
+             in self.mgr_ids])
+
+    def mgr_stop(self, mgr_id):
+        self.mgr_daemons[mgr_id].stop()
+
+    def mgr_fail(self, mgr_id):
+        self.mon_manager.raw_cluster_cmd("mgr", "fail", mgr_id)
+
+    def mgr_restart(self, mgr_id):
+        self.mgr_daemons[mgr_id].restart()
+
+    def get_mgr_map(self):
+        return json.loads(
+            self.mon_manager.raw_cluster_cmd("mgr", "dump", "--format=json-pretty"))
+
+    def get_registered_clients(self, name, mgr_map = None):
+        if mgr_map is None:
+            mgr_map = self.get_mgr_map()
+        for c in mgr_map['active_clients']:
+            if c['name'] == name:
+                return c['addrvec']
+        return None
+
+    def get_active_id(self):
+        return self.get_mgr_map()["active_name"]
+
+    def get_standby_ids(self):
+        return [s['name'] for s in self.get_mgr_map()["standbys"]]
+
+    def set_module_conf(self, module, key, val):
+        self.mon_manager.raw_cluster_cmd("config", "set", "mgr",
+                                         "mgr/{0}/{1}".format(
+                                             module, key
+                                         ), val)
+
+    def set_module_localized_conf(self, module, mgr_id, key, val, force):
+        cmd = ["config", "set", "mgr",
+               "/".join(["mgr", module, mgr_id, key]),
+               val]
+        if force:
+            cmd.append("--force")
+        self.mon_manager.raw_cluster_cmd(*cmd)
+
+
+class MgrTestCase(CephTestCase):
+    MGRS_REQUIRED = 1
+
+    @classmethod
+    def setup_mgrs(cls):
+        # Stop all the daemons
+        for daemon in cls.mgr_cluster.mgr_daemons.values():
+            daemon.stop()
+
+        for mgr_id in cls.mgr_cluster.mgr_ids:
+            cls.mgr_cluster.mgr_fail(mgr_id)
+
+        # Unload all non-default plugins
+        loaded = json.loads(cls.mgr_cluster.mon_manager.raw_cluster_cmd(
+                   "mgr", "module", "ls", "--format=json-pretty"))['enabled_modules']
+        unload_modules = set(loaded) - {"cephadm", "restful"}
+
+        for m in unload_modules:
+            cls.mgr_cluster.mon_manager.raw_cluster_cmd(
+                "mgr", "module", "disable", m)
+
+        # Start all the daemons
+        for daemon in cls.mgr_cluster.mgr_daemons.values():
+            daemon.restart()
+
+        # Wait for an active to come up
+        cls.wait_until_true(lambda: cls.mgr_cluster.get_active_id() != "",
+                             timeout=20)
+
+        expect_standbys = set(cls.mgr_cluster.mgr_ids) \
+                          - {cls.mgr_cluster.get_active_id()}
+        cls.wait_until_true(
+            lambda: set(cls.mgr_cluster.get_standby_ids()) == expect_standbys,
+            timeout=20)
+
+    @classmethod
+    def setUpClass(cls):
+        # The test runner should have populated this
+        assert cls.mgr_cluster is not None
+
+        if len(cls.mgr_cluster.mgr_ids) < cls.MGRS_REQUIRED:
+            raise SkipTest(
+                "Only have {0} manager daemons, {1} are required".format(
+                    len(cls.mgr_cluster.mgr_ids), cls.MGRS_REQUIRED))
+
+        cls.setup_mgrs()
+
+    @classmethod
+    def _unload_module(cls, module_name):
+        def is_disabled():
+            enabled_modules = json.loads(cls.mgr_cluster.mon_manager.raw_cluster_cmd(
+                'mgr', 'module', 'ls', "--format=json-pretty"))['enabled_modules']
+            return module_name not in enabled_modules
+
+        if is_disabled():
+            return
+
+        log.debug("Unloading Mgr module %s ...", module_name)
+        cls.mgr_cluster.mon_manager.raw_cluster_cmd('mgr', 'module', 'disable', module_name)
+        cls.wait_until_true(is_disabled, timeout=30)
+
+    @classmethod
+    def _load_module(cls, module_name):
+        loaded = json.loads(cls.mgr_cluster.mon_manager.raw_cluster_cmd(
+            "mgr", "module", "ls", "--format=json-pretty"))['enabled_modules']
+        if module_name in loaded:
+            # The enable command is idempotent, but our wait for a restart
+            # isn't, so let's return now if it's already loaded
+            return
+
+        initial_mgr_map = cls.mgr_cluster.get_mgr_map()
+
+        # check if the the module is configured as an always on module
+        mgr_daemons = json.loads(cls.mgr_cluster.mon_manager.raw_cluster_cmd(
+            "mgr", "metadata"))
+
+        for daemon in mgr_daemons:
+            if daemon["name"] == initial_mgr_map["active_name"]:
+                ceph_version = daemon["ceph_release"]
+                always_on = initial_mgr_map["always_on_modules"].get(ceph_version, [])
+                if module_name in always_on:
+                    return
+
+        log.debug("Loading Mgr module %s ...", module_name)
+        initial_gid = initial_mgr_map['active_gid']
+        cls.mgr_cluster.mon_manager.raw_cluster_cmd(
+            "mgr", "module", "enable", module_name, "--force")
+
+        # Wait for the module to load
+        def has_restarted():
+            mgr_map = cls.mgr_cluster.get_mgr_map()
+            done = mgr_map['active_gid'] != initial_gid and mgr_map['available']
+            if done:
+                log.debug("Restarted after module load (new active {0}/{1})".format(
+                    mgr_map['active_name'], mgr_map['active_gid']))
+            return done
+        cls.wait_until_true(has_restarted, timeout=30)
+
+
+    @classmethod
+    def _get_uri(cls, service_name):
+        # Little dict hack so that I can assign into this from
+        # the get_or_none function
+        mgr_map = {'x': None}
+
+        def _get_or_none():
+            mgr_map['x'] = cls.mgr_cluster.get_mgr_map()
+            result = mgr_map['x']['services'].get(service_name, None)
+            return result
+
+        cls.wait_until_true(lambda: _get_or_none() is not None, 30)
+
+        uri = mgr_map['x']['services'][service_name]
+
+        log.debug("Found {0} at {1} (daemon {2}/{3})".format(
+            service_name, uri, mgr_map['x']['active_name'],
+            mgr_map['x']['active_gid']))
+
+        return uri
+
+    @classmethod
+    def _assign_ports(cls, module_name, config_name, min_port=7789):
+        """
+        To avoid the need to run lots of hosts in teuthology tests to
+        get different URLs per mgr, we will hand out different ports
+        to each mgr here.
+
+        This is already taken care of for us when running in a vstart
+        environment.
+        """
+        # Start handing out ports well above Ceph's range.
+        assign_port = min_port
+
+        for mgr_id in cls.mgr_cluster.mgr_ids:
+            cls.mgr_cluster.mgr_stop(mgr_id)
+            cls.mgr_cluster.mgr_fail(mgr_id)
+
+        for mgr_id in cls.mgr_cluster.mgr_ids:
+            log.debug("Using port {0} for {1} on mgr.{2}".format(
+                assign_port, module_name, mgr_id
+            ))
+            cls.mgr_cluster.set_module_localized_conf(module_name, mgr_id,
+                                                      config_name,
+                                                      str(assign_port),
+                                                      force=True)
+            assign_port += 1
+
+        for mgr_id in cls.mgr_cluster.mgr_ids:
+            cls.mgr_cluster.mgr_restart(mgr_id)
+
+        def is_available():
+            mgr_map = cls.mgr_cluster.get_mgr_map()
+            done = mgr_map['available']
+            if done:
+                log.debug("Available after assign ports (new active {0}/{1})".format(
+                    mgr_map['active_name'], mgr_map['active_gid']))
+            return done
+        cls.wait_until_true(is_available, timeout=30)
diff --git a/qa/tasks/mgr/test_cache.py b/qa/tasks/mgr/test_cache.py
new file mode 100644
index 000000000..71131cbc6
--- /dev/null
+++ b/qa/tasks/mgr/test_cache.py
@@ -0,0 +1,83 @@
+import json
+
+from .mgr_test_case import MgrTestCase
+
+class TestCache(MgrTestCase):
+
+    def setUp(self):
+        super(TestCache, self).setUp()
+        self.setup_mgrs()
+        self._load_module("cli_api")
+        self.ttl = 10
+        self.enable_cache(self.ttl)
+
+    def tearDown(self):
+        self.disable_cache()
+
+    def get_hit_miss_ratio(self):
+        perf_dump_command = f"daemon mgr.{self.mgr_cluster.get_active_id()} perf dump"
+        perf_dump_res = self.cluster_cmd(perf_dump_command)
+        perf_dump = json.loads(perf_dump_res)
+        h = perf_dump["mgr"]["cache_hit"]
+        m = perf_dump["mgr"]["cache_miss"]
+        return int(h), int(m)
+
+    def enable_cache(self, ttl):
+        set_ttl = f"config set mgr mgr_ttl_cache_expire_seconds {ttl}"
+        self.cluster_cmd(set_ttl)
+
+    def disable_cache(self):
+        set_ttl = "config set mgr mgr_ttl_cache_expire_seconds 0"
+        self.cluster_cmd(set_ttl)
+
+
+    def test_init_cache(self):
+        get_ttl = "config get mgr mgr_ttl_cache_expire_seconds"
+        res = self.cluster_cmd(get_ttl)
+        self.assertEquals(int(res), 10)
+
+    def test_health_not_cached(self):
+        get_health = "mgr api get health"
+
+        h_start, m_start = self.get_hit_miss_ratio()
+        self.cluster_cmd(get_health)
+        h, m = self.get_hit_miss_ratio()
+
+        self.assertEquals(h, h_start)
+        self.assertEquals(m, m_start)
+
+    def test_osdmap(self):
+        get_osdmap = "mgr api get osd_map"
+
+        # store in cache
+        self.cluster_cmd(get_osdmap)
+        # get from cache
+        res = self.cluster_cmd(get_osdmap)
+        osd_map = json.loads(res)
+        self.assertIn("osds", osd_map)
+        self.assertGreater(len(osd_map["osds"]), 0)
+        self.assertIn("epoch", osd_map)
+
+
+
+    def test_hit_miss_ratio(self):
+        get_osdmap = "mgr api get osd_map"
+
+        hit_start, miss_start = self.get_hit_miss_ratio()
+
+        def wait_miss():
+            self.cluster_cmd(get_osdmap)
+            _, m = self.get_hit_miss_ratio()
+            return m == miss_start + 1
+
+        # Miss, add osd_map to cache
+        self.wait_until_true(wait_miss, self.ttl + 5)
+        h, m = self.get_hit_miss_ratio()
+        self.assertEquals(h, hit_start)
+        self.assertEquals(m, miss_start+1)
+
+        # Hit, get osd_map from cache
+        self.cluster_cmd(get_osdmap)
+        h, m = self.get_hit_miss_ratio()
+        self.assertEquals(h, hit_start+1)
+        self.assertEquals(m, miss_start+1)
diff --git a/qa/tasks/mgr/test_crash.py b/qa/tasks/mgr/test_crash.py
new file mode 100644
index 000000000..49191127f
--- /dev/null
+++ b/qa/tasks/mgr/test_crash.py
@@ -0,0 +1,108 @@
+import json
+import logging
+import datetime
+
+from .mgr_test_case import MgrTestCase
+
+
+log = logging.getLogger(__name__)
+UUID = 'd5775432-0742-44a3-a435-45095e32e6b1'
+DATEFMT = '%Y-%m-%d %H:%M:%S.%f'
+
+
+class TestCrash(MgrTestCase):
+
+    def setUp(self):
+        super(TestCrash, self).setUp()
+        self.setup_mgrs()
+        self._load_module('crash')
+
+        # Whip up some crash data
+        self.crashes = dict()
+        now = datetime.datetime.utcnow()
+
+        for i in (0, 1, 3, 4, 8):
+            timestamp = now - datetime.timedelta(days=i)
+            timestamp = timestamp.strftime(DATEFMT) + 'Z'
+            crash_id = '_'.join((timestamp, UUID)).replace(' ', '_')
+            self.crashes[crash_id] = {
+                'crash_id': crash_id, 'timestamp': timestamp,
+            }
+
+            self.assertEqual(
+                0,
+                self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+                    'crash', 'post', '-i', '-',
+                    stdin=json.dumps(self.crashes[crash_id]),
+                )
+            )
+
+        retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            'crash', 'ls',
+        )
+        log.warning("setUp: crash ls returns %s" % retstr)
+
+        self.oldest_crashid = crash_id
+
+    def tearDown(self):
+        for crash in self.crashes.values():
+            self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+                'crash', 'rm', crash['crash_id']
+            )
+
+    def test_info(self):
+        for crash in self.crashes.values():
+            log.warning('test_info: crash %s' % crash)
+            retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                'crash', 'ls'
+            )
+            log.warning('ls output: %s' % retstr)
+            retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                'crash', 'info', crash['crash_id'],
+            )
+            log.warning('crash info output: %s' % retstr)
+            crashinfo = json.loads(retstr)
+            self.assertIn('crash_id', crashinfo)
+            self.assertIn('timestamp', crashinfo)
+
+    def test_ls(self):
+        retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            'crash', 'ls',
+        )
+        for crash in self.crashes.values():
+            self.assertIn(crash['crash_id'], retstr)
+
+    def test_rm(self):
+        crashid = next(iter(self.crashes.keys()))
+        self.assertEqual(
+            0,
+            self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+                'crash', 'rm', crashid,
+            )
+        )
+
+        retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            'crash', 'ls',
+        )
+        self.assertNotIn(crashid, retstr)
+
+    def test_stat(self):
+        retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            'crash', 'stat',
+        )
+        self.assertIn('5 crashes recorded', retstr)
+        self.assertIn('4 older than 1 days old:', retstr)
+        self.assertIn('3 older than 3 days old:', retstr)
+        self.assertIn('1 older than 7 days old:', retstr)
+
+    def test_prune(self):
+        self.assertEqual(
+            0,
+            self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+                'crash', 'prune', '5'
+            )
+        )
+        retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            'crash', 'ls',
+        )
+        self.assertNotIn(self.oldest_crashid, retstr)
diff --git a/qa/tasks/mgr/test_dashboard.py b/qa/tasks/mgr/test_dashboard.py
new file mode 100644
index 000000000..c3459ec02
--- /dev/null
+++ b/qa/tasks/mgr/test_dashboard.py
@@ -0,0 +1,177 @@
+import logging
+import ssl
+
+import requests
+from requests.adapters import HTTPAdapter
+
+from .mgr_test_case import MgrTestCase
+
+log = logging.getLogger(__name__)
+
+
+class TestDashboard(MgrTestCase):
+    MGRS_REQUIRED = 3
+
+    def setUp(self):
+        super(TestDashboard, self).setUp()
+
+        self._assign_ports("dashboard", "ssl_server_port")
+        self._load_module("dashboard")
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("dashboard",
+                                                     "create-self-signed-cert")
+
+    def tearDown(self):
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("config", "set", "mgr",
+                                                     "mgr/dashboard/standby_behaviour",
+                                                     "redirect")
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("config", "set", "mgr",
+                                                     "mgr/dashboard/standby_error_status_code",
+                                                     "500")
+
+    def wait_until_webserver_available(self, url):
+        def _check_connection():
+            try:
+                requests.get(url, allow_redirects=False, verify=False)
+                return True
+            except requests.ConnectionError:
+                pass
+            return False
+        self.wait_until_true(_check_connection, timeout=30)
+
+    def test_standby(self):
+        # skip this test if mgr_standby_modules=false
+        if self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                "config", "get", "mgr", "mgr_standby_modules").strip() == "false":
+            log.info("Skipping test_standby since mgr_standby_modules=false")
+            return
+
+        original_active_id = self.mgr_cluster.get_active_id()
+        original_uri = self._get_uri("dashboard")
+        log.info("Originally running manager '{}' at {}".format(
+            original_active_id, original_uri))
+
+        # Force a failover and wait until the previously active manager
+        # is listed as standby.
+        self.mgr_cluster.mgr_fail(original_active_id)
+        self.wait_until_true(
+            lambda: original_active_id in self.mgr_cluster.get_standby_ids(),
+            timeout=30)
+
+        failed_active_id = self.mgr_cluster.get_active_id()
+        failed_over_uri = self._get_uri("dashboard")
+        log.info("After failover running manager '{}' at {}".format(
+            failed_active_id, failed_over_uri))
+
+        self.assertNotEqual(original_uri, failed_over_uri)
+
+        # Wait until web server of the standby node is settled.
+        self.wait_until_webserver_available(original_uri)
+
+        # The original active daemon should have come back up as a standby
+        # and be doing redirects to the new active daemon.
+        r = requests.get(original_uri, allow_redirects=False, verify=False)
+        self.assertEqual(r.status_code, 303)
+        self.assertEqual(r.headers['Location'], failed_over_uri)
+
+        # Ensure that every URL redirects to the active daemon.
+        r = requests.get("{}/runtime.js".format(original_uri.strip('/')),
+                         allow_redirects=False,
+                         verify=False)
+        self.assertEqual(r.status_code, 303)
+        self.assertEqual(r.headers['Location'], failed_over_uri)
+
+    def test_standby_disable_redirect(self):
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("config", "set", "mgr",
+                                                     "mgr/dashboard/standby_behaviour",
+                                                     "error")
+
+        original_active_id = self.mgr_cluster.get_active_id()
+        original_uri = self._get_uri("dashboard")
+        log.info("Originally running manager '{}' at {}".format(
+            original_active_id, original_uri))
+
+        # Force a failover and wait until the previously active manager
+        # is listed as standby.
+        self.mgr_cluster.mgr_fail(original_active_id)
+        self.wait_until_true(
+            lambda: original_active_id in self.mgr_cluster.get_standby_ids(),
+            timeout=30)
+
+        failed_active_id = self.mgr_cluster.get_active_id()
+        failed_over_uri = self._get_uri("dashboard")
+        log.info("After failover running manager '{}' at {}".format(
+            failed_active_id, failed_over_uri))
+
+        self.assertNotEqual(original_uri, failed_over_uri)
+
+        # Wait until web server of the standby node is settled.
+        self.wait_until_webserver_available(original_uri)
+
+        # Redirection should be disabled now, instead a 500 must be returned.
+        r = requests.get(original_uri, allow_redirects=False, verify=False)
+        self.assertEqual(r.status_code, 500)
+
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("config", "set", "mgr",
+                                                     "mgr/dashboard/standby_error_status_code",
+                                                     "503")
+
+        # The customized HTTP status code (503) must be returned.
+        r = requests.get(original_uri, allow_redirects=False, verify=False)
+        self.assertEqual(r.status_code, 503)
+
+    def test_urls(self):
+        base_uri = self._get_uri("dashboard")
+
+        # This is a very simple smoke test to check that the dashboard can
+        # give us a 200 response to requests.  We're not testing that
+        # the content is correct or even renders!
+
+        urls = [
+            "/",
+        ]
+
+        failures = []
+
+        for url in urls:
+            r = requests.get(base_uri + url, allow_redirects=False,
+                             verify=False)
+            if r.status_code >= 300 and r.status_code < 400:
+                log.error("Unexpected redirect to: {0} (from {1})".format(
+                    r.headers['Location'], base_uri))
+            if r.status_code != 200:
+                failures.append(url)
+
+            log.info("{0}: {1} ({2} bytes)".format(
+                url, r.status_code, len(r.content)
+            ))
+
+        self.assertListEqual(failures, [])
+
+    def test_tls(self):
+        class CustomHTTPAdapter(HTTPAdapter):
+            def __init__(self, ssl_version):
+                self.ssl_version = ssl_version
+                super().__init__()
+
+            def init_poolmanager(self, *args, **kwargs):
+                kwargs['ssl_version'] = self.ssl_version
+                return super().init_poolmanager(*args, **kwargs)
+
+        uri = self._get_uri("dashboard")
+
+        # TLSv1
+        with self.assertRaises(requests.exceptions.SSLError):
+            session = requests.Session()
+            session.mount(uri, CustomHTTPAdapter(ssl.PROTOCOL_TLSv1))
+            session.get(uri, allow_redirects=False, verify=False)
+
+        # TLSv1.1
+        with self.assertRaises(requests.exceptions.SSLError):
+            session = requests.Session()
+            session.mount(uri, CustomHTTPAdapter(ssl.PROTOCOL_TLSv1_1))
+            session.get(uri, allow_redirects=False, verify=False)
+
+        session = requests.Session()
+        session.mount(uri, CustomHTTPAdapter(ssl.PROTOCOL_TLS))
+        r = session.get(uri, allow_redirects=False, verify=False)
+        self.assertEqual(r.status_code, 200)
diff --git a/qa/tasks/mgr/test_failover.py b/qa/tasks/mgr/test_failover.py
new file mode 100644
index 000000000..bfff11262
--- /dev/null
+++ b/qa/tasks/mgr/test_failover.py
@@ -0,0 +1,182 @@
+
+import logging
+import json
+
+from .mgr_test_case import MgrTestCase
+
+
+log = logging.getLogger(__name__)
+
+
+class TestFailover(MgrTestCase):
+    MGRS_REQUIRED = 2
+
+    def setUp(self):
+        super(TestFailover, self).setUp()
+        self.setup_mgrs()
+
+    def test_timeout(self):
+        """
+        That when an active mgr stops responding, a standby is promoted
+        after mon_mgr_beacon_grace.
+        """
+
+        # Query which mgr is active
+        original_active = self.mgr_cluster.get_active_id()
+        original_standbys = self.mgr_cluster.get_standby_ids()
+
+        # Stop that daemon
+        self.mgr_cluster.mgr_stop(original_active)
+
+        # Assert that the other mgr becomes active
+        self.wait_until_true(
+            lambda: self.mgr_cluster.get_active_id() in original_standbys,
+            timeout=60
+        )
+
+        self.mgr_cluster.mgr_restart(original_active)
+        self.wait_until_true(
+            lambda: original_active in self.mgr_cluster.get_standby_ids(),
+            timeout=10
+        )
+
+    def test_timeout_nostandby(self):
+        """
+        That when an active mgr stop responding, and no standby is
+        available, the active mgr is removed from the map anyway.
+        """
+        # Query which mgr is active
+        original_active = self.mgr_cluster.get_active_id()
+        original_standbys = self.mgr_cluster.get_standby_ids()
+
+        for s in original_standbys:
+            self.mgr_cluster.mgr_stop(s)
+            self.mgr_cluster.mgr_fail(s)
+
+        self.assertListEqual(self.mgr_cluster.get_standby_ids(), [])
+        self.assertEqual(self.mgr_cluster.get_active_id(), original_active)
+
+        grace = int(self.mgr_cluster.get_config("mon_mgr_beacon_grace"))
+        log.info("Should time out in about {0} seconds".format(grace))
+
+        self.mgr_cluster.mgr_stop(original_active)
+
+        # Now wait for the mon to notice the mgr is gone and remove it
+        # from the map.
+        self.wait_until_equal(
+            lambda: self.mgr_cluster.get_active_id(),
+            "",
+            timeout=grace * 2
+        )
+
+        self.assertListEqual(self.mgr_cluster.get_standby_ids(), [])
+        self.assertEqual(self.mgr_cluster.get_active_id(), "")
+
+    def test_explicit_fail(self):
+        """
+        That when a user explicitly fails a daemon, a standby immediately
+        replaces it.
+        :return:
+        """
+        # Query which mgr is active
+        original_active = self.mgr_cluster.get_active_id()
+        original_standbys = self.mgr_cluster.get_standby_ids()
+
+        self.mgr_cluster.mgr_fail(original_active)
+
+        # A standby should take over
+        self.wait_until_true(
+            lambda: self.mgr_cluster.get_active_id() in original_standbys,
+            timeout=60
+        )
+
+        # The one we failed should come back as a standby (he isn't
+        # really dead)
+        self.wait_until_true(
+            lambda: original_active in self.mgr_cluster.get_standby_ids(),
+            timeout=10
+        )
+
+        # Both daemons should have fully populated metadata
+        # (regression test for http://tracker.ceph.com/issues/21260)
+        meta = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            "mgr", "metadata"))
+        id_to_meta = dict([(i['name'], i) for i in meta])
+        for i in [original_active] + original_standbys:
+            self.assertIn(i, id_to_meta)
+            self.assertIn('ceph_version', id_to_meta[i])
+
+        # We should be able to fail back over again: the exercises
+        # our re-initialization of the python runtime within
+        # a single process lifetime.
+
+        # Get rid of any bystander standbys so that the original_active
+        # will be selected as next active.
+        new_active = self.mgr_cluster.get_active_id()
+        for daemon in original_standbys:
+            if daemon != new_active:
+                self.mgr_cluster.mgr_stop(daemon)
+                self.mgr_cluster.mgr_fail(daemon)
+
+        self.assertListEqual(self.mgr_cluster.get_standby_ids(),
+                             [original_active])
+
+        self.mgr_cluster.mgr_stop(new_active)
+        self.mgr_cluster.mgr_fail(new_active)
+
+        self.assertEqual(self.mgr_cluster.get_active_id(), original_active)
+        self.assertEqual(self.mgr_cluster.get_standby_ids(), [])
+
+    def test_standby_timeout(self):
+        """
+        That when a standby daemon stops sending beacons, it is
+        removed from the list of standbys
+        :return:
+        """
+        original_active = self.mgr_cluster.get_active_id()
+        original_standbys = self.mgr_cluster.get_standby_ids()
+
+        victim = original_standbys[0]
+        self.mgr_cluster.mgr_stop(victim)
+
+        expect_standbys = set(original_standbys) - {victim}
+
+        self.wait_until_true(
+            lambda: set(self.mgr_cluster.get_standby_ids()) == expect_standbys,
+            timeout=60
+        )
+        self.assertEqual(self.mgr_cluster.get_active_id(), original_active)
+
+class TestLibCephSQLiteFailover(MgrTestCase):
+    MGRS_REQUIRED = 1
+
+    def setUp(self):
+        super(TestLibCephSQLiteFailover, self).setUp()
+        self.setup_mgrs()
+
+    def get_libcephsqlite(self):
+        mgr_map = self.mgr_cluster.get_mgr_map()
+        addresses = self.mgr_cluster.get_registered_clients('libcephsqlite', mgr_map=mgr_map)
+        self.assertEqual(len(addresses), 1)
+        return addresses[0]
+
+    def test_maybe_reonnect(self):
+        """
+        That the devicehealth module can recover after losing its libcephsqlite lock.
+        """
+
+        # make sure the database is populated and loaded by the module
+        self.mgr_cluster.mon_manager.ceph("device scrape-health-metrics")
+
+        oldaddr = self.get_libcephsqlite()
+        self.mgr_cluster.mon_manager.ceph(f"osd blocklist add {oldaddr['addr']}/{oldaddr['nonce']}")
+
+        def test():
+            self.mgr_cluster.mon_manager.ceph("device scrape-health-metrics")
+            newaddr = self.get_libcephsqlite()
+            return oldaddr != newaddr
+
+        self.wait_until_true(
+            test,
+            timeout=30
+        )
diff --git a/qa/tasks/mgr/test_insights.py b/qa/tasks/mgr/test_insights.py
new file mode 100644
index 000000000..aa2548881
--- /dev/null
+++ b/qa/tasks/mgr/test_insights.py
@@ -0,0 +1,192 @@
+import logging
+import json
+import datetime
+import time
+
+from .mgr_test_case import MgrTestCase
+
+
+log = logging.getLogger(__name__)
+UUID = 'd5775432-0742-44a3-a435-45095e32e6b2'
+DATEFMT = '%Y-%m-%d %H:%M:%S.%f'
+
+class TestInsights(MgrTestCase):
+    def setUp(self):
+        super(TestInsights, self).setUp()
+        self.setup_mgrs()
+        self._load_module("insights")
+        self._load_module("selftest")
+        self.crash_ids = []
+
+    def tearDown(self):
+        self._clear_crashes()
+
+    def _insights(self):
+        retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd("insights")
+        return json.loads(retstr)
+
+    def _add_crash(self, hours, make_invalid = False):
+        now = datetime.datetime.utcnow()
+        timestamp = now - datetime.timedelta(hours = hours)
+        timestamp = timestamp.strftime(DATEFMT) + 'Z'
+        crash_id = '_'.join((timestamp, UUID)).replace(' ', '_')
+        crash = {
+            'crash_id': crash_id,
+            'timestamp': timestamp,
+        }
+        if make_invalid:
+            crash["timestamp"] = "not a timestamp"
+
+        ret = self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+            'crash', 'post', '-i', '-',
+            stdin=json.dumps(crash)
+        )
+        self.crash_ids.append(crash_id)
+        self.assertEqual(0, ret)
+
+    def _clear_crashes(self):
+        for crash_id in self.crash_ids:
+            self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+                'crash', 'rm', crash_id
+            )
+
+    def _wait_for_health_history_checks(self, *args):
+        """Wait for a set of health checks to appear in the health history"""
+        timeout = datetime.datetime.utcnow() + \
+            datetime.timedelta(seconds = 15)
+        while True:
+            report = self._insights()
+            missing = False
+            for check in args:
+                if check not in report["health"]["history"]["checks"]:
+                    missing = True
+                    break
+            if not missing:
+                return
+            self.assertGreater(timeout,
+                    datetime.datetime.utcnow())
+            time.sleep(0.25)
+
+    def _wait_for_curr_health_cleared(self, check):
+        timeout = datetime.datetime.utcnow() + \
+            datetime.timedelta(seconds = 15)
+        while True:
+            report = self._insights()
+            if check not in report["health"]["current"]["checks"]:
+                return
+            self.assertGreater(timeout,
+                    datetime.datetime.utcnow())
+            time.sleep(0.25)
+
+    def test_health_history(self):
+        # use empty health history as starting point
+        self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+            "insights", "prune-health", "0")
+        report = self._insights()
+        self.assertFalse(report["health"]["history"]["checks"])
+
+        # generate health check history entries. we want to avoid the edge case
+        # of running these tests at _exactly_ the top of the hour so we can
+        # explicitly control when hourly work occurs. for this we use the
+        # current time offset to a half hour.
+        now = datetime.datetime.utcnow()
+        now = datetime.datetime(
+            year = now.year,
+            month = now.month,
+            day = now.day,
+            hour = now.hour,
+            minute = 30)
+
+        check_names = set()
+        for hours in [-18, -11, -5, -1, 0]:
+            # change the insight module's perception of "now" ...
+            self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+                "mgr", "self-test", "insights_set_now_offset", str(hours))
+
+            # ... to simulate health check arrivals in the past
+            unique_check_name = "insights_health_check_{}".format(hours)
+            health_check = {
+                unique_check_name: {
+                    "severity": "warning",
+                    "summary": "summary",
+                    "detail": ["detail"]
+                }
+            }
+            self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+                "mgr", "self-test", "health", "set",
+                json.dumps(health_check))
+
+            check_names.add(unique_check_name)
+
+            # and also set the same health check to test deduplication
+            dupe_check_name = "insights_health_check"
+            health_check = {
+                dupe_check_name: {
+                    "severity": "warning",
+                    "summary": "summary",
+                    "detail": ["detail"]
+                }
+            }
+            self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+                "mgr", "self-test", "health", "set",
+                json.dumps(health_check))
+
+            check_names.add(dupe_check_name)
+
+            # wait for the health check to show up in the history report
+            self._wait_for_health_history_checks(unique_check_name, dupe_check_name)
+
+            # clear out the current health checks before moving on
+            self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+                "mgr", "self-test", "health", "clear")
+            self._wait_for_curr_health_cleared(unique_check_name)
+
+        report = self._insights()
+        for check in check_names:
+            self.assertIn(check, report["health"]["history"]["checks"])
+
+        # restart the manager
+        active_id = self.mgr_cluster.get_active_id()
+        self.mgr_cluster.mgr_restart(active_id)
+
+        # pruning really removes history
+        self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+            "insights", "prune-health", "0")
+        report = self._insights()
+        self.assertFalse(report["health"]["history"]["checks"])
+
+    def test_schema(self):
+        """TODO: assert conformance to a full schema specification?"""
+        report = self._insights()
+        for key in ["osd_metadata",
+                    "pg_summary",
+                    "mon_status",
+                    "manager_map",
+                    "service_map",
+                    "mon_map",
+                    "crush_map",
+                    "fs_map",
+                    "osd_tree",
+                    "df",
+                    "osd_dump",
+                    "config",
+                    "health",
+                    "crashes",
+                    "version",
+                    "errors"]:
+            self.assertIn(key, report)
+
+    def test_crash_history(self):
+        self._clear_crashes()
+        report = self._insights()
+        self.assertFalse(report["crashes"]["summary"])
+        self.assertFalse(report["errors"])
+
+        # crashes show up in the report
+        self._add_crash(1)
+        report = self._insights()
+        self.assertTrue(report["crashes"]["summary"])
+        self.assertFalse(report["errors"])
+        log.warning("{}".format(json.dumps(report["crashes"], indent=2)))
+
+        self._clear_crashes()
diff --git a/qa/tasks/mgr/test_module_selftest.py b/qa/tasks/mgr/test_module_selftest.py
new file mode 100644
index 000000000..7ac296037
--- /dev/null
+++ b/qa/tasks/mgr/test_module_selftest.py
@@ -0,0 +1,254 @@
+
+import time
+import requests
+import errno
+import logging
+
+from teuthology.exceptions import CommandFailedError
+
+from .mgr_test_case import MgrTestCase
+
+
+log = logging.getLogger(__name__)
+
+
+class TestModuleSelftest(MgrTestCase):
+    """
+    That modules with a self-test command can be loaded and execute it
+    without errors.
+
+    This is not a substitute for really testing the modules, but it
+    is quick and is designed to catch regressions that could occur
+    if data structures change in a way that breaks how the modules
+    touch them.
+    """
+    MGRS_REQUIRED = 1
+
+    def setUp(self):
+        super(TestModuleSelftest, self).setUp()
+        self.setup_mgrs()
+
+    def _selftest_plugin(self, module_name):
+        self._load_module("selftest")
+        self._load_module(module_name)
+
+        # Execute the module's self_test() method
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                "mgr", "self-test", "module", module_name)
+
+    def test_zabbix(self):
+        # Set these mandatory config fields so that the zabbix module
+        # won't trigger health/log errors on load/serve.
+        self.mgr_cluster.set_module_conf("zabbix", "zabbix_host", "localhost")
+        self.mgr_cluster.set_module_conf("zabbix", "identifier", "foo")
+        self._selftest_plugin("zabbix")
+
+    def test_prometheus(self):
+        self._assign_ports("prometheus", "server_port", min_port=8100)
+        self._selftest_plugin("prometheus")
+
+    def test_influx(self):
+        self._selftest_plugin("influx")
+
+    def test_diskprediction_local(self):
+        self._load_module("selftest")
+        python_version = self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            "mgr", "self-test", "python-version")
+        if tuple(int(v) for v in python_version.split('.')) == (3, 8):
+            # https://tracker.ceph.com/issues/45147
+            self.skipTest(f'python {python_version} not compatible with '
+                          'diskprediction_local')
+        self._selftest_plugin("diskprediction_local")
+
+    def test_telegraf(self):
+        self._selftest_plugin("telegraf")
+
+    def test_iostat(self):
+        self._selftest_plugin("iostat")
+
+    def test_devicehealth(self):
+        self._selftest_plugin("devicehealth")
+
+    def test_selftest_run(self):
+        self._load_module("selftest")
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test", "run")
+
+    def test_telemetry(self):
+        self._selftest_plugin("telemetry")
+
+    def test_crash(self):
+        self._selftest_plugin("crash")
+
+    def test_orchestrator(self):
+        self._selftest_plugin("orchestrator")
+
+
+    def test_selftest_config_update(self):
+        """
+        That configuration updates are seen by running mgr modules
+        """
+        self._load_module("selftest")
+
+        def get_value():
+            return self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                "mgr", "self-test", "config", "get", "testkey").strip()
+
+        self.assertEqual(get_value(), "None")
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            "config", "set", "mgr", "mgr/selftest/testkey", "foo")
+        self.wait_until_equal(get_value, "foo", timeout=10)
+
+        def get_localized_value():
+            return self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                "mgr", "self-test", "config", "get_localized", "testkey").strip()
+
+        self.assertEqual(get_localized_value(), "foo")
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            "config", "set", "mgr", "mgr/selftest/{}/testkey".format(
+                self.mgr_cluster.get_active_id()),
+            "bar")
+        self.wait_until_equal(get_localized_value, "bar", timeout=10)
+
+
+    def test_selftest_command_spam(self):
+        # Use the selftest module to stress the mgr daemon
+        self._load_module("selftest")
+
+        # Use the dashboard to test that the mgr is still able to do its job
+        self._assign_ports("dashboard", "ssl_server_port")
+        self._load_module("dashboard")
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("dashboard",
+                                                     "create-self-signed-cert")
+
+        original_active = self.mgr_cluster.get_active_id()
+        original_standbys = self.mgr_cluster.get_standby_ids()
+
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test",
+                                                     "background", "start",
+                                                     "command_spam")
+
+        dashboard_uri = self._get_uri("dashboard")
+
+        delay = 10
+        periods = 10
+        for i in range(0, periods):
+            t1 = time.time()
+            # Check that an HTTP module remains responsive
+            r = requests.get(dashboard_uri, verify=False)
+            self.assertEqual(r.status_code, 200)
+
+            # Check that a native non-module command remains responsive
+            self.mgr_cluster.mon_manager.raw_cluster_cmd("osd", "df")
+
+            time.sleep(delay - (time.time() - t1))
+
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test",
+                                                     "background", "stop")
+
+        # Check that all mgr daemons are still running
+        self.assertEqual(original_active, self.mgr_cluster.get_active_id())
+        self.assertEqual(original_standbys, self.mgr_cluster.get_standby_ids())
+
+    def test_module_commands(self):
+        """
+        That module-handled commands have appropriate  behavior on
+        disabled/failed/recently-enabled modules.
+        """
+
+        # Calling a command on a disabled module should return the proper
+        # error code.
+        self._load_module("selftest")
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            "mgr", "module", "disable", "selftest")
+        with self.assertRaises(CommandFailedError) as exc_raised:
+            self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                "mgr", "self-test", "run")
+
+        self.assertEqual(exc_raised.exception.exitstatus, errno.EOPNOTSUPP)
+
+        # Calling a command that really doesn't exist should give me EINVAL.
+        with self.assertRaises(CommandFailedError) as exc_raised:
+            self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                "osd", "albatross")
+
+        self.assertEqual(exc_raised.exception.exitstatus, errno.EINVAL)
+
+        # Enabling a module and then immediately using ones of its commands
+        # should work (#21683)
+        self._load_module("selftest")
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            "mgr", "self-test", "config", "get", "testkey")
+
+        # Calling a command for a failed module should return the proper
+        # error code.
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            "mgr", "self-test", "background", "start", "throw_exception")
+        with self.assertRaises(CommandFailedError) as exc_raised:
+            self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                "mgr", "self-test", "run"
+            )
+        self.assertEqual(exc_raised.exception.exitstatus, errno.EIO)
+
+        # A health alert should be raised for a module that has thrown
+        # an exception from its serve() method
+        self.wait_for_health(
+            "Module 'selftest' has failed: Synthetic exception in serve",
+            timeout=30)
+        # prune the crash reports, so that the health report is back to
+        # clean
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            "crash", "prune", "0")
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            "mgr", "module", "disable", "selftest")
+
+        self.wait_for_health_clear(timeout=30)
+
+    def test_module_remote(self):
+        """
+        Use the selftest module to exercise inter-module communication
+        """
+        self._load_module("selftest")
+        # The "self-test remote" operation just happens to call into
+        # influx.
+        self._load_module("influx")
+
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            "mgr", "self-test", "remote")
+
+    def test_selftest_cluster_log(self):
+        """
+        Use the selftest module to test the cluster/audit log interface.
+        """
+        priority_map = {
+            "info": "INF",
+            "security": "SEC",
+            "warning": "WRN",
+            "error": "ERR"
+        }
+        self._load_module("selftest")
+        for priority in priority_map.keys():
+            message = "foo bar {}".format(priority)
+            log_message = "[{}] {}".format(priority_map[priority], message)
+            # Check for cluster/audit logs:
+            # 2018-09-24 09:37:10.977858 mgr.x [INF] foo bar info
+            # 2018-09-24 09:37:10.977860 mgr.x [SEC] foo bar security
+            # 2018-09-24 09:37:10.977863 mgr.x [WRN] foo bar warning
+            # 2018-09-24 09:37:10.977866 mgr.x [ERR] foo bar error
+            with self.assert_cluster_log(log_message):
+                self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                    "mgr", "self-test", "cluster-log", "cluster",
+                    priority, message)
+            with self.assert_cluster_log(log_message, watch_channel="audit"):
+                self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                    "mgr", "self-test", "cluster-log", "audit",
+                    priority, message)
+
+    def test_selftest_cluster_log_unknown_channel(self):
+        """
+        Use the selftest module to test the cluster/audit log interface.
+        """
+        with self.assertRaises(CommandFailedError) as exc_raised:
+            self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                "mgr", "self-test", "cluster-log", "xyz",
+                "ERR", "The channel does not exist")
+        self.assertEqual(exc_raised.exception.exitstatus, errno.EOPNOTSUPP)
diff --git a/qa/tasks/mgr/test_orchestrator_cli.py b/qa/tasks/mgr/test_orchestrator_cli.py
new file mode 100644
index 000000000..3fccef9a6
--- /dev/null
+++ b/qa/tasks/mgr/test_orchestrator_cli.py
@@ -0,0 +1,250 @@
+import errno
+import json
+import logging
+
+
+from .mgr_test_case import MgrTestCase
+
+
+log = logging.getLogger(__name__)
+
+
+class TestOrchestratorCli(MgrTestCase):
+    MGRS_REQUIRED = 1
+
+    def _cmd(self, module, *args):
+        return self.mgr_cluster.mon_manager.raw_cluster_cmd(module, *args)
+
+    def _orch_cmd(self, *args):
+        return self._cmd("orch", *args)
+
+    def _progress_cmd(self, *args):
+        return self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", *args)
+
+    def _orch_cmd_result(self, *args, **kwargs):
+        """
+        raw_cluster_cmd doesn't support kwargs.
+        """
+        return self.mgr_cluster.mon_manager.raw_cluster_cmd_result("orch", *args, **kwargs)
+
+    def _test_orchestrator_cmd_result(self, *args, **kwargs):
+        return self.mgr_cluster.mon_manager.raw_cluster_cmd_result("test_orchestrator", *args, **kwargs)
+
+    def setUp(self):
+        super(TestOrchestratorCli, self).setUp()
+
+        self._load_module("orchestrator")
+        self._load_module("test_orchestrator")
+        self._orch_cmd("set", "backend", "test_orchestrator")
+
+    def test_status(self):
+        ret = self._orch_cmd("status")
+        self.assertIn("test_orchestrator", ret)
+
+    def test_device_ls(self):
+        ret = self._orch_cmd("device", "ls")
+        self.assertIn("localhost", ret)
+
+    def test_device_ls_refresh(self):
+        ret = self._orch_cmd("device", "ls", "--refresh")
+        self.assertIn("localhost", ret)
+
+    def test_device_ls_hoshs(self):
+        ret = self._orch_cmd("device", "ls", "localhost", "host1")
+        self.assertIn("localhost", ret)
+
+
+    def test_device_ls_json(self):
+        ret = self._orch_cmd("device", "ls", "--format", "json")
+        self.assertIn("localhost", ret)
+        self.assertIsInstance(json.loads(ret), list)
+
+    def test_ps(self):
+        ret = self._orch_cmd("ps")
+        self.assertIn("mgr", ret)
+
+    def test_ps_json(self):
+        ret = self._orch_cmd("ps", "--format", "json")
+        self.assertIsInstance(json.loads(ret), list)
+        self.assertIn("mgr", ret)
+
+
+    def test_service_action(self):
+        self._orch_cmd("restart", "mds.cephfs")
+        self._orch_cmd("stop", "mds.cephfs")
+        self._orch_cmd("start", "mds.cephfs")
+
+    def test_service_instance_action(self):
+        self._orch_cmd("daemon", "restart", "mds.a")
+        self._orch_cmd("daemon", "stop", "mds.a")
+        self._orch_cmd("daemon", "start", "mds.a")
+
+    def test_osd_create(self):
+        drive_group = """
+service_type: osd
+service_id: any.sda
+placement:
+  host_pattern: '*'
+data_devices:
+  all: True
+"""
+        res = self._orch_cmd_result("apply", "osd", "-i", "-",
+                                    stdin=drive_group)
+        self.assertEqual(res, 0)
+
+    def test_blink_device_light(self):
+        def _ls_lights(what):
+            return json.loads(self._cmd("device", "ls-lights"))[what]
+
+        metadata = json.loads(self._cmd("osd", "metadata"))
+        dev_name_ids = [osd["device_ids"] for osd in metadata]
+        _, dev_id = [d.split('=') for d in dev_name_ids if len(d.split('=')) == 2][0]
+
+        for t in ["ident", "fault"]:
+            self.assertNotIn(dev_id, _ls_lights(t))
+            self._cmd("device", "light", "on", dev_id, t)
+            self.assertIn(dev_id, _ls_lights(t))
+
+            health = {
+                'ident': 'DEVICE_IDENT_ON',
+                'fault': 'DEVICE_FAULT_ON',
+            }[t]
+            self.wait_for_health(health, 30)
+
+            self._cmd("device", "light", "off", dev_id, t)
+            self.assertNotIn(dev_id, _ls_lights(t))
+
+        self.wait_for_health_clear(30)
+
+    def test_mds_add(self):
+        self._orch_cmd('daemon', 'add', 'mds', 'fsname')
+
+    def test_rgw_add(self):
+        self._orch_cmd('daemon', 'add', 'rgw', 'realm', 'zone')
+
+    def test_nfs_add(self):
+        self._orch_cmd('daemon', 'add', "nfs", "service_name")
+
+    def test_osd_rm(self):
+        self._orch_cmd('daemon', "rm", "osd.0", '--force')
+
+    def test_mds_rm(self):
+        self._orch_cmd("daemon", "rm", "mds.fsname")
+
+    def test_rgw_rm(self):
+        self._orch_cmd("daemon", "rm", "rgw.myrealm.myzone")
+
+    def test_nfs_rm(self):
+        self._orch_cmd("daemon", "rm", "nfs.service_name")
+
+    def test_host_ls(self):
+        out = self._orch_cmd("host", "ls", "--format=json")
+        hosts = json.loads(out)
+        self.assertEqual(len(hosts), 1)
+        self.assertEqual(hosts[0]["hostname"], "localhost")
+
+    def test_host_add(self):
+        self._orch_cmd("host", "add", "hostname")
+
+    def test_host_rm(self):
+        self._orch_cmd("host", "rm", "hostname")
+
+    def test_mon_update(self):
+        self._orch_cmd("apply", "mon", "3 host1:1.2.3.0/24 host2:1.2.3.0/24 host3:10.0.0.0/8")
+        self._orch_cmd("apply", "mon", "3 host1:1.2.3.4 host2:1.2.3.4 host3:10.0.0.1")
+
+    def test_mgr_update(self):
+        self._orch_cmd("apply", "mgr", "3")
+
+    def test_nfs_update(self):
+        self._orch_cmd("apply", "nfs", "service_name", "2")
+
+    def test_error(self):
+        ret = self._orch_cmd_result("host", "add", "raise_validation_error")
+        self.assertEqual(ret, errno.EINVAL)
+        ret = self._orch_cmd_result("host", "add", "raise_error")
+        self.assertEqual(ret, errno.EINVAL)
+        ret = self._orch_cmd_result("host", "add", "raise_bug")
+        self.assertEqual(ret, errno.EINVAL)
+        ret = self._orch_cmd_result("host", "add", "raise_not_implemented")
+        self.assertEqual(ret, errno.ENOENT)
+        ret = self._orch_cmd_result("host", "add", "raise_no_orchestrator")
+        self.assertEqual(ret, errno.ENOENT)
+        ret = self._orch_cmd_result("host", "add", "raise_import_error")
+        self.assertEqual(ret, errno.ENOENT)
+
+    def test_load_data(self):
+        data = {
+            'inventory': [
+                {
+                    'name': 'host0',
+                    'devices': [
+                        {
+                            'type': 'hdd',
+                            'id': '/dev/sda',
+                            'size': 1024**4 * 4,
+                            'rotates': True
+                        }
+                    ]
+                },
+                {
+                    'name': 'host1',
+                    'devices': [
+                        {
+                            'type': 'hdd',
+                            'id': '/dev/sda',
+                            'size': 1024**4 * 4,
+                            'rotates': True
+                        }
+                    ]
+                }
+            ],
+            'daemons': [
+                {
+                    'hostname': 'host0',
+                    'daemon_type': 'mon',
+                    'daemon_id': 'a'
+                },
+                {
+                    'hostname': 'host1',
+                    'daemon_type': 'osd',
+                    'daemon_id': '1'
+                }
+            ]
+        }
+
+        ret = self._test_orchestrator_cmd_result('load_data', '-i', '-', stdin=json.dumps(data))
+        self.assertEqual(ret, 0)
+        out = self._orch_cmd('device', 'ls', '--format=json')
+        inventory = data['inventory']
+        inventory_result = json.loads(out)
+        self.assertEqual(len(inventory), len(inventory_result))
+
+        out = self._orch_cmd('device', 'ls', 'host0', '--format=json')
+        inventory_result = json.loads(out)
+        self.assertEqual(len(inventory_result), 1)
+        self.assertEqual(inventory_result[0]['name'], 'host0')
+
+        out = self._orch_cmd('ps', '--format=json')
+        daemons = data['daemons']
+        daemons_result = json.loads(out)
+        self.assertEqual(len(daemons), len(daemons_result))
+
+        out = self._orch_cmd('ps', 'host0', '--format=json')
+        daemons_result = json.loads(out)
+        self.assertEqual(len(daemons_result), 1)
+        self.assertEqual(daemons_result[0]['hostname'], 'host0')
+
+        # test invalid input file: invalid json
+        json_str = '{ "inventory: '
+        ret = self._test_orchestrator_cmd_result('load_data', '-i', '-', stdin=json_str)
+        self.assertEqual(ret, errno.EINVAL)
+
+        # test invalid input file: missing key
+        json_str = '{ "inventory": [{"devices": []}] }'
+        ret = self._test_orchestrator_cmd_result('load_data', '-i', '-', stdin=json_str)
+        self.assertEqual(ret, errno.EINVAL)
+
+        # load empty data for other tests
+        ret = self._test_orchestrator_cmd_result('load_data', '-i', '-', stdin='{}')
+        self.assertEqual(ret, 0)
diff --git a/qa/tasks/mgr/test_progress.py b/qa/tasks/mgr/test_progress.py
new file mode 100644
index 000000000..a80600c6a
--- /dev/null
+++ b/qa/tasks/mgr/test_progress.py
@@ -0,0 +1,423 @@
+
+import json
+import logging
+import time
+from .mgr_test_case import MgrTestCase
+from contextlib import contextmanager
+
+log = logging.getLogger(__name__)
+
+
+class TestProgress(MgrTestCase):
+    POOL = "progress_data"
+
+    # How long we expect to wait at most between taking an OSD out
+    # and seeing the progress event pop up.
+    EVENT_CREATION_PERIOD = 60
+
+    WRITE_PERIOD = 30
+
+    # Generous period for OSD recovery, should be same order of magnitude
+    # to how long it took to write the data to begin with
+    RECOVERY_PERIOD = WRITE_PERIOD * 4
+
+    def _get_progress(self):
+        out = self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", "json")
+        return json.loads(out)
+
+    def _all_events(self):
+        """
+        To avoid racing on completion, we almost always want to look
+        for events in the total list of active and complete, so
+        munge them into a single list.
+        """
+        p = self._get_progress()
+        log.info(json.dumps(p, indent=2))
+        return p['events'] + p['completed']
+
+    def _events_in_progress(self):
+        """
+        this function returns all events that are in progress
+        """
+        p = self._get_progress()
+        log.info(json.dumps(p, indent=2))
+        return p['events']
+
+    def _completed_events(self):
+        """
+        This function returns all events that are completed
+        """
+        p = self._get_progress()
+        log.info(json.dumps(p, indent=2))
+        return p['completed']
+
+    def is_osd_marked_out(self, ev):
+        return ev['message'].endswith('marked out')
+
+    def is_osd_marked_in(self, ev):
+        return ev['message'].endswith('marked in')
+
+    def _get_osd_in_out_events(self, marked='both'):
+        """
+        Return the event that deals with OSDs being
+        marked in, out or both
+        """
+
+        marked_in_events = []
+        marked_out_events = []
+
+        events_in_progress = self._events_in_progress()
+        for ev in events_in_progress:
+            if self.is_osd_marked_out(ev):
+                marked_out_events.append(ev)
+            elif self.is_osd_marked_in(ev):
+                marked_in_events.append(ev)
+
+        if marked == 'both':
+            return [marked_in_events] + [marked_out_events]
+        elif marked == 'in':
+            return marked_in_events
+        else:
+            return marked_out_events
+
+    def _osd_in_out_events_count(self, marked='both'):
+        """
+        Count the number of on going recovery events that deals with
+        OSDs being marked in, out or both.
+        """
+        events_in_progress = self._events_in_progress()
+        marked_in_count = 0
+        marked_out_count = 0
+
+        for ev in events_in_progress:
+            if self.is_osd_marked_out(ev):
+                marked_out_count += 1
+            elif self.is_osd_marked_in(ev):
+                marked_in_count += 1
+
+        if marked == 'both':
+            return marked_in_count + marked_out_count
+        elif marked == 'in':
+            return marked_in_count
+        else:
+            return marked_out_count
+
+    def _setup_pool(self, size=None):
+        self.mgr_cluster.mon_manager.create_pool(self.POOL)
+        if size is not None:
+            self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                'osd', 'pool', 'set', self.POOL, 'size', str(size))
+
+    def _osd_in_out_completed_events_count(self, marked='both'):
+        """
+        Count the number of completed recovery events that deals with
+        OSDs being marked in, out, or both.
+        """
+
+        completed_events = self._completed_events()
+        marked_in_count = 0
+        marked_out_count = 0
+
+        for ev in completed_events:
+            if self.is_osd_marked_out(ev):
+                marked_out_count += 1
+            elif self.is_osd_marked_in(ev):
+                marked_in_count += 1
+
+        if marked == 'both':
+            return marked_in_count + marked_out_count
+        elif marked == 'in':
+            return marked_in_count
+        else:
+            return marked_out_count
+
+    def _write_some_data(self, t):
+        """
+        To adapt to test systems of varying performance, we write
+        data for a defined time period, rather than to a defined
+        capacity.  This will hopefully result in a similar timescale
+        for PG recovery after an OSD failure.
+        """
+
+        args = [
+            "rados", "-p", self.POOL, "bench", str(t), "write", "-t", "16"]
+
+        self.mgr_cluster.admin_remote.run(args=args, wait=True)
+
+    def _osd_count(self):
+        osd_map = self.mgr_cluster.mon_manager.get_osd_dump_json()
+        return len(osd_map['osds'])
+
+    @contextmanager    
+    def recovery_backfill_disabled(self):
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            'osd', 'set', 'nobackfill')
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            'osd', 'set', 'norecover')
+        yield
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            'osd', 'unset', 'nobackfill')
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            'osd', 'unset', 'norecover')
+           
+    def setUp(self):
+        super(TestProgress, self).setUp()
+        # Ensure we have at least four OSDs
+        if self._osd_count() < 4:
+            self.skipTest("Not enough OSDS!")
+
+        # Remove any filesystems so that we can remove their pools
+        if self.mds_cluster:
+            self.mds_cluster.mds_stop()
+            self.mds_cluster.mds_fail()
+            self.mds_cluster.delete_all_filesystems()
+
+        # Remove all other pools
+        for pool in self.mgr_cluster.mon_manager.get_osd_dump_json()['pools']:
+            self.mgr_cluster.mon_manager.remove_pool(pool['pool_name'])
+
+        self._load_module("progress")
+        self.mgr_cluster.mon_manager.raw_cluster_cmd('progress', 'clear')
+
+    def _simulate_failure(self, osd_ids=None):
+        """
+        Common lead-in to several tests: get some data in the cluster,
+        then mark an OSD out to trigger the start of a progress event.
+
+        Return the JSON representation of the failure event.
+        """
+
+        if osd_ids is None:
+            osd_ids = [0]
+
+        self._setup_pool()
+        self._write_some_data(self.WRITE_PERIOD)
+        with self.recovery_backfill_disabled():
+            for osd_id in osd_ids:
+                self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                    'osd', 'out', str(osd_id))
+
+            # Wait for a progress event to pop up
+            self.wait_until_equal(lambda: self._osd_in_out_events_count('out'), 1,
+                                  timeout=self.EVENT_CREATION_PERIOD,
+                                  period=1)
+
+        ev = self._get_osd_in_out_events('out')[0]
+        log.info(json.dumps(ev, indent=1))
+        self.assertIn("Rebalancing after osd.0 marked out", ev['message'])
+        return ev
+
+    def _simulate_back_in(self, osd_ids, initial_event):
+        for osd_id in osd_ids:
+            self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                    'osd', 'in', str(osd_id))
+
+        # First Event should complete promptly
+        self.wait_until_true(lambda: self._is_complete(initial_event['id']),
+                             timeout=self.RECOVERY_PERIOD)
+
+        with self.recovery_backfill_disabled():
+
+            try:
+                # Wait for progress event marked in to pop up
+                self.wait_until_equal(lambda: self._osd_in_out_events_count('in'), 1,
+                                      timeout=self.EVENT_CREATION_PERIOD,
+                                      period=1)
+            except RuntimeError as ex:
+                if not "Timed out after" in str(ex):
+                    raise ex
+
+                log.info("There was no PGs affected by osd being marked in")
+                return None
+
+            new_event = self._get_osd_in_out_events('in')[0]
+        return new_event
+
+    def _no_events_anywhere(self):
+        """
+        Whether there are any live or completed events
+        """
+        p = self._get_progress()
+        total_events = len(p['events']) + len(p['completed'])
+        return total_events == 0
+
+    def _is_quiet(self):
+        """
+        Whether any progress events are live.
+        """
+        return len(self._get_progress()['events']) == 0
+
+    def _is_complete(self, ev_id):
+        progress = self._get_progress()
+        live_ids = [ev['id'] for ev in progress['events']]
+        complete_ids = [ev['id'] for ev in progress['completed']]
+        if ev_id in complete_ids:
+            assert ev_id not in live_ids
+            return True
+        else:
+            assert ev_id in live_ids
+            return False
+
+    def _is_inprogress_or_complete(self, ev_id):
+        for ev in self._events_in_progress():
+            if ev['id'] == ev_id:
+                return ev['progress'] > 0
+        # check if the event completed
+        return self._is_complete(ev_id)
+
+    def tearDown(self):
+        if self.POOL in self.mgr_cluster.mon_manager.pools:
+            self.mgr_cluster.mon_manager.remove_pool(self.POOL)
+
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            'osd', 'unset', 'nobackfill')
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            'osd', 'unset', 'norecover')
+
+        osd_map = self.mgr_cluster.mon_manager.get_osd_dump_json()
+        for osd in osd_map['osds']:
+            if osd['weight'] == 0.0:
+                self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                    'osd', 'in', str(osd['osd']))
+
+        # Unset allow_pg_recovery_event in case it's set to true
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            'config', 'set', 'mgr',
+            'mgr/progress/allow_pg_recovery_event', 'false')
+
+        super(TestProgress, self).tearDown()
+
+    def test_osd_healthy_recovery(self):
+        """
+        The simple recovery case: an OSD goes down, its PGs get a new
+        placement, and we wait for the PG to get healthy in its new
+        locations.
+        """
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            'config', 'set', 'mgr',
+            'mgr/progress/allow_pg_recovery_event', 'true')
+
+        ev = self._simulate_failure()
+
+        # Wait for progress event to ultimately reach completion
+        self.wait_until_true(lambda: self._is_complete(ev['id']),
+                             timeout=self.RECOVERY_PERIOD)
+        self.assertEqual(self._osd_in_out_events_count(), 0)
+
+    def test_pool_removal(self):
+        """
+        That a pool removed during OSD recovery causes the
+        progress event to be correctly marked complete once there
+        is no more data to move.
+        """
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            'config', 'set', 'mgr',
+            'mgr/progress/allow_pg_recovery_event', 'true')
+
+        ev = self._simulate_failure()
+
+        self.mgr_cluster.mon_manager.remove_pool(self.POOL)
+
+        # Event should complete promptly
+        self.wait_until_true(lambda: self._is_complete(ev['id']),
+                             timeout=self.RECOVERY_PERIOD)
+        self.assertEqual(self._osd_in_out_events_count(), 0)
+
+    def test_osd_came_back(self):
+        """
+        When a recovery is underway, but then the out OSD
+        comes back in, such that recovery is no longer necessary.
+        It should create another event for when osd is marked in
+        and cancel the one that is still ongoing.
+        """
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            'config', 'set', 'mgr',
+            'mgr/progress/allow_pg_recovery_event', 'true')
+
+        ev1 = self._simulate_failure()
+
+        ev2 = self._simulate_back_in([0], ev1)
+
+        if ev2 is not None:
+            # Wait for progress event to ultimately complete
+            self.wait_until_true(lambda: self._is_complete(ev2['id']),
+                                 timeout=self.RECOVERY_PERIOD)
+
+        self.assertEqual(self._osd_in_out_events_count(), 0)
+
+    def test_turn_off_module(self):
+        """
+        When the the module is turned off, there should not
+        be any on going events or completed events.
+        Also module should not accept any kind of Remote Event
+        coming in from other module, however, once it is turned
+        back, on creating an event should be working as it is.
+        """
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            'config', 'set', 'mgr',
+            'mgr/progress/allow_pg_recovery_event', 'true')
+
+        pool_size = 3
+        self._setup_pool(size=pool_size)
+        self._write_some_data(self.WRITE_PERIOD)
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", "off")
+
+        with self.recovery_backfill_disabled():
+            self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                    'osd', 'out', '0')
+
+        time.sleep(self.EVENT_CREATION_PERIOD/2)
+
+        with self.recovery_backfill_disabled():
+            self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                    'osd', 'in', '0')
+
+        time.sleep(self.EVENT_CREATION_PERIOD/2)
+
+        self.assertTrue(self._no_events_anywhere())
+
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", "on")
+
+        self._write_some_data(self.WRITE_PERIOD)
+
+        with self.recovery_backfill_disabled():
+
+            self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                    'osd', 'out', '0')
+
+            # Wait for a progress event to pop up
+            self.wait_until_equal(lambda: self._osd_in_out_events_count('out'), 1,
+                                  timeout=self.EVENT_CREATION_PERIOD,
+                                  period=1)
+
+        ev1 = self._get_osd_in_out_events('out')[0]
+
+        log.info(json.dumps(ev1, indent=1))
+
+        self.wait_until_true(lambda: self._is_complete(ev1['id']),
+                             check_fn=lambda: self._is_inprogress_or_complete(ev1['id']),
+                             timeout=self.RECOVERY_PERIOD)
+        self.assertTrue(self._is_quiet())
+
+    def test_default_progress_test(self):
+        """
+        progress module disabled the event of pg recovery event
+        by default, we test this to see if this holds true
+        """
+        pool_size = 3
+        self._setup_pool(size=pool_size)
+        self._write_some_data(self.WRITE_PERIOD)        
+
+        with self.recovery_backfill_disabled():
+            self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                    'osd', 'out', '0')
+
+        time.sleep(self.EVENT_CREATION_PERIOD/2)
+
+        with self.recovery_backfill_disabled():
+            self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                    'osd', 'in', '0')
+
+        time.sleep(self.EVENT_CREATION_PERIOD/2)
+
+        self.assertEqual(self._osd_in_out_events_count(), 0)
diff --git a/qa/tasks/mgr/test_prometheus.py b/qa/tasks/mgr/test_prometheus.py
new file mode 100644
index 000000000..376556ab3
--- /dev/null
+++ b/qa/tasks/mgr/test_prometheus.py
@@ -0,0 +1,79 @@
+import json
+import logging
+import requests
+
+from .mgr_test_case import MgrTestCase
+
+log = logging.getLogger(__name__)
+
+
+class TestPrometheus(MgrTestCase):
+    MGRS_REQUIRED = 3
+
+    def setUp(self):
+        super(TestPrometheus, self).setUp()
+        self.setup_mgrs()
+
+    def test_file_sd_command(self):
+        self._assign_ports("prometheus", "server_port")
+        self._load_module("prometheus")
+
+        result = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            "prometheus", "file_sd_config"))
+        mgr_map = self.mgr_cluster.get_mgr_map()
+        self.assertEqual(len(result[0]['targets']), len(mgr_map['standbys']) + 1)
+
+
+
+    def test_standby(self):
+        self._assign_ports("prometheus", "server_port")
+        self._load_module("prometheus")
+
+        original_active = self.mgr_cluster.get_active_id()
+
+        original_uri = self._get_uri("prometheus")
+        log.info("Originally running at {0}".format(original_uri))
+
+        self.mgr_cluster.mgr_fail(original_active)
+
+        failed_over_uri = self._get_uri("prometheus")
+        log.info("After failover running at {0}".format(failed_over_uri))
+
+        self.assertNotEqual(original_uri, failed_over_uri)
+
+        # The original active daemon should have come back up as a standby
+        # and serve some html under "/" and an empty answer under /metrics
+        r = requests.get(original_uri, allow_redirects=False)
+        self.assertEqual(r.status_code, 200)
+        r = requests.get(original_uri + "metrics", allow_redirects=False)
+        self.assertEqual(r.status_code, 200)
+        self.assertEqual(r.headers["content-type"], "text/plain;charset=utf-8")
+        self.assertEqual(r.headers["server"], "Ceph-Prometheus")
+
+    def test_urls(self):
+        self._assign_ports("prometheus", "server_port")
+        self._load_module("prometheus")
+
+        base_uri = self._get_uri("prometheus")
+
+        # This is a very simple smoke test to check that the module can
+        # give us a 200 response to requests.  We're not testing that
+        # the content is correct or even renders!
+
+        urls = [
+            "/",
+            "/metrics"
+        ]
+
+        failures = []
+
+        for url in urls:
+            r = requests.get(base_uri + url, allow_redirects=False)
+            if r.status_code != 200:
+                failures.append(url)
+
+            log.info("{0}: {1} ({2} bytes)".format(
+                url, r.status_code, len(r.content)
+            ))
+
+        self.assertListEqual(failures, [])
diff --git a/qa/tasks/mon_clock_skew_check.py b/qa/tasks/mon_clock_skew_check.py
new file mode 100644
index 000000000..59d4169d1
--- /dev/null
+++ b/qa/tasks/mon_clock_skew_check.py
@@ -0,0 +1,73 @@
+"""
+Handle clock skews in monitors.
+"""
+import logging
+import time
+from tasks import ceph_manager
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+class ClockSkewCheck:
+    """
+    Check if there are any clock skews among the monitors in the
+    quorum.
+
+    This task accepts the following options:
+
+    interval     amount of seconds to wait before check. (default: 30.0)
+    expect-skew  'true' or 'false', to indicate whether to expect a skew during
+                 the run or not. If 'true', the test will fail if no skew is
+                 found, and succeed if a skew is indeed found; if 'false', it's
+                 the other way around. (default: false)
+
+    - mon_clock_skew_check:
+        expect-skew: true
+    """
+
+    def __init__(self, ctx, manager, config, logger):
+        self.ctx = ctx
+        self.manager = manager
+
+        self.stopping = False
+        self.logger = logger
+        self.config = config
+
+        if self.config is None:
+            self.config = dict()
+
+
+def task(ctx, config):
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'mon_clock_skew_check task only accepts a dict for configuration'
+    interval = float(config.get('interval', 30.0))
+    expect_skew = config.get('expect-skew', False)
+
+    log.info('Beginning mon_clock_skew_check...')
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    quorum_size = len(teuthology.get_mon_names(ctx))
+    manager.wait_for_mon_quorum_size(quorum_size)
+
+    # wait a bit
+    log.info('sleeping for {s} seconds'.format(
+        s=interval))
+    time.sleep(interval)
+
+    health = manager.get_mon_health(True)
+    log.info('got health %s' % health)
+    if expect_skew:
+        if 'MON_CLOCK_SKEW' not in health['checks']:
+            raise RuntimeError('expected MON_CLOCK_SKEW but got none')
+    else:
+        if 'MON_CLOCK_SKEW' in health['checks']:
+            raise RuntimeError('got MON_CLOCK_SKEW but expected none')
+
diff --git a/qa/tasks/mon_recovery.py b/qa/tasks/mon_recovery.py
new file mode 100644
index 000000000..fa7aa1a8d
--- /dev/null
+++ b/qa/tasks/mon_recovery.py
@@ -0,0 +1,80 @@
+"""
+Monitor recovery
+"""
+import logging
+from tasks import ceph_manager
+from teuthology import misc as teuthology
+
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Test monitor recovery.
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'task only accepts a dict for configuration'
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    mons = [f.split('.')[1] for f in teuthology.get_mon_names(ctx)]
+    log.info("mon ids = %s" % mons)
+
+    manager.wait_for_mon_quorum_size(len(mons))
+
+    log.info('verifying all monitors are in the quorum')
+    for m in mons:
+        s = manager.get_mon_status(m)
+        assert s['state'] == 'leader' or s['state'] == 'peon'
+        assert len(s['quorum']) == len(mons)
+
+    log.info('restarting each monitor in turn')
+    for m in mons:
+        # stop a monitor
+        manager.kill_mon(m)
+        manager.wait_for_mon_quorum_size(len(mons) - 1)
+
+        # restart
+        manager.revive_mon(m)
+        manager.wait_for_mon_quorum_size(len(mons))
+
+    # in forward and reverse order,
+    rmons = mons
+    rmons.reverse()
+    for mons in mons, rmons:
+        log.info('stopping all monitors')
+        for m in mons:
+            manager.kill_mon(m)
+
+        log.info('forming a minimal quorum for %s, then adding monitors' % mons)
+        qnum = (len(mons) // 2) + 1
+        num = 0
+        for m in mons:
+            manager.revive_mon(m)
+            num += 1
+            if num >= qnum:
+                manager.wait_for_mon_quorum_size(num)
+
+    # on both leader and non-leader ranks...
+    for rank in [0, 1]:
+        # take one out
+        log.info('removing mon %s' % mons[rank])
+        manager.kill_mon(mons[rank])
+        manager.wait_for_mon_quorum_size(len(mons) - 1)
+
+        log.info('causing some monitor log activity')
+        m = 30
+        for n in range(1, m):
+            manager.raw_cluster_cmd('log', '%d of %d' % (n, m))
+
+        log.info('adding mon %s back in' % mons[rank])
+        manager.revive_mon(mons[rank])
+        manager.wait_for_mon_quorum_size(len(mons))
diff --git a/qa/tasks/mon_thrash.py b/qa/tasks/mon_thrash.py
new file mode 100644
index 000000000..30a7555b5
--- /dev/null
+++ b/qa/tasks/mon_thrash.py
@@ -0,0 +1,420 @@
+"""
+Monitor thrash
+"""
+import logging
+import contextlib
+import random
+import time
+import gevent
+import json
+import math
+from teuthology import misc as teuthology
+from teuthology.contextutil import safe_while
+from tasks import ceph_manager
+from tasks.cephfs.filesystem import MDSCluster
+from tasks.thrasher import Thrasher
+
+log = logging.getLogger(__name__)
+
+def _get_mons(ctx):
+    """
+    Get monitor names from the context value.
+    """
+    mons = [f[len('mon.'):] for f in teuthology.get_mon_names(ctx)]
+    return mons
+
+class MonitorThrasher(Thrasher):
+    """
+    How it works::
+
+    - pick a monitor
+    - kill it
+    - wait for quorum to be formed
+    - sleep for 'revive_delay' seconds
+    - revive monitor
+    - wait for quorum to be formed
+    - sleep for 'thrash_delay' seconds
+
+    Options::
+
+    seed                Seed to use on the RNG to reproduce a previous
+                        behaviour (default: None; i.e., not set)
+    revive_delay        Number of seconds to wait before reviving
+                        the monitor (default: 10)
+    thrash_delay        Number of seconds to wait in-between
+                        test iterations (default: 0)
+    store_thrash        Thrash monitor store before killing the monitor being thrashed (default: False)
+    store_thrash_probability  Probability of thrashing a monitor's store
+                              (default: 50)
+    thrash_many         Thrash multiple monitors instead of just one. If
+                        'maintain_quorum' is set to False, then we will
+                        thrash up to as many monitors as there are
+                        available. (default: False)
+    maintain_quorum     Always maintain quorum, taking care on how many
+                        monitors we kill during the thrashing. If we
+                        happen to only have one or two monitors configured,
+                        if this option is set to True, then we won't run
+                        this task as we cannot guarantee maintenance of
+                        quorum. Setting it to false however would allow the
+                        task to run with as many as just one single monitor.
+                        (default: True)
+    freeze_mon_probability: how often to freeze the mon instead of killing it,
+                        in % (default: 0)
+    freeze_mon_duration: how many seconds to freeze the mon (default: 15)
+    scrub               Scrub after each iteration (default: True)
+    check_mds_failover  Check if mds failover happened (default: False)
+
+    Note: if 'store_thrash' is set to True, then 'maintain_quorum' must also
+          be set to True.
+
+    For example::
+
+    tasks:
+    - ceph:
+    - mon_thrash:
+        revive_delay: 20
+        thrash_delay: 1
+        store_thrash: true
+        store_thrash_probability: 40
+        seed: 31337
+        maintain_quorum: true
+        thrash_many: true
+        check_mds_failover: True
+    - ceph-fuse:
+    - workunit:
+        clients:
+          all:
+            - mon/workloadgen.sh
+    """
+    def __init__(self, ctx, manager, config, name, logger):
+        super(MonitorThrasher, self).__init__()
+
+        self.ctx = ctx
+        self.manager = manager
+        self.manager.wait_for_clean()
+
+        self.stopping = False
+        self.logger = logger
+        self.config = config
+        self.name = name
+
+        if self.config is None:
+            self.config = dict()
+
+        """ Test reproducibility """
+        self.random_seed = self.config.get('seed', None)
+
+        if self.random_seed is None:
+            self.random_seed = int(time.time())
+
+        self.rng = random.Random()
+        self.rng.seed(int(self.random_seed))
+
+        """ Monitor thrashing """
+        self.revive_delay = float(self.config.get('revive_delay', 10.0))
+        self.thrash_delay = float(self.config.get('thrash_delay', 0.0))
+
+        self.thrash_many = self.config.get('thrash_many', False)
+        self.maintain_quorum = self.config.get('maintain_quorum', True)
+
+        self.scrub = self.config.get('scrub', True)
+
+        self.freeze_mon_probability = float(self.config.get('freeze_mon_probability', 10))
+        self.freeze_mon_duration = float(self.config.get('freeze_mon_duration', 15.0))
+
+        assert self.max_killable() > 0, \
+            'Unable to kill at least one monitor with the current config.'
+
+        """ Store thrashing """
+        self.store_thrash = self.config.get('store_thrash', False)
+        self.store_thrash_probability = int(
+            self.config.get('store_thrash_probability', 50))
+        if self.store_thrash:
+            assert self.store_thrash_probability > 0, \
+                'store_thrash is set, probability must be > 0'
+            assert self.maintain_quorum, \
+                'store_thrash = true must imply maintain_quorum = true'
+
+        #MDS failover
+        self.mds_failover = self.config.get('check_mds_failover', False)
+
+        if self.mds_failover:
+            self.mds_cluster = MDSCluster(ctx)
+
+        self.thread = gevent.spawn(self.do_thrash)
+
+    def log(self, x):
+        """
+        locally log info messages
+        """
+        self.logger.info(x)
+
+    def do_join(self):
+        """
+        Break out of this processes thrashing loop.
+        """
+        self.stopping = True
+        self.thread.get()
+
+    def should_thrash_store(self):
+        """
+        If allowed, indicate that we should thrash a certain percentage of
+        the time as determined by the store_thrash_probability value.
+        """
+        if not self.store_thrash:
+            return False
+        return self.rng.randrange(0, 101) < self.store_thrash_probability
+
+    def thrash_store(self, mon):
+        """
+        Thrash the monitor specified.
+        :param mon: monitor to thrash
+        """
+        self.log('thrashing mon.{id} store'.format(id=mon))
+        out = self.manager.raw_cluster_cmd(
+            'tell', 'mon.%s' % mon, 'sync_force',
+            '--yes-i-really-mean-it')
+        j = json.loads(out)
+        assert j['ret'] == 0, \
+            'error forcing store sync on mon.{id}:\n{ret}'.format(
+                id=mon,ret=out)
+
+    def should_freeze_mon(self):
+        """
+        Indicate that we should freeze a certain percentago of the time
+        as determined by the freeze_mon_probability value.
+        """
+        return self.rng.randrange(0, 101) < self.freeze_mon_probability
+
+    def freeze_mon(self, mon):
+        """
+        Send STOP signal to freeze the monitor.
+        """
+        log.info('Sending STOP to mon %s', mon)
+        self.manager.signal_mon(mon, 19)  # STOP
+
+    def unfreeze_mon(self, mon):
+        """
+        Send CONT signal to unfreeze the monitor.
+        """
+        log.info('Sending CONT to mon %s', mon)
+        self.manager.signal_mon(mon, 18)  # CONT
+
+    def kill_mon(self, mon):
+        """
+        Kill the monitor specified
+        """
+        self.log('killing mon.{id}'.format(id=mon))
+        self.manager.kill_mon(mon)
+
+    def revive_mon(self, mon):
+        """
+        Revive the monitor specified
+        """
+        self.log('killing mon.{id}'.format(id=mon))
+        self.log('reviving mon.{id}'.format(id=mon))
+        self.manager.revive_mon(mon)
+
+    def max_killable(self):
+        """
+        Return the maximum number of monitors we can kill.
+        """
+        m = len(_get_mons(self.ctx))
+        if self.maintain_quorum:
+            return max(math.ceil(m/2.0)-1, 0)
+        else:
+            return m
+
+    def _wait_until_quorum(self, mon, size, timeout=300):
+        """
+        Wait until the monitor specified is in the quorum.
+        """
+        self.log('waiting for quorum size %d for mon %s' % (size, mon))
+        s = {}
+
+        with safe_while(sleep=3,
+                        tries=timeout // 3,
+                        action=f'wait for quorum size {size} on mon {mon}') as proceed:
+            while proceed():
+                s = self.manager.get_mon_status(mon)
+                if len(s['quorum']) == size:
+                   break
+                self.log("quorum is size %d" % len(s['quorum']))
+
+        self.log("final quorum is size %d" % len(s['quorum']))
+        return s
+
+    def do_thrash(self):
+        """
+        _do_thrash() wrapper.
+        """
+        try:
+            self._do_thrash()
+        except Exception as e:
+            # See _run exception comment for MDSThrasher
+            self.set_thrasher_exception(e)
+            self.logger.exception("exception:")
+            # Allow successful completion so gevent doesn't see an exception.
+            # The DaemonWatchdog will observe the error and tear down the test.
+
+    def _do_thrash(self):
+        """
+        Continuously loop and thrash the monitors.
+        """
+        #status before mon thrashing
+        if self.mds_failover:
+            oldstatus = self.mds_cluster.status()
+
+        self.log('start thrashing')
+        self.log('seed: {s}, revive delay: {r}, thrash delay: {t} '\
+                   'thrash many: {tm}, maintain quorum: {mq} '\
+                   'store thrash: {st}, probability: {stp} '\
+                   'freeze mon: prob {fp} duration {fd}'.format(
+                s=self.random_seed,r=self.revive_delay,t=self.thrash_delay,
+                tm=self.thrash_many, mq=self.maintain_quorum,
+                st=self.store_thrash,stp=self.store_thrash_probability,
+                fp=self.freeze_mon_probability,fd=self.freeze_mon_duration,
+                ))
+
+        while not self.stopping:
+            mons = _get_mons(self.ctx)
+            self.manager.wait_for_mon_quorum_size(len(mons))
+            self.log('making sure all monitors are in the quorum')
+            for m in mons:
+                try:
+                    s = self._wait_until_quorum(m, len(mons), timeout=30)
+                except Exception as e:
+                    self.log('mon.{m} is not in quorum size, exception: {e}'.format(m=m,e=e))
+                    self.log('mon_status: {s}'.format(s=s))
+                assert s['state'] == 'leader' or s['state'] == 'peon'
+                assert len(s['quorum']) == len(mons)
+
+            kill_up_to = self.rng.randrange(1, self.max_killable()+1)
+            mons_to_kill = self.rng.sample(mons, kill_up_to)
+            self.log('monitors to thrash: {m}'.format(m=mons_to_kill))
+
+            mons_to_freeze = []
+            for mon in mons:
+                if mon in mons_to_kill:
+                    continue
+                if self.should_freeze_mon():
+                    mons_to_freeze.append(mon)
+            self.log('monitors to freeze: {m}'.format(m=mons_to_freeze))
+
+            for mon in mons_to_kill:
+                self.log('thrashing mon.{m}'.format(m=mon))
+
+                """ we only thrash stores if we are maintaining quorum """
+                if self.should_thrash_store() and self.maintain_quorum:
+                    self.thrash_store(mon)
+
+                self.kill_mon(mon)
+
+            if mons_to_freeze:
+                for mon in mons_to_freeze:
+                    self.freeze_mon(mon)
+                self.log('waiting for {delay} secs to unfreeze mons'.format(
+                    delay=self.freeze_mon_duration))
+                time.sleep(self.freeze_mon_duration)
+                for mon in mons_to_freeze:
+                    self.unfreeze_mon(mon)
+
+            if self.maintain_quorum:
+                self.manager.wait_for_mon_quorum_size(len(mons)-len(mons_to_kill))
+                for m in mons:
+                    if m in mons_to_kill:
+                        continue
+                    try:
+                        s = self._wait_until_quorum(m, len(mons)-len(mons_to_kill), timeout=30)
+                    except Exception as e:
+                        self.log('mon.{m} is not in quorum size, exception: {e}'.format(m=m,e=e))
+                        self.log('mon_status: {s}'.format(s=s))
+
+                    assert s['state'] == 'leader' or s['state'] == 'peon'
+                    assert len(s['quorum']) == len(mons)-len(mons_to_kill)
+
+            self.log('waiting for {delay} secs before reviving monitors'.format(
+                delay=self.revive_delay))
+            time.sleep(self.revive_delay)
+
+            for mon in mons_to_kill:
+                self.revive_mon(mon)
+            # do more freezes
+            if mons_to_freeze:
+                for mon in mons_to_freeze:
+                    self.freeze_mon(mon)
+                self.log('waiting for {delay} secs to unfreeze mons'.format(
+                    delay=self.freeze_mon_duration))
+                time.sleep(self.freeze_mon_duration)
+                for mon in mons_to_freeze:
+                    self.unfreeze_mon(mon)
+
+            self.manager.wait_for_mon_quorum_size(len(mons))
+            for m in mons:
+                try:
+                    s = self._wait_until_quorum(m, len(mons), timeout=30)
+                except Exception as e:
+                    self.log('mon.{m} is not in quorum size, exception: {e}'.format(m=m,e=e))
+                    self.log('mon_status: {s}'.format(s=s))
+
+                assert s['state'] == 'leader' or s['state'] == 'peon'
+                assert len(s['quorum']) == len(mons)
+
+            if self.scrub:
+                self.log('triggering scrub')
+                try:
+                    self.manager.raw_cluster_cmd('mon', 'scrub')
+                except Exception as e:
+                    log.warning("Ignoring exception while triggering scrub: %s", e)
+
+            if self.thrash_delay > 0.0:
+                self.log('waiting for {delay} secs before continuing thrashing'.format(
+                    delay=self.thrash_delay))
+                time.sleep(self.thrash_delay)
+
+        #status after thrashing
+        if self.mds_failover:
+            status = self.mds_cluster.status()
+            assert not oldstatus.hadfailover(status), \
+                'MDS Failover'
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Stress test the monitor by thrashing them while another task/workunit
+    is running.
+
+    Please refer to MonitorThrasher class for further information on the
+    available options.
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'mon_thrash task only accepts a dict for configuration'
+    assert len(_get_mons(ctx)) > 2, \
+        'mon_thrash task requires at least 3 monitors'
+
+    if 'cluster' not in config:
+        config['cluster'] = 'ceph'
+
+    log.info('Beginning mon_thrash...')
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+    thrash_proc = MonitorThrasher(ctx,
+        manager, config, "MonitorThrasher",
+        logger=log.getChild('mon_thrasher'))
+    ctx.ceph[config['cluster']].thrashers.append(thrash_proc)
+    try:
+        log.debug('Yielding')
+        yield
+    finally:
+        log.info('joining mon_thrasher')
+        thrash_proc.do_join()
+        mons = _get_mons(ctx)
+        manager.wait_for_mon_quorum_size(len(mons))
diff --git a/qa/tasks/multibench.py b/qa/tasks/multibench.py
new file mode 100644
index 000000000..c2a7299f1
--- /dev/null
+++ b/qa/tasks/multibench.py
@@ -0,0 +1,61 @@
+"""
+Multibench testing
+"""
+import contextlib
+import logging
+import time
+import copy
+import gevent
+
+from tasks import radosbench
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run multibench
+
+    The config should be as follows:
+
+    multibench:
+        time: <seconds to run total>
+        segments: <number of concurrent benches>
+        radosbench: <config for radosbench>
+
+    example:
+
+    tasks:
+    - ceph:
+    - multibench:
+        clients: [client.0]
+        time: 360
+    - interactive:
+    """
+    log.info('Beginning multibench...')
+    assert isinstance(config, dict), \
+        "please list clients to run on"
+
+    def run_one(num):
+        """Run test spawn from gevent"""
+        start = time.time()
+        if not config.get('radosbench'):
+            benchcontext = {}
+        else:
+            benchcontext = copy.copy(config.get('radosbench'))
+        iterations = 0
+        while time.time() - start < int(config.get('time', 600)):
+            log.info("Starting iteration %s of segment %s"%(iterations, num))
+            benchcontext['pool'] = str(num) + "-" + str(iterations)
+            with radosbench.task(ctx, benchcontext):
+                time.sleep()
+            iterations += 1
+    log.info("Starting %s threads"%(str(config.get('segments', 3)),))
+    segments = [
+        gevent.spawn(run_one, i)
+        for i in range(0, int(config.get('segments', 3)))]
+
+    try:
+        yield
+    finally:
+        [i.get() for i in segments]
diff --git a/qa/tasks/netem.py b/qa/tasks/netem.py
new file mode 100644
index 000000000..1d9fd98f7
--- /dev/null
+++ b/qa/tasks/netem.py
@@ -0,0 +1,268 @@
+"""
+Task to run tests with network delay between two remotes using tc and netem.
+Reference:https://wiki.linuxfoundation.org/networking/netem.
+
+"""
+
+import logging
+import contextlib
+from paramiko import SSHException
+import socket
+import time
+import gevent
+import argparse
+
+log = logging.getLogger(__name__)
+
+
+def set_priority(interface):
+
+    # create a priority queueing discipline
+    return ['sudo', 'tc', 'qdisc', 'add', 'dev', interface, 'root', 'handle', '1:', 'prio']
+
+
+def show_tc(interface):
+
+    # shows tc device present
+    return ['sudo', 'tc', 'qdisc', 'show', 'dev', interface]
+
+
+def del_tc(interface):
+
+    return ['sudo', 'tc', 'qdisc', 'del', 'dev', interface, 'root']
+
+
+def cmd_prefix(interface):
+
+    # prepare command to set delay
+    cmd1 = ['sudo', 'tc', 'qdisc', 'add', 'dev', interface, 'parent',
+                     '1:1', 'handle', '2:', 'netem', 'delay']
+
+    # prepare command to change delay
+    cmd2 = ['sudo', 'tc', 'qdisc', 'replace', 'dev', interface, 'root', 'netem', 'delay']
+
+    # prepare command to apply filter to the matched ip/host
+
+    cmd3 = ['sudo', 'tc', 'filter', 'add', 'dev', interface,
+                     'parent', '1:0', 'protocol', 'ip', 'pref', '55',
+                     'handle', '::55', 'u32', 'match', 'ip', 'dst']
+
+    return cmd1, cmd2, cmd3
+
+
+def static_delay(remote, host, interface, delay):
+
+    """ Sets a constant delay between two hosts to emulate network delays using tc qdisc and netem"""
+
+    set_delay, change_delay, set_ip = cmd_prefix(interface)
+
+    ip = socket.gethostbyname(host.hostname)
+
+    tc = remote.sh(show_tc(interface))
+    if tc.strip().find('refcnt') == -1:
+        # call set_priority() func to create priority queue
+        # if not already created(indicated by -1)
+        log.info('Create priority queue')
+        remote.run(args=set_priority(interface))
+
+        # set static delay, with +/- 5ms jitter with normal distribution as default
+        log.info('Setting delay to %s' % delay)
+        set_delay.extend(['%s' % delay, '5ms', 'distribution', 'normal'])
+        remote.run(args=set_delay)
+
+        # set delay to a particular remote node via ip
+        log.info('Delay set on %s' % remote)
+        set_ip.extend(['%s' % ip, 'flowid', '2:1'])
+        remote.run(args=set_ip)
+    else:
+        # if the device is already created, only change the delay
+        log.info('Setting delay to %s' % delay)
+        change_delay.extend(['%s' % delay, '5ms', 'distribution', 'normal'])
+        remote.run(args=change_delay)
+
+
+def variable_delay(remote, host, interface, delay_range=[]):
+
+    """ Vary delay between two values"""
+
+    set_delay, change_delay, set_ip = cmd_prefix(interface)
+
+    ip = socket.gethostbyname(host.hostname)
+
+    # delay1 has to be lower than delay2
+    delay1 = delay_range[0]
+    delay2 = delay_range[1]
+
+    tc = remote.sh(show_tc(interface))
+    if tc.strip().find('refcnt') == -1:
+        # call set_priority() func to create priority queue
+        # if not already created(indicated by -1)
+        remote.run(args=set_priority(interface))
+
+        # set variable delay
+        log.info('Setting varying delay')
+        set_delay.extend(['%s' % delay1, '%s' % delay2])
+        remote.run(args=set_delay)
+
+        # set delay to a particular remote node via ip
+        log.info('Delay set on %s' % remote)
+        set_ip.extend(['%s' % ip, 'flowid', '2:1'])
+        remote.run(args=set_ip)
+    else:
+        # if the device is already created, only change the delay
+        log.info('Setting varying delay')
+        change_delay.extend(['%s' % delay1, '%s' % delay2])
+        remote.run(args=change_delay)
+
+
+def delete_dev(remote, interface):
+
+    """ Delete the qdisc if present"""
+
+    log.info('Delete tc')
+    tc = remote.sh(show_tc(interface))
+    if tc.strip().find('refcnt') != -1:
+        remote.run(args=del_tc(interface))
+
+
+class Toggle:
+
+    stop_event = gevent.event.Event()
+
+    def __init__(self, ctx, remote, host, interface, interval):
+        self.ctx = ctx
+        self.remote = remote
+        self.host = host
+        self.interval = interval
+        self.interface = interface
+        self.ip = socket.gethostbyname(self.host.hostname)
+
+    def packet_drop(self):
+
+        """ Drop packets to the remote ip specified"""
+
+        _, _, set_ip = cmd_prefix(self.interface)
+
+        tc = self.remote.sh(show_tc(self.interface))
+        if tc.strip().find('refcnt') == -1:
+            self.remote.run(args=set_priority(self.interface))
+            # packet drop to specific ip
+            log.info('Drop all packets to %s' % self.host)
+            set_ip.extend(['%s' % self.ip, 'action', 'drop'])
+            self.remote.run(args=set_ip)
+
+    def link_toggle(self):
+
+        """
+         For toggling packet drop and recovery in regular interval.
+         If interval is 5s, link is up for 5s and link is down for 5s
+        """
+
+        while not self.stop_event.is_set():
+            self.stop_event.wait(timeout=self.interval)
+            # simulate link down
+            try:
+                self.packet_drop()
+                log.info('link down')
+            except SSHException:
+                log.debug('Failed to run command')
+
+            self.stop_event.wait(timeout=self.interval)
+            # if qdisc exist,delete it.
+            try:
+                delete_dev(self.remote, self.interface)
+                log.info('link up')
+            except SSHException:
+                log.debug('Failed to run command')
+
+    def begin(self, gname):
+        self.thread = gevent.spawn(self.link_toggle)
+        self.ctx.netem.names[gname] = self.thread
+
+    def end(self, gname):
+        self.stop_event.set()
+        log.info('gname is {}'.format(self.ctx.netem.names[gname]))
+        self.ctx.netem.names[gname].get()
+
+    def cleanup(self):
+        """
+        Invoked during unwinding if the test fails or exits before executing task 'link_recover'
+        """
+        log.info('Clean up')
+        self.stop_event.set()
+        self.thread.get()
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+
+    """
+    - netem:
+          clients: [c1.rgw.0]
+          iface: eno1
+          dst_client: [c2.rgw.1]
+          delay: 10ms
+
+    - netem:
+          clients: [c1.rgw.0]
+          iface: eno1
+          dst_client: [c2.rgw.1]
+          delay_range: [10ms, 20ms] # (min, max)
+
+    - netem:
+          clients: [rgw.1, mon.0]
+          iface: eno1
+          gname: t1
+          dst_client: [c2.rgw.1]
+          link_toggle_interval: 10 # no unit mentioned. By default takes seconds.
+
+    - netem:
+          clients: [rgw.1, mon.0]
+          iface: eno1
+          link_recover: [t1, t2]
+
+
+    """
+
+    log.info('config %s' % config)
+
+    assert isinstance(config, dict), \
+        "please list clients to run on"
+    if not hasattr(ctx, 'netem'):
+        ctx.netem = argparse.Namespace()
+        ctx.netem.names = {}
+
+    if config.get('dst_client') is not None:
+        dst = config.get('dst_client')
+        (host,) = ctx.cluster.only(dst).remotes.keys()
+
+    for role in config.get('clients', None):
+        (remote,) = ctx.cluster.only(role).remotes.keys()
+        ctx.netem.remote = remote
+        if config.get('delay', False):
+            static_delay(remote, host, config.get('iface'), config.get('delay'))
+        if config.get('delay_range', False):
+            variable_delay(remote, host, config.get('iface'), config.get('delay_range'))
+        if config.get('link_toggle_interval', False):
+            log.info('Toggling link for %s' % config.get('link_toggle_interval'))
+            global toggle
+            toggle = Toggle(ctx, remote, host, config.get('iface'), config.get('link_toggle_interval'))
+            toggle.begin(config.get('gname'))
+        if config.get('link_recover', False):
+            log.info('Recovering link')
+            for gname in config.get('link_recover'):
+                toggle.end(gname)
+                log.info('sleeping')
+                time.sleep(config.get('link_toggle_interval'))
+                delete_dev(ctx.netem.remote, config.get('iface'))
+                del ctx.netem.names[gname]
+
+    try:
+        yield
+    finally:
+        if ctx.netem.names:
+            toggle.cleanup()
+        for role in config.get('clients'):
+            (remote,) = ctx.cluster.only(role).remotes.keys()
+            delete_dev(remote, config.get('iface'))
+
diff --git a/qa/tasks/netsplit.py b/qa/tasks/netsplit.py
new file mode 100644
index 000000000..0a9484a89
--- /dev/null
+++ b/qa/tasks/netsplit.py
@@ -0,0 +1,73 @@
+"""
+Functions to netsplit test machines.
+
+At present, you must specify monitors to disconnect, and it
+drops those IP pairs. This means OSDs etc on the hosts which use
+the same IP will also be blocked! If you are using multiple IPs on the
+same host within the cluster, daemons on those other IPs will get
+through.
+"""
+import logging
+import re
+
+log = logging.getLogger(__name__)
+
+def get_ip_and_ports(ctx, daemon):
+    assert daemon.startswith('mon.')
+    addr = ctx.ceph['ceph'].mons['{a}'.format(a=daemon)]
+    ips = re.findall("[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+[:[0-9]*]*", addr)
+    assert len(ips) > 0
+    plain_ip = re.match("[0-9\.]*", ips[0]).group()
+    assert plain_ip is not None
+    port_list = []
+    for ip in ips:
+        ip_str, port_str = re.match("([0-9\.]*)([:[0-9]*]*)", ip).groups()
+        assert ip_str == plain_ip
+        if len(port_str) > 0:
+            port_list.append(port_str)
+    return (plain_ip, port_list)
+
+def disconnect(ctx, config):
+    assert len(config) == 2 # we can only disconnect pairs right now
+    # and we can only disconnect mons right now
+    assert config[0].startswith('mon.')
+    assert config[1].startswith('mon.')
+    (ip1, _) = get_ip_and_ports(ctx, config[0])
+    (ip2, _) = get_ip_and_ports(ctx, config[1])
+
+    (host1,) = ctx.cluster.only(config[0]).remotes.keys()
+    (host2,) = ctx.cluster.only(config[1]).remotes.keys()
+    assert host1 is not None
+    assert host2 is not None
+
+    host1.run(
+        args = ["sudo", "iptables", "-A", "INPUT", "-p", "tcp", "-s",
+                ip2, "-j", "DROP"]
+    )
+    host2.run(
+        args = ["sudo", "iptables", "-A", "INPUT", "-p", "tcp", "-s",
+                ip1, "-j", "DROP"]
+    )
+
+def reconnect(ctx, config):
+    assert len(config) == 2 # we can only disconnect pairs right now
+    # and we can only disconnect mons right now
+    assert config[0].startswith('mon.')
+    assert config[1].startswith('mon.')
+
+    (ip1, _) = get_ip_and_ports(ctx, config[0])
+    (ip2, _) = get_ip_and_ports(ctx, config[1])
+
+    (host1,) = ctx.cluster.only(config[0]).remotes.keys()
+    (host2,) = ctx.cluster.only(config[1]).remotes.keys()
+    assert host1 is not None
+    assert host2 is not None
+
+    host1.run(
+        args = ["sudo", "iptables", "-D", "INPUT", "-p", "tcp", "-s",
+                ip2, "-j", "DROP"]
+    )
+    host2.run(
+        args = ["sudo", "iptables", "-D", "INPUT", "-p", "tcp", "-s",
+                ip1, "-j", "DROP"]
+    )
diff --git a/qa/tasks/notification_tests.py b/qa/tasks/notification_tests.py
new file mode 100644
index 000000000..7a3a401ab
--- /dev/null
+++ b/qa/tasks/notification_tests.py
@@ -0,0 +1,320 @@
+"""
+Run a set of bucket notification tests on rgw.
+"""
+from io import BytesIO
+from configobj import ConfigObj
+import base64
+import contextlib
+import logging
+import os
+import random
+import string
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def download(ctx, config):
+    assert isinstance(config, dict)
+    log.info('Downloading bucket-notifications-tests...')
+    testdir = teuthology.get_testdir(ctx)
+    branch = ctx.config.get('suite_branch')
+    repo = ctx.config.get('suite_repo')
+    log.info('Using branch %s from %s for bucket notifications tests', branch, repo)
+    for (client, client_config) in config.items():
+        ctx.cluster.only(client).run(
+            args=['git', 'clone', '-b', branch, repo, '{tdir}/ceph'.format(tdir=testdir)],
+            )
+
+        sha1 = client_config.get('sha1')
+
+        if sha1 is not None:
+            ctx.cluster.only(client).run(
+                args=[
+                    'cd', '{tdir}/ceph'.format(tdir=testdir),
+                    run.Raw('&&'),
+                    'git', 'reset', '--hard', sha1,
+                    ],
+                )
+
+    try:
+        yield
+    finally:
+        log.info('Removing bucket-notifications-tests...')
+        testdir = teuthology.get_testdir(ctx)
+        for client in config:
+            ctx.cluster.only(client).run(
+                args=[
+                    'rm',
+                    '-rf',
+                    '{tdir}/ceph'.format(tdir=testdir),
+                    ],
+                )
+
+def _config_user(bntests_conf, section, user):
+    """
+    Configure users for this section by stashing away keys, ids, and
+    email addresses.
+    """
+    bntests_conf[section].setdefault('user_id', user)
+    bntests_conf[section].setdefault('email', '{user}+test@test.test'.format(user=user))
+    bntests_conf[section].setdefault('display_name', 'Mr. {user}'.format(user=user))
+    bntests_conf[section].setdefault('access_key',
+        ''.join(random.choice(string.ascii_uppercase) for i in range(20)))
+    bntests_conf[section].setdefault('secret_key',
+        base64.b64encode(os.urandom(40)).decode())
+
+
+@contextlib.contextmanager
+def pre_process(ctx, config):
+    """
+    This function creates a directory which is required to run some AMQP tests.
+    """
+    assert isinstance(config, dict)
+    log.info('Pre-processing...')
+
+    for (client, _) in config.items():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        test_dir=teuthology.get_testdir(ctx)
+
+        ctx.cluster.only(client).run(
+            args=[
+                'mkdir', '-p', '/home/ubuntu/.aws/models/s3/2006-03-01/',
+                ],
+            )
+
+        ctx.cluster.only(client).run(
+            args=[
+                'cd', '/home/ubuntu/.aws/models/s3/2006-03-01/', run.Raw('&&'), 'cp', '{tdir}/ceph/examples/rgw/boto3/service-2.sdk-extras.json'.format(tdir=test_dir), 'service-2.sdk-extras.json'
+                ],
+            )
+
+    try:
+        yield
+    finally:
+        log.info('Pre-processing completed...')
+        test_dir = teuthology.get_testdir(ctx)
+        for (client, _) in config.items():
+            (remote,) = ctx.cluster.only(client).remotes.keys()
+
+            ctx.cluster.only(client).run(
+                args=[
+                    'rm', '-rf', '/home/ubuntu/.aws/models/s3/2006-03-01/service-2.sdk-extras.json',
+                    ],
+                )
+
+            ctx.cluster.only(client).run(
+                args=[
+                    'cd', '/home/ubuntu/', run.Raw('&&'), 'rmdir', '-p', '.aws/models/s3/2006-03-01/',
+                    ],
+                )
+
+
+@contextlib.contextmanager
+def create_users(ctx, config):
+    """
+    Create a main and an alternate s3 user.
+    """
+    assert isinstance(config, dict)
+    log.info('Creating rgw user...')
+    testdir = teuthology.get_testdir(ctx)
+
+    users = {'s3 main': 'foo'}
+    for client in config['clients']:
+        bntests_conf = config['bntests_conf'][client]
+        for section, user in users.items():
+            _config_user(bntests_conf, section, '{user}.{client}'.format(user=user, client=client))
+            log.debug('Creating user {user} on {host}'.format(user=bntests_conf[section]['user_id'], host=client))
+            cluster_name, daemon_type, client_id = teuthology.split_role(client)
+            client_with_id = daemon_type + '.' + client_id
+            ctx.cluster.only(client).run(
+                args=[
+                    'adjust-ulimits',
+                    'ceph-coverage',
+                    '{tdir}/archive/coverage'.format(tdir=testdir),
+                    'radosgw-admin',
+                    '-n', client_with_id,
+                    'user', 'create',
+                    '--uid', bntests_conf[section]['user_id'],
+                    '--display-name', bntests_conf[section]['display_name'],
+                    '--access-key', bntests_conf[section]['access_key'],
+                    '--secret', bntests_conf[section]['secret_key'],
+                    '--cluster', cluster_name,
+                    ],
+                )
+
+    try:
+        yield
+    finally:
+        for client in config['clients']:
+            for user in users.values():
+                uid = '{user}.{client}'.format(user=user, client=client)
+                cluster_name, daemon_type, client_id = teuthology.split_role(client)
+                client_with_id = daemon_type + '.' + client_id
+                ctx.cluster.only(client).run(
+                    args=[
+                        'adjust-ulimits',
+                        'ceph-coverage',
+                        '{tdir}/archive/coverage'.format(tdir=testdir),
+                        'radosgw-admin',
+                        '-n', client_with_id,
+                        'user', 'rm',
+                        '--uid', uid,
+                        '--purge-data',
+                        '--cluster', cluster_name,
+                        ],
+                    )
+
+@contextlib.contextmanager
+def configure(ctx, config):
+    assert isinstance(config, dict)
+    log.info('Configuring bucket-notifications-tests...')
+    testdir = teuthology.get_testdir(ctx)
+    for client, properties in config['clients'].items():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        bntests_conf = config['bntests_conf'][client]
+
+        conf_fp = BytesIO()
+        bntests_conf.write(conf_fp)
+        remote.write_file(
+            path='{tdir}/ceph/src/test/rgw/bucket_notification/bn-tests.{client}.conf'.format(tdir=testdir, client=client),
+            data=conf_fp.getvalue(),
+            )
+
+        remote.run(
+            args=[
+                'cd',
+                '{tdir}/ceph/src/test/rgw/bucket_notification'.format(tdir=testdir),
+                run.Raw('&&'),
+                './bootstrap',
+                ],
+            )
+
+    try:
+        yield
+    finally:
+        log.info('Removing bn-tests.conf file...')
+        testdir = teuthology.get_testdir(ctx)
+        for client, properties in config['clients'].items():
+            (remote,) = ctx.cluster.only(client).remotes.keys()
+            remote.run(
+                 args=['rm', '-f',
+                       '{tdir}/ceph/src/test/rgw/bucket_notification/bn-tests.{client}.conf'.format(tdir=testdir,client=client),
+                 ],
+                 )
+
+@contextlib.contextmanager
+def run_tests(ctx, config):
+    """
+    Run the bucket notifications tests after everything is set up.
+    :param ctx: Context passed to task
+    :param config: specific configuration information
+    """
+    assert isinstance(config, dict)
+    log.info('Running bucket-notifications-tests...')
+    testdir = teuthology.get_testdir(ctx)
+    for client, client_config in config.items():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+
+        attr = ["!kafka_test", "!amqp_test", "!amqp_ssl_test", "!kafka_ssl_test", "!modification_required", "!manual_test"]
+
+        if 'extra_attr' in client_config:
+            attr = client_config.get('extra_attr')
+
+        args = [
+            'BNTESTS_CONF={tdir}/ceph/src/test/rgw/bucket_notification/bn-tests.{client}.conf'.format(tdir=testdir, client=client),
+            '{tdir}/ceph/src/test/rgw/bucket_notification/virtualenv/bin/python'.format(tdir=testdir),
+            '-m', 'nose',
+            '-s',
+            '{tdir}/ceph/src/test/rgw/bucket_notification/test_bn.py'.format(tdir=testdir),
+            '-v',
+            '-a', ','.join(attr),
+            ]
+
+        remote.run(
+            args=args,
+            label="bucket notification tests against different endpoints"
+            )
+    yield
+
+@contextlib.contextmanager
+def task(ctx,config):
+    """
+    To run bucket notification tests under Kafka endpoint the prerequisite is to run the kafka server. Also you need to pass the
+    'extra_attr' to the notification tests. Following is the way how to run kafka and finally bucket notification tests::
+
+    tasks:
+    - kafka:
+        client.0:
+          kafka_version: 2.6.0
+    - notification_tests:
+        client.0:
+          extra_attr: ["kafka_test"]
+
+    To run bucket notification tests under AMQP endpoint the prerequisite is to run the rabbitmq server. Also you need to pass the
+    'extra_attr' to the notification tests. Following is the way how to run rabbitmq and finally bucket notification tests::
+
+    tasks:
+    - rabbitmq:
+        client.0:
+    - notification_tests:
+        client.0:
+          extra_attr: ["amqp_test"]
+
+    If you want to run the tests against your changes pushed to your remote repo you can provide 'suite_branch' and 'suite_repo'
+    parameters in your teuthology-suite command. Example command for this is as follows::
+
+    teuthology-suite --ceph-repo https://github.com/ceph/ceph-ci.git -s rgw:notifications --ceph your_ceph_branch_name -m smithi --suite-repo https://github.com/your_name/ceph.git --suite-branch your_branch_name
+    
+    """
+    assert config is None or isinstance(config, list) \
+        or isinstance(config, dict), \
+        "task kafka only supports a list or dictionary for configuration"
+
+    all_clients = ['client.{id}'.format(id=id_)
+                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+    if config is None:
+        config = all_clients
+    if isinstance(config, list):
+        config = dict.fromkeys(config)
+    clients=config.keys()
+
+    log.debug('Notifications config is %s', config)
+
+    bntests_conf = {}
+
+    for client in clients:
+        endpoint = ctx.rgw.role_endpoints.get(client)
+        assert endpoint, 'bntests: no rgw endpoint for {}'.format(client)
+
+        bntests_conf[client] = ConfigObj(
+            indent_type='',
+            infile={
+                'DEFAULT':
+                    {
+                    'port':endpoint.port,
+                    'host':endpoint.dns_name,
+                    },
+                's3 main':{}
+            }
+        )
+
+    with contextutil.nested(
+        lambda: download(ctx=ctx, config=config),
+        lambda: pre_process(ctx=ctx, config=config),
+        lambda: create_users(ctx=ctx, config=dict(
+                clients=clients,
+                bntests_conf=bntests_conf,
+                )),
+        lambda: configure(ctx=ctx, config=dict(
+                clients=config,
+                bntests_conf=bntests_conf,
+                )),
+        lambda: run_tests(ctx=ctx, config=config),
+        ):
+        pass
+    yield
diff --git a/qa/tasks/nvme_loop.py b/qa/tasks/nvme_loop.py
new file mode 100644
index 000000000..c9d8f0dc7
--- /dev/null
+++ b/qa/tasks/nvme_loop.py
@@ -0,0 +1,106 @@
+import contextlib
+import logging
+
+from io import StringIO
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.orchestra import run
+
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    log.info('Setting up nvme_loop on scratch devices...')
+    host = 'hostnqn'
+    port = '1'
+    devs_by_remote = {}
+    old_scratch_by_remote = {}
+    for remote, roles in ctx.cluster.remotes.items():
+        if remote.is_container:
+            continue
+        devs = teuthology.get_scratch_devices(remote)
+        devs_by_remote[remote] = devs
+        base = '/sys/kernel/config/nvmet'
+        remote.run(
+            args=[
+                'grep', '^nvme_loop', '/proc/modules', run.Raw('||'),
+                'sudo', 'modprobe', 'nvme_loop',
+                run.Raw('&&'),
+                'sudo', 'mkdir', '-p', f'{base}/hosts/{host}',
+                run.Raw('&&'),
+                'sudo', 'mkdir', '-p', f'{base}/ports/{port}',
+                run.Raw('&&'),
+                'echo', 'loop', run.Raw('|'),
+                'sudo', 'tee', f'{base}/ports/{port}/addr_trtype',
+            ]
+        )
+        for dev in devs:
+            short = dev.split('/')[-1]
+            log.info(f'Connecting nvme_loop {remote.shortname}:{dev}...')
+            remote.run(
+                args=[
+                    'sudo', 'mkdir', '-p', f'{base}/subsystems/{short}',
+                    run.Raw('&&'),
+                    'echo', '1', run.Raw('|'),
+                    'sudo', 'tee', f'{base}/subsystems/{short}/attr_allow_any_host',
+                    run.Raw('&&'),
+                    'sudo', 'mkdir', '-p', f'{base}/subsystems/{short}/namespaces/1',
+                    run.Raw('&&'),
+                    'echo', '-n', dev, run.Raw('|'),
+                    'sudo', 'tee', f'{base}/subsystems/{short}/namespaces/1/device_path',
+                    run.Raw('&&'),
+                    'echo', '1', run.Raw('|'),
+                    'sudo', 'tee', f'{base}/subsystems/{short}/namespaces/1/enable',
+                    run.Raw('&&'),
+                    'sudo', 'ln', '-s', f'{base}/subsystems/{short}',
+                    f'{base}/ports/{port}/subsystems/{short}',
+                    run.Raw('&&'),
+                    'sudo', 'nvme', 'connect', '-t', 'loop', '-n', short, '-q', host,
+                ]
+            )
+
+        # identify nvme_loops devices
+        old_scratch_by_remote[remote] = remote.read_file('/scratch_devs')
+
+        with contextutil.safe_while(sleep=1, tries=15) as proceed:
+            while proceed():
+                p = remote.run(args=['sudo', 'nvme', 'list'], stdout=StringIO())
+                new_devs = []
+                for line in p.stdout.getvalue().splitlines():
+                    dev, _, vendor = line.split()[0:3]
+                    if dev.startswith('/dev/') and vendor == 'Linux':
+                        new_devs.append(dev)
+                log.info(f'new_devs {new_devs}')
+                assert len(new_devs) <= len(devs)
+                if len(new_devs) == len(devs):
+                    break
+
+        remote.write_file(
+            path='/scratch_devs',
+            data='\n'.join(new_devs) + '\n',
+            sudo=True
+        )
+
+    try:
+        yield
+
+    finally:
+        for remote, devs in devs_by_remote.items():
+            if remote.is_container:
+                continue
+            for dev in devs:
+                short = dev.split('/')[-1]
+                log.info(f'Disconnecting nvme_loop {remote.shortname}:{dev}...')
+                remote.run(
+                    args=[
+                        'sudo', 'nvme', 'disconnect', '-n', short
+                    ],
+                    check_status=False,
+                )
+            remote.write_file(
+                path='/scratch_devs',
+                data=old_scratch_by_remote[remote],
+                sudo=True
+            )
diff --git a/qa/tasks/object_source_down.py b/qa/tasks/object_source_down.py
new file mode 100644
index 000000000..e4519bb6f
--- /dev/null
+++ b/qa/tasks/object_source_down.py
@@ -0,0 +1,101 @@
+"""
+Test Object locations going down
+"""
+import logging
+import time
+from teuthology import misc as teuthology
+from tasks import ceph_manager
+from tasks.util.rados import rados
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Test handling of object location going down
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'lost_unfound task only accepts a dict for configuration'
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    while len(manager.get_osd_status()['up']) < 3:
+        time.sleep(10)
+    manager.wait_for_clean()
+
+    # something that is always there
+    dummyfile = '/etc/fstab'
+
+    # take 0, 1 out
+    manager.mark_out_osd(0)
+    manager.mark_out_osd(1)
+    manager.wait_for_clean()
+
+    # delay recovery, and make the pg log very long (to prevent backfill)
+    manager.raw_cluster_cmd(
+            'tell', 'osd.0',
+            'injectargs',
+            '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
+            )
+    # delay recovery, and make the pg log very long (to prevent backfill)
+    manager.raw_cluster_cmd(
+            'tell', 'osd.1',
+            'injectargs',
+            '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
+            )
+    # delay recovery, and make the pg log very long (to prevent backfill)
+    manager.raw_cluster_cmd(
+            'tell', 'osd.2',
+            'injectargs',
+            '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
+            )
+    # delay recovery, and make the pg log very long (to prevent backfill)
+    manager.raw_cluster_cmd(
+            'tell', 'osd.3',
+            'injectargs',
+            '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
+            )
+
+    # kludge to make sure they get a map
+    rados(ctx, mon, ['-p', 'data', 'put', 'dummy', dummyfile])
+
+    # create old objects
+    for f in range(1, 10):
+        rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile])
+
+    manager.mark_out_osd(3)
+    manager.wait_till_active()
+
+    manager.mark_in_osd(0)
+    manager.wait_till_active()
+
+    manager.flush_pg_stats([2, 0])
+
+    manager.mark_out_osd(2)
+    manager.wait_till_active()
+
+    # bring up 1
+    manager.mark_in_osd(1)
+    manager.wait_till_active()
+
+    manager.flush_pg_stats([0, 1])
+    log.info("Getting unfound objects")
+    unfound = manager.get_num_unfound_objects()
+    assert not unfound
+
+    manager.kill_osd(2)
+    manager.mark_down_osd(2)
+    manager.kill_osd(3)
+    manager.mark_down_osd(3)
+
+    manager.flush_pg_stats([0, 1])
+    log.info("Getting unfound objects")
+    unfound = manager.get_num_unfound_objects()
+    assert unfound
diff --git a/qa/tasks/omapbench.py b/qa/tasks/omapbench.py
new file mode 100644
index 000000000..a5bd3a4df
--- /dev/null
+++ b/qa/tasks/omapbench.py
@@ -0,0 +1,83 @@
+"""
+Run omapbench executable within teuthology
+"""
+import contextlib
+import logging
+
+from teuthology.orchestra import run
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run omapbench
+
+    The config should be as follows::
+
+		  omapbench:
+		      clients: [client list]
+		      threads: <threads at once>
+		      objects: <number of objects to write>
+		      entries: <number of entries per object map>
+		      keysize: <number of characters per object map key>
+		      valsize: <number of characters per object map val>
+		      increment: <interval to show in histogram (in ms)>
+		      omaptype: <how the omaps should be generated>
+
+    example::
+
+		  tasks:
+		  - ceph:
+		  - omapbench:
+		      clients: [client.0]
+		      threads: 30
+		      objects: 1000
+		      entries: 10
+		      keysize: 10
+		      valsize: 100
+		      increment: 100
+		      omaptype: uniform
+		  - interactive:
+    """
+    log.info('Beginning omapbench...')
+    assert isinstance(config, dict), \
+        "please list clients to run on"
+    omapbench = {}
+    testdir = teuthology.get_testdir(ctx)
+    print(str(config.get('increment',-1)))
+    for role in config.get('clients', ['client.0']):
+        assert isinstance(role, str)
+        PREFIX = 'client.'
+        assert role.startswith(PREFIX)
+        id_ = role[len(PREFIX):]
+        (remote,) = ctx.cluster.only(role).remotes.keys()
+        proc = remote.run(
+            args=[
+                "/bin/sh", "-c",
+                " ".join(['adjust-ulimits',
+                          'ceph-coverage',
+                          '{tdir}/archive/coverage',
+                          'omapbench',
+                          '--name', role[len(PREFIX):],
+                          '-t', str(config.get('threads', 30)),
+                          '-o', str(config.get('objects', 1000)),
+                          '--entries', str(config.get('entries',10)),
+                          '--keysize', str(config.get('keysize',10)),
+                          '--valsize', str(config.get('valsize',1000)),
+                          '--inc', str(config.get('increment',10)),
+                          '--omaptype', str(config.get('omaptype','uniform'))
+                          ]).format(tdir=testdir),
+                ],
+            logger=log.getChild('omapbench.{id}'.format(id=id_)),
+            stdin=run.PIPE,
+            wait=False
+            )
+        omapbench[id_] = proc
+
+    try:
+        yield
+    finally:
+        log.info('joining omapbench')
+        run.wait(omapbench.values())
diff --git a/qa/tasks/openssl_keys.py b/qa/tasks/openssl_keys.py
new file mode 100644
index 000000000..2d26a8729
--- /dev/null
+++ b/qa/tasks/openssl_keys.py
@@ -0,0 +1,245 @@
+"""
+Generates and installs a signed SSL certificate.
+"""
+import argparse
+import logging
+import os
+
+from teuthology import misc
+from teuthology.exceptions import ConfigError
+from teuthology.orchestra import run
+from teuthology.task import Task
+
+log = logging.getLogger(__name__)
+
+class OpenSSLKeys(Task):
+    name = 'openssl_keys'
+    """
+    Generates and installs a signed SSL certificate.
+
+    To create a self-signed certificate:
+
+        - openssl_keys:
+            # certificate name
+            root: # results in root.key and root.crt
+
+              # [required] make the private key and certificate available in this client's test directory
+              client: client.0
+
+              # common name, defaults to `hostname`. chained certificates must not share a common name
+              cn: teuthology
+
+              # private key type for -newkey, defaults to rsa:2048
+              key-type: rsa:4096
+
+              # install the certificate as trusted on these clients:
+              install: [client.0, client.1]
+
+
+    To create a certificate signed by a ca certificate:
+
+        - openssl_keys:
+            root: (self-signed certificate as above)
+              ...
+
+            cert-for-client1:
+              client: client.1
+
+              # use another ssl certificate (by 'name') as the certificate authority
+              ca: root  # --CAkey=root.key -CA=root.crt
+
+              # embed the private key in the certificate file
+              embed-key: true
+    """
+
+    def __init__(self, ctx, config):
+        super(OpenSSLKeys, self).__init__(ctx, config)
+        self.certs = []
+        self.installed = []
+
+    def setup(self):
+        # global dictionary allows other tasks to look up certificate paths
+        if not hasattr(self.ctx, 'ssl_certificates'):
+            self.ctx.ssl_certificates = {}
+
+        # use testdir/ca as a working directory
+        self.cadir = '/'.join((misc.get_testdir(self.ctx), 'ca'))
+        # make sure self-signed certs get added first, they don't have 'ca' field
+        configs = sorted(self.config.items(), key=lambda x: 'ca' in x[1])
+        for name, config in configs:
+            # names must be unique to avoid clobbering each others files
+            if name in self.ctx.ssl_certificates:
+                raise ConfigError('ssl: duplicate certificate name {}'.format(name))
+
+            # create the key and certificate
+            cert = self.create_cert(name, config)
+
+            self.ctx.ssl_certificates[name] = cert
+            self.certs.append(cert)
+
+            # install as trusted on the requested clients
+            for client in config.get('install', []):
+                installed = self.install_cert(cert, client)
+                self.installed.append(installed)
+
+    def teardown(self):
+        """
+        Clean up any created/installed certificate files.
+        """
+        for cert in self.certs:
+            self.remove_cert(cert)
+
+        for installed in self.installed:
+            self.uninstall_cert(installed)
+
+    def create_cert(self, name, config):
+        """
+        Create a certificate with the given configuration.
+        """
+        cert = argparse.Namespace()
+        cert.name = name
+        cert.key_type = config.get('key-type', 'rsa:2048')
+
+        cert.client = config.get('client', None)
+        if not cert.client:
+            raise ConfigError('ssl: missing required field "client"')
+
+        (cert.remote,) = self.ctx.cluster.only(cert.client).remotes.keys()
+
+        cert.remote.run(args=['mkdir', '-p', self.cadir])
+
+        cert.key = f'{self.cadir}/{cert.name}.key'
+        cert.certificate = f'{self.cadir}/{cert.name}.crt'
+
+        san_ext = []
+        add_san_default = False
+        cn = config.get('cn', '')
+        if cn == '':
+            cn = cert.remote.hostname
+            add_san_default = True
+        if config.get('add-san', add_san_default):
+            ext = f'{self.cadir}/{cert.name}.ext'
+            san_ext = ['-extfile', ext]
+
+        # provide the common name in -subj to avoid the openssl command prompts
+        subject = f'/CN={cn}'
+
+        # if a ca certificate is provided, use it to sign the new certificate
+        ca = config.get('ca', None)
+        if ca:
+            # the ca certificate must have been created by a prior ssl task
+            ca_cert = self.ctx.ssl_certificates.get(ca, None)
+            if not ca_cert:
+                raise ConfigError(f'ssl: ca {ca} not found for certificate {cert.name}')
+
+            csr = f'{self.cadir}/{cert.name}.csr'
+            srl = f'{self.cadir}/{ca_cert.name}.srl'
+            remove_files = ['rm', '-f', csr, srl]
+
+            # these commands are run on the ca certificate's client because
+            # they need access to its private key and cert
+
+            # generate a private key and signing request
+            ca_cert.remote.run(args=['openssl', 'req', '-nodes',
+                '-newkey', cert.key_type, '-keyout', cert.key,
+                '-out', csr, '-subj', subject])
+
+            if san_ext:
+                remove_files.append(ext)
+                ca_cert.remote.write_file(path=ext,
+                    data='subjectAltName = DNS:{},IP:{}'.format(
+                        cn,
+                        config.get('ip', cert.remote.ip_address)))
+
+            # create the signed certificate
+            ca_cert.remote.run(args=['openssl', 'x509', '-req', '-in', csr,
+                '-CA', ca_cert.certificate, '-CAkey', ca_cert.key, '-CAcreateserial',
+                '-out', cert.certificate, '-days', '365', '-sha256'] + san_ext)
+
+            ca_cert.remote.run(args=remove_files) # clean up the signing request and serial
+
+            # verify the new certificate against its ca cert
+            ca_cert.remote.run(args=['openssl', 'verify',
+                '-CAfile', ca_cert.certificate, cert.certificate])
+
+            if cert.remote != ca_cert.remote:
+                # copy to remote client
+                self.remote_copy_file(ca_cert.remote, cert.certificate, cert.remote, cert.certificate)
+                self.remote_copy_file(ca_cert.remote, cert.key, cert.remote, cert.key)
+                # clean up the local copies
+                ca_cert.remote.run(args=['rm', cert.certificate, cert.key])
+                # verify the remote certificate (requires ca to be in its trusted ca certificate store)
+                cert.remote.run(args=['openssl', 'verify', cert.certificate])
+        else:
+            # otherwise, generate a private key and use it to self-sign a new certificate
+            cert.remote.run(args=['openssl', 'req', '-x509', '-nodes',
+                '-newkey', cert.key_type, '-keyout', cert.key,
+                '-days', '365', '-out', cert.certificate, '-subj', subject])
+
+        if config.get('embed-key', False):
+            # append the private key to the certificate file
+            cert.remote.run(args=['cat', cert.key, run.Raw('>>'), cert.certificate])
+
+        return cert
+
+    def remove_cert(self, cert):
+        """
+        Delete all of the files associated with the given certificate.
+        """
+        # remove the private key and certificate
+        cert.remote.run(args=['rm', '-f', cert.certificate, cert.key])
+
+        # remove ca subdirectory if it's empty
+        cert.remote.run(args=['rmdir', '--ignore-fail-on-non-empty', self.cadir])
+
+    def install_cert(self, cert, client):
+        """
+        Install as a trusted ca certificate on the given client.
+        """
+        (remote,) = self.ctx.cluster.only(client).remotes.keys()
+
+        installed = argparse.Namespace()
+        installed.remote = remote
+
+        if remote.os.package_type == 'deb':
+            installed.path = '/usr/local/share/ca-certificates/{}.crt'.format(cert.name)
+            installed.command = ['sudo', 'update-ca-certificates']
+        else:
+            installed.path = '/usr/share/pki/ca-trust-source/anchors/{}.crt'.format(cert.name)
+            installed.command = ['sudo', 'update-ca-trust']
+
+        cp_or_mv = 'cp'
+        if remote != cert.remote:
+            # copy into remote cadir (with mkdir if necessary)
+            remote.run(args=['mkdir', '-p', self.cadir])
+            self.remote_copy_file(cert.remote, cert.certificate, remote, cert.certificate)
+            cp_or_mv = 'mv' # move this remote copy into the certificate store
+
+        # install into certificate store as root
+        remote.run(args=['sudo', cp_or_mv, cert.certificate, installed.path])
+        remote.run(args=installed.command)
+
+        return installed
+
+    def uninstall_cert(self, installed):
+        """
+        Uninstall a certificate from the trusted certificate store.
+        """
+        installed.remote.run(args=['sudo', 'rm', installed.path])
+        installed.remote.run(args=installed.command)
+
+    def remote_copy_file(self, from_remote, from_path, to_remote, to_path):
+        """
+        Copies a file from one remote to another.
+
+        The remotes don't have public-key auth for 'scp' or misc.copy_file(),
+        so this copies through an intermediate local tmp file.
+        """
+        log.info('copying from {}:{} to {}:{}...'.format(from_remote, from_path, to_remote, to_path))
+        local_path = from_remote.get_file(from_path)
+        try:
+            to_remote.put_file(local_path, to_path)
+        finally:
+            os.remove(local_path)
+
+task = OpenSSLKeys
diff --git a/qa/tasks/osd_backfill.py b/qa/tasks/osd_backfill.py
new file mode 100644
index 000000000..b33e1c912
--- /dev/null
+++ b/qa/tasks/osd_backfill.py
@@ -0,0 +1,104 @@
+"""
+Osd backfill test
+"""
+import logging
+import time
+from tasks import ceph_manager
+from teuthology import misc as teuthology
+
+
+log = logging.getLogger(__name__)
+
+
+def rados_start(ctx, remote, cmd):
+    """
+    Run a remote rados command (currently used to only write data)
+    """
+    log.info("rados %s" % ' '.join(cmd))
+    testdir = teuthology.get_testdir(ctx)
+    pre = [
+        'adjust-ulimits',
+        'ceph-coverage',
+        '{tdir}/archive/coverage'.format(tdir=testdir),
+        'rados',
+        ];
+    pre.extend(cmd)
+    proc = remote.run(
+        args=pre,
+        wait=False,
+        )
+    return proc
+
+def task(ctx, config):
+    """
+    Test backfill
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'thrashosds task only accepts a dict for configuration'
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
+    log.info('num_osds is %s' % num_osds)
+    assert num_osds == 3
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    while len(manager.get_osd_status()['up']) < 3:
+        time.sleep(10)
+    manager.flush_pg_stats([0, 1, 2])
+    manager.wait_for_clean()
+
+    # write some data
+    p = rados_start(ctx, mon, ['-p', 'rbd', 'bench', '15', 'write', '-b', '4096',
+                          '--no-cleanup'])
+    err = p.wait()
+    log.info('err is %d' % err)
+
+    # mark osd.0 out to trigger a rebalance/backfill
+    manager.mark_out_osd(0)
+
+    # also mark it down to it won't be included in pg_temps
+    manager.kill_osd(0)
+    manager.mark_down_osd(0)
+
+    # wait for everything to peer and be happy...
+    manager.flush_pg_stats([1, 2])
+    manager.wait_for_recovery()
+
+    # write some new data
+    p = rados_start(ctx, mon, ['-p', 'rbd', 'bench', '30', 'write', '-b', '4096',
+                          '--no-cleanup'])
+
+    time.sleep(15)
+
+    # blackhole + restart osd.1
+    # this triggers a divergent backfill target
+    manager.blackhole_kill_osd(1)
+    time.sleep(2)
+    manager.revive_osd(1)
+
+    # wait for our writes to complete + succeed
+    err = p.wait()
+    log.info('err is %d' % err)
+
+    # wait for osd.1 and osd.2 to be up
+    manager.wait_till_osd_is_up(1)
+    manager.wait_till_osd_is_up(2)
+
+    # cluster must recover
+    manager.flush_pg_stats([1, 2])
+    manager.wait_for_recovery()
+
+    # re-add osd.0
+    manager.revive_osd(0)
+    manager.flush_pg_stats([1, 2])
+    manager.wait_for_clean()
+
+
diff --git a/qa/tasks/osd_failsafe_enospc.py b/qa/tasks/osd_failsafe_enospc.py
new file mode 100644
index 000000000..fe2996a78
--- /dev/null
+++ b/qa/tasks/osd_failsafe_enospc.py
@@ -0,0 +1,218 @@
+"""
+Handle osdfailsafe configuration settings (nearfull ratio and full ratio)
+"""
+from io import StringIO
+import logging
+import time
+
+from teuthology.orchestra import run
+from tasks.util.rados import rados
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Test handling of osd_failsafe_nearfull_ratio and osd_failsafe_full_ratio
+    configuration settings
+
+    In order for test to pass must use log-ignorelist as follows
+
+        tasks:
+            - chef:
+            - install:
+            - ceph:
+                log-ignorelist: ['OSD near full', 'OSD full dropping all updates']
+            - osd_failsafe_enospc:
+
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'osd_failsafe_enospc task only accepts a dict for configuration'
+
+    # Give 2 seconds for injectargs + osd_op_complaint_time (30) + 2 * osd_heartbeat_interval (6) + 6 padding
+    sleep_time = 50
+
+    # something that is always there
+    dummyfile = '/etc/fstab'
+    dummyfile2 = '/etc/resolv.conf'
+
+    manager = ctx.managers['ceph']
+
+    # create 1 pg pool with 1 rep which can only be on osd.0
+    osds = manager.get_osd_dump()
+    for osd in osds:
+        if osd['osd'] != 0:
+            manager.mark_out_osd(osd['osd'])
+
+    log.info('creating pool foo')
+    manager.create_pool("foo")
+    manager.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'size', '1')
+
+    # State NONE -> NEAR
+    log.info('1. Verify warning messages when exceeding nearfull_ratio')
+
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+    proc = mon.run(
+             args=[
+                 'sudo',
+                 'daemon-helper',
+                 'kill',
+                 'ceph', '-w'
+             ],
+             stdin=run.PIPE,
+             stdout=StringIO(),
+             wait=False,
+        )
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .00001')
+
+    time.sleep(sleep_time)
+    proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
+    proc.wait()
+
+    lines = proc.stdout.getvalue().split('\n')
+
+    count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
+    assert count == 2, 'Incorrect number of warning messages expected 2 got %d' % count
+    count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
+    assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count
+
+    # State NEAR -> FULL
+    log.info('2. Verify error messages when exceeding full_ratio')
+
+    proc = mon.run(
+             args=[
+                 'sudo',
+                 'daemon-helper',
+                 'kill',
+                 'ceph', '-w'
+             ],
+             stdin=run.PIPE,
+             stdout=StringIO(),
+             wait=False,
+        )
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001')
+
+    time.sleep(sleep_time)
+    proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
+    proc.wait()
+
+    lines = proc.stdout.getvalue().split('\n')
+
+    count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
+    assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count
+
+    log.info('3. Verify write failure when exceeding full_ratio')
+
+    # Write data should fail
+    ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile1', dummyfile])
+    assert ret != 0, 'Expected write failure but it succeeded with exit status 0'
+
+    # Put back default
+    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97')
+    time.sleep(10)
+
+    # State FULL -> NEAR
+    log.info('4. Verify write success when NOT exceeding full_ratio')
+
+    # Write should succeed
+    ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile2', dummyfile2])
+    assert ret == 0, 'Expected write to succeed, but got exit status %d' % ret
+
+    log.info('5. Verify warning messages again when exceeding nearfull_ratio')
+
+    proc = mon.run(
+             args=[
+                 'sudo',
+                 'daemon-helper',
+                 'kill',
+                 'ceph', '-w'
+             ],
+             stdin=run.PIPE,
+             stdout=StringIO(),
+             wait=False,
+        )
+
+    time.sleep(sleep_time)
+    proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
+    proc.wait()
+
+    lines = proc.stdout.getvalue().split('\n')
+
+    count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
+    assert count == 1 or count == 2, 'Incorrect number of warning messages expected 1 or 2 got %d' % count
+    count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
+    assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .90')
+    time.sleep(10)
+
+    # State NONE -> FULL
+    log.info('6. Verify error messages again when exceeding full_ratio')
+
+    proc = mon.run(
+             args=[
+                 'sudo',
+                 'daemon-helper',
+                 'kill',
+                 'ceph', '-w'
+             ],
+             stdin=run.PIPE,
+             stdout=StringIO(),
+             wait=False,
+        )
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001')
+
+    time.sleep(sleep_time)
+    proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
+    proc.wait()
+
+    lines = proc.stdout.getvalue().split('\n')
+
+    count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
+    assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count
+    count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
+    assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count
+
+    # State FULL -> NONE
+    log.info('7. Verify no messages settings back to default')
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97')
+    time.sleep(10)
+
+    proc = mon.run(
+             args=[
+                 'sudo',
+                 'daemon-helper',
+                 'kill',
+                 'ceph', '-w'
+             ],
+             stdin=run.PIPE,
+             stdout=StringIO(),
+             wait=False,
+        )
+
+    time.sleep(sleep_time)
+    proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
+    proc.wait()
+
+    lines = proc.stdout.getvalue().split('\n')
+
+    count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
+    assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count
+    count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
+    assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count
+
+    log.info('Test Passed')
+
+    # Bring all OSDs back in
+    manager.remove_pool("foo")
+    for osd in osds:
+        if osd['osd'] != 0:
+            manager.mark_in_osd(osd['osd'])
diff --git a/qa/tasks/osd_max_pg_per_osd.py b/qa/tasks/osd_max_pg_per_osd.py
new file mode 100644
index 000000000..6680fe6e3
--- /dev/null
+++ b/qa/tasks/osd_max_pg_per_osd.py
@@ -0,0 +1,126 @@
+import logging
+import random
+
+
+log = logging.getLogger(__name__)
+
+
+def pg_num_in_all_states(pgs, *states):
+    return sum(1 for state in pgs.values()
+               if all(s in state for s in states))
+
+
+def pg_num_in_any_state(pgs, *states):
+    return sum(1 for state in pgs.values()
+               if any(s in state for s in states))
+
+
+def test_create_from_mon(ctx, config):
+    """
+    osd should stop creating new pools if the number of pg it servers
+    exceeds the max-pg-per-osd setting, and it should resume the previously
+    suspended pg creations once the its pg number drops down below the setting
+    How it works::
+    1. set the hard limit of pg-per-osd to "2"
+    2. create pool.a with pg_num=2
+       # all pgs should be active+clean
+    2. create pool.b with pg_num=2
+       # new pgs belonging to this pool should be unknown (the primary osd
+       reaches the limit) or creating (replica osd reaches the limit)
+    3. remove pool.a
+    4. all pg belonging to pool.b should be active+clean
+    """
+    pg_num = config.get('pg_num', 2)
+    manager = ctx.managers['ceph']
+    log.info('1. creating pool.a')
+    pool_a = manager.create_pool_with_unique_name(pg_num)
+    pg_states = manager.wait_till_pg_convergence(300)
+    pg_created = pg_num_in_all_states(pg_states, 'active', 'clean')
+    assert pg_created == pg_num
+
+    log.info('2. creating pool.b')
+    pool_b = manager.create_pool_with_unique_name(pg_num)
+    pg_states = manager.wait_till_pg_convergence(300)
+    pg_created = pg_num_in_all_states(pg_states, 'active', 'clean')
+    assert pg_created == pg_num
+    pg_pending = pg_num_in_any_state(pg_states, 'unknown', 'creating')
+    assert pg_pending == pg_num
+
+    log.info('3. removing pool.a')
+    manager.remove_pool(pool_a)
+    pg_states = manager.wait_till_pg_convergence(300)
+    assert len(pg_states) == pg_num
+    pg_created = pg_num_in_all_states(pg_states, 'active', 'clean')
+    assert pg_created == pg_num
+
+    # cleanup
+    manager.remove_pool(pool_b)
+
+
+def test_create_from_peer(ctx, config):
+    """
+    osd should stop creating new pools if the number of pg it servers
+    exceeds the max-pg-per-osd setting, and it should resume the previously
+    suspended pg creations once the its pg number drops down below the setting
+
+    How it works::
+    0. create 4 OSDs.
+    1. create pool.a with pg_num=1, size=2
+       pg will be mapped to osd.0, and osd.1, and it should be active+clean
+    2. create pool.b with pg_num=1, size=2.
+       if the pgs stuck in creating, delete the pool since the pool and try
+       again, eventually we'll get the pool to land on the other 2 osds that
+       aren't occupied by pool.a. (this will also verify that pgs for deleted
+       pools get cleaned out of the creating wait list.)
+    3. mark an osd out. verify that some pgs get stuck stale or peering.
+    4. delete a pool, verify pgs go active.
+    """
+    pg_num = config.get('pg_num', 1)
+    from_primary = config.get('from_primary', True)
+
+    manager = ctx.managers['ceph']
+    log.info('1. creating pool.a')
+    pool_a = manager.create_pool_with_unique_name(pg_num)
+    pg_states = manager.wait_till_pg_convergence(300)
+    pg_created = pg_num_in_all_states(pg_states, 'active', 'clean')
+    assert pg_created == pg_num
+
+    log.info('2. creating pool.b')
+    while True:
+        pool_b = manager.create_pool_with_unique_name(pg_num)
+        pg_states = manager.wait_till_pg_convergence(300)
+        pg_created = pg_num_in_all_states(pg_states, 'active', 'clean')
+        assert pg_created >= pg_num
+        pg_pending = pg_num_in_any_state(pg_states, 'unknown', 'creating')
+        assert pg_pending == pg_num * 2 - pg_created
+        if pg_created == pg_num * 2:
+            break
+        manager.remove_pool(pool_b)
+
+    log.info('3. mark an osd out')
+    pg_stats = manager.get_pg_stats()
+    pg = random.choice(pg_stats)
+    if from_primary:
+        victim = pg['acting'][-1]
+    else:
+        victim = pg['acting'][0]
+    manager.mark_out_osd(victim)
+    pg_states = manager.wait_till_pg_convergence(300)
+    pg_stuck = pg_num_in_any_state(pg_states, 'activating', 'stale', 'peering')
+    assert pg_stuck > 0
+
+    log.info('4. removing pool.b')
+    manager.remove_pool(pool_b)
+    manager.wait_for_clean(30)
+
+    # cleanup
+    manager.remove_pool(pool_a)
+
+
+def task(ctx, config):
+    assert isinstance(config, dict), \
+        'osd_max_pg_per_osd task only accepts a dict for config'
+    if config.get('test_create_from_mon', True):
+        test_create_from_mon(ctx, config)
+    else:
+        test_create_from_peer(ctx, config)
diff --git a/qa/tasks/osd_recovery.py b/qa/tasks/osd_recovery.py
new file mode 100644
index 000000000..b0623c21b
--- /dev/null
+++ b/qa/tasks/osd_recovery.py
@@ -0,0 +1,193 @@
+"""
+osd recovery
+"""
+import logging
+import time
+from tasks import ceph_manager
+from teuthology import misc as teuthology
+
+
+log = logging.getLogger(__name__)
+
+
+def rados_start(testdir, remote, cmd):
+    """
+    Run a remote rados command (currently used to only write data)
+    """
+    log.info("rados %s" % ' '.join(cmd))
+    pre = [
+        'adjust-ulimits',
+        'ceph-coverage',
+        '{tdir}/archive/coverage'.format(tdir=testdir),
+        'rados',
+        ];
+    pre.extend(cmd)
+    proc = remote.run(
+        args=pre,
+        wait=False,
+        )
+    return proc
+
+def task(ctx, config):
+    """
+    Test (non-backfill) recovery
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'task only accepts a dict for configuration'
+    testdir = teuthology.get_testdir(ctx)
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
+    log.info('num_osds is %s' % num_osds)
+    assert num_osds == 3
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    while len(manager.get_osd_status()['up']) < 3:
+        time.sleep(10)
+    manager.flush_pg_stats([0, 1, 2])
+    manager.wait_for_clean()
+
+    # test some osdmap flags
+    manager.raw_cluster_cmd('osd', 'set', 'noin')
+    manager.raw_cluster_cmd('osd', 'set', 'noout')
+    manager.raw_cluster_cmd('osd', 'set', 'noup')
+    manager.raw_cluster_cmd('osd', 'set', 'nodown')
+    manager.raw_cluster_cmd('osd', 'unset', 'noin')
+    manager.raw_cluster_cmd('osd', 'unset', 'noout')
+    manager.raw_cluster_cmd('osd', 'unset', 'noup')
+    manager.raw_cluster_cmd('osd', 'unset', 'nodown')
+
+    # write some new data
+    p = rados_start(testdir, mon, ['-p', 'rbd', 'bench', '20', 'write', '-b', '4096',
+                          '--no-cleanup'])
+
+    time.sleep(15)
+
+    # trigger a divergent target:
+    #  blackhole + restart osd.1 (shorter log)
+    manager.blackhole_kill_osd(1)
+    #  kill osd.2 (longer log... we'll make it divergent below)
+    manager.kill_osd(2)
+    time.sleep(2)
+    manager.revive_osd(1)
+
+    # wait for our writes to complete + succeed
+    err = p.wait()
+    log.info('err is %d' % err)
+
+    # cluster must repeer
+    manager.flush_pg_stats([0, 1])
+    manager.wait_for_active_or_down()
+
+    # write some more (make sure osd.2 really is divergent)
+    p = rados_start(testdir, mon, ['-p', 'rbd', 'bench', '15', 'write', '-b', '4096'])
+    p.wait()
+
+    # revive divergent osd
+    manager.revive_osd(2)
+
+    while len(manager.get_osd_status()['up']) < 3:
+        log.info('waiting a bit...')
+        time.sleep(2)
+    log.info('3 are up!')
+
+    # cluster must recover
+    manager.flush_pg_stats([0, 1, 2])
+    manager.wait_for_clean()
+
+
+def test_incomplete_pgs(ctx, config):
+    """
+    Test handling of incomplete pgs.  Requires 4 osds.
+    """
+    testdir = teuthology.get_testdir(ctx)
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'task only accepts a dict for configuration'
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
+    log.info('num_osds is %s' % num_osds)
+    assert num_osds == 4
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    while len(manager.get_osd_status()['up']) < 4:
+        time.sleep(10)
+
+    manager.flush_pg_stats([0, 1, 2, 3])
+    manager.wait_for_clean()
+
+    log.info('Testing incomplete pgs...')
+
+    for i in range(4):
+        manager.set_config(
+            i,
+            osd_recovery_delay_start=1000)
+
+    # move data off of osd.0, osd.1
+    manager.raw_cluster_cmd('osd', 'out', '0', '1')
+    manager.flush_pg_stats([0, 1, 2, 3], [0, 1])
+    manager.wait_for_clean()
+
+    # lots of objects in rbd (no pg log, will backfill)
+    p = rados_start(testdir, mon,
+                    ['-p', 'rbd', 'bench', '20', 'write', '-b', '1',
+                     '--no-cleanup'])
+    p.wait()
+
+    # few objects in rbd pool (with pg log, normal recovery)
+    for f in range(1, 20):
+        p = rados_start(testdir, mon, ['-p', 'rbd', 'put',
+                              'foo.%d' % f, '/etc/passwd'])
+        p.wait()
+
+    # move it back
+    manager.raw_cluster_cmd('osd', 'in', '0', '1')
+    manager.raw_cluster_cmd('osd', 'out', '2', '3')
+    time.sleep(10)
+    manager.flush_pg_stats([0, 1, 2, 3], [2, 3])
+    time.sleep(10)
+    manager.wait_for_active()
+
+    assert not manager.is_clean()
+    assert not manager.is_recovered()
+
+    # kill 2 + 3
+    log.info('stopping 2,3')
+    manager.kill_osd(2)
+    manager.kill_osd(3)
+    log.info('...')
+    manager.raw_cluster_cmd('osd', 'down', '2', '3')
+    manager.flush_pg_stats([0, 1])
+    manager.wait_for_active_or_down()
+
+    assert manager.get_num_down() > 0
+
+    # revive 2 + 3
+    manager.revive_osd(2)
+    manager.revive_osd(3)
+    while len(manager.get_osd_status()['up']) < 4:
+        log.info('waiting a bit...')
+        time.sleep(2)
+    log.info('all are up!')
+
+    for i in range(4):
+        manager.kick_recovery_wq(i)
+
+    # cluster must recover
+    manager.wait_for_clean()
diff --git a/qa/tasks/peer.py b/qa/tasks/peer.py
new file mode 100644
index 000000000..6b19096b1
--- /dev/null
+++ b/qa/tasks/peer.py
@@ -0,0 +1,90 @@
+"""
+Peer test (Single test, not much configurable here)
+"""
+import logging
+import json
+import time
+
+from tasks import ceph_manager
+from tasks.util.rados import rados
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Test peering.
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'peer task only accepts a dict for configuration'
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    while len(manager.get_osd_status()['up']) < 3:
+        time.sleep(10)
+    manager.flush_pg_stats([0, 1, 2])
+    manager.wait_for_clean()
+
+    for i in range(3):
+        manager.set_config(
+            i,
+            osd_recovery_delay_start=120)
+
+    # take on osd down
+    manager.kill_osd(2)
+    manager.mark_down_osd(2)
+
+    # kludge to make sure they get a map
+    rados(ctx, mon, ['-p', 'data', 'get', 'dummy', '-'])
+
+    manager.flush_pg_stats([0, 1])
+    manager.wait_for_recovery()
+
+    # kill another and revive 2, so that some pgs can't peer.
+    manager.kill_osd(1)
+    manager.mark_down_osd(1)
+    manager.revive_osd(2)
+    manager.wait_till_osd_is_up(2)
+
+    manager.flush_pg_stats([0, 2])
+
+    manager.wait_for_active_or_down()
+
+    manager.flush_pg_stats([0, 2])
+
+    # look for down pgs
+    num_down_pgs = 0
+    pgs = manager.get_pg_stats()
+    for pg in pgs:
+        out = manager.raw_cluster_cmd('pg', pg['pgid'], 'query')
+        log.debug("out string %s",out)
+        j = json.loads(out)
+        log.info("pg is %s, query json is %s", pg, j)
+
+        if pg['state'].count('down'):
+            num_down_pgs += 1
+            # verify that it is blocked on osd.1
+            rs = j['recovery_state']
+            assert len(rs) >= 2
+            assert rs[0]['name'] == 'Started/Primary/Peering/Down'
+            assert rs[1]['name'] == 'Started/Primary/Peering'
+            assert rs[1]['blocked']
+            assert rs[1]['down_osds_we_would_probe'] == [1]
+            assert len(rs[1]['peering_blocked_by']) == 1
+            assert rs[1]['peering_blocked_by'][0]['osd'] == 1
+
+    assert num_down_pgs > 0
+
+    # bring it all back
+    manager.revive_osd(1)
+    manager.wait_till_osd_is_up(1)
+    manager.flush_pg_stats([0, 1, 2])
+    manager.wait_for_clean()
diff --git a/qa/tasks/peering_speed_test.py b/qa/tasks/peering_speed_test.py
new file mode 100644
index 000000000..9dc658361
--- /dev/null
+++ b/qa/tasks/peering_speed_test.py
@@ -0,0 +1,87 @@
+"""
+Remotely run peering tests.
+"""
+import logging
+import time
+
+log = logging.getLogger(__name__)
+
+from teuthology.task.args import argify
+
+POOLNAME = "POOLNAME"
+ARGS = [
+    ('num_pgs', 'number of pgs to create', 256, int),
+    ('max_time', 'seconds to complete peering', 0, int),
+    ('runs', 'trials to run', 10, int),
+    ('num_objects', 'objects to create', 256 * 1024, int),
+    ('object_size', 'size in bytes for objects', 64, int),
+    ('creation_time_limit', 'time limit for pool population', 60*60, int),
+    ('create_threads', 'concurrent writes for create', 256, int)
+    ]
+
+def setup(ctx, config):
+    """
+    Setup peering test on remotes.
+    """
+    manager = ctx.managers['ceph']
+    manager.clear_pools()
+    manager.create_pool(POOLNAME, config.num_pgs)
+    log.info("populating pool")
+    manager.rados_write_objects(
+        POOLNAME,
+        config.num_objects,
+        config.object_size,
+        config.creation_time_limit,
+        config.create_threads)
+    log.info("done populating pool")
+
+def do_run(ctx, config):
+    """
+    Perform the test.
+    """
+    start = time.time()
+    # mark in osd
+    manager = ctx.managers['ceph']
+    manager.mark_in_osd(0)
+    log.info("writing out objects")
+    manager.rados_write_objects(
+        POOLNAME,
+        config.num_pgs, # write 1 object per pg or so
+        1,
+        config.creation_time_limit,
+        config.num_pgs, # lots of concurrency
+        cleanup = True)
+    peering_end = time.time()
+
+    log.info("peering done, waiting on recovery")
+    manager.wait_for_clean()
+
+    log.info("recovery done")
+    recovery_end = time.time()
+    if config.max_time:
+        assert(peering_end - start < config.max_time)
+    manager.mark_out_osd(0)
+    manager.wait_for_clean()
+    return {
+        'time_to_active': peering_end - start,
+        'time_to_clean': recovery_end - start
+        }
+
+@argify("peering_speed_test", ARGS)
+def task(ctx, config):
+    """
+    Peering speed test
+    """
+    setup(ctx, config)
+    manager = ctx.managers['ceph']
+    manager.mark_out_osd(0)
+    manager.wait_for_clean()
+    ret = []
+    for i in range(config.runs):
+        log.info("Run {i}".format(i = i))
+        ret.append(do_run(ctx, config))
+
+    manager.mark_in_osd(0)
+    ctx.summary['recovery_times'] = {
+        'runs': ret
+        }
diff --git a/qa/tasks/populate_rbd_pool.py b/qa/tasks/populate_rbd_pool.py
new file mode 100644
index 000000000..76395eb68
--- /dev/null
+++ b/qa/tasks/populate_rbd_pool.py
@@ -0,0 +1,82 @@
+"""
+Populate rbd pools
+"""
+import contextlib
+import logging
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Populate <num_pools> pools with prefix <pool_prefix> with <num_images>
+    rbd images at <num_snaps> snaps
+
+    The config could be as follows::
+
+        populate_rbd_pool:
+          client: <client>
+          pool_prefix: foo
+          num_pools: 5
+          num_images: 10
+          num_snaps: 3
+          image_size: 10737418240
+    """
+    if config is None:
+        config = {}
+    client = config.get("client", "client.0")
+    pool_prefix = config.get("pool_prefix", "foo")
+    num_pools = config.get("num_pools", 2)
+    num_images = config.get("num_images", 20)
+    num_snaps = config.get("num_snaps", 4)
+    image_size = config.get("image_size", 100)
+    write_size = config.get("write_size", 1024*1024)
+    write_threads = config.get("write_threads", 10)
+    write_total_per_snap = config.get("write_total_per_snap", 1024*1024*30)
+
+    (remote,) = ctx.cluster.only(client).remotes.keys()
+
+    for poolid in range(num_pools):
+        poolname = "%s-%s" % (pool_prefix, str(poolid))
+        log.info("Creating pool %s" % (poolname,))
+        ctx.managers['ceph'].create_pool(poolname)
+        for imageid in range(num_images):
+            imagename = "rbd-%s" % (str(imageid),)
+            log.info("Creating imagename %s" % (imagename,))
+            remote.run(
+                args = [
+                    "rbd",
+                    "create",
+                    imagename,
+                    "--image-format", "1",
+                    "--size", str(image_size),
+                    "--pool", str(poolname)])
+            def bench_run():
+                remote.run(
+                    args = [
+                        "rbd",
+                        "bench-write",
+                        imagename,
+                        "--pool", poolname,
+                        "--io-size", str(write_size),
+                        "--io-threads", str(write_threads),
+                        "--io-total", str(write_total_per_snap),
+                        "--io-pattern", "rand"])
+            log.info("imagename %s first bench" % (imagename,))
+            bench_run()
+            for snapid in range(num_snaps):
+                snapname = "snap-%s" % (str(snapid),)
+                log.info("imagename %s creating snap %s" % (imagename, snapname))
+                remote.run(
+                    args = [
+                        "rbd", "snap", "create",
+                        "--pool", poolname,
+                        "--snap", snapname,
+                        imagename
+                        ])
+                bench_run()
+
+    try:
+        yield
+    finally:
+        log.info('done')
diff --git a/qa/tasks/pykmip.py b/qa/tasks/pykmip.py
new file mode 100644
index 000000000..45a5af689
--- /dev/null
+++ b/qa/tasks/pykmip.py
@@ -0,0 +1,465 @@
+"""
+Deploy and configure PyKMIP for Teuthology
+"""
+import argparse
+import contextlib
+import logging
+import time
+import tempfile
+import json
+import os
+from io import BytesIO
+from teuthology.orchestra.daemon import DaemonGroup
+from teuthology.orchestra.remote import Remote
+
+import pprint
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.orchestra import run
+from teuthology.packaging import install_package
+from teuthology.packaging import remove_package
+from teuthology.exceptions import ConfigError
+from tasks.util import get_remote_for_role
+
+log = logging.getLogger(__name__)
+
+
+def get_pykmip_dir(ctx):
+    return '{tdir}/pykmip'.format(tdir=teuthology.get_testdir(ctx))
+
+def run_in_pykmip_dir(ctx, client, args, **kwargs):
+    (remote,) = [client] if isinstance(client,Remote) else ctx.cluster.only(client).remotes.keys()
+    return remote.run(
+        args=['cd', get_pykmip_dir(ctx), run.Raw('&&'), ] + args,
+        **kwargs
+    )
+
+def run_in_pykmip_venv(ctx, client, args, **kwargs):
+    return run_in_pykmip_dir(ctx, client,
+        args = ['.', '.pykmipenv/bin/activate',
+                         run.Raw('&&')
+                        ] + args, **kwargs)
+
+@contextlib.contextmanager
+def download(ctx, config):
+    """
+    Download PyKMIP from github.
+    Remove downloaded file upon exit.
+
+    The context passed in should be identical to the context
+    passed in to the main task.
+    """
+    assert isinstance(config, dict)
+    log.info('Downloading pykmip...')
+    pykmipdir = get_pykmip_dir(ctx)
+
+    for (client, cconf) in config.items():
+        branch = cconf.get('force-branch', 'master')
+        repo = cconf.get('force-repo', 'https://github.com/OpenKMIP/PyKMIP')
+        sha1 = cconf.get('sha1')
+        log.info("Using branch '%s' for pykmip", branch)
+        log.info('sha1=%s', sha1)
+
+        ctx.cluster.only(client).run(
+            args=[
+                'git', 'clone', '-b', branch, repo,
+                pykmipdir,
+                ],
+            )
+        if sha1 is not None:
+            run_in_pykmip_dir(ctx, client, [
+                    'git', 'reset', '--hard', sha1,
+                ],
+            )
+    try:
+        yield
+    finally:
+        log.info('Removing pykmip...')
+        for client in config:
+            ctx.cluster.only(client).run(
+                args=[ 'rm', '-rf', pykmipdir ],
+            )
+
+_bindep_txt = """# should be part of PyKMIP
+libffi-dev [platform:dpkg]
+libffi-devel [platform:rpm]
+libssl-dev [platform:dpkg]
+openssl-devel [platform:redhat]
+libopenssl-devel [platform:suse]
+libsqlite3-dev [platform:dpkg]
+sqlite-devel [platform:rpm]
+python-dev [platform:dpkg]
+python-devel [(platform:redhat platform:base-py2)]
+python3-dev [platform:dpkg]
+python3-devel [(platform:redhat platform:base-py3) platform:suse]
+python3 [platform:suse]
+"""
+
+@contextlib.contextmanager
+def install_packages(ctx, config):
+    """
+    Download the packaged dependencies of PyKMIP.
+    Remove install packages upon exit.
+
+    The context passed in should be identical to the context
+    passed in to the main task.
+    """
+    assert isinstance(config, dict)
+    log.info('Installing system dependenies for PyKMIP...')
+
+    packages = {}
+    for (client, _) in config.items():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        # use bindep to read which dependencies we need from temp/bindep.txt
+        fd, local_temp_path = tempfile.mkstemp(suffix='.txt',
+                                               prefix='bindep-')
+        os.write(fd, _bindep_txt.encode())
+        os.close(fd)
+        fd, remote_temp_path = tempfile.mkstemp(suffix='.txt',
+                                               prefix='bindep-')
+        os.close(fd)
+        remote.put_file(local_temp_path, remote_temp_path)
+        os.remove(local_temp_path)
+        run_in_pykmip_venv(ctx, remote, ['pip', 'install', 'bindep'])
+        r = run_in_pykmip_venv(ctx, remote,
+                ['bindep', '--brief', '--file', remote_temp_path],
+                stdout=BytesIO(),
+                check_status=False) # returns 1 on success?
+        packages[client] = r.stdout.getvalue().decode().splitlines()
+        for dep in packages[client]:
+            install_package(dep, remote)
+    try:
+        yield
+    finally:
+        log.info('Removing system dependencies of PyKMIP...')
+
+        for (client, _) in config.items():
+            (remote,) = ctx.cluster.only(client).remotes.keys()
+            for dep in packages[client]:
+                remove_package(dep, remote)
+
+@contextlib.contextmanager
+def setup_venv(ctx, config):
+    """
+    Setup the virtualenv for PyKMIP using pip.
+    """
+    assert isinstance(config, dict)
+    log.info('Setting up virtualenv for pykmip...')
+    for (client, _) in config.items():
+        run_in_pykmip_dir(ctx, client, ['python3', '-m', 'venv', '.pykmipenv'])
+        run_in_pykmip_venv(ctx, client, ['pip', 'install', '--upgrade', 'pip'])
+        run_in_pykmip_venv(ctx, client, ['pip', 'install', 'pytz', '-e', get_pykmip_dir(ctx)])
+    yield
+
+def assign_ports(ctx, config, initial_port):
+    """
+    Assign port numbers starting from @initial_port
+    """
+    port = initial_port
+    role_endpoints = {}
+    for remote, roles_for_host in ctx.cluster.remotes.items():
+        for role in roles_for_host:
+            if role in config:
+                r = get_remote_for_role(ctx, role)
+                role_endpoints[role] = r.ip_address, port, r.hostname
+                port += 1
+
+    return role_endpoints
+
+def copy_policy_json(ctx, cclient, cconfig):
+    run_in_pykmip_dir(ctx, cclient,
+                        ['cp',
+                         get_pykmip_dir(ctx)+'/examples/policy.json',
+                         get_pykmip_dir(ctx)])
+
+_pykmip_configuration = """# configuration for pykmip
+[server]
+hostname={ipaddr}
+port={port}
+certificate_path={servercert}
+key_path={serverkey}
+ca_path={clientca}
+auth_suite=TLS1.2
+policy_path={confdir}
+enable_tls_client_auth=False
+tls_cipher_suites=
+    TLS_RSA_WITH_AES_128_CBC_SHA256
+    TLS_RSA_WITH_AES_256_CBC_SHA256
+    TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384
+logging_level=DEBUG
+database_path={confdir}/pykmip.sqlite
+[client]
+host={hostname}
+port=5696
+certfile={clientcert}
+keyfile={clientkey}
+ca_certs={clientca}
+ssl_version=PROTOCOL_TLSv1_2
+"""
+
+def create_pykmip_conf(ctx, cclient, cconfig):
+    log.info('#0 cclient={} cconfig={}'.format(pprint.pformat(cclient),pprint.pformat(cconfig)))
+    (remote,) = ctx.cluster.only(cclient).remotes.keys()
+    pykmip_ipaddr, pykmip_port, pykmip_hostname = ctx.pykmip.endpoints[cclient]
+    log.info('#1 ip,p,h {} {} {}'.format(pykmip_ipaddr, pykmip_port, pykmip_hostname))
+    clientca = cconfig.get('clientca', None)
+    log.info('#2 clientca {}'.format(clientca))
+    serverkey = None
+    servercert = cconfig.get('servercert', None)
+    log.info('#3 servercert {}'.format(servercert))
+    servercert = ctx.ssl_certificates.get(servercert)
+    log.info('#4 servercert {}'.format(servercert))
+    clientkey = None
+    clientcert = cconfig.get('clientcert', None)
+    log.info('#3 clientcert {}'.format(clientcert))
+    clientcert = ctx.ssl_certificates.get(clientcert)
+    log.info('#4 clientcert {}'.format(clientcert))
+    clientca = ctx.ssl_certificates.get(clientca)
+    log.info('#5 clientca {}'.format(clientca))
+    if servercert != None:
+      serverkey = servercert.key
+      servercert = servercert.certificate
+      log.info('#6 serverkey {} servercert {}'.format(serverkey, servercert))
+    if clientcert != None:
+      clientkey = clientcert.key
+      clientcert = clientcert.certificate
+      log.info('#6 clientkey {} clientcert {}'.format(clientkey, clientcert))
+    if clientca != None:
+      clientca = clientca.certificate
+      log.info('#7 clientca {}'.format(clientca))
+    if servercert == None or clientca == None or serverkey == None:
+      log.info('#8 clientca {} serverkey {} servercert {}'.format(clientca, serverkey, servercert))
+      raise ConfigError('pykmip: Missing/bad servercert or clientca')
+    pykmipdir = get_pykmip_dir(ctx)
+    kmip_conf = _pykmip_configuration.format(
+        ipaddr=pykmip_ipaddr,
+        port=pykmip_port,
+        confdir=pykmipdir,
+        hostname=pykmip_hostname,
+        clientca=clientca,
+        clientkey=clientkey,
+        clientcert=clientcert,
+        serverkey=serverkey,
+        servercert=servercert
+    )
+    fd, local_temp_path = tempfile.mkstemp(suffix='.conf',
+                                           prefix='pykmip')
+    os.write(fd, kmip_conf.encode())
+    os.close(fd)
+    remote.put_file(local_temp_path, pykmipdir+'/pykmip.conf')
+    os.remove(local_temp_path)
+
+@contextlib.contextmanager
+def configure_pykmip(ctx, config):
+    """
+    Configure pykmip paste-api and pykmip-api.
+    """
+    assert isinstance(config, dict)
+    (cclient, cconfig) = next(iter(config.items()))
+
+    copy_policy_json(ctx, cclient, cconfig)
+    create_pykmip_conf(ctx, cclient, cconfig)
+    try:
+        yield
+    finally:
+        pass
+
+def has_ceph_task(tasks):
+    for task in tasks:
+        for name, conf in task.items():
+            if name == 'ceph':
+                return True
+    return False
+
+@contextlib.contextmanager
+def run_pykmip(ctx, config):
+    assert isinstance(config, dict)
+    if hasattr(ctx, 'daemons'):
+        pass
+    elif has_ceph_task(ctx.config['tasks']):
+        log.info('Delay start pykmip so ceph can do once-only daemon logic')
+        try:
+            yield
+        finally:
+            pass
+    else:
+        ctx.daemons = DaemonGroup()
+    log.info('Running pykmip...')
+
+    pykmipdir = get_pykmip_dir(ctx)
+
+    for (client, _) in config.items():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        cluster_name, _, client_id = teuthology.split_role(client)
+
+        # start the public endpoint
+        client_public_with_id = 'pykmip.public' + '.' + client_id
+
+        run_cmd = 'cd ' + pykmipdir + ' && ' + \
+                  '. .pykmipenv/bin/activate && ' + \
+                  'HOME={}'.format(pykmipdir) + ' && ' + \
+                  'exec pykmip-server -f pykmip.conf -l ' + \
+                  pykmipdir + '/pykmip.log & { read; kill %1; }'
+
+        ctx.daemons.add_daemon(
+            remote, 'pykmip', client_public_with_id,
+            cluster=cluster_name,
+            args=['bash', '-c', run_cmd],
+            logger=log.getChild(client),
+            stdin=run.PIPE,
+            cwd=pykmipdir,
+            wait=False,
+            check_status=False,
+        )
+
+        # sleep driven synchronization
+        time.sleep(10)
+    try:
+        yield
+    finally:
+        log.info('Stopping PyKMIP instance')
+        ctx.daemons.get_daemon('pykmip', client_public_with_id,
+                               cluster_name).stop()
+
+make_keys_template = """
+from kmip.pie import client
+from kmip import enums
+import ssl
+import sys
+import json
+from io import BytesIO
+
+c = client.ProxyKmipClient(config_file="{replace-with-config-file-path}")
+
+rl=[]
+for kwargs in {replace-with-secrets}:
+ with c:
+  key_id = c.create(
+   enums.CryptographicAlgorithm.AES,
+   256,
+   operation_policy_name='default',
+   cryptographic_usage_mask=[
+    enums.CryptographicUsageMask.ENCRYPT,
+    enums.CryptographicUsageMask.DECRYPT
+   ],
+   **kwargs
+  )
+  c.activate(key_id)
+  attrs = c.get_attributes(uid=key_id)
+  r = {}
+  for a in attrs[1]:
+   r[str(a.attribute_name)] = str(a.attribute_value)
+  rl.append(r)
+print(json.dumps(rl))
+"""
+
+@contextlib.contextmanager
+def create_secrets(ctx, config):
+    """
+    Create and activate any requested keys in kmip
+    """
+    assert isinstance(config, dict)
+
+    pykmipdir = get_pykmip_dir(ctx)
+    pykmip_conf_path = pykmipdir + '/pykmip.conf'
+    my_output = BytesIO()
+    for (client,cconf) in config.items():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        secrets=cconf.get('secrets')
+        if secrets:
+            secrets_json = json.dumps(cconf['secrets'])
+            make_keys = make_keys_template \
+                .replace("{replace-with-secrets}",secrets_json) \
+                .replace("{replace-with-config-file-path}",pykmip_conf_path)
+            my_output.truncate()
+            remote.run(args=[run.Raw('. cephtest/pykmip/.pykmipenv/bin/activate;' \
+                + 'python')], stdin=make_keys, stdout = my_output)
+            ctx.pykmip.keys[client] = json.loads(my_output.getvalue().decode())
+    try:
+        yield
+    finally:
+        pass
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Deploy and configure PyKMIP
+
+    Example of configuration:
+
+    tasks:
+    - install:
+    - ceph:
+       conf:
+        client:
+         rgw crypt s3 kms backend: kmip
+         rgw crypt kmip ca path: /home/ubuntu/cephtest/ca/kmiproot.crt
+         rgw crypt kmip client cert: /home/ubuntu/cephtest/ca/kmip-client.crt
+         rgw crypt kmip client key: /home/ubuntu/cephtest/ca/kmip-client.key
+         rgw crypt kmip kms key template: pykmip-$keyid
+    - openssl_keys:
+       kmiproot:
+         client: client.0
+         cn: kmiproot
+         key-type: rsa:4096
+    - openssl_keys:
+       kmip-server:
+         client: client.0
+         ca: kmiproot
+       kmip-client:
+         client: client.0
+         ca: kmiproot
+         cn: rgw-client
+    - pykmip:
+        client.0:
+          force-branch: master
+          clientca: kmiproot
+          servercert: kmip-server
+          clientcert: kmip-client
+          secrets:
+          - name: pykmip-key-1
+          - name: pykmip-key-2
+    - rgw:
+        client.0:
+          use-pykmip-role: client.0
+    - s3tests:
+        client.0:
+          force-branch: master
+    """
+    assert config is None or isinstance(config, list) \
+        or isinstance(config, dict), \
+        "task pykmip only supports a list or dictionary for configuration"
+    all_clients = ['client.{id}'.format(id=id_)
+                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+    if config is None:
+        config = all_clients
+    if isinstance(config, list):
+        config = dict.fromkeys(config)
+
+    overrides = ctx.config.get('overrides', {})
+    # merge each client section, not the top level.
+    for client in config.keys():
+        if not config[client]:
+            config[client] = {}
+        teuthology.deep_merge(config[client], overrides.get('pykmip', {}))
+
+    log.debug('PyKMIP config is %s', config)
+
+    if not hasattr(ctx, 'ssl_certificates'):
+        raise ConfigError('pykmip must run after the openssl_keys task')
+
+
+    ctx.pykmip = argparse.Namespace()
+    ctx.pykmip.endpoints = assign_ports(ctx, config, 5696)
+    ctx.pykmip.keys = {}
+    
+    with contextutil.nested(
+        lambda: download(ctx=ctx, config=config),
+        lambda: setup_venv(ctx=ctx, config=config),
+        lambda: install_packages(ctx=ctx, config=config),
+        lambda: configure_pykmip(ctx=ctx, config=config),
+        lambda: run_pykmip(ctx=ctx, config=config),
+        lambda: create_secrets(ctx=ctx, config=config),
+        ):
+        yield
diff --git a/qa/tasks/python.py b/qa/tasks/python.py
new file mode 100644
index 000000000..4ddb14f71
--- /dev/null
+++ b/qa/tasks/python.py
@@ -0,0 +1,45 @@
+import logging
+from teuthology import misc as teuthology
+from tasks.vip import subst_vip
+
+log = logging.getLogger(__name__)
+
+
+def task(ctx, config):
+    """
+    Execute some python code.
+ 
+      tasks:
+      - python:
+          host.a: |
+            import boto3
+            c = boto3.resource(...)
+
+    The provided dict is normally indexed by role.  You can also include a 
+    'sudo: false' key to run the code without sudo.
+
+      tasks:
+      - python:
+          sudo: false
+          host.b: |
+            import boto3
+            c = boto3.resource(...)
+    """
+    assert isinstance(config, dict), "task python got invalid config"
+
+    testdir = teuthology.get_testdir(ctx)
+
+    sudo = config.pop('sudo', True)
+
+    for role, code in config.items():
+        (remote,) = ctx.cluster.only(role).remotes.keys()
+        log.info('Running python on role %s host %s', role, remote.name)
+        log.info(code)
+        args=[
+            'TESTDIR={tdir}'.format(tdir=testdir),
+            'python3',
+        ]
+        if sudo:
+            args = ['sudo'] + args
+        remote.run(args=args, stdin=subst_vip(ctx, code))
+
diff --git a/qa/tasks/qemu.py b/qa/tasks/qemu.py
new file mode 100644
index 000000000..6533026b4
--- /dev/null
+++ b/qa/tasks/qemu.py
@@ -0,0 +1,713 @@
+"""
+Qemu task
+"""
+
+import contextlib
+import logging
+import os
+import yaml
+import time
+
+from tasks import rbd
+from tasks.util.workunit import get_refspec_after_overrides
+from teuthology import contextutil
+from teuthology import misc as teuthology
+from teuthology.config import config as teuth_config
+from teuthology.orchestra import run
+from teuthology.packaging import install_package, remove_package
+
+log = logging.getLogger(__name__)
+
+DEFAULT_NUM_DISKS = 2
+DEFAULT_IMAGE_URL = 'http://download.ceph.com/qa/ubuntu-12.04.qcow2'
+DEFAULT_IMAGE_SIZE = 10240 # in megabytes
+ENCRYPTION_HEADER_SIZE = 16 # in megabytes
+DEFAULT_CPUS = 1
+DEFAULT_MEM = 4096 # in megabytes
+
+def normalize_disks(config):
+    # normalize the 'disks' parameter into a list of dictionaries
+    for client, client_config in config.items():
+        clone = client_config.get('clone', False)
+        image_url = client_config.get('image_url', DEFAULT_IMAGE_URL)
+        device_type = client_config.get('type', 'filesystem')
+        encryption_format = client_config.get('encryption_format', 'none')
+        parent_encryption_format = client_config.get(
+            'parent_encryption_format', 'none')
+
+        disks = client_config.get('disks', DEFAULT_NUM_DISKS)
+        if not isinstance(disks, list):
+            disks = [{'image_name': '{client}.{num}'.format(client=client,
+                                                            num=i)}
+                     for i in range(int(disks))]
+            client_config['disks'] = disks
+
+        for i, disk in enumerate(disks):
+            if 'action' not in disk:
+                disk['action'] = 'create'
+            assert disk['action'] in ['none', 'create', 'clone'], 'invalid disk action'
+            assert disk['action'] != 'clone' or 'parent_name' in disk, 'parent_name required for clone'
+
+            if 'image_size' not in disk:
+                disk['image_size'] = DEFAULT_IMAGE_SIZE
+            disk['image_size'] = int(disk['image_size'])
+
+            if 'image_url' not in disk and i == 0:
+                disk['image_url'] = image_url
+
+            if 'device_type' not in disk:
+                disk['device_type'] = device_type
+
+            disk['device_letter'] = chr(ord('a') + i)
+
+            if 'encryption_format' not in disk:
+                if clone:
+                    disk['encryption_format'] = parent_encryption_format
+                else:
+                    disk['encryption_format'] = encryption_format
+            assert disk['encryption_format'] in ['none', 'luks1', 'luks2'], 'invalid encryption format'
+
+        assert disks, 'at least one rbd device must be used'
+
+        if clone:
+            for disk in disks:
+                if disk['action'] != 'create':
+                    continue
+                clone = dict(disk)
+                clone['action'] = 'clone'
+                clone['parent_name'] = clone['image_name']
+                clone['image_name'] += '-clone'
+                del disk['device_letter']
+
+                clone['encryption_format'] = encryption_format
+                assert clone['encryption_format'] in ['none', 'luks1', 'luks2'], 'invalid encryption format'
+
+                clone['parent_encryption_format'] = parent_encryption_format
+                assert clone['parent_encryption_format'] in ['none', 'luks1', 'luks2'], 'invalid encryption format'
+
+                disks.append(clone)
+
+def create_images(ctx, config, managers):
+    for client, client_config in config.items():
+        disks = client_config['disks']
+        for disk in disks:
+            if disk.get('action') != 'create' or (
+                    'image_url' in disk and
+                    disk['encryption_format'] == 'none'):
+                continue
+            image_size = disk['image_size']
+            if disk['encryption_format'] != 'none':
+                image_size += ENCRYPTION_HEADER_SIZE
+            create_config = {
+                client: {
+                    'image_name': disk['image_name'],
+                    'image_format': 2,
+                    'image_size': image_size,
+                    'encryption_format': disk['encryption_format'],
+                    }
+                }
+            managers.append(
+                lambda create_config=create_config:
+                rbd.create_image(ctx=ctx, config=create_config)
+                )
+
+def create_clones(ctx, config, managers):
+    for client, client_config in config.items():
+        disks = client_config['disks']
+        for disk in disks:
+            if disk['action'] != 'clone':
+                continue
+
+            create_config = {
+                client: {
+                    'image_name': disk['image_name'],
+                    'parent_name': disk['parent_name'],
+                    'encryption_format': disk['encryption_format'],
+                    }
+                }
+            managers.append(
+                lambda create_config=create_config:
+                rbd.clone_image(ctx=ctx, config=create_config)
+                )
+
+def create_encrypted_devices(ctx, config, managers):
+    for client, client_config in config.items():
+        disks = client_config['disks']
+        for disk in disks:
+            if (disk['encryption_format'] == 'none' and
+                disk.get('parent_encryption_format', 'none') == 'none') or \
+                    'device_letter' not in disk:
+                continue
+
+            dev_config = {client: disk}
+            managers.append(
+                lambda dev_config=dev_config:
+                rbd.dev_create(ctx=ctx, config=dev_config)
+                )
+
+@contextlib.contextmanager
+def create_dirs(ctx, config):
+    """
+    Handle directory creation and cleanup
+    """
+    testdir = teuthology.get_testdir(ctx)
+    for client, client_config in config.items():
+        assert 'test' in client_config, 'You must specify a test to run'
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        remote.run(
+            args=[
+                'install', '-d', '-m0755', '--',
+                '{tdir}/qemu'.format(tdir=testdir),
+                '{tdir}/archive/qemu'.format(tdir=testdir),
+                ]
+            )
+    try:
+        yield
+    finally:
+        for client, client_config in config.items():
+            assert 'test' in client_config, 'You must specify a test to run'
+            (remote,) = ctx.cluster.only(client).remotes.keys()
+            remote.run(
+                args=[
+                    'rmdir', '{tdir}/qemu'.format(tdir=testdir), run.Raw('||'), 'true',
+                    ]
+                )
+
+@contextlib.contextmanager
+def install_block_rbd_driver(ctx, config):
+    """
+    Make sure qemu rbd block driver (block-rbd.so) is installed
+    """
+    packages = {}
+    for client, _ in config.items():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        if remote.os.package_type == 'rpm':
+            packages[client] = ['qemu-kvm-block-rbd']
+        else:
+            packages[client] = ['qemu-block-extra', 'qemu-utils']
+        for pkg in packages[client]:
+            install_package(pkg, remote)
+    try:
+        yield
+    finally:
+        for client, _ in config.items():
+            (remote,) = ctx.cluster.only(client).remotes.keys()
+            for pkg in packages[client]:
+                remove_package(pkg, remote)
+
+@contextlib.contextmanager
+def generate_iso(ctx, config):
+    """Execute system commands to generate iso"""
+    log.info('generating iso...')
+    testdir = teuthology.get_testdir(ctx)
+
+    # use ctx.config instead of config, because config has been
+    # through teuthology.replace_all_with_clients()
+    refspec = get_refspec_after_overrides(ctx.config, {})
+
+    git_url = teuth_config.get_ceph_qa_suite_git_url()
+    log.info('Pulling tests from %s ref %s', git_url, refspec)
+
+    for client, client_config in config.items():
+        assert 'test' in client_config, 'You must specify a test to run'
+        test = client_config['test']
+
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+
+        clone_dir = '{tdir}/qemu_clone.{role}'.format(tdir=testdir, role=client)
+        remote.run(args=refspec.clone(git_url, clone_dir))
+
+        src_dir = os.path.dirname(__file__)
+        userdata_path = os.path.join(testdir, 'qemu', 'userdata.' + client)
+        metadata_path = os.path.join(testdir, 'qemu', 'metadata.' + client)
+
+        with open(os.path.join(src_dir, 'userdata_setup.yaml')) as f:
+            test_setup = ''.join(f.readlines())
+            # configuring the commands to setup the nfs mount
+            mnt_dir = "/export/{client}".format(client=client)
+            test_setup = test_setup.format(
+                mnt_dir=mnt_dir
+            )
+
+        with open(os.path.join(src_dir, 'userdata_teardown.yaml')) as f:
+            test_teardown = ''.join(f.readlines())
+
+        user_data = test_setup
+
+        disks = client_config['disks']
+        for disk in disks:
+            if disk['device_type'] != 'filesystem' or \
+                    'device_letter' not in disk or \
+                    'image_url' in disk:
+                continue
+            if disk['encryption_format'] == 'none' and \
+                    disk.get('parent_encryption_format', 'none') == 'none':
+                dev_name = 'vd' + disk['device_letter']
+            else:
+                # encrypted disks use if=ide interface, instead of if=virtio
+                dev_name = 'sd' + disk['device_letter']
+            user_data += """
+- |
+  #!/bin/bash
+  mkdir /mnt/test_{dev_name}
+  mkfs -t xfs /dev/{dev_name}
+  mount -t xfs /dev/{dev_name} /mnt/test_{dev_name}
+""".format(dev_name=dev_name)
+
+        user_data += """
+- |
+  #!/bin/bash
+  test -d /etc/ceph || mkdir /etc/ceph
+  cp /mnt/cdrom/ceph.* /etc/ceph/
+"""
+
+        cloud_config_archive = client_config.get('cloud_config_archive', [])
+        if cloud_config_archive:
+          user_data += yaml.safe_dump(cloud_config_archive, default_style='|',
+                                      default_flow_style=False)
+
+        # this may change later to pass the directories as args to the
+        # script or something. xfstests needs that.
+        user_data += """
+- |
+  #!/bin/bash
+  test -d /mnt/test_b && cd /mnt/test_b
+  /mnt/cdrom/test.sh > /mnt/log/test.log 2>&1 && touch /mnt/log/success
+""" + test_teardown
+
+        user_data = user_data.format(
+            ceph_branch=ctx.config.get('branch'),
+            ceph_sha1=ctx.config.get('sha1'))
+        remote.write_file(userdata_path, user_data)
+
+        with open(os.path.join(src_dir, 'metadata.yaml'), 'rb') as f:
+            remote.write_file(metadata_path, f)
+
+        test_file = '{tdir}/qemu/{client}.test.sh'.format(tdir=testdir, client=client)
+
+        log.info('fetching test %s for %s', test, client)
+        remote.run(
+            args=[
+                'cp', '--', os.path.join(clone_dir, test), test_file,
+                run.Raw('&&'),
+                'chmod', '755', test_file,
+                ],
+            )
+        remote.run(
+            args=[
+                'genisoimage', '-quiet', '-input-charset', 'utf-8',
+                '-volid', 'cidata', '-joliet', '-rock',
+                '-o', '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client),
+                '-graft-points',
+                'user-data={userdata}'.format(userdata=userdata_path),
+                'meta-data={metadata}'.format(metadata=metadata_path),
+                'ceph.conf=/etc/ceph/ceph.conf',
+                'ceph.keyring=/etc/ceph/ceph.keyring',
+                'test.sh={file}'.format(file=test_file),
+                ],
+            )
+    try:
+        yield
+    finally:
+        for client in config.keys():
+            (remote,) = ctx.cluster.only(client).remotes.keys()
+            remote.run(
+                args=[
+                    'rm', '-rf',
+                    '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client),
+                    os.path.join(testdir, 'qemu', 'userdata.' + client),
+                    os.path.join(testdir, 'qemu', 'metadata.' + client),
+                    '{tdir}/qemu/{client}.test.sh'.format(tdir=testdir, client=client),
+                    '{tdir}/qemu_clone.{client}'.format(tdir=testdir, client=client),
+                    ],
+                )
+
+@contextlib.contextmanager
+def download_image(ctx, config):
+    """Downland base image, remove image file when done"""
+    log.info('downloading base image')
+    testdir = teuthology.get_testdir(ctx)
+
+    client_base_files = {}
+    for client, client_config in config.items():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+
+        client_base_files[client] = []
+        disks = client_config['disks']
+        for disk in disks:
+            if disk['action'] != 'create' or 'image_url' not in disk:
+                continue
+
+            base_file = '{tdir}/qemu/base.{name}.qcow2'.format(tdir=testdir,
+                                                               name=disk['image_name'])
+            client_base_files[client].append(base_file)
+
+            remote.run(
+                args=[
+                    'wget', '-nv', '-O', base_file, disk['image_url'],
+                    ]
+                )
+
+            if disk['encryption_format'] == 'none':
+                remote.run(
+                    args=[
+                        'qemu-img', 'convert', '-f', 'qcow2', '-O', 'raw',
+                        base_file, 'rbd:rbd/{image_name}'.format(image_name=disk['image_name'])
+                        ]
+                    )
+            else:
+                dev_config = {client: {'image_name': disk['image_name'],
+                                       'encryption_format': disk['encryption_format']}}
+                raw_file = '{tdir}/qemu/base.{name}.raw'.format(
+                    tdir=testdir, name=disk['image_name'])
+                client_base_files[client].append(raw_file)
+                remote.run(
+                    args=[
+                        'qemu-img', 'convert', '-f', 'qcow2', '-O', 'raw',
+                        base_file, raw_file
+                        ]
+                    )
+                with rbd.dev_create(ctx, dev_config):
+                    remote.run(
+                        args=[
+                            'dd', 'if={name}'.format(name=raw_file),
+                            'of={name}'.format(name=dev_config[client]['device_path']),
+                            'bs=4M', 'conv=fdatasync'
+                            ]
+                        )
+
+        for disk in disks:
+            if disk['action'] == 'clone' or \
+                    disk['encryption_format'] != 'none' or \
+                    (disk['action'] == 'create' and 'image_url' not in disk):
+                continue
+
+            remote.run(
+                args=[
+                    'rbd', 'resize',
+                    '--size={image_size}M'.format(image_size=disk['image_size']),
+                    disk['image_name'], run.Raw('||'), 'true'
+                    ]
+                )
+
+    try:
+        yield
+    finally:
+        log.debug('cleaning up base image files')
+        for client, base_files in client_base_files.items():
+            (remote,) = ctx.cluster.only(client).remotes.keys()
+            for base_file in base_files:
+                remote.run(
+                    args=[
+                        'rm', '-f', base_file,
+                        ],
+                    )
+
+
+def _setup_nfs_mount(remote, client, service_name, mount_dir):
+    """
+    Sets up an nfs mount on the remote that the guest can use to
+    store logs. This nfs mount is also used to touch a file
+    at the end of the test to indicate if the test was successful
+    or not.
+    """
+    export_dir = "/export/{client}".format(client=client)
+    log.info("Creating the nfs export directory...")
+    remote.run(args=[
+        'sudo', 'mkdir', '-p', export_dir,
+    ])
+    log.info("Mounting the test directory...")
+    remote.run(args=[
+        'sudo', 'mount', '--bind', mount_dir, export_dir,
+    ])
+    log.info("Adding mount to /etc/exports...")
+    export = "{dir} *(rw,no_root_squash,no_subtree_check,insecure)".format(
+        dir=export_dir
+    )
+    log.info("Deleting export from /etc/exports...")
+    remote.run(args=[
+        'sudo', 'sed', '-i', "\|{export_dir}|d".format(export_dir=export_dir),
+        '/etc/exports'
+    ])
+    remote.run(args=[
+        'echo', export, run.Raw("|"),
+        'sudo', 'tee', '-a', "/etc/exports",
+    ])
+    log.info("Restarting NFS...")
+    if remote.os.package_type == "deb":
+        remote.run(args=['sudo', 'service', 'nfs-kernel-server', 'restart'])
+    else:
+        remote.run(args=['sudo', 'systemctl', 'restart', service_name])
+
+
+def _teardown_nfs_mount(remote, client, service_name):
+    """
+    Tears down the nfs mount on the remote used for logging and reporting the
+    status of the tests being ran in the guest.
+    """
+    log.info("Tearing down the nfs mount for {remote}".format(remote=remote))
+    export_dir = "/export/{client}".format(client=client)
+    log.info("Stopping NFS...")
+    if remote.os.package_type == "deb":
+        remote.run(args=[
+            'sudo', 'service', 'nfs-kernel-server', 'stop'
+        ])
+    else:
+        remote.run(args=[
+            'sudo', 'systemctl', 'stop', service_name
+        ])
+    log.info("Unmounting exported directory...")
+    remote.run(args=[
+        'sudo', 'umount', export_dir
+    ])
+    log.info("Deleting export from /etc/exports...")
+    remote.run(args=[
+        'sudo', 'sed', '-i', "\|{export_dir}|d".format(export_dir=export_dir),
+        '/etc/exports'
+    ])
+    log.info("Starting NFS...")
+    if remote.os.package_type == "deb":
+        remote.run(args=[
+            'sudo', 'service', 'nfs-kernel-server', 'start'
+        ])
+    else:
+        remote.run(args=[
+            'sudo', 'systemctl', 'start', service_name
+        ])
+
+
+@contextlib.contextmanager
+def run_qemu(ctx, config):
+    """Setup kvm environment and start qemu"""
+    procs = []
+    testdir = teuthology.get_testdir(ctx)
+    for client, client_config in config.items():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        log_dir = '{tdir}/archive/qemu/{client}'.format(tdir=testdir, client=client)
+        remote.run(
+            args=[
+                'mkdir', log_dir, run.Raw('&&'),
+                'sudo', 'modprobe', 'kvm',
+                ]
+            )
+
+        nfs_service_name = 'nfs'
+        if remote.os.name in ['rhel', 'centos'] and float(remote.os.version) >= 8:
+            nfs_service_name = 'nfs-server'
+
+        # make an nfs mount to use for logging and to
+        # allow to test to tell teuthology the tests outcome
+        _setup_nfs_mount(remote, client, nfs_service_name, log_dir)
+
+        # Hack to make sure /dev/kvm permissions are set correctly
+        # See http://tracker.ceph.com/issues/17977 and
+        # https://bugzilla.redhat.com/show_bug.cgi?id=1333159
+        remote.run(args='sudo udevadm control --reload')
+        remote.run(args='sudo udevadm trigger /dev/kvm')
+        remote.run(args='ls -l /dev/kvm')
+
+        qemu_cmd = 'qemu-system-x86_64'
+        if remote.os.package_type == "rpm":
+            qemu_cmd = "/usr/libexec/qemu-kvm"
+        args=[
+            'adjust-ulimits',
+            'ceph-coverage',
+            '{tdir}/archive/coverage'.format(tdir=testdir),
+            'daemon-helper',
+            'term',
+            qemu_cmd, '-enable-kvm', '-nographic', '-cpu', 'host',
+            '-smp', str(client_config.get('cpus', DEFAULT_CPUS)),
+            '-m', str(client_config.get('memory', DEFAULT_MEM)),
+            # cd holding metadata for cloud-init
+            '-cdrom', '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client),
+            ]
+
+        cachemode = 'none'
+        ceph_config = ctx.ceph['ceph'].conf.get('global', {})
+        ceph_config.update(ctx.ceph['ceph'].conf.get('client', {}))
+        ceph_config.update(ctx.ceph['ceph'].conf.get(client, {}))
+        if ceph_config.get('rbd cache', True):
+            if ceph_config.get('rbd cache max dirty', 1) > 0:
+                cachemode = 'writeback'
+            else:
+                cachemode = 'writethrough'
+
+        disks = client_config['disks']
+        for disk in disks:
+            if 'device_letter' not in disk:
+                continue
+
+            if disk['encryption_format'] == 'none' and \
+                    disk.get('parent_encryption_format', 'none') == 'none':
+                interface = 'virtio'
+                disk_spec = 'rbd:rbd/{img}:id={id}'.format(
+                    img=disk['image_name'],
+                    id=client[len('client.'):]
+                    )
+            else:
+                # encrypted disks use ide as a temporary workaround for
+                # a bug in qemu when using virtio over nbd
+                # TODO: use librbd encryption directly via qemu (not via nbd)
+                interface = 'ide'
+                disk_spec = disk['device_path']
+
+            args.extend([
+                '-drive',
+                'file={disk_spec},format=raw,if={interface},cache={cachemode}'.format(
+                    disk_spec=disk_spec,
+                    interface=interface,
+                    cachemode=cachemode,
+                    ),
+                ])
+        time_wait = client_config.get('time_wait', 0)
+
+        log.info('starting qemu...')
+        procs.append(
+            remote.run(
+                args=args,
+                logger=log.getChild(client),
+                stdin=run.PIPE,
+                wait=False,
+                )
+            )
+
+    try:
+        yield
+    finally:
+        log.info('waiting for qemu tests to finish...')
+        run.wait(procs)
+
+        if time_wait > 0:
+            log.debug('waiting {time_wait} sec for workloads detect finish...'.format(
+                time_wait=time_wait));
+            time.sleep(time_wait)
+
+        log.debug('checking that qemu tests succeeded...')
+        for client in config.keys():
+            (remote,) = ctx.cluster.only(client).remotes.keys()
+
+            # ensure we have permissions to all the logs
+            log_dir = '{tdir}/archive/qemu/{client}'.format(tdir=testdir,
+                                                            client=client)
+            remote.run(
+                args=[
+                    'sudo', 'chmod', 'a+rw', '-R', log_dir
+                    ]
+                )
+
+            # teardown nfs mount
+            _teardown_nfs_mount(remote, client, nfs_service_name)
+            # check for test status
+            remote.run(
+                args=[
+                    'test', '-f',
+                    '{tdir}/archive/qemu/{client}/success'.format(
+                        tdir=testdir,
+                        client=client
+                        ),
+                    ],
+                )
+        log.info("Deleting exported directory...")
+        for client in config.keys():
+            (remote,) = ctx.cluster.only(client).remotes.keys()
+            remote.run(args=[
+                'sudo', 'rm', '-r', '/export'
+            ])
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run a test inside of QEMU on top of rbd. Only one test
+    is supported per client.
+
+    For example, you can specify which clients to run on::
+
+        tasks:
+        - ceph:
+        - qemu:
+            client.0:
+              test: http://download.ceph.com/qa/test.sh
+            client.1:
+              test: http://download.ceph.com/qa/test2.sh
+
+    Or use the same settings on all clients:
+
+        tasks:
+        - ceph:
+        - qemu:
+            all:
+              test: http://download.ceph.com/qa/test.sh
+
+    For tests that want to explicitly describe the RBD images to connect:
+
+        tasks:
+        - ceph:
+        - qemu:
+            client.0:
+                test: http://download.ceph.com/qa/test.sh
+                clone: True/False (optionally clone all created disks),
+                image_url: <URL> (optional default image URL)
+                type: filesystem / block (optional default device type)
+                disks: [
+                    {
+                        action: create / clone / none (optional, defaults to create)
+                        image_name: <image name> (optional)
+                        parent_name: <parent_name> (if action == clone),
+                        type: filesystem / block (optional, defaults to fileystem)
+                        image_url: <URL> (optional),
+                        image_size: <MiB> (optional)
+                        encryption_format: luks1 / luks2 / none (optional, defaults to none)
+                    }, ...
+                ]
+
+    You can set the amount of CPUs and memory the VM has (default is 1 CPU and
+    4096 MB)::
+
+        tasks:
+        - ceph:
+        - qemu:
+            client.0:
+              test: http://download.ceph.com/qa/test.sh
+              cpus: 4
+              memory: 512 # megabytes
+
+    If you need to configure additional cloud-config options, set cloud_config
+    to the required data set::
+
+        tasks:
+        - ceph
+        - qemu:
+            client.0:
+                test: http://ceph.com/qa/test.sh
+                cloud_config_archive:
+                    - |
+                      #/bin/bash
+                      touch foo1
+                    - content: |
+                        test data
+                      type: text/plain
+                      filename: /tmp/data
+    """
+    assert isinstance(config, dict), \
+           "task qemu only supports a dictionary for configuration"
+
+    config = teuthology.replace_all_with_clients(ctx.cluster, config)
+    normalize_disks(config)
+
+    managers = []
+    create_images(ctx=ctx, config=config, managers=managers)
+    managers.extend([
+        lambda: create_dirs(ctx=ctx, config=config),
+        lambda: install_block_rbd_driver(ctx=ctx, config=config),
+        lambda: generate_iso(ctx=ctx, config=config),
+        lambda: download_image(ctx=ctx, config=config),
+        ])
+    create_clones(ctx=ctx, config=config, managers=managers)
+    create_encrypted_devices(ctx=ctx, config=config, managers=managers)
+    managers.append(
+        lambda: run_qemu(ctx=ctx, config=config),
+        )
+
+    with contextutil.nested(*managers):
+        yield
diff --git a/qa/tasks/rabbitmq.py b/qa/tasks/rabbitmq.py
new file mode 100644
index 000000000..c78ac1e56
--- /dev/null
+++ b/qa/tasks/rabbitmq.py
@@ -0,0 +1,130 @@
+"""
+Deploy and configure RabbitMQ for Teuthology
+"""
+import contextlib
+import logging
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def install_rabbitmq(ctx, config):
+    """
+    Downloading the RabbitMQ package.
+    """
+    assert isinstance(config, dict)
+    log.info('Installing RabbitMQ...')
+
+    for (client, _) in config.items():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+
+        ctx.cluster.only(client).run(args=[
+             'sudo', 'yum', '-y', 'install', 'epel-release'
+        ])
+
+        link1 = 'https://packagecloud.io/install/repositories/rabbitmq/erlang/script.rpm.sh'
+
+        ctx.cluster.only(client).run(args=[
+             'curl', '-s', link1, run.Raw('|'), 'sudo', 'bash'
+        ])
+
+        ctx.cluster.only(client).run(args=[
+             'sudo', 'yum', '-y', 'install', 'erlang'
+        ])
+
+        link2 = 'https://packagecloud.io/install/repositories/rabbitmq/rabbitmq-server/script.rpm.sh'
+
+        ctx.cluster.only(client).run(args=[
+             'curl', '-s', link2, run.Raw('|'), 'sudo', 'bash'
+        ])
+
+        ctx.cluster.only(client).run(args=[
+             'sudo', 'yum', '-y', 'install', 'rabbitmq-server'
+        ])
+
+    try:
+        yield
+    finally:
+        log.info('Removing packaged dependencies of RabbitMQ...')
+
+        for (client, _) in config.items():
+            ctx.cluster.only(client).run(args=[
+                 'sudo', 'yum', '-y', 'remove', 'rabbitmq-server.noarch'
+            ])
+
+
+@contextlib.contextmanager
+def run_rabbitmq(ctx, config):
+    """
+    This includes two parts:
+    1. Starting Daemon
+    2. Starting RabbitMQ service
+    """
+    assert isinstance(config, dict)
+    log.info('Bringing up Daemon and RabbitMQ service...')
+    for (client,_) in config.items():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+
+        ctx.cluster.only(client).run(args=[
+             'sudo', 'chkconfig', 'rabbitmq-server', 'on'
+            ],
+        )
+
+        ctx.cluster.only(client).run(args=[
+             'sudo', '/sbin/service', 'rabbitmq-server', 'start'
+            ],
+        )
+
+        '''
+        # To check whether rabbitmq-server is running or not
+        ctx.cluster.only(client).run(args=[
+             'sudo', '/sbin/service', 'rabbitmq-server', 'status'
+            ],
+        )
+        '''
+
+    try:
+        yield
+    finally:
+        log.info('Stopping RabbitMQ Service...')
+
+        for (client, _) in config.items():
+            (remote,) = ctx.cluster.only(client).remotes.keys()
+
+            ctx.cluster.only(client).run(args=[
+                 'sudo', '/sbin/service', 'rabbitmq-server', 'stop'
+                ],
+            )
+
+
+@contextlib.contextmanager
+def task(ctx,config):
+    """
+    To run rabbitmq the prerequisite is to run the tox task. Following is the way how to run
+    tox and then rabbitmq::
+    tasks:
+    - rabbitmq:
+        client.0:
+    """
+    assert config is None or isinstance(config, list) \
+        or isinstance(config, dict), \
+        "task rabbitmq only supports a list or dictionary for configuration"
+
+    all_clients = ['client.{id}'.format(id=id_)
+                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+    if config is None:
+        config = all_clients
+    if isinstance(config, list):
+        config = dict.fromkeys(config)
+
+    log.debug('RabbitMQ config is %s', config)
+
+    with contextutil.nested(
+        lambda: install_rabbitmq(ctx=ctx, config=config),
+        lambda: run_rabbitmq(ctx=ctx, config=config),
+        ):
+        yield
diff --git a/qa/tasks/rados.py b/qa/tasks/rados.py
new file mode 100644
index 000000000..a730a7299
--- /dev/null
+++ b/qa/tasks/rados.py
@@ -0,0 +1,286 @@
+"""
+Rados modle-based integration tests
+"""
+import contextlib
+import logging
+import gevent
+from teuthology import misc as teuthology
+
+
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run RadosModel-based integration tests.
+
+    The config should be as follows::
+
+        rados:
+          clients: [client list]
+          ops: <number of ops>
+          objects: <number of objects to use>
+          max_in_flight: <max number of operations in flight>
+          object_size: <size of objects in bytes>
+          min_stride_size: <minimum write stride size in bytes>
+          max_stride_size: <maximum write stride size in bytes>
+          op_weights: <dictionary mapping operation type to integer weight>
+          runs: <number of times to run> - the pool is remade between runs
+          ec_pool: use an ec pool
+          erasure_code_profile: profile to use with the erasure coded pool
+          fast_read: enable ec_pool's fast_read
+          min_size: set the min_size of created pool
+          pool_snaps: use pool snapshots instead of selfmanaged snapshots
+	  write_fadvise_dontneed: write behavior like with LIBRADOS_OP_FLAG_FADVISE_DONTNEED.
+	                          This mean data don't access in the near future.
+				  Let osd backend don't keep data in cache.
+
+    For example::
+
+        tasks:
+        - ceph:
+        - rados:
+            clients: [client.0]
+            ops: 1000
+            max_seconds: 0   # 0 for no limit
+            objects: 25
+            max_in_flight: 16
+            object_size: 4000000
+            min_stride_size: 1024
+            max_stride_size: 4096
+            op_weights:
+              read: 20
+              write: 10
+              delete: 2
+              snap_create: 3
+              rollback: 2
+              snap_remove: 0
+            ec_pool: create an ec pool, defaults to False
+            erasure_code_use_overwrites: test overwrites, default false
+            erasure_code_profile:
+              name: teuthologyprofile
+              k: 2
+              m: 1
+              crush-failure-domain: osd
+            pool_snaps: true
+	    write_fadvise_dontneed: true
+            runs: 10
+        - interactive:
+
+    Optionally, you can provide the pool name to run against:
+
+        tasks:
+        - ceph:
+        - exec:
+            client.0:
+              - ceph osd pool create foo
+        - rados:
+            clients: [client.0]
+            pools: [foo]
+            ...
+
+    Alternatively, you can provide a pool prefix:
+
+        tasks:
+        - ceph:
+        - exec:
+            client.0:
+              - ceph osd pool create foo.client.0
+        - rados:
+            clients: [client.0]
+            pool_prefix: foo
+            ...
+
+    The tests are run asynchronously, they are not complete when the task
+    returns. For instance:
+
+        - rados:
+            clients: [client.0]
+            pools: [ecbase]
+            ops: 4000
+            objects: 500
+            op_weights:
+              read: 100
+              write: 100
+              delete: 50
+              copy_from: 50
+        - print: "**** done rados ec-cache-agent (part 2)"
+
+     will run the print task immediately after the rados tasks begins but
+     not after it completes. To make the rados task a blocking / sequential
+     task, use:
+
+        - sequential:
+          - rados:
+              clients: [client.0]
+              pools: [ecbase]
+              ops: 4000
+              objects: 500
+              op_weights:
+                read: 100
+                write: 100
+                delete: 50
+                copy_from: 50
+        - print: "**** done rados ec-cache-agent (part 2)"
+
+    """
+    log.info('Beginning rados...')
+    assert isinstance(config, dict), \
+        "please list clients to run on"
+
+    object_size = int(config.get('object_size', 4000000))
+    op_weights = config.get('op_weights', {})
+    testdir = teuthology.get_testdir(ctx)
+    args = [
+        'adjust-ulimits',
+        'ceph-coverage',
+        '{tdir}/archive/coverage'.format(tdir=testdir),
+        'ceph_test_rados']
+    if config.get('ec_pool', False):
+        args.extend(['--no-omap'])
+        if not config.get('erasure_code_use_overwrites', False):
+            args.extend(['--ec-pool'])
+    if config.get('write_fadvise_dontneed', False):
+        args.extend(['--write-fadvise-dontneed'])
+    if config.get('set_redirect', False):
+        args.extend(['--set_redirect'])
+    if config.get('set_chunk', False):
+        args.extend(['--set_chunk'])
+    if config.get('enable_dedup', False):
+        args.extend(['--enable_dedup'])
+    if config.get('low_tier_pool', None):
+        args.extend(['--low_tier_pool', config.get('low_tier_pool', None)])
+    if config.get('dedup_chunk_size', False):
+        args.extend(['--dedup_chunk_size', config.get('dedup_chunk_size', None)] )
+    if config.get('dedup_chunk_algo', False):
+        args.extend(['--dedup_chunk_algo', config.get('dedup_chunk_algo', None)])
+    if config.get('pool_snaps', False):
+        args.extend(['--pool-snaps'])
+    if config.get('balance_reads', False):
+        args.extend(['--balance-reads'])
+    if config.get('localize_reads', False):
+        args.extend(['--localize-reads'])
+    args.extend([
+        '--max-ops', str(config.get('ops', 10000)),
+        '--objects', str(config.get('objects', 500)),
+        '--max-in-flight', str(config.get('max_in_flight', 16)),
+        '--size', str(object_size),
+        '--min-stride-size', str(config.get('min_stride_size', object_size // 10)),
+        '--max-stride-size', str(config.get('max_stride_size', object_size // 5)),
+        '--max-seconds', str(config.get('max_seconds', 0))
+        ])
+
+    weights = {}
+    weights['read'] = 100
+    weights['write'] = 100
+    weights['delete'] = 10
+    # Parallel of the op_types in test/osd/TestRados.cc
+    for field in [
+        # read handled above
+        # write handled above
+        # delete handled above
+        "snap_create",
+        "snap_remove",
+        "rollback",
+        "setattr",
+        "rmattr",
+        "watch",
+        "copy_from",
+        "hit_set_list",
+        "is_dirty",
+        "undirty",
+        "cache_flush",
+        "cache_try_flush",
+        "cache_evict",
+        "append",
+        "write",
+        "read",
+        "delete",
+        "set_chunk",
+        "tier_promote",
+        "tier_evict",
+        "tier_promote",
+        "tier_flush"
+        ]:
+        if field in op_weights:
+            weights[field] = op_weights[field]
+
+    if config.get('write_append_excl', True):
+        if 'write' in weights:
+            weights['write'] = weights['write'] // 2
+            weights['write_excl'] = weights['write']
+
+        if 'append' in weights:
+            weights['append'] = weights['append'] // 2
+            weights['append_excl'] = weights['append']
+
+    for op, weight in weights.items():
+        args.extend([
+            '--op', op, str(weight)
+        ])
+                
+
+    def thread():
+        """Thread spawned by gevent"""
+        clients = ['client.{id}'.format(id=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+        log.info('clients are %s' % clients)
+        manager = ctx.managers['ceph']
+        if config.get('ec_pool', False):
+            profile = config.get('erasure_code_profile', {})
+            profile_name = profile.get('name', 'teuthologyprofile')
+            manager.create_erasure_code_profile(profile_name, profile)
+        else:
+            profile_name = None
+        for i in range(int(config.get('runs', '1'))):
+            log.info("starting run %s out of %s", str(i), config.get('runs', '1'))
+            tests = {}
+            existing_pools = config.get('pools', [])
+            created_pools = []
+            for role in config.get('clients', clients):
+                assert isinstance(role, str)
+                PREFIX = 'client.'
+                assert role.startswith(PREFIX)
+                id_ = role[len(PREFIX):]
+
+                pool = config.get('pool', None)
+                if not pool and existing_pools:
+                    pool = existing_pools.pop()
+                else:
+                    pool = manager.create_pool_with_unique_name(
+                        erasure_code_profile_name=profile_name,
+                        erasure_code_use_overwrites=
+                          config.get('erasure_code_use_overwrites', False)
+                    )
+                    created_pools.append(pool)
+                    if config.get('fast_read', False):
+                        manager.raw_cluster_cmd(
+                            'osd', 'pool', 'set', pool, 'fast_read', 'true')
+                    min_size = config.get('min_size', None);
+                    if min_size is not None:
+                        manager.raw_cluster_cmd(
+                            'osd', 'pool', 'set', pool, 'min_size', str(min_size))
+
+                (remote,) = ctx.cluster.only(role).remotes.keys()
+                proc = remote.run(
+                    args=["CEPH_CLIENT_ID={id_}".format(id_=id_)] + args +
+                    ["--pool", pool],
+                    logger=log.getChild("rados.{id}".format(id=id_)),
+                    stdin=run.PIPE,
+                    wait=False
+                    )
+                tests[id_] = proc
+            run.wait(tests.values())
+
+            for pool in created_pools:
+                manager.wait_snap_trimming_complete(pool);
+                manager.remove_pool(pool)
+
+    running = gevent.spawn(thread)
+
+    try:
+        yield
+    finally:
+        log.info('joining rados')
+        running.get()
diff --git a/qa/tasks/radosbench.py b/qa/tasks/radosbench.py
new file mode 100644
index 000000000..3a5aee2e2
--- /dev/null
+++ b/qa/tasks/radosbench.py
@@ -0,0 +1,144 @@
+"""
+Rados benchmarking
+"""
+import contextlib
+import logging
+
+from teuthology.orchestra import run
+from teuthology import misc as teuthology
+
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run radosbench
+
+    The config should be as follows:
+
+    radosbench:
+        clients: [client list]
+        time: <seconds to run>
+        pool: <pool to use>
+        size: write size to use
+        concurrency: max number of outstanding writes (16)
+        objectsize: object size to use
+        unique_pool: use a unique pool, defaults to False
+        ec_pool: create an ec pool, defaults to False
+        create_pool: create pool, defaults to True
+        erasure_code_profile:
+          name: teuthologyprofile
+          k: 2
+          m: 1
+          crush-failure-domain: osd
+        cleanup: false (defaults to true)
+        type: <write|seq|rand> (defaults to write)
+    example:
+
+    tasks:
+    - ceph:
+    - radosbench:
+        clients: [client.0]
+        time: 360
+    - interactive:
+    """
+    log.info('Beginning radosbench...')
+    assert isinstance(config, dict), \
+        "please list clients to run on"
+    radosbench = {}
+
+    testdir = teuthology.get_testdir(ctx)
+    manager = ctx.managers['ceph']
+    runtype = config.get('type', 'write')
+
+    create_pool = config.get('create_pool', True)
+    for role in config.get(
+            'clients',
+            list(map(lambda x: 'client.' + x,
+                     teuthology.all_roles_of_type(ctx.cluster, 'client')))):
+        assert isinstance(role, str)
+        (_, id_) = role.split('.', 1)
+        (remote,) = ctx.cluster.only(role).remotes.keys()
+
+        if config.get('ec_pool', False):
+            profile = config.get('erasure_code_profile', {})
+            profile_name = profile.get('name', 'teuthologyprofile')
+            manager.create_erasure_code_profile(profile_name, profile)
+        else:
+            profile_name = None
+
+        cleanup = []
+        if not config.get('cleanup', True):
+            cleanup = ['--no-cleanup']
+        write_to_omap = []
+        if config.get('write-omap', False):
+            write_to_omap = ['--write-omap']
+            log.info('omap writes')
+
+        pool = config.get('pool', 'data')
+        if create_pool:
+            if pool != 'data':
+                manager.create_pool(pool, erasure_code_profile_name=profile_name)
+            else:
+                pool = manager.create_pool_with_unique_name(erasure_code_profile_name=profile_name)
+
+        concurrency = config.get('concurrency', 16)
+        osize = config.get('objectsize', 65536)
+        if osize == 0:
+            objectsize = []
+        else:
+            objectsize = ['--object-size', str(osize)]
+        size = ['-b', str(config.get('size', 65536))]
+        # If doing a reading run then populate data
+        if runtype != "write":
+            proc = remote.run(
+                args=[
+                    "/bin/sh", "-c",
+                    " ".join(['adjust-ulimits',
+                              'ceph-coverage',
+                              '{tdir}/archive/coverage',
+                              'rados',
+                              '--no-log-to-stderr',
+                              '--name', role] +
+                              ['-t', str(concurrency)]
+                              + size + objectsize +
+                              ['-p' , pool,
+                          'bench', str(60), "write", "--no-cleanup"
+                          ]).format(tdir=testdir),
+                ],
+            logger=log.getChild('radosbench.{id}'.format(id=id_)),
+            wait=True
+            )
+            size = []
+            objectsize = []
+
+        proc = remote.run(
+            args=[
+                "/bin/sh", "-c",
+                " ".join(['adjust-ulimits',
+                          'ceph-coverage',
+                          '{tdir}/archive/coverage',
+                          'rados',
+			  '--no-log-to-stderr',
+                          '--name', role]
+                          + size + objectsize +
+                          ['-p' , pool,
+                          'bench', str(config.get('time', 360)), runtype,
+                          ] + write_to_omap + cleanup).format(tdir=testdir),
+                ],
+            logger=log.getChild('radosbench.{id}'.format(id=id_)),
+            stdin=run.PIPE,
+            wait=False
+            )
+        radosbench[id_] = proc
+
+    try:
+        yield
+    finally:
+        timeout = config.get('time', 360) * 30 + 300
+        log.info('joining radosbench (timing out after %ss)', timeout)
+        run.wait(radosbench.values(), timeout=timeout)
+
+        if pool != 'data' and create_pool:
+            manager.remove_pool(pool)
diff --git a/qa/tasks/radosbenchsweep.py b/qa/tasks/radosbenchsweep.py
new file mode 100644
index 000000000..df0ba1ed1
--- /dev/null
+++ b/qa/tasks/radosbenchsweep.py
@@ -0,0 +1,222 @@
+"""
+Rados benchmarking sweep
+"""
+import contextlib
+import logging
+import re
+
+from io import BytesIO
+from itertools import product
+
+from teuthology.orchestra import run
+from teuthology import misc as teuthology
+
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Execute a radosbench parameter sweep
+
+    Puts radosbench in a loop, taking values from the given config at each
+    iteration. If given, the min and max values below create a range, e.g.
+    min_replicas=1 and max_replicas=3 implies executing with 1-3 replicas.
+
+    Parameters:
+
+        clients: [client list]
+        time: seconds to run (default=120)
+        sizes: [list of object sizes] (default=[4M])
+        mode: <write|read|seq> (default=write)
+        repetitions: execute the same configuration multiple times (default=1)
+        min_num_replicas: minimum number of replicas to use (default = 3)
+        max_num_replicas: maximum number of replicas to use (default = 3)
+        min_num_osds: the minimum number of OSDs in a pool (default=all)
+        max_num_osds: the maximum number of OSDs in a pool (default=all)
+        file: name of CSV-formatted output file (default='radosbench.csv')
+        columns: columns to include (default=all)
+          - rep: execution number (takes values from 'repetitions')
+          - num_osd: number of osds for pool
+          - num_replica: number of replicas
+          - avg_throughput: throughput
+          - avg_latency: latency
+          - stdev_throughput:
+          - stdev_latency:
+
+    Example:
+    - radsobenchsweep:
+        columns: [rep, num_osd, num_replica, avg_throughput, stdev_throughput]
+    """
+    log.info('Beginning radosbenchsweep...')
+    assert isinstance(config, dict), 'expecting dictionary for configuration'
+
+    # get and validate config values
+    # {
+
+    # only one client supported for now
+    if len(config.get('clients', [])) != 1:
+        raise Exception("Only one client can be specified")
+
+    # only write mode
+    if config.get('mode', 'write') != 'write':
+        raise Exception("Only 'write' mode supported for now.")
+
+    # OSDs
+    total_osds_in_cluster = teuthology.num_instances_of_type(ctx.cluster, 'osd')
+    min_num_osds = config.get('min_num_osds', total_osds_in_cluster)
+    max_num_osds = config.get('max_num_osds', total_osds_in_cluster)
+
+    if max_num_osds > total_osds_in_cluster:
+        raise Exception('max_num_osds cannot be greater than total in cluster')
+    if min_num_osds < 1:
+        raise Exception('min_num_osds cannot be less than 1')
+    if min_num_osds > max_num_osds:
+        raise Exception('min_num_osds cannot be greater than max_num_osd')
+    osds = range(0, (total_osds_in_cluster + 1))
+
+    # replicas
+    min_num_replicas = config.get('min_num_replicas', 3)
+    max_num_replicas = config.get('max_num_replicas', 3)
+
+    if min_num_replicas < 1:
+        raise Exception('min_num_replicas cannot be less than 1')
+    if min_num_replicas > max_num_replicas:
+        raise Exception('min_num_replicas cannot be greater than max_replicas')
+    if max_num_replicas > max_num_osds:
+        raise Exception('max_num_replicas cannot be greater than max_num_osds')
+    replicas = range(min_num_replicas, (max_num_replicas + 1))
+
+    # object size
+    sizes = config.get('size', [4 << 20])
+
+    # repetitions
+    reps = range(config.get('repetitions', 1))
+
+    # file
+    fname = config.get('file', 'radosbench.csv')
+    f = open('{}/{}'.format(ctx.archive, fname), 'w')
+    f.write(get_csv_header(config) + '\n')
+    # }
+
+    # set default pools size=1 to avoid 'unhealthy' issues
+    ctx.manager.set_pool_property('data', 'size', 1)
+    ctx.manager.set_pool_property('metadata', 'size', 1)
+    ctx.manager.set_pool_property('rbd', 'size', 1)
+
+    current_osds_out = 0
+
+    # sweep through all parameters
+    for osds_out, size, replica, rep in product(osds, sizes, replicas, reps):
+
+        osds_in = total_osds_in_cluster - osds_out
+
+        if osds_in == 0:
+            # we're done
+            break
+
+        if current_osds_out != osds_out:
+            # take an osd out
+            ctx.manager.raw_cluster_cmd(
+                'osd', 'reweight', str(osds_out-1), '0.0')
+            wait_until_healthy(ctx, config)
+            current_osds_out = osds_out
+
+        if osds_in not in range(min_num_osds, (max_num_osds + 1)):
+            # no need to execute with a number of osds that wasn't requested
+            continue
+
+        if osds_in < replica:
+            # cannot execute with more replicas than available osds
+            continue
+
+        run_radosbench(ctx, config, f, osds_in, size, replica, rep)
+
+    f.close()
+
+    yield
+
+
+def get_csv_header(conf):
+    all_columns = [
+        'rep', 'num_osd', 'num_replica', 'avg_throughput',
+        'avg_latency', 'stdev_throughput', 'stdev_latency'
+    ]
+    given_columns = conf.get('columns', None)
+    if given_columns and len(given_columns) != 0:
+        for column in given_columns:
+            if column not in all_columns:
+                raise Exception('Unknown column ' + column)
+        return ','.join(conf['columns'])
+    else:
+        conf['columns'] = all_columns
+        return ','.join(all_columns)
+
+
+def run_radosbench(ctx, config, f, num_osds, size, replica, rep):
+    pool = ctx.manager.create_pool_with_unique_name()
+
+    ctx.manager.set_pool_property(pool, 'size', replica)
+
+    wait_until_healthy(ctx, config)
+
+    log.info('Executing with parameters: ')
+    log.info('  num_osd =' + str(num_osds))
+    log.info('  size =' + str(size))
+    log.info('  num_replicas =' + str(replica))
+    log.info('  repetition =' + str(rep))
+
+    for role in config.get('clients', ['client.0']):
+        assert isinstance(role, str)
+        PREFIX = 'client.'
+        assert role.startswith(PREFIX)
+        id_ = role[len(PREFIX):]
+        (remote,) = ctx.cluster.only(role).remotes.keys()
+
+        proc = remote.run(
+            args=[
+                'adjust-ulimits',
+                'ceph-coverage',
+                '{}/archive/coverage'.format(teuthology.get_testdir(ctx)),
+                'rados',
+                '--no-log-to-stderr',
+                '--name', role,
+                '-b', str(size),
+                '-p', pool,
+                'bench', str(config.get('time', 120)), 'write',
+            ],
+            logger=log.getChild('radosbench.{id}'.format(id=id_)),
+            stdin=run.PIPE,
+            stdout=BytesIO(),
+            wait=False
+        )
+
+        # parse output to get summary and format it as CSV
+        proc.wait()
+        out = proc.stdout.getvalue()
+        all_values = {
+            'stdev_throughput': re.sub(r'Stddev Bandwidth: ', '', re.search(
+                r'Stddev Bandwidth:.*', out).group(0)),
+            'stdev_latency': re.sub(r'Stddev Latency: ', '', re.search(
+                r'Stddev Latency:.*', out).group(0)),
+            'avg_throughput': re.sub(r'Bandwidth \(MB/sec\): ', '', re.search(
+                r'Bandwidth \(MB/sec\):.*', out).group(0)),
+            'avg_latency': re.sub(r'Average Latency: ', '', re.search(
+                r'Average Latency:.*', out).group(0)),
+            'rep': str(rep),
+            'num_osd': str(num_osds),
+            'num_replica': str(replica)
+        }
+        values_to_write = []
+        for column in config['columns']:
+            values_to_write.extend([all_values[column]])
+        f.write(','.join(values_to_write) + '\n')
+
+    ctx.manager.remove_pool(pool)
+
+
+def wait_until_healthy(ctx, config):
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon_remote,) = ctx.cluster.only(first_mon).remotes.keys()
+    teuthology.wait_until_healthy(ctx, mon_remote)
diff --git a/qa/tasks/radosgw_admin.py b/qa/tasks/radosgw_admin.py
new file mode 100644
index 000000000..780dae1e1
--- /dev/null
+++ b/qa/tasks/radosgw_admin.py
@@ -0,0 +1,1148 @@
+"""
+Rgw admin testing against a running instance
+"""
+# The test cases in this file have been annotated for inventory.
+# To extract the inventory (in csv format) use the command:
+#
+#   grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //'
+#
+# to run this standalone:
+#	python qa/tasks/radosgw_admin.py [--user=uid] --host=host --port=port
+#
+
+import json
+import logging
+import time
+import datetime
+import sys
+
+from io import StringIO
+from queue import Queue
+
+import boto.exception
+import boto.s3.connection
+import boto.s3.acl
+
+import httplib2
+
+#import pdb
+
+import tasks.vstart_runner
+from tasks.rgw import RGWEndpoint
+from tasks.util.rgw import rgwadmin as tasks_util_rgw_rgwadmin
+from tasks.util.rgw import get_user_summary, get_user_successful_ops
+
+log = logging.getLogger(__name__)
+
+def rgwadmin(*args, **kwargs):
+    ctx = args[0]
+    # Is this a local runner?
+    omit_sudo = hasattr(ctx.rgw, 'omit_sudo') and ctx.rgw.omit_sudo == True
+    omit_tdir = hasattr(ctx.rgw, 'omit_tdir') and ctx.rgw.omit_tdir == True
+    return tasks_util_rgw_rgwadmin(*args, **kwargs, omit_sudo=omit_sudo, omit_tdir=omit_tdir)
+
+def usage_acc_findentry2(entries, user, add=True):
+    for e in entries:
+        if e['user'] == user:
+            return e
+    if not add:
+            return None
+    e = {'user': user, 'buckets': []}
+    entries.append(e)
+    return e
+def usage_acc_findsum2(summaries, user, add=True):
+    for e in summaries:
+        if e['user'] == user:
+            return e
+    if not add:
+        return None
+    e = {'user': user, 'categories': [],
+        'total': {'bytes_received': 0,
+            'bytes_sent': 0, 'ops': 0, 'successful_ops': 0 }}
+    summaries.append(e)
+    return e
+def usage_acc_update2(x, out, b_in, err):
+    x['bytes_sent'] += b_in
+    x['bytes_received'] += out
+    x['ops'] += 1
+    if not err:
+        x['successful_ops'] += 1
+def usage_acc_validate_fields(r, x, x2, what):
+    q=[]
+    for field in ['bytes_sent', 'bytes_received', 'ops', 'successful_ops']:
+        try:
+            if x2[field] < x[field]:
+                q.append("field %s: %d < %d" % (field, x2[field], x[field]))
+        except Exception as ex:
+            r.append( "missing/bad field " + field + " in " + what + " " + str(ex))
+            return
+    if len(q) > 0:
+        r.append("incomplete counts in " + what + ": " + ", ".join(q))
+class usage_acc:
+    def __init__(self):
+        self.results = {'entries': [], 'summary': []}
+    def findentry(self, user):
+        return usage_acc_findentry2(self.results['entries'], user)
+    def findsum(self, user):
+        return usage_acc_findsum2(self.results['summary'], user)
+    def e2b(self, e, bucket, add=True):
+        for b in e['buckets']:
+            if b['bucket'] == bucket:
+                return b
+        if not add:
+                return None
+        b = {'bucket': bucket, 'categories': []}
+        e['buckets'].append(b)
+        return b
+    def c2x(self, c, cat, add=True):
+        for x in c:
+            if x['category'] == cat:
+                return x
+        if not add:
+                return None
+        x = {'bytes_received': 0, 'category': cat,
+            'bytes_sent': 0, 'ops': 0, 'successful_ops': 0 }
+        c.append(x)
+        return x
+    def update(self, c, cat, user, out, b_in, err):
+        x = self.c2x(c, cat)
+        usage_acc_update2(x, out, b_in, err)
+        if not err and cat == 'create_bucket' and 'owner' not in x:
+            x['owner'] = user
+    def make_entry(self, cat, bucket, user, out, b_in, err):
+        if cat == 'create_bucket' and err:
+                return
+        e = self.findentry(user)
+        b = self.e2b(e, bucket)
+        self.update(b['categories'], cat, user, out, b_in, err)
+        s = self.findsum(user)
+        x = self.c2x(s['categories'], cat)
+        usage_acc_update2(x, out, b_in, err)
+        x = s['total']
+        usage_acc_update2(x, out, b_in, err)
+    def generate_make_entry(self):
+        return lambda cat,bucket,user,out,b_in,err: self.make_entry(cat, bucket, user, out, b_in, err)
+    def get_usage(self):
+        return self.results
+    def compare_results(self, results):
+        if 'entries' not in results or 'summary' not in results:
+            return ['Missing entries or summary']
+        r = []
+        for e in self.results['entries']:
+            try:
+                e2 = usage_acc_findentry2(results['entries'], e['user'], False)
+            except Exception as ex:
+                r.append("malformed entry looking for user "
+		    + e['user'] + " " + str(ex))
+                break
+            if e2 == None:
+                r.append("missing entry for user " + e['user'])
+                continue
+            for b in e['buckets']:
+                c = b['categories']
+                if b['bucket'] == 'nosuchbucket':
+                    print("got here")
+                try:
+                    b2 = self.e2b(e2, b['bucket'], False)
+                    if b2 != None:
+                            c2 = b2['categories']
+                except Exception as ex:
+                    r.append("malformed entry looking for bucket "
+			+ b['bucket'] + " in user " + e['user'] + " " + str(ex))
+                    break
+                if b2 == None:
+                    r.append("can't find bucket " + b['bucket']
+			+ " in user " + e['user'])
+                    continue
+                for x in c:
+                    try:
+                        x2 = self.c2x(c2, x['category'], False)
+                    except Exception as ex:
+                        r.append("malformed entry looking for "
+			    + x['category'] + " in bucket " + b['bucket']
+			    + " user " + e['user'] + " " + str(ex))
+                        break
+                    usage_acc_validate_fields(r, x, x2, "entry: category "
+			+ x['category'] + " bucket " + b['bucket']
+			+ " in user " + e['user'])
+        for s in self.results['summary']:
+            c = s['categories']
+            try:
+                s2 = usage_acc_findsum2(results['summary'], s['user'], False)
+            except Exception as ex:
+                r.append("malformed summary looking for user " + e['user']
+		    + " " + str(ex))
+                break
+                if s2 == None:
+                    r.append("missing summary for user " + e['user'] + " " + str(ex))
+                    continue
+            try:
+                c2 = s2['categories']
+            except Exception as ex:
+                r.append("malformed summary missing categories for user "
+		    + e['user'] + " " + str(ex))
+                break
+            for x in c:
+                try:
+                    x2 = self.c2x(c2, x['category'], False)
+                except Exception as ex:
+                    r.append("malformed summary looking for "
+			+ x['category'] + " user " + e['user'] + " " + str(ex))
+                    break
+                usage_acc_validate_fields(r, x, x2, "summary: category "
+		    + x['category'] + " in user " + e['user'])
+            x = s['total']
+            try:
+                x2 = s2['total']
+            except Exception as ex:
+                r.append("malformed summary looking for totals for user "
+                         + e['user'] + " " + str(ex))
+                break
+            usage_acc_validate_fields(r, x, x2, "summary: totals for user" + e['user'])
+        return r
+
+def ignore_this_entry(cat, bucket, user, out, b_in, err):
+    pass
+class requestlog_queue():
+    def __init__(self, add):
+        self.q = Queue(1000)
+        self.adder = add
+    def handle_request_data(self, request, response, error=False):
+        now = datetime.datetime.now()
+        if error:
+            pass
+        elif response.status < 200 or response.status >= 400:
+            error = True
+        self.q.put({'t': now, 'o': request, 'i': response, 'e': error})
+    def clear(self):
+        with self.q.mutex:
+            self.q.queue.clear()
+    def log_and_clear(self, cat, bucket, user, add_entry = None):
+        while not self.q.empty():
+            j = self.q.get()
+            bytes_out = 0
+            if 'Content-Length' in j['o'].headers:
+                bytes_out = int(j['o'].headers['Content-Length'])
+            bytes_in = 0
+            msg = j['i'].msg
+            if 'content-length'in msg:
+                bytes_in = int(msg['content-length'])
+            log.info('RL: %s %s %s bytes_out=%d bytes_in=%d failed=%r'
+                     % (cat, bucket, user, bytes_out, bytes_in, j['e']))
+            if add_entry == None:
+                add_entry = self.adder
+            add_entry(cat, bucket, user, bytes_out, bytes_in, j['e'])
+
+def create_presigned_url(conn, method, bucket_name, key_name, expiration):
+    return conn.generate_url(expires_in=expiration,
+        method=method,
+        bucket=bucket_name,
+        key=key_name,
+        query_auth=True,
+    )
+
+def send_raw_http_request(conn, method, bucket_name, key_name, follow_redirects = False):
+    url = create_presigned_url(conn, method, bucket_name, key_name, 3600)
+    print(url)
+    h = httplib2.Http()
+    h.follow_redirects = follow_redirects
+    return h.request(url, method)
+
+
+def get_acl(key):
+    """
+    Helper function to get the xml acl from a key, ensuring that the xml
+    version tag is removed from the acl response
+    """
+    raw_acl = key.get_xml_acl().decode()
+
+    def remove_version(string):
+        return string.split(
+            '<?xml version="1.0" encoding="UTF-8"?>'
+        )[-1]
+
+    def remove_newlines(string):
+        return string.strip('\n')
+
+    return remove_version(
+        remove_newlines(raw_acl)
+    )
+
+def cleanup(ctx, client):
+    # remove objects and buckets
+    (err, out) = rgwadmin(ctx, client, ['bucket', 'list'], check_status=True)
+    try:
+        for bucket in out:
+            (err, out) = rgwadmin(ctx, client, [
+                'bucket', 'rm', '--bucket', bucket, '--purge-objects'],
+                check_status=True)
+    except:
+        pass
+
+    # remove test user(s)
+    users = ['foo', 'fud', 'bar', 'bud']
+    users.reverse()
+    for user in users:
+        try:
+            (err, out) = rgwadmin(ctx, client, [
+                'user', 'rm', '--uid', user],
+                check_status=True)
+        except:
+            pass
+
+    # remove custom placement
+    try:
+        zonecmd = ['zone', 'placement', 'rm', '--rgw-zone', 'default',
+                   '--placement-id', 'new-placement']
+        (err, out) = rgwadmin(ctx, client, zonecmd, check_status=True)
+    except:
+        pass
+
+def task(ctx, config):
+    """
+    Test radosgw-admin functionality against a running rgw instance.
+    """
+    global log
+
+    assert ctx.rgw.config, \
+        "radosgw_admin task needs a config passed from the rgw task"
+    config = ctx.rgw.config
+    log.debug('config is: %r', config)
+
+    clients_from_config = config.keys()
+
+    # choose first client as default
+    client = next(iter(clients_from_config))
+
+    # once the client is chosen, pull the host name and  assigned port out of
+    # the role_endpoints that were assigned by the rgw task
+    endpoint = ctx.rgw.role_endpoints[client]
+
+    cleanup(ctx, client)
+
+    ##
+    user1='foo'
+    user2='fud'
+    user3='bar'
+    user4='bud'
+    subuser1='foo:foo1'
+    subuser2='foo:foo2'
+    display_name1='Foo'
+    display_name2='Fud'
+    display_name3='Bar'
+    email='foo@foo.com'
+    access_key='9te6NH5mcdcq0Tc5i8i1'
+    secret_key='Ny4IOauQoL18Gp2zM7lC1vLmoawgqcYP/YGcWfXu'
+    access_key2='p5YnriCv1nAtykxBrupQ'
+    secret_key2='Q8Tk6Q/27hfbFSYdSkPtUqhqx1GgzvpXa4WARozh'
+    access_key3='NX5QOQKC6BH2IDN8HC7A'
+    secret_key3='LnEsqNNqZIpkzauboDcLXLcYaWwLQ3Kop0zAnKIn'
+    swift_secret1='gpS2G9RREMrnbqlp29PP2D36kgPR1tm72n5fPYfL'
+    swift_secret2='ri2VJQcKSYATOY6uaDUX7pxgkW+W1YmC6OCxPHwy'
+
+    bucket_name='myfoo'
+    bucket_name2='mybar'
+
+    # connect to rgw
+    connection = boto.s3.connection.S3Connection(
+        aws_access_key_id=access_key,
+        aws_secret_access_key=secret_key,
+        is_secure=False,
+        port=endpoint.port,
+        host=endpoint.hostname,
+        calling_format=boto.s3.connection.OrdinaryCallingFormat(),
+        )
+    connection.auth_region_name='us-east-1'
+
+    connection2 = boto.s3.connection.S3Connection(
+        aws_access_key_id=access_key2,
+        aws_secret_access_key=secret_key2,
+        is_secure=False,
+        port=endpoint.port,
+        host=endpoint.hostname,
+        calling_format=boto.s3.connection.OrdinaryCallingFormat(),
+        )
+    connection2.auth_region_name='us-east-1'
+
+    connection3 = boto.s3.connection.S3Connection(
+        aws_access_key_id=access_key3,
+        aws_secret_access_key=secret_key3,
+        is_secure=False,
+        port=endpoint.port,
+        host=endpoint.hostname,
+        calling_format=boto.s3.connection.OrdinaryCallingFormat(),
+        )
+    connection3.auth_region_name='us-east-1'
+
+    acc = usage_acc()
+    rl = requestlog_queue(acc.generate_make_entry())
+    connection.set_request_hook(rl)
+    connection2.set_request_hook(rl)
+    connection3.set_request_hook(rl)
+
+    # legend (test cases can be easily grep-ed out)
+    # TESTCASE 'testname','object','method','operation','assertion'
+
+    # TESTCASE 'usage-show0' 'usage' 'show' 'all usage' 'succeeds'
+    (err, summary0) = rgwadmin(ctx, client, ['usage', 'show'], check_status=True)
+
+    # TESTCASE 'info-nosuch','user','info','non-existent user','fails'
+    (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1])
+    assert err
+
+    # TESTCASE 'create-ok','user','create','w/all valid info','succeeds'
+    (err, out) = rgwadmin(ctx, client, [
+            'user', 'create',
+            '--uid', user1,
+            '--display-name', display_name1,
+            '--email', email,
+            '--access-key', access_key,
+            '--secret', secret_key,
+            '--max-buckets', '4'
+            ],
+            check_status=True)
+
+    # TESTCASE 'duplicate email','user','create','existing user email','fails'
+    (err, out) = rgwadmin(ctx, client, [
+            'user', 'create',
+            '--uid', user2,
+            '--display-name', display_name2,
+            '--email', email,
+            ])
+    assert err
+
+    # TESTCASE 'info-existing','user','info','existing user','returns correct info'
+    (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True)
+    assert out['user_id'] == user1
+    assert out['email'] == email
+    assert out['display_name'] == display_name1
+    assert len(out['keys']) == 1
+    assert out['keys'][0]['access_key'] == access_key
+    assert out['keys'][0]['secret_key'] == secret_key
+    assert not out['suspended']
+
+    # TESTCASE 'suspend-ok','user','suspend','active user','succeeds'
+    (err, out) = rgwadmin(ctx, client, ['user', 'suspend', '--uid', user1],
+        check_status=True)
+
+    # TESTCASE 'suspend-suspended','user','suspend','suspended user','succeeds w/advisory'
+    (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True)
+    assert out['suspended']
+
+    # TESTCASE 're-enable','user','enable','suspended user','succeeds'
+    (err, out) = rgwadmin(ctx, client, ['user', 'enable', '--uid', user1], check_status=True)
+
+    # TESTCASE 'info-re-enabled','user','info','re-enabled user','no longer suspended'
+    (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True)
+    assert not out['suspended']
+
+    # TESTCASE 'add-keys','key','create','w/valid info','succeeds'
+    (err, out) = rgwadmin(ctx, client, [
+            'key', 'create', '--uid', user1,
+            '--access-key', access_key2, '--secret', secret_key2,
+            ], check_status=True)
+
+    # TESTCASE 'info-new-key','user','info','after key addition','returns all keys'
+    (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1],
+        check_status=True)
+    assert len(out['keys']) == 2
+    assert out['keys'][0]['access_key'] == access_key2 or out['keys'][1]['access_key'] == access_key2
+    assert out['keys'][0]['secret_key'] == secret_key2 or out['keys'][1]['secret_key'] == secret_key2
+
+    # TESTCASE 'rm-key','key','rm','newly added key','succeeds, key is removed'
+    (err, out) = rgwadmin(ctx, client, [
+            'key', 'rm', '--uid', user1,
+            '--access-key', access_key2,
+            ], check_status=True)
+    assert len(out['keys']) == 1
+    assert out['keys'][0]['access_key'] == access_key
+    assert out['keys'][0]['secret_key'] == secret_key
+
+    # TESTCASE 'add-swift-key','key','create','swift key','succeeds'
+    subuser_access = 'full'
+    subuser_perm = 'full-control'
+
+    (err, out) = rgwadmin(ctx, client, [
+            'subuser', 'create', '--subuser', subuser1,
+            '--access', subuser_access
+            ], check_status=True)
+
+    # TESTCASE 'add-swift-key','key','create','swift key','succeeds'
+    (err, out) = rgwadmin(ctx, client, [
+            'subuser', 'modify', '--subuser', subuser1,
+            '--secret', swift_secret1,
+            '--key-type', 'swift',
+            ], check_status=True)
+
+    # TESTCASE 'subuser-perm-mask', 'subuser', 'info', 'test subuser perm mask durability', 'succeeds'
+    (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1])
+
+    assert out['subusers'][0]['permissions'] == subuser_perm
+
+    # TESTCASE 'info-swift-key','user','info','after key addition','returns all keys'
+    (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True)
+    assert len(out['swift_keys']) == 1
+    assert out['swift_keys'][0]['user'] == subuser1
+    assert out['swift_keys'][0]['secret_key'] == swift_secret1
+
+    # TESTCASE 'add-swift-subuser','key','create','swift sub-user key','succeeds'
+    (err, out) = rgwadmin(ctx, client, [
+            'subuser', 'create', '--subuser', subuser2,
+            '--secret', swift_secret2,
+            '--key-type', 'swift',
+            ], check_status=True)
+
+    # TESTCASE 'info-swift-subuser','user','info','after key addition','returns all sub-users/keys'
+    (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True)
+    assert len(out['swift_keys']) == 2
+    assert out['swift_keys'][0]['user'] == subuser2 or out['swift_keys'][1]['user'] == subuser2
+    assert out['swift_keys'][0]['secret_key'] == swift_secret2 or out['swift_keys'][1]['secret_key'] == swift_secret2
+
+    # TESTCASE 'rm-swift-key1','key','rm','subuser','succeeds, one key is removed'
+    (err, out) = rgwadmin(ctx, client, [
+            'key', 'rm', '--subuser', subuser1,
+            '--key-type', 'swift',
+            ], check_status=True)
+    assert len(out['swift_keys']) == 1
+
+    # TESTCASE 'rm-subuser','subuser','rm','subuser','success, subuser is removed'
+    (err, out) = rgwadmin(ctx, client, [
+            'subuser', 'rm', '--subuser', subuser1,
+            ], check_status=True)
+    assert len(out['subusers']) == 1
+
+    # TESTCASE 'rm-subuser-with-keys','subuser','rm','subuser','succeeds, second subser and key is removed'
+    (err, out) = rgwadmin(ctx, client, [
+            'subuser', 'rm', '--subuser', subuser2,
+            '--key-type', 'swift', '--purge-keys',
+            ], check_status=True)
+    assert len(out['swift_keys']) == 0
+    assert len(out['subusers']) == 0
+
+    # TESTCASE 'bucket-stats','bucket','stats','no session/buckets','succeeds, empty list'
+    (err, out) = rgwadmin(ctx, client, ['bucket', 'stats', '--uid', user1],
+        check_status=True)
+    assert len(out) == 0
+
+    # TESTCASE 'bucket-stats2','bucket','stats','no buckets','succeeds, empty list'
+    (err, out) = rgwadmin(ctx, client, ['bucket', 'list', '--uid', user1], check_status=True)
+    assert len(out) == 0
+
+    # create a first bucket
+    bucket = connection.create_bucket(bucket_name)
+
+    rl.log_and_clear("create_bucket", bucket_name, user1)
+
+    # TESTCASE 'bucket-list','bucket','list','one bucket','succeeds, expected list'
+    (err, out) = rgwadmin(ctx, client, ['bucket', 'list', '--uid', user1], check_status=True)
+    assert len(out) == 1
+    assert out[0] == bucket_name
+
+    bucket_list = connection.get_all_buckets()
+    assert len(bucket_list) == 1
+    assert bucket_list[0].name == bucket_name
+
+    rl.log_and_clear("list_buckets", '', user1)
+
+    # TESTCASE 'bucket-list-all','bucket','list','all buckets','succeeds, expected list'
+    (err, out) = rgwadmin(ctx, client, ['bucket', 'list'], check_status=True)
+    assert len(out) >= 1
+    assert bucket_name in out;
+
+    # TESTCASE 'max-bucket-limit,'bucket','create','4 buckets','5th bucket fails due to max buckets == 4'
+    bucket2 = connection.create_bucket(bucket_name + '2')
+    rl.log_and_clear("create_bucket", bucket_name + '2', user1)
+    bucket3 = connection.create_bucket(bucket_name + '3')
+    rl.log_and_clear("create_bucket", bucket_name + '3', user1)
+    bucket4 = connection.create_bucket(bucket_name + '4')
+    rl.log_and_clear("create_bucket", bucket_name + '4', user1)
+    # the 5th should fail.
+    failed = False
+    try:
+        connection.create_bucket(bucket_name + '5')
+    except Exception:
+        failed = True
+    assert failed
+    rl.log_and_clear("create_bucket", bucket_name + '5', user1)
+
+    # delete the buckets
+    bucket2.delete()
+    rl.log_and_clear("delete_bucket", bucket_name + '2', user1)
+    bucket3.delete()
+    rl.log_and_clear("delete_bucket", bucket_name + '3', user1)
+    bucket4.delete()
+    rl.log_and_clear("delete_bucket", bucket_name + '4', user1)
+
+    # TESTCASE 'bucket-stats3','bucket','stats','new empty bucket','succeeds, empty list'
+    (err, out) = rgwadmin(ctx, client, [
+            'bucket', 'stats', '--bucket', bucket_name], check_status=True)
+    assert out['owner'] == user1
+    bucket_id = out['id']
+
+    # TESTCASE 'bucket-stats4','bucket','stats','new empty bucket','succeeds, expected bucket ID'
+    (err, out) = rgwadmin(ctx, client, ['bucket', 'stats', '--uid', user1], check_status=True)
+    assert len(out) == 1
+    assert out[0]['id'] == bucket_id    # does it return the same ID twice in a row?
+
+    # use some space
+    key = boto.s3.key.Key(bucket)
+    key.set_contents_from_string('one')
+    rl.log_and_clear("put_obj", bucket_name, user1)
+
+    # TESTCASE 'bucket-stats5','bucket','stats','after creating key','succeeds, lists one non-empty object'
+    (err, out) = rgwadmin(ctx, client, [
+            'bucket', 'stats', '--bucket', bucket_name], check_status=True)
+    assert out['id'] == bucket_id
+    assert out['usage']['rgw.main']['num_objects'] == 1
+    assert out['usage']['rgw.main']['size_kb'] > 0
+
+    #validate we have a positive user stats now
+    (err, out) = rgwadmin(ctx, client,
+                          ['user', 'stats','--uid', user1, '--sync-stats'],
+                          check_status=True)
+    assert out['stats']['size'] > 0
+
+    # reclaim it
+    key.delete()
+    rl.log_and_clear("delete_obj", bucket_name, user1)
+
+    # TESTCASE 'bucket unlink', 'bucket', 'unlink', 'unlink bucket from user', 'fails', 'access denied error'
+    (err, out) = rgwadmin(ctx, client,
+        ['bucket', 'unlink', '--uid', user1, '--bucket', bucket_name],
+        check_status=True)
+
+    # create a second user to link the bucket to
+    (err, out) = rgwadmin(ctx, client, [
+            'user', 'create',
+            '--uid', user2,
+            '--display-name', display_name2,
+            '--access-key', access_key2,
+            '--secret', secret_key2,
+            '--max-buckets', '1',
+            ],
+            check_status=True)
+
+    # try creating an object with the first user before the bucket is relinked
+    denied = False
+    key = boto.s3.key.Key(bucket)
+
+    try:
+        key.set_contents_from_string('two')
+    except boto.exception.S3ResponseError:
+        denied = True
+
+    assert not denied
+    rl.log_and_clear("put_obj", bucket_name, user1)
+
+    # delete the object
+    key.delete()
+    rl.log_and_clear("delete_obj", bucket_name, user1)
+
+    # link the bucket to another user
+    (err, out) = rgwadmin(ctx, client, ['metadata', 'get', 'bucket:{n}'.format(n=bucket_name)],
+        check_status=True)
+
+    bucket_data = out['data']
+    assert bucket_data['bucket']['name'] == bucket_name
+
+    bucket_id = bucket_data['bucket']['bucket_id']
+
+    # link the bucket to another user
+    (err, out) = rgwadmin(ctx, client, ['bucket', 'link', '--uid', user2, '--bucket', bucket_name, '--bucket-id', bucket_id],
+        check_status=True)
+
+    # try to remove user, should fail (has a linked bucket)
+    (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user2])
+    assert err
+
+    # TESTCASE 'bucket unlink', 'bucket', 'unlink', 'unlink bucket from user', 'succeeds, bucket unlinked'
+    (err, out) = rgwadmin(ctx, client, ['bucket', 'unlink', '--uid', user2, '--bucket', bucket_name],
+        check_status=True)
+
+    # relink the bucket to the first user and delete the second user
+    (err, out) = rgwadmin(ctx, client,
+        ['bucket', 'link', '--uid', user1, '--bucket', bucket_name, '--bucket-id', bucket_id],
+        check_status=True)
+
+    (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user2],
+        check_status=True)
+
+    #TESTCASE 'bucket link', 'bucket', 'tenanted user', 'succeeds'
+    tenant_name = "testx"
+    # create a tenanted user to link the bucket to
+    (err, out) = rgwadmin(ctx, client, [
+            'user', 'create',
+            '--tenant', tenant_name,
+            '--uid', 'tenanteduser',
+            '--display-name', 'tenanted-user',
+            '--access-key', access_key2,
+            '--secret', secret_key2,
+            '--max-buckets', '1',
+            ],
+            check_status=True)
+
+    # link the bucket to a tenanted user
+    (err, out) = rgwadmin(ctx, client, ['bucket', 'link', '--bucket', '/' + bucket_name, '--tenant', tenant_name, '--uid', 'tenanteduser'],
+        check_status=True)
+    
+    # check if the bucket name has tenant/ prefix
+    (err, out) = rgwadmin(ctx, client, ['metadata', 'get', 'bucket:{n}'.format(n= tenant_name + '/' + bucket_name)],
+        check_status=True)
+
+    bucket_data = out['data']
+    assert bucket_data['bucket']['name'] == bucket_name
+    assert bucket_data['bucket']['tenant'] == tenant_name
+
+    # relink the bucket to the first user and delete the tenanted user
+    (err, out) = rgwadmin(ctx, client,
+        ['bucket', 'link', '--bucket', tenant_name + '/' + bucket_name, '--uid', user1],
+        check_status=True)
+
+    (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--tenant', tenant_name, '--uid', 'tenanteduser'],
+        check_status=True)
+
+    # TESTCASE 'object-rm', 'object', 'rm', 'remove object', 'succeeds, object is removed'
+
+    # upload an object
+    object_name = 'four'
+    key = boto.s3.key.Key(bucket, object_name)
+    key.set_contents_from_string(object_name)
+    rl.log_and_clear("put_obj", bucket_name, user1)
+
+    # fetch it too (for usage stats presently)
+    s = key.get_contents_as_string(encoding='ascii')
+    rl.log_and_clear("get_obj", bucket_name, user1)
+    assert s == object_name
+    # list bucket too (for usage stats presently)
+    keys = list(bucket.list())
+    rl.log_and_clear("list_bucket", bucket_name, user1)
+    assert len(keys) == 1
+    assert keys[0].name == object_name
+
+    # now delete it
+    (err, out) = rgwadmin(ctx, client,
+        ['object', 'rm', '--bucket', bucket_name, '--object', object_name],
+        check_status=True)
+
+    # TESTCASE 'bucket-stats6','bucket','stats','after deleting key','succeeds, lists one no objects'
+    (err, out) = rgwadmin(ctx, client, [
+            'bucket', 'stats', '--bucket', bucket_name],
+            check_status=True)
+    assert out['id'] == bucket_id
+    assert out['usage']['rgw.main']['num_objects'] == 0
+
+    # list log objects
+    # TESTCASE 'log-list','log','list','after activity','succeeds, lists one no objects'
+    (err, out) = rgwadmin(ctx, client, ['log', 'list'], check_status=True)
+    assert len(out) > 0
+
+    for obj in out:
+        # TESTCASE 'log-show','log','show','after activity','returns expected info'
+        if obj[:4] == 'meta' or obj[:4] == 'data' or obj[:18] == 'obj_delete_at_hint':
+            continue
+
+        (err, rgwlog) = rgwadmin(ctx, client, ['log', 'show', '--object', obj],
+            check_status=True)
+        assert len(rgwlog) > 0
+
+        # skip any entry for which there is no bucket name--e.g., list_buckets,
+        # since that is valid but cannot pass the following checks
+        entry_bucket_name = rgwlog['bucket']
+        if entry_bucket_name.strip() != "":
+            # exempt bucket_name2 from checking as it was only used for multi-region tests
+            assert rgwlog['bucket'].find(bucket_name) == 0 or rgwlog['bucket'].find(bucket_name2) == 0
+            assert rgwlog['bucket'] != bucket_name or rgwlog['bucket_id'] == bucket_id
+            assert rgwlog['bucket_owner'] == user1 or rgwlog['bucket'] == bucket_name + '5' or rgwlog['bucket'] == bucket_name2
+            for entry in rgwlog['log_entries']:
+                log.debug('checking log entry: ', entry)
+                assert entry['bucket'] == rgwlog['bucket']
+                possible_buckets = [bucket_name + '5', bucket_name2]
+                user = entry['user']
+                assert user == user1 or user.endswith('system-user') or \
+                    rgwlog['bucket'] in possible_buckets
+
+        # TESTCASE 'log-rm','log','rm','delete log objects','succeeds'
+        (err, out) = rgwadmin(ctx, client, ['log', 'rm', '--object', obj],
+            check_status=True)
+
+    # TODO: show log by bucket+date
+
+    # TESTCASE 'user-suspend2','user','suspend','existing user','succeeds'
+    (err, out) = rgwadmin(ctx, client, ['user', 'suspend', '--uid', user1],
+        check_status=True)
+
+    # TESTCASE 'user-suspend3','user','suspend','suspended user','cannot write objects'
+    denied = False
+    try:
+        key = boto.s3.key.Key(bucket)
+        key.set_contents_from_string('five')
+    except boto.exception.S3ResponseError as e:
+        denied = True
+        assert e.status == 403
+
+    assert denied
+    rl.log_and_clear("put_obj", bucket_name, user1)
+
+    # TESTCASE 'user-renable2','user','enable','suspended user','succeeds'
+    (err, out) = rgwadmin(ctx, client, ['user', 'enable', '--uid', user1],
+        check_status=True)
+
+    # TESTCASE 'user-renable3','user','enable','reenabled user','can write objects'
+    key = boto.s3.key.Key(bucket)
+    key.set_contents_from_string('six')
+    rl.log_and_clear("put_obj", bucket_name, user1)
+
+    # TESTCASE 'gc-list', 'gc', 'list', 'get list of objects ready for garbage collection'
+
+    # create an object large enough to be split into multiple parts
+    test_string = 'foo'*10000000
+
+    big_key = boto.s3.key.Key(bucket)
+    big_key.set_contents_from_string(test_string)
+    rl.log_and_clear("put_obj", bucket_name, user1)
+
+    # now delete the head
+    big_key.delete()
+    rl.log_and_clear("delete_obj", bucket_name, user1)
+
+    # wait a bit to give the garbage collector time to cycle
+    time.sleep(15)
+
+    (err, out) = rgwadmin(ctx, client, ['gc', 'list', '--include-all'])
+    assert len(out) > 0
+
+    # TESTCASE 'gc-process', 'gc', 'process', 'manually collect garbage'
+    (err, out) = rgwadmin(ctx, client, ['gc', 'process'], check_status=True)
+
+    #confirm
+    (err, out) = rgwadmin(ctx, client, ['gc', 'list', '--include-all'])
+
+    # don't assume rgw_gc_obj_min_wait has been overridden
+    omit_tdir = hasattr(ctx.rgw, 'omit_tdir') and ctx.rgw.omit_tdir == True
+    if omit_tdir==False:
+        assert len(out) == 0
+
+    # TESTCASE 'rm-user-buckets','user','rm','existing user','fails, still has buckets'
+    (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user1])
+    assert err
+
+    # delete should fail because ``key`` still exists
+    try:
+        bucket.delete()
+    except boto.exception.S3ResponseError as e:
+        assert e.status == 409
+    rl.log_and_clear("delete_bucket", bucket_name, user1)
+
+    key.delete()
+    rl.log_and_clear("delete_obj", bucket_name, user1)
+    bucket.delete()
+    rl.log_and_clear("delete_bucket", bucket_name, user1)
+
+    # TESTCASE 'policy', 'bucket', 'policy', 'get bucket policy', 'returns S3 policy'
+    bucket = connection.create_bucket(bucket_name)
+    rl.log_and_clear("create_bucket", bucket_name, user1)
+
+    # create an object
+    key = boto.s3.key.Key(bucket)
+    key.set_contents_from_string('seven')
+    rl.log_and_clear("put_obj", bucket_name, user1)
+
+    # should be private already but guarantee it
+    key.set_acl('private')
+    rl.log_and_clear("put_acls", bucket_name, user1)
+
+    (err, out) = rgwadmin(ctx, client,
+        ['policy', '--bucket', bucket.name, '--object', key.key.decode()],
+        check_status=True, format='xml')
+
+    acl = get_acl(key)
+    rl.log_and_clear("get_acls", bucket_name, user1)
+
+    assert acl == out.strip('\n')
+
+    # add another grantee by making the object public read
+    key.set_acl('public-read')
+    rl.log_and_clear("put_acls", bucket_name, user1)
+
+    (err, out) = rgwadmin(ctx, client,
+        ['policy', '--bucket', bucket.name, '--object', key.key.decode()],
+        check_status=True, format='xml')
+
+    acl = get_acl(key)
+    rl.log_and_clear("get_acls", bucket_name, user1)
+
+    assert acl == out.strip('\n')
+
+    # TESTCASE 'rm-bucket', 'bucket', 'rm', 'bucket with objects', 'succeeds'
+    bucket = connection.create_bucket(bucket_name)
+    rl.log_and_clear("create_bucket", bucket_name, user1)
+    key_name = ['eight', 'nine', 'ten', 'eleven']
+    for i in range(4):
+        key = boto.s3.key.Key(bucket)
+        key.set_contents_from_string(key_name[i])
+    rl.log_and_clear("put_obj", bucket_name, user1)
+
+    (err, out) = rgwadmin(ctx, client,
+        ['bucket', 'rm', '--bucket', bucket_name, '--purge-objects'],
+        check_status=True)
+
+    # TESTCASE 'caps-add', 'caps', 'add', 'add user cap', 'succeeds'
+    caps='user=read'
+    (err, out) = rgwadmin(ctx, client, ['caps', 'add', '--uid', user1, '--caps', caps])
+
+    assert out['caps'][0]['perm'] == 'read'
+
+    # TESTCASE 'caps-rm', 'caps', 'rm', 'remove existing cap from user', 'succeeds'
+    (err, out) = rgwadmin(ctx, client, ['caps', 'rm', '--uid', user1, '--caps', caps])
+
+    assert not out['caps']
+
+    # TESTCASE 'rm-user','user','rm','existing user','fails, still has buckets'
+    bucket = connection.create_bucket(bucket_name)
+    rl.log_and_clear("create_bucket", bucket_name, user1)
+    key = boto.s3.key.Key(bucket)
+
+    (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user1])
+    assert err
+
+    # TESTCASE 'rm-user2', 'user', 'rm', 'user with data', 'succeeds'
+    bucket = connection.create_bucket(bucket_name)
+    rl.log_and_clear("create_bucket", bucket_name, user1)
+    key = boto.s3.key.Key(bucket)
+    key.set_contents_from_string('twelve')
+    rl.log_and_clear("put_obj", bucket_name, user1)
+
+    time.sleep(35)
+
+    # need to wait for all usage data to get flushed, should take up to 30 seconds
+    timestamp = time.time()
+    while time.time() - timestamp <= (2 * 60):      # wait up to 20 minutes
+        (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--categories', 'delete_obj'])  # one of the operations we did is delete_obj, should be present.
+        if get_user_successful_ops(out, user1) > 0:
+            break
+        time.sleep(1)
+
+    assert time.time() - timestamp <= (20 * 60)
+
+    # TESTCASE 'usage-show' 'usage' 'show' 'all usage' 'succeeds'
+    (err, out) = rgwadmin(ctx, client, ['usage', 'show'], check_status=True)
+    assert len(out['entries']) > 0
+    assert len(out['summary']) > 0
+
+    r = acc.compare_results(out)
+    if len(r) != 0:
+        sys.stderr.write(("\n".join(r))+"\n")
+        assert(len(r) == 0)
+
+    user_summary = get_user_summary(out, user1)
+
+    total = user_summary['total']
+    assert total['successful_ops'] > 0
+
+    # TESTCASE 'usage-show2' 'usage' 'show' 'user usage' 'succeeds'
+    (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--uid', user1],
+        check_status=True)
+    assert len(out['entries']) > 0
+    assert len(out['summary']) > 0
+    user_summary = out['summary'][0]
+    for entry in user_summary['categories']:
+        assert entry['successful_ops'] > 0
+    assert user_summary['user'] == user1
+
+    # TESTCASE 'usage-show3' 'usage' 'show' 'user usage categories' 'succeeds'
+    test_categories = ['create_bucket', 'put_obj', 'delete_obj', 'delete_bucket']
+    for cat in test_categories:
+        (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--uid', user1, '--categories', cat],
+            check_status=True)
+        assert len(out['summary']) > 0
+        user_summary = out['summary'][0]
+        assert user_summary['user'] == user1
+        assert len(user_summary['categories']) == 1
+        entry = user_summary['categories'][0]
+        assert entry['category'] == cat
+        assert entry['successful_ops'] > 0
+
+    # TESTCASE 'user-rename', 'user', 'rename', 'existing user', 'new user', 'succeeds'
+    # create a new user user3
+    (err, out) = rgwadmin(ctx, client, [
+        'user', 'create',
+        '--uid', user3,
+        '--display-name', display_name3,
+        '--access-key', access_key3,
+        '--secret', secret_key3,
+        '--max-buckets', '4'
+        ],
+        check_status=True)
+
+    # create a bucket
+    bucket = connection3.create_bucket(bucket_name + '6')
+
+    rl.log_and_clear("create_bucket", bucket_name + '6', user3)
+
+    # create object
+    object_name1 = 'thirteen'
+    key1 = boto.s3.key.Key(bucket, object_name1)
+    key1.set_contents_from_string(object_name1)
+    rl.log_and_clear("put_obj", bucket_name + '6', user3)
+
+    # rename user3
+    (err, out) = rgwadmin(ctx, client, ['user', 'rename', '--uid', user3, '--new-uid', user4], check_status=True)
+    assert out['user_id'] == user4
+    assert out['keys'][0]['access_key'] == access_key3
+    assert out['keys'][0]['secret_key'] == secret_key3
+
+    time.sleep(5)
+
+    # get bucket and object to test if user keys are preserved
+    bucket = connection3.get_bucket(bucket_name + '6')
+    s = key1.get_contents_as_string(encoding='ascii')
+    rl.log_and_clear("get_obj", bucket_name + '6', user4)
+    assert s == object_name1
+
+    # TESTCASE 'user-rename', 'user', 'rename', 'existing user', 'another existing user', 'fails'
+    # create a new user user2
+    (err, out) = rgwadmin(ctx, client, [
+        'user', 'create',
+        '--uid', user2,
+        '--display-name', display_name2,
+        '--access-key', access_key2,
+        '--secret', secret_key2,
+        '--max-buckets', '4'
+        ],
+        check_status=True)
+
+    # create a bucket
+    bucket = connection2.create_bucket(bucket_name + '7')
+
+    rl.log_and_clear("create_bucket", bucket_name + '7', user2)
+
+    # create object
+    object_name2 = 'fourteen'
+    key2 = boto.s3.key.Key(bucket, object_name2)
+    key2.set_contents_from_string(object_name2)
+    rl.log_and_clear("put_obj", bucket_name + '7', user2)
+
+    (err, out) = rgwadmin(ctx, client, ['user', 'rename', '--uid', user4, '--new-uid', user2])
+    assert err
+
+    # test if user 2 and user4 can still access their bucket and objects after rename fails
+    bucket = connection3.get_bucket(bucket_name + '6')
+    s = key1.get_contents_as_string(encoding='ascii')
+    rl.log_and_clear("get_obj", bucket_name + '6', user4)
+    assert s == object_name1
+
+    bucket = connection2.get_bucket(bucket_name + '7')
+    s = key2.get_contents_as_string(encoding='ascii')
+    rl.log_and_clear("get_obj", bucket_name + '7', user2)
+    assert s == object_name2
+
+    (err, out) = rgwadmin(ctx, client,
+    ['user', 'rm', '--uid', user4, '--purge-data' ],
+    check_status=True)
+
+    (err, out) = rgwadmin(ctx, client,
+    ['user', 'rm', '--uid', user2, '--purge-data' ],
+    check_status=True)
+
+    time.sleep(5)
+
+    # should be all through with connection. (anything using connection
+    #  should be BEFORE the usage stuff above.)
+    rl.log_and_clear("(before-close)", '-', '-', ignore_this_entry)
+    connection.close()
+    connection = None
+
+    # the usage flush interval is 30 seconds, wait that much an then some
+    # to make sure everything has been flushed
+    time.sleep(35)
+
+    # TESTCASE 'usage-trim' 'usage' 'trim' 'user usage' 'succeeds, usage removed'
+    (err, out) = rgwadmin(ctx, client, ['usage', 'trim', '--uid', user1],
+        check_status=True)
+    (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--uid', user1],
+        check_status=True)
+    assert len(out['entries']) == 0
+    assert len(out['summary']) == 0
+
+    (err, out) = rgwadmin(ctx, client,
+        ['user', 'rm', '--uid', user1, '--purge-data' ],
+        check_status=True)
+
+    # TESTCASE 'rm-user3','user','rm','deleted user','fails'
+    (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1])
+    assert err
+
+    # TESTCASE 'zone-info', 'zone', 'get', 'get zone info', 'succeeds, has default placement rule'
+    (err, out) = rgwadmin(ctx, client, ['zone', 'get','--rgw-zone','default'])
+    orig_placement_pools = len(out['placement_pools'])
+
+    # removed this test, it is not correct to assume that zone has default placement, it really
+    # depends on how we set it up before
+    #
+    # assert len(out) > 0
+    # assert len(out['placement_pools']) == 1
+
+    # default_rule = out['placement_pools'][0]
+    # assert default_rule['key'] == 'default-placement'
+
+    rule={'key': 'new-placement', 'val': {'data_pool': '.rgw.buckets.2', 'index_pool': '.rgw.buckets.index.2'}}
+
+    out['placement_pools'].append(rule)
+
+    (err, out) = rgwadmin(ctx, client, ['zone', 'set'],
+        stdin=StringIO(json.dumps(out)),
+        check_status=True)
+
+    (err, out) = rgwadmin(ctx, client, ['zone', 'get'])
+    assert len(out) > 0
+    assert len(out['placement_pools']) == orig_placement_pools + 1
+
+    zonecmd = ['zone', 'placement', 'rm',
+               '--rgw-zone', 'default',
+               '--placement-id', 'new-placement']
+
+    (err, out) = rgwadmin(ctx, client, zonecmd, check_status=True)
+
+    # TESTCASE 'zonegroup-info', 'zonegroup', 'get', 'get zonegroup info', 'succeeds'
+    (err, out) = rgwadmin(ctx, client, ['zonegroup', 'get'], check_status=True)
+
+from teuthology.config import config
+from teuthology.orchestra import cluster
+
+import argparse;
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--uid')
+    parser.add_argument('--host', required=True)
+    parser.add_argument('--port', type=int)
+
+    args = parser.parse_args()
+    host = args.host
+    if args.port:
+        port = args.port
+    else:
+        port = 80
+
+    client0 = tasks.vstart_runner.LocalRemote()
+    ctx = config
+    ctx.cluster=cluster.Cluster(remotes=[(client0,
+        [ 'ceph.client.rgw.%s' % (port),  ]),])
+    ctx.rgw = argparse.Namespace()
+    endpoints = {}
+    endpoints['ceph.client.rgw.%s' % port] = RGWEndpoint(
+        hostname=host,
+        port=port)
+    ctx.rgw.role_endpoints = endpoints
+    ctx.rgw.realm = None
+    ctx.rgw.regions = {'region0': { 'api name': 'api1',
+        'is master': True, 'master zone': 'r0z0',
+        'zones': ['r0z0', 'r0z1'] }}
+    ctx.rgw.omit_sudo = True
+    ctx.rgw.omit_tdir = True
+    ctx.rgw.config = {'ceph.client.rgw.%s' % port: {'system user': {'name': '%s-system-user' % port}}}
+    task(config, None)
+    exit()
+
+if __name__ == '__main__':
+    main()
diff --git a/qa/tasks/radosgw_admin_rest.py b/qa/tasks/radosgw_admin_rest.py
new file mode 100644
index 000000000..3de4d6bc9
--- /dev/null
+++ b/qa/tasks/radosgw_admin_rest.py
@@ -0,0 +1,815 @@
+"""
+Run a series of rgw admin commands through the rest interface.
+
+The test cases in this file have been annotated for inventory.
+To extract the inventory (in csv format) use the command:
+
+   grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //'
+
+"""
+import logging
+
+
+import boto.exception
+import boto.s3.connection
+import boto.s3.acl
+
+import requests
+import time
+
+from boto.connection import AWSAuthConnection
+from teuthology import misc as teuthology
+from tasks.util.rgw import get_user_summary, get_user_successful_ops, rgwadmin
+
+log = logging.getLogger(__name__)
+
+def rgwadmin_rest(connection, cmd, params=None, headers=None, raw=False):
+    """
+    perform a rest command
+    """
+    log.info('radosgw-admin-rest: %s %s' % (cmd, params))
+    put_cmds = ['create', 'link', 'add']
+    post_cmds = ['unlink', 'modify']
+    delete_cmds = ['trim', 'rm', 'process']
+    get_cmds = ['check', 'info', 'show', 'list', '']
+
+    bucket_sub_resources = ['object', 'policy', 'index']
+    user_sub_resources = ['subuser', 'key', 'caps']
+    zone_sub_resources = ['pool', 'log', 'garbage']
+
+    def get_cmd_method_and_handler(cmd):
+        """
+        Get the rest command and handler from information in cmd and
+        from the imported requests object.
+        """
+        if cmd[1] in put_cmds:
+            return 'PUT', requests.put
+        elif cmd[1] in delete_cmds:
+            return 'DELETE', requests.delete
+        elif cmd[1] in post_cmds:
+            return 'POST', requests.post
+        elif cmd[1] in get_cmds:
+            return 'GET', requests.get
+
+    def get_resource(cmd):
+        """
+        Get the name of the resource from information in cmd.
+        """
+        if cmd[0] == 'bucket' or cmd[0] in bucket_sub_resources:
+            if cmd[0] == 'bucket':
+                return 'bucket', ''
+            else:
+                return 'bucket', cmd[0]
+        elif cmd[0] == 'user' or cmd[0] in user_sub_resources:
+            if cmd[0] == 'user':
+                return 'user', ''
+            else:
+                return 'user', cmd[0]
+        elif cmd[0] == 'usage':
+            return 'usage', ''
+        elif cmd[0] == 'info':
+            return 'info', ''
+        elif cmd[0] == 'ratelimit':
+            return 'ratelimit', ''
+        elif cmd[0] == 'zone' or cmd[0] in zone_sub_resources:
+            if cmd[0] == 'zone':
+                return 'zone', ''
+            else:
+                return 'zone', cmd[0]
+
+    def build_admin_request(conn, method, resource = '', headers=None, data='',
+            query_args=None, params=None):
+        """
+        Build an administative request adapted from the build_request()
+        method of boto.connection
+        """
+
+        path = conn.calling_format.build_path_base('admin', resource)
+        auth_path = conn.calling_format.build_auth_path('admin', resource)
+        host = conn.calling_format.build_host(conn.server_name(), 'admin')
+        if query_args:
+            path += '?' + query_args
+            boto.log.debug('path=%s' % path)
+            auth_path += '?' + query_args
+            boto.log.debug('auth_path=%s' % auth_path)
+        return AWSAuthConnection.build_base_http_request(conn, method, path,
+                auth_path, params, headers, data, host)
+
+    method, handler = get_cmd_method_and_handler(cmd)
+    resource, query_args = get_resource(cmd)
+    request = build_admin_request(connection, method, resource,
+            query_args=query_args, headers=headers)
+
+    url = '{protocol}://{host}{path}'.format(protocol=request.protocol,
+            host=request.host, path=request.path)
+
+    request.authorize(connection=connection)
+    result = handler(url, params=params, headers=request.headers)
+
+    if raw:
+        log.info(' text result: %s' % result.text)
+        return result.status_code, result.text
+    elif len(result.content) == 0:
+        # many admin requests return no body, so json() throws a JSONDecodeError
+        log.info(' empty result')
+        return result.status_code, None
+    else:
+        log.info(' json result: %s' % result.json())
+        return result.status_code, result.json()
+
+
+def task(ctx, config):
+    """
+    Test radosgw-admin functionality through the RESTful interface
+    """
+    assert config is None or isinstance(config, list) \
+        or isinstance(config, dict), \
+        "task s3tests only supports a list or dictionary for configuration"
+    all_clients = ['client.{id}'.format(id=id_)
+                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+    if config is None:
+        config = all_clients
+    if isinstance(config, list):
+        config = dict.fromkeys(config)
+    clients = config.keys()
+
+    # just use the first client...
+    client = next(iter(clients))
+
+    ##
+    admin_user = 'ada'
+    admin_display_name = 'Ms. Admin User'
+    admin_access_key = 'MH1WC2XQ1S8UISFDZC8W'
+    admin_secret_key = 'dQyrTPA0s248YeN5bBv4ukvKU0kh54LWWywkrpoG'
+    admin_caps = 'users=read, write; usage=read, write; buckets=read, write; zone=read, write; info=read;ratelimit=read, write'
+
+    user1 = 'foo'
+    user2 = 'fud'
+    ratelimit_user = 'ratelimit_user'
+    subuser1 = 'foo:foo1'
+    subuser2 = 'foo:foo2'
+    display_name1 = 'Foo'
+    display_name2 = 'Fud'
+    email = 'foo@foo.com'
+    access_key = '9te6NH5mcdcq0Tc5i8i1'
+    secret_key = 'Ny4IOauQoL18Gp2zM7lC1vLmoawgqcYP/YGcWfXu'
+    access_key2 = 'p5YnriCv1nAtykxBrupQ'
+    secret_key2 = 'Q8Tk6Q/27hfbFSYdSkPtUqhqx1GgzvpXa4WARozh'
+    swift_secret1 = 'gpS2G9RREMrnbqlp29PP2D36kgPR1tm72n5fPYfL'
+    swift_secret2 = 'ri2VJQcKSYATOY6uaDUX7pxgkW+W1YmC6OCxPHwy'
+
+    bucket_name = 'myfoo'
+
+    # legend (test cases can be easily grep-ed out)
+    # TESTCASE 'testname','object','method','operation','assertion'
+    # TESTCASE 'create-admin-user','user','create','administrative user','succeeds'
+    (err, out) = rgwadmin(ctx, client, [
+            'user', 'create',
+            '--uid', admin_user,
+            '--display-name', admin_display_name,
+            '--access-key', admin_access_key,
+            '--secret', admin_secret_key,
+            '--max-buckets', '0',
+            '--caps', admin_caps
+            ])
+    logging.error(out)
+    logging.error(err)
+    assert not err
+
+    assert hasattr(ctx, 'rgw'), 'radosgw-admin-rest must run after the rgw task'
+    endpoint = ctx.rgw.role_endpoints.get(client)
+    assert endpoint, 'no rgw endpoint for {}'.format(client)
+
+    admin_conn = boto.s3.connection.S3Connection(
+        aws_access_key_id=admin_access_key,
+        aws_secret_access_key=admin_secret_key,
+        is_secure=True if endpoint.cert else False,
+        port=endpoint.port,
+        host=endpoint.hostname,
+        calling_format=boto.s3.connection.OrdinaryCallingFormat(),
+        )
+
+    # TESTCASE 'info-nosuch','user','info','non-existent user','fails'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {"uid": user1})
+    assert ret == 404
+
+    # TESTCASE 'create-ok','user','create','w/all valid info','succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn,
+            ['user', 'create'],
+            {'uid' : user1,
+             'display-name' :  display_name1,
+             'email' : email,
+             'access-key' : access_key,
+             'secret-key' : secret_key,
+             'max-buckets' : '4'
+            })
+
+    assert ret == 200
+
+    # TESTCASE 'list-no-user','user','list','list user keys','user list object'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'list'], {'list' : '', 'max-entries' : 0})
+    assert ret == 200
+    assert out['count'] == 0
+    assert out['truncated'] == True
+    assert len(out['keys']) == 0
+    assert len(out['marker']) > 0
+
+    # TESTCASE 'list-user-without-marker','user','list','list user keys','user list object'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'list'], {'list' : '', 'max-entries' : 1})
+    assert ret == 200
+    assert out['count'] == 1
+    assert out['truncated'] == True
+    assert len(out['keys']) == 1
+    assert len(out['marker']) > 0
+    marker = out['marker']
+
+    # TESTCASE 'list-user-with-marker','user','list','list user keys','user list object'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'list'], {'list' : '', 'max-entries' : 1, 'marker': marker})
+    assert ret == 200
+    assert out['count'] == 1
+    assert out['truncated'] == False
+    assert len(out['keys']) == 1
+
+    # TESTCASE 'info-existing','user','info','existing user','returns correct info'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
+
+    assert out['user_id'] == user1
+    assert out['email'] == email
+    assert out['display_name'] == display_name1
+    assert len(out['keys']) == 1
+    assert out['keys'][0]['access_key'] == access_key
+    assert out['keys'][0]['secret_key'] == secret_key
+    assert not out['suspended']
+    assert out['tenant'] == ''
+    assert out['max_buckets'] == 4
+    assert out['caps'] == []
+    assert out['op_mask'] == 'read, write, delete'
+    assert out['default_placement'] == ''
+    assert out['default_storage_class'] == ''
+    assert out['placement_tags'] == []
+    assert not out['bucket_quota']['enabled']
+    assert not out['bucket_quota']['check_on_raw']
+    assert out['bucket_quota']['max_size'] == -1
+    assert out['bucket_quota']['max_size_kb'] == 0
+    assert out['bucket_quota']['max_objects'] == -1
+    assert not out['user_quota']['enabled']
+    assert not out['user_quota']['check_on_raw']
+    assert out['user_quota']['max_size'] == -1
+    assert out['user_quota']['max_size_kb'] == 0
+    assert out['user_quota']['max_objects'] == -1
+    assert out['temp_url_keys'] == []
+    assert out['type'] == 'rgw'
+    assert out['mfa_ids'] == []
+    # TESTCASE 'info-existing','user','info','existing user query with wrong uid but correct access key','returns correct info'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'access-key' : access_key, 'uid': 'uid_not_exist'})
+
+    assert out['user_id'] == user1
+    assert out['email'] == email
+    assert out['display_name'] == display_name1
+    assert len(out['keys']) == 1
+    assert out['keys'][0]['access_key'] == access_key
+    assert out['keys'][0]['secret_key'] == secret_key
+    assert not out['suspended']
+    assert out['tenant'] == ''
+    assert out['max_buckets'] == 4
+    assert out['caps'] == []
+    assert out['op_mask'] == "read, write, delete"
+    assert out['default_placement'] == ''
+    assert out['default_storage_class'] == ''
+    assert out['placement_tags'] == []
+    assert not out['bucket_quota']['enabled']
+    assert not out['bucket_quota']['check_on_raw']
+    assert out ['bucket_quota']['max_size'] == -1
+    assert out ['bucket_quota']['max_size_kb'] == 0
+    assert out ['bucket_quota']['max_objects'] == -1
+    assert not out['user_quota']['enabled']
+    assert not out['user_quota']['check_on_raw']
+    assert out['user_quota']['max_size'] == -1
+    assert out['user_quota']['max_size_kb'] == 0
+    assert out['user_quota']['max_objects'] == -1
+    assert out['temp_url_keys'] == []
+    assert out['type'] == 'rgw'
+    assert out['mfa_ids'] == []
+
+    # TESTCASE 'suspend-ok','user','suspend','active user','succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : True})
+    assert ret == 200
+
+    # TESTCASE 'suspend-suspended','user','suspend','suspended user','succeeds w/advisory'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
+    assert ret == 200
+    assert out['suspended']
+    assert out['email'] == email
+
+    # TESTCASE 're-enable','user','enable','suspended user','succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : 'false'})
+    assert not err
+
+    # TESTCASE 'info-re-enabled','user','info','re-enabled user','no longer suspended'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
+    assert ret == 200
+    assert not out['suspended']
+
+    # TESTCASE 'add-keys','key','create','w/valid info','succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn,
+            ['key', 'create'],
+            {'uid' : user1,
+             'access-key' : access_key2,
+             'secret-key' : secret_key2
+            })
+
+
+    assert ret == 200
+
+    # TESTCASE 'info-new-key','user','info','after key addition','returns all keys'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
+    assert ret == 200
+    assert len(out['keys']) == 2
+    assert out['keys'][0]['access_key'] == access_key2 or out['keys'][1]['access_key'] == access_key2
+    assert out['keys'][0]['secret_key'] == secret_key2 or out['keys'][1]['secret_key'] == secret_key2
+
+    # TESTCASE 'rm-key','key','rm','newly added key','succeeds, key is removed'
+    (ret, out) = rgwadmin_rest(admin_conn,
+            ['key', 'rm'],
+            {'uid' : user1,
+             'access-key' : access_key2
+            })
+
+    assert ret == 200
+
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
+
+    assert len(out['keys']) == 1
+    assert out['keys'][0]['access_key'] == access_key
+    assert out['keys'][0]['secret_key'] == secret_key
+
+    # TESTCASE 'add-swift-key','key','create','swift key','succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn,
+            ['subuser', 'create'],
+            {'subuser' : subuser1,
+             'secret-key' : swift_secret1,
+             'key-type' : 'swift'
+            })
+
+    assert ret == 200
+
+    # TESTCASE 'info-swift-key','user','info','after key addition','returns all keys'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
+    assert ret == 200
+    assert len(out['swift_keys']) == 1
+    assert out['swift_keys'][0]['user'] == subuser1
+    assert out['swift_keys'][0]['secret_key'] == swift_secret1
+
+    # TESTCASE 'add-swift-subuser','key','create','swift sub-user key','succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn,
+            ['subuser', 'create'],
+            {'subuser' : subuser2,
+             'secret-key' : swift_secret2,
+             'key-type' : 'swift'
+            })
+
+    assert ret == 200
+
+    # TESTCASE 'info-swift-subuser','user','info','after key addition','returns all sub-users/keys'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' :  user1})
+    assert ret == 200
+    assert len(out['swift_keys']) == 2
+    assert out['swift_keys'][0]['user'] == subuser2 or out['swift_keys'][1]['user'] == subuser2
+    assert out['swift_keys'][0]['secret_key'] == swift_secret2 or out['swift_keys'][1]['secret_key'] == swift_secret2
+
+    # TESTCASE 'rm-swift-key1','key','rm','subuser','succeeds, one key is removed'
+    (ret, out) = rgwadmin_rest(admin_conn,
+            ['key', 'rm'],
+            {'subuser' : subuser1,
+             'key-type' :'swift'
+            })
+
+    assert ret == 200
+
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' :  user1})
+    assert len(out['swift_keys']) == 1
+
+    # TESTCASE 'rm-subuser','subuser','rm','subuser','success, subuser is removed'
+    (ret, out) = rgwadmin_rest(admin_conn,
+            ['subuser', 'rm'],
+            {'subuser' : subuser1
+            })
+
+    assert ret == 200
+
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' :  user1})
+    assert len(out['subusers']) == 1
+
+    # TESTCASE 'rm-subuser-with-keys','subuser','rm','subuser','succeeds, second subser and key is removed'
+    (ret, out) = rgwadmin_rest(admin_conn,
+            ['subuser', 'rm'],
+            {'subuser' : subuser2,
+             'key-type' : 'swift',
+             '{purge-keys' :True
+            })
+
+    assert ret == 200
+
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' :  user1})
+    assert len(out['swift_keys']) == 0
+    assert len(out['subusers']) == 0
+
+    # TESTCASE 'bucket-stats','bucket','info','no session/buckets','succeeds, empty list'
+    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' :  user1})
+    assert ret == 200
+    assert len(out) == 0
+
+    # connect to rgw
+    connection = boto.s3.connection.S3Connection(
+        aws_access_key_id=access_key,
+        aws_secret_access_key=secret_key,
+        is_secure=True if endpoint.cert else False,
+        port=endpoint.port,
+        host=endpoint.hostname,
+        calling_format=boto.s3.connection.OrdinaryCallingFormat(),
+        )
+
+    # TESTCASE 'bucket-stats2','bucket','stats','no buckets','succeeds, empty list'
+    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1, 'stats' : True})
+    assert ret == 200
+    assert len(out) == 0
+
+    # create a first bucket
+    bucket = connection.create_bucket(bucket_name)
+
+    # TESTCASE 'bucket-list','bucket','list','one bucket','succeeds, expected list'
+    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1})
+    assert ret == 200
+    assert len(out) == 1
+    assert out[0] == bucket_name
+
+    # TESTCASE 'bucket-stats3','bucket','stats','new empty bucket','succeeds, empty list'
+    (ret, out) = rgwadmin_rest(admin_conn,
+            ['bucket', 'info'], {'bucket' : bucket_name, 'stats' : True})
+
+    assert ret == 200
+    assert out['owner'] == user1
+    assert out['tenant'] == ''
+    bucket_id = out['id']
+
+    # TESTCASE 'bucket-stats4','bucket','stats','new empty bucket','succeeds, expected bucket ID'
+    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1, 'stats' : True})
+    assert ret == 200
+    assert len(out) == 1
+    assert out[0]['id'] == bucket_id    # does it return the same ID twice in a row?
+
+    # use some space
+    key = boto.s3.key.Key(bucket)
+    key.set_contents_from_string('one')
+
+    # TESTCASE 'bucket-stats5','bucket','stats','after creating key','succeeds, lists one non-empty object'
+    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'bucket' : bucket_name, 'stats' : True})
+    assert ret == 200
+    assert out['id'] == bucket_id
+    assert out['usage']['rgw.main']['num_objects'] == 1
+    assert out['usage']['rgw.main']['size_kb'] > 0
+
+    # TESTCASE 'bucket-stats6', 'bucket', 'stats', 'non-existent bucket', 'fails, 'bucket not found error'
+    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'bucket' : 'doesnotexist'})
+    assert ret == 404
+    assert out['Code'] == 'NoSuchBucket'
+
+    # reclaim it
+    key.delete()
+
+    # TESTCASE 'bucket unlink', 'bucket', 'unlink', 'unlink bucket from user', 'fails', 'access denied error'
+    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'unlink'], {'uid' : user1, 'bucket' : bucket_name})
+
+    assert ret == 200
+
+    # create a second user to link the bucket to
+    (ret, out) = rgwadmin_rest(admin_conn,
+            ['user', 'create'],
+            {'uid' : user2,
+            'display-name' :  display_name2,
+            'access-key' : access_key2,
+            'secret-key' : secret_key2,
+            'max-buckets' : '1',
+            })
+
+    assert ret == 200
+
+    # try creating an object with the first user before the bucket is relinked
+    denied = False
+    key = boto.s3.key.Key(bucket)
+
+    try:
+        key.set_contents_from_string('two')
+    except boto.exception.S3ResponseError:
+        denied = True
+
+    assert not denied
+
+    # delete the object
+    key.delete()
+
+    # link the bucket to another user
+    (ret, out) = rgwadmin_rest(admin_conn,
+            ['bucket', 'link'],
+            {'uid' : user2,
+             'bucket' : bucket_name,
+             'bucket-id' : bucket_id,
+            })
+
+    assert ret == 200
+
+    # try creating an object with the first user which should cause an error
+    key = boto.s3.key.Key(bucket)
+
+    try:
+        key.set_contents_from_string('three')
+    except boto.exception.S3ResponseError:
+        denied = True
+
+    assert denied
+
+    # relink the bucket to the first user and delete the second user
+    (ret, out) = rgwadmin_rest(admin_conn,
+            ['bucket', 'link'],
+            {'uid' : user1,
+             'bucket' : bucket_name,
+             'bucket-id' : bucket_id,
+            })
+    assert ret == 200
+
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user2})
+    assert ret == 200
+
+    # TESTCASE 'object-rm', 'object', 'rm', 'remove object', 'succeeds, object is removed'
+
+    # upload an object
+    object_name = 'four'
+    key = boto.s3.key.Key(bucket, object_name)
+    key.set_contents_from_string(object_name)
+
+    # now delete it
+    (ret, out) = rgwadmin_rest(admin_conn, ['object', 'rm'], {'bucket' : bucket_name, 'object' : object_name})
+    assert ret == 200
+
+    # TESTCASE 'bucket-stats6','bucket','stats','after deleting key','succeeds, lists one no objects'
+    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'bucket' : bucket_name, 'stats' : True})
+    assert ret == 200
+    assert out['id'] == bucket_id
+    assert out['usage']['rgw.main']['num_objects'] == 0
+
+    # create a bucket for deletion stats
+    useless_bucket = connection.create_bucket('useless-bucket')
+    useless_key = useless_bucket.new_key('useless_key')
+    useless_key.set_contents_from_string('useless string')
+
+    # delete it
+    useless_key.delete()
+    useless_bucket.delete()
+
+    # wait for the statistics to flush
+    time.sleep(60)
+
+    # need to wait for all usage data to get flushed, should take up to 30 seconds
+    timestamp = time.time()
+    while time.time() - timestamp <= (20 * 60):      # wait up to 20 minutes
+        (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'categories' : 'delete_obj'})  # last operation we did is delete obj, wait for it to flush
+
+        if get_user_successful_ops(out, user1) > 0:
+            break
+        time.sleep(1)
+
+    assert time.time() - timestamp <= (20 * 60)
+
+    # TESTCASE 'usage-show' 'usage' 'show' 'all usage' 'succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'])
+    assert ret == 200
+    assert len(out['entries']) > 0
+    assert len(out['summary']) > 0
+    user_summary = get_user_summary(out, user1)
+    total = user_summary['total']
+    assert total['successful_ops'] > 0
+
+    # TESTCASE 'usage-show2' 'usage' 'show' 'user usage' 'succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'uid' : user1})
+    assert ret == 200
+    assert len(out['entries']) > 0
+    assert len(out['summary']) > 0
+    user_summary = out['summary'][0]
+    for entry in user_summary['categories']:
+        assert entry['successful_ops'] > 0
+    assert user_summary['user'] == user1
+
+    # TESTCASE 'usage-show3' 'usage' 'show' 'user usage categories' 'succeeds'
+    test_categories = ['create_bucket', 'put_obj', 'delete_obj', 'delete_bucket']
+    for cat in test_categories:
+        (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'uid' : user1, 'categories' : cat})
+        assert ret == 200
+        assert len(out['summary']) > 0
+        user_summary = out['summary'][0]
+        assert user_summary['user'] == user1
+        assert len(user_summary['categories']) == 1
+        entry = user_summary['categories'][0]
+        assert entry['category'] == cat
+        assert entry['successful_ops'] > 0
+
+    # TESTCASE 'usage-trim' 'usage' 'trim' 'user usage' 'succeeds, usage removed'
+    (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'trim'], {'uid' : user1})
+    assert ret == 200
+    (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'uid' : user1})
+    assert ret == 200
+    assert len(out['entries']) == 0
+    assert len(out['summary']) == 0
+
+    # TESTCASE 'user-suspend2','user','suspend','existing user','succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : True})
+    assert ret == 200
+
+    # TESTCASE 'user-suspend3','user','suspend','suspended user','cannot write objects'
+    try:
+        key = boto.s3.key.Key(bucket)
+        key.set_contents_from_string('five')
+    except boto.exception.S3ResponseError as e:
+        assert e.status == 403
+
+    # TESTCASE 'user-renable2','user','enable','suspended user','succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' :  user1, 'suspended' : 'false'})
+    assert ret == 200
+
+    # TESTCASE 'user-renable3','user','enable','reenabled user','can write objects'
+    key = boto.s3.key.Key(bucket)
+    key.set_contents_from_string('six')
+
+    # TESTCASE 'garbage-list', 'garbage', 'list', 'get list of objects ready for garbage collection'
+
+    # create an object large enough to be split into multiple parts
+    test_string = 'foo'*10000000
+
+    big_key = boto.s3.key.Key(bucket)
+    big_key.set_contents_from_string(test_string)
+
+    # now delete the head
+    big_key.delete()
+
+    # TESTCASE 'rm-user-buckets','user','rm','existing user','fails, still has buckets'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user1})
+    assert ret == 409
+
+    # delete should fail because ``key`` still exists
+    try:
+        bucket.delete()
+    except boto.exception.S3ResponseError as e:
+        assert e.status == 409
+
+    key.delete()
+    bucket.delete()
+
+    # TESTCASE 'policy', 'bucket', 'policy', 'get bucket policy', 'returns S3 policy'
+    bucket = connection.create_bucket(bucket_name)
+
+    # create an object
+    key = boto.s3.key.Key(bucket)
+    key.set_contents_from_string('seven')
+
+    # should be private already but guarantee it
+    key.set_acl('private')
+
+    (ret, out) = rgwadmin_rest(admin_conn, ['policy', 'show'], {'bucket' : bucket.name, 'object' : key.key})
+    assert ret == 200
+    assert len(out['acl']['grant_map']) == 1
+
+    # add another grantee by making the object public read
+    key.set_acl('public-read')
+
+    (ret, out) = rgwadmin_rest(admin_conn, ['policy', 'show'], {'bucket' : bucket.name, 'object' : key.key})
+    assert ret == 200
+    assert len(out['acl']['grant_map']) == 2
+
+    # TESTCASE 'rm-bucket', 'bucket', 'rm', 'bucket with objects', 'succeeds'
+    bucket = connection.create_bucket(bucket_name)
+    key_name = ['eight', 'nine', 'ten', 'eleven']
+    for i in range(4):
+        key = boto.s3.key.Key(bucket)
+        key.set_contents_from_string(key_name[i])
+
+    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'rm'], {'bucket' : bucket_name, 'purge-objects' : True})
+    assert ret == 200
+
+    # TESTCASE 'caps-add', 'caps', 'add', 'add user cap', 'succeeds'
+    caps = 'usage=read'
+    (ret, out) = rgwadmin_rest(admin_conn, ['caps', 'add'], {'uid' :  user1, 'user-caps' : caps})
+    assert ret == 200
+    assert out[0]['perm'] == 'read'
+
+    # TESTCASE 'caps-rm', 'caps', 'rm', 'remove existing cap from user', 'succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn, ['caps', 'rm'], {'uid' :  user1, 'user-caps' : caps})
+    assert ret == 200
+    assert not out
+
+    # TESTCASE 'rm-user','user','rm','existing user','fails, still has buckets'
+    bucket = connection.create_bucket(bucket_name)
+    key = boto.s3.key.Key(bucket)
+
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user1})
+    assert ret == 409
+
+    # TESTCASE 'rm-user2', 'user', 'rm', user with data', 'succeeds'
+    bucket = connection.create_bucket(bucket_name)
+    key = boto.s3.key.Key(bucket)
+    key.set_contents_from_string('twelve')
+
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user1, 'purge-data' : True})
+    assert ret == 200
+
+    # TESTCASE 'rm-user3','user','info','deleted user','fails'
+    (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' :  user1})
+    assert ret == 404
+
+    # TESTCASE 'info' 'display info' 'succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn, ['info', ''])
+    assert ret == 200
+    info = out['info']
+    backends = info['storage_backends']
+    name = backends[0]['name']
+    fsid = backends[0]['cluster_id']
+    # name is always "rados" at time of writing, but zipper would allow
+    # other backends, at some point
+    assert len(name) > 0
+    # fsid is a uuid, but I'm not going to try to parse it
+    assert len(fsid) > 0
+    
+    # TESTCASE 'ratelimit' 'user' 'info' 'succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn,
+        ['user', 'create'],
+        {'uid' : ratelimit_user,
+         'display-name' :  display_name1,
+         'email' : email,
+         'access-key' : access_key,
+         'secret-key' : secret_key,
+         'max-buckets' : '1000'
+        })
+    (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'info'], {'ratelimit-scope' : 'user', 'uid' : ratelimit_user})
+    assert ret == 200
+
+    # TESTCASE 'ratelimit' 'user' 'info'  'not existing user' 'fails'
+    (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'info'], {'ratelimit-scope' : 'user', 'uid' : ratelimit_user + 'string'})
+    assert ret == 404
+
+    # TESTCASE 'ratelimit' 'user' 'info'  'uid not specified' 'fails'
+    (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'info'], {'ratelimit-scope' : 'user'})
+    assert ret == 400
+
+    # TESTCASE 'ratelimit' 'bucket' 'info' 'succeeds'
+    ratelimit_bucket = 'ratelimitbucket'
+    connection.create_bucket(ratelimit_bucket)
+    (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'info'], {'ratelimit-scope' : 'bucket', 'bucket' : ratelimit_bucket})
+    assert ret == 200
+
+    # TESTCASE 'ratelimit' 'bucket' 'info'  'not existing bucket' 'fails'
+    (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'info'], {'ratelimit-scope' : 'bucket', 'bucket' : ratelimit_bucket + 'string'})
+    assert ret == 404
+
+    # TESTCASE 'ratelimit' 'bucket' 'info' 'bucket not specified' 'fails'
+    (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'info'], {'ratelimit-scope' : 'bucket'})
+    assert ret == 400
+
+    # TESTCASE 'ratelimit' 'global' 'info' 'succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'info'], {'global' : 'true'})
+    assert ret == 200
+
+    # TESTCASE 'ratelimit' 'user' 'modify'  'not existing user' 'fails'
+    (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'modify'], {'ratelimit-scope' : 'user', 'uid' : ratelimit_user + 'string', 'enabled' : 'true'})
+    assert ret == 404
+
+    # TESTCASE 'ratelimit' 'user' 'modify'  'uid not specified' 'fails'
+    (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'modify'], {'ratelimit-scope' : 'user'})
+    assert ret == 400
+    
+    # TESTCASE 'ratelimit' 'bucket' 'modify'  'not existing bucket' 'fails'
+    (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'modify'], {'ratelimit-scope' : 'bucket', 'bucket' : ratelimit_bucket + 'string', 'enabled' : 'true'})
+    assert ret == 404
+
+    # TESTCASE 'ratelimit' 'bucket' 'modify' 'bucket not specified' 'fails'
+    (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'modify'], {'ratelimit-scope' : 'bucket', 'enabled' : 'true'})
+    assert ret == 400
+
+    # TESTCASE 'ratelimit' 'user' 'modifiy' 'enabled' 'max-read-bytes = 2' 'succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'modify'], {'ratelimit-scope' : 'user', 'uid' : ratelimit_user, 'enabled' : 'true', 'max-read-bytes' : '2'})
+    assert ret == 200
+    (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'info'], {'ratelimit-scope' : 'user', 'uid' : ratelimit_user})
+    assert ret == 200
+    user_ratelimit = out['user_ratelimit']
+    assert user_ratelimit['enabled'] == True
+    assert user_ratelimit['max_read_bytes'] ==  2
+
+    # TESTCASE 'ratelimit' 'bucket' 'modifiy' 'enabled' 'max-write-bytes = 2' 'succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'modify'], {'ratelimit-scope' : 'bucket', 'bucket' : ratelimit_bucket, 'enabled' : 'true', 'max-write-bytes' : '2'})
+    assert ret == 200
+    (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'info'], {'ratelimit-scope' : 'bucket', 'bucket' : ratelimit_bucket})
+    assert ret == 200
+    bucket_ratelimit = out['bucket_ratelimit']
+    assert bucket_ratelimit['enabled'] == True
+    assert bucket_ratelimit['max_write_bytes'] == 2
+
+    # TESTCASE 'ratelimit' 'global' 'modify' 'anonymous' 'enabled' 'succeeds'
+    (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'modify'], {'ratelimit-scope' : 'bucket', 'global': 'true', 'enabled' : 'true'})
+    assert ret == 200
+\ No newline at end of file
diff --git a/qa/tasks/ragweed.py b/qa/tasks/ragweed.py
new file mode 100644
index 000000000..e2b33527a
--- /dev/null
+++ b/qa/tasks/ragweed.py
@@ -0,0 +1,372 @@
+"""
+Run a set of s3 tests on rgw.
+"""
+from io import BytesIO
+from configobj import ConfigObj
+import base64
+import contextlib
+import logging
+import os
+import random
+import string
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.config import config as teuth_config
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+
+def get_ragweed_branches(config, client_conf):
+    """
+    figure out the ragweed branch according to the per-client settings
+
+    use force-branch is specified, and fall back to the ones deduced using ceph
+    branch under testing
+    """
+    force_branch = client_conf.get('force-branch', None)
+    if force_branch:
+        return [force_branch]
+    else:
+        S3_BRANCHES = ['master', 'nautilus', 'mimic',
+                       'luminous', 'kraken', 'jewel']
+        ceph_branch = config.get('branch')
+        suite_branch = config.get('suite_branch', ceph_branch)
+        if suite_branch in S3_BRANCHES:
+            branch = client_conf.get('branch', 'ceph-' + suite_branch)
+        else:
+            branch = client_conf.get('branch', suite_branch)
+        default_branch = client_conf.get('default-branch', None)
+        if default_branch:
+            return [branch, default_branch]
+        else:
+            return [branch]
+
+def get_ragweed_dir(testdir, client):
+    return '{}/ragweed.{}'.format(testdir, client)
+
+@contextlib.contextmanager
+def download(ctx, config):
+    """
+    Download the s3 tests from the git builder.
+    Remove downloaded s3 file upon exit.
+
+    The context passed in should be identical to the context
+    passed in to the main task.
+    """
+    assert isinstance(config, dict)
+    log.info('Downloading ragweed...')
+    testdir = teuthology.get_testdir(ctx)
+    for (client, cconf) in config.items():
+        ragweed_dir = get_ragweed_dir(testdir, client)
+        ragweed_repo = ctx.config.get('ragweed_repo',
+                                      teuth_config.ceph_git_base_url + 'ragweed.git')
+        for branch in get_ragweed_branches(ctx.config, cconf):
+            log.info("Using branch '%s' for ragweed", branch)
+            try:
+                ctx.cluster.only(client).sh(
+                    script=f'git clone -b {branch} {ragweed_repo} {ragweed_dir}')
+                break
+            except Exception as e:
+                exc = e
+        else:
+            raise exc
+
+        sha1 = cconf.get('sha1')
+        if sha1 is not None:
+            ctx.cluster.only(client).run(
+                args=[
+                    'cd', ragweed_dir,
+                    run.Raw('&&'),
+                    'git', 'reset', '--hard', sha1,
+                    ],
+                )
+    try:
+        yield
+    finally:
+        log.info('Removing ragweed...')
+        for client in config:
+            ragweed_dir = get_ragweed_dir(testdir, client)
+            ctx.cluster.only(client).run(
+                args=['rm', '-rf', ragweed_dir]
+                )
+
+
+def _config_user(ragweed_conf, section, user):
+    """
+    Configure users for this section by stashing away keys, ids, and
+    email addresses.
+    """
+    ragweed_conf[section].setdefault('user_id', user)
+    ragweed_conf[section].setdefault('email', '{user}+test@test.test'.format(user=user))
+    ragweed_conf[section].setdefault('display_name', 'Mr. {user}'.format(user=user))
+    ragweed_conf[section].setdefault('access_key', ''.join(random.choice(string.ascii_uppercase) for i in range(20)))
+    ragweed_conf[section].setdefault('secret_key', base64.b64encode(os.urandom(40)).decode('ascii'))
+
+
+@contextlib.contextmanager
+def create_users(ctx, config, run_stages):
+    """
+    Create a main and an alternate s3 user.
+    """
+    assert isinstance(config, dict)
+
+    for client, properties in config['config'].items():
+        run_stages[client] = properties.get('stages', 'prepare,check').split(',')
+
+    log.info('Creating rgw users...')
+    testdir = teuthology.get_testdir(ctx)
+    users = {'user regular': 'ragweed', 'user system': 'sysuser'}
+    for client in config['clients']:
+        if not 'prepare' in run_stages[client]:
+            # should have been prepared in a previous run
+            continue
+
+        ragweed_conf = config['ragweed_conf'][client]
+        ragweed_conf.setdefault('fixtures', {})
+        ragweed_conf['rgw'].setdefault('bucket_prefix', 'test-' + client)
+        for section, user in users.items():
+            _config_user(ragweed_conf, section, '{user}.{client}'.format(user=user, client=client))
+            log.debug('Creating user {user} on {host}'.format(user=ragweed_conf[section]['user_id'], host=client))
+            if user == 'sysuser':
+                sys_str = 'true'
+            else:
+                sys_str = 'false'
+            ctx.cluster.only(client).run(
+                args=[
+                    'adjust-ulimits',
+                    'ceph-coverage',
+                    '{tdir}/archive/coverage'.format(tdir=testdir),
+                    'radosgw-admin',
+                    '-n', client,
+                    'user', 'create',
+                    '--uid', ragweed_conf[section]['user_id'],
+                    '--display-name', ragweed_conf[section]['display_name'],
+                    '--access-key', ragweed_conf[section]['access_key'],
+                    '--secret', ragweed_conf[section]['secret_key'],
+                    '--email', ragweed_conf[section]['email'],
+                    '--system', sys_str,
+                ],
+            )
+    try:
+        yield
+    finally:
+        for client in config['clients']:
+            if not 'check' in run_stages[client]:
+                # only remove user if went through the check stage
+                continue
+            for user in users.values():
+                uid = '{user}.{client}'.format(user=user, client=client)
+                ctx.cluster.only(client).run(
+                    args=[
+                        'adjust-ulimits',
+                        'ceph-coverage',
+                        '{tdir}/archive/coverage'.format(tdir=testdir),
+                        'radosgw-admin',
+                        '-n', client,
+                        'user', 'rm',
+                        '--uid', uid,
+                        '--purge-data',
+                        ],
+                    )
+
+
+@contextlib.contextmanager
+def configure(ctx, config, run_stages):
+    """
+    Configure the local config files.
+    """
+    assert isinstance(config, dict)
+    log.info('Configuring ragweed...')
+    testdir = teuthology.get_testdir(ctx)
+    for client, properties in config['clients'].items():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        preparing = 'prepare' in run_stages[client]
+        if not preparing:
+            # should have been prepared in a previous run
+            continue
+
+        ragweed_conf = config['ragweed_conf'][client]
+        if properties is not None and 'slow_backend' in properties:
+            ragweed_conf['fixtures']['slow backend'] = properties['slow_backend']
+
+        conf_fp = BytesIO()
+        ragweed_conf.write(conf_fp)
+        remote.write_file(
+            path='{tdir}/archive/ragweed.{client}.conf'.format(tdir=testdir, client=client),
+            data=conf_fp.getvalue(),
+            )
+
+    log.info('Configuring boto...')
+    boto_src = os.path.join(os.path.dirname(__file__), 'boto.cfg.template')
+    for client, properties in config['clients'].items():
+        with open(boto_src, 'r') as f:
+            (remote,) = ctx.cluster.only(client).remotes.keys()
+            conf = f.read().format(
+                idle_timeout=config.get('idle_timeout', 30)
+                )
+            remote.write_file('{tdir}/boto.cfg'.format(tdir=testdir), conf)
+
+    try:
+        yield
+
+    finally:
+        log.info('Cleaning up boto...')
+        for client, properties in config['clients'].items():
+            (remote,) = ctx.cluster.only(client).remotes.keys()
+            remote.run(
+                args=[
+                    'rm', '-f',
+                    '{tdir}/boto.cfg'.format(tdir=testdir),
+                    ],
+                )
+
+def get_toxvenv_dir(ctx):
+    return ctx.tox.venv_path
+
+def toxvenv_sh(ctx, remote, args, **kwargs):
+    activate = get_toxvenv_dir(ctx) + '/bin/activate'
+    return remote.sh(['source', activate, run.Raw('&&')] + args, **kwargs)
+
+@contextlib.contextmanager
+def run_tests(ctx, config, run_stages):
+    """
+    Run the ragweed after everything is set up.
+
+    :param ctx: Context passed to task
+    :param config: specific configuration information
+    """
+    assert isinstance(config, dict)
+    testdir = teuthology.get_testdir(ctx)
+    attrs = ["not fails_on_rgw"]
+    for client, client_config in config.items():
+        ragweed_dir = get_ragweed_dir(testdir, client)
+        stages = ','.join(run_stages[client])
+        args = [
+            'cd', ragweed_dir, run.Raw('&&'),
+            'RAGWEED_CONF={tdir}/archive/ragweed.{client}.conf'.format(tdir=testdir, client=client),
+            'RAGWEED_STAGES={stages}'.format(stages=stages),
+            'BOTO_CONFIG={tdir}/boto.cfg'.format(tdir=testdir),
+            'tox',
+            '--sitepackages',
+            '--',
+            '-v',
+            '-m', ' and '.join(attrs),
+            ]
+        if client_config is not None and 'extra_args' in client_config:
+            args.extend(client_config['extra_args'])
+
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        toxvenv_sh(ctx, remote, args, label="ragweed tests against rgw")
+    yield
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run the ragweed suite against rgw.
+
+    To run all tests on all clients::
+
+        tasks:
+        - ceph:
+        - rgw:
+        - ragweed:
+
+    To restrict testing to particular clients::
+
+        tasks:
+        - ceph:
+        - rgw: [client.0]
+        - ragweed: [client.0]
+
+    To run against a server on client.1 and increase the boto timeout to 10m::
+
+        tasks:
+        - ceph:
+        - rgw: [client.1]
+        - ragweed:
+            client.0:
+              rgw_server: client.1
+              idle_timeout: 600
+              stages: prepare,check
+
+    To pass extra arguments to nose (e.g. to run a certain test)::
+
+        tasks:
+        - ceph:
+        - rgw: [client.0]
+        - ragweed:
+            client.0:
+              extra_args: ['test_s3:test_object_acl_grand_public_read']
+            client.1:
+              extra_args: ['--exclude', 'test_100_continue']
+    """
+    assert hasattr(ctx, 'rgw'), 'ragweed must run after the rgw task'
+    assert hasattr(ctx, 'tox'), 'ragweed must run after the tox task'
+    assert config is None or isinstance(config, list) \
+        or isinstance(config, dict), \
+        "task ragweed only supports a list or dictionary for configuration"
+    all_clients = ['client.{id}'.format(id=id_)
+                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+    if config is None:
+        config = all_clients
+    if isinstance(config, list):
+        config = dict.fromkeys(config)
+    clients = config.keys()
+
+    overrides = ctx.config.get('overrides', {})
+    # merge each client section, not the top level.
+    for client in config.keys():
+        if not config[client]:
+            config[client] = {}
+        teuthology.deep_merge(config[client], overrides.get('ragweed', {}))
+
+    log.debug('ragweed config is %s', config)
+
+    ragweed_conf = {}
+    for client in clients:
+        # use rgw_server endpoint if given, or default to same client
+        target = config[client].get('rgw_server', client)
+
+        endpoint = ctx.rgw.role_endpoints.get(target)
+        assert endpoint, 'ragweed: no rgw endpoint for {}'.format(target)
+
+        ragweed_conf[client] = ConfigObj(
+            indent_type='',
+            infile={
+                'rgw':
+                    {
+                    'host'      : endpoint.dns_name,
+                    'port'      : endpoint.port,
+                    'is_secure' : endpoint.cert is not None,
+                    },
+                'fixtures' : {},
+                'user system'  : {},
+                'user regular'   : {},
+                'rados':
+                    {
+                    'ceph_conf'  : '/etc/ceph/ceph.conf',
+                    },
+                }
+            )
+
+    run_stages = {}
+
+    with contextutil.nested(
+        lambda: download(ctx=ctx, config=config),
+        lambda: create_users(ctx=ctx, config=dict(
+                clients=clients,
+                ragweed_conf=ragweed_conf,
+                config=config,
+                ),
+                run_stages=run_stages),
+        lambda: configure(ctx=ctx, config=dict(
+                clients=config,
+                ragweed_conf=ragweed_conf,
+                ),
+                run_stages=run_stages),
+        lambda: run_tests(ctx=ctx, config=config, run_stages=run_stages),
+        ):
+        pass
+    yield
diff --git a/qa/tasks/rbd.py b/qa/tasks/rbd.py
new file mode 100644
index 000000000..b0ffaba83
--- /dev/null
+++ b/qa/tasks/rbd.py
@@ -0,0 +1,747 @@
+"""
+Rbd testing task
+"""
+import contextlib
+import logging
+import os
+import tempfile
+import sys
+
+from io import StringIO
+from teuthology.orchestra import run
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.parallel import parallel
+from teuthology.task.common_fs_utils import generic_mkfs
+from teuthology.task.common_fs_utils import generic_mount
+from teuthology.task.common_fs_utils import default_image_name
+
+
+#V1 image unsupported but required for testing purposes
+os.environ["RBD_FORCE_ALLOW_V1"] = "1"
+
+log = logging.getLogger(__name__)
+
+ENCRYPTION_PASSPHRASE = "password"
+CLONE_ENCRYPTION_PASSPHRASE = "password2"
+
+@contextlib.contextmanager
+def create_image(ctx, config):
+    """
+    Create an rbd image.
+
+    For example::
+
+        tasks:
+        - ceph:
+        - rbd.create_image:
+            client.0:
+                image_name: testimage
+                image_size: 100
+                image_format: 1
+                encryption_format: luks2
+            client.1:
+
+    Image size is expressed as a number of megabytes; default value
+    is 10240.
+
+    Image format value must be either 1 or 2; default value is 1.
+
+    """
+    assert isinstance(config, dict) or isinstance(config, list), \
+        "task create_image only supports a list or dictionary for configuration"
+
+    if isinstance(config, dict):
+        images = config.items()
+    else:
+        images = [(role, None) for role in config]
+
+    testdir = teuthology.get_testdir(ctx)
+    passphrase_file = '{tdir}/passphrase'.format(tdir=testdir)
+    for role, properties in images:
+        if properties is None:
+            properties = {}
+        name = properties.get('image_name', default_image_name(role))
+        size = properties.get('image_size', 10240)
+        fmt = properties.get('image_format', 1)
+        encryption_format = properties.get('encryption_format', 'none')
+        (remote,) = ctx.cluster.only(role).remotes.keys()
+        log.info('Creating image {name} with size {size}'.format(name=name,
+                                                                 size=size))
+        args = [
+                'adjust-ulimits',
+                'ceph-coverage',
+                '{tdir}/archive/coverage'.format(tdir=testdir),
+                'rbd',
+                '-p', 'rbd',
+                'create',
+                '--size', str(size),
+                name,
+            ]
+        # omit format option if using the default (format 1)
+        # since old versions of don't support it
+        if int(fmt) != 1:
+            args += ['--image-format', str(fmt)]
+        remote.run(args=args)
+
+        if encryption_format != 'none':
+            remote.run(
+                args=[
+                    'echo',
+                    ENCRYPTION_PASSPHRASE,
+                    run.Raw('>'),
+                    passphrase_file
+                    ]
+                )
+            remote.run(
+                args=[
+                    'adjust-ulimits',
+                    'ceph-coverage',
+                    '{tdir}/archive/coverage'.format(tdir=testdir),
+                    'rbd',
+                    'encryption',
+                    'format',
+                    name,
+                    encryption_format,
+                    passphrase_file,
+                    '-p',
+                    'rbd'
+                    ]
+                )
+    try:
+        yield
+    finally:
+        log.info('Deleting rbd images...')
+        remote.run(args=['rm', '-f', passphrase_file])
+        for role, properties in images:
+            if properties is None:
+                properties = {}
+            name = properties.get('image_name', default_image_name(role))
+            (remote,) = ctx.cluster.only(role).remotes.keys()
+            remote.run(
+                args=[
+                    'adjust-ulimits',
+                    'ceph-coverage',
+                    '{tdir}/archive/coverage'.format(tdir=testdir),
+                    'rbd',
+                    '-p', 'rbd',
+                    'rm',
+                    name,
+                    ],
+                )
+
+@contextlib.contextmanager
+def clone_image(ctx, config):
+    """
+    Clones a parent imag
+
+    For example::
+
+        tasks:
+        - ceph:
+        - rbd.clone_image:
+            client.0:
+                parent_name: testimage
+                image_name: cloneimage
+                encryption_format: luks2
+    """
+    assert isinstance(config, dict) or isinstance(config, list), \
+        "task clone_image only supports a list or dictionary for configuration"
+
+    if isinstance(config, dict):
+        images = config.items()
+    else:
+        images = [(role, None) for role in config]
+
+    testdir = teuthology.get_testdir(ctx)
+    clone_passphrase_file = '{tdir}/clone-passphrase'.format(tdir=testdir)
+    for role, properties in images:
+        if properties is None:
+            properties = {}
+
+        name = properties.get('image_name', default_image_name(role))
+        parent_name = properties.get('parent_name')
+        assert parent_name is not None, \
+            "parent_name is required"
+        parent_spec = '{name}@{snap}'.format(name=parent_name, snap=name)
+
+        (remote,) = ctx.cluster.only(role).remotes.keys()
+        log.info('Clone image {parent} to {child}'.format(parent=parent_name,
+                                                          child=name))
+
+        commands = [('snap', 'create', parent_spec),
+                    ('snap', 'protect', parent_spec),
+                    ('clone', parent_spec, name)
+                    ]
+
+        encryption_format = properties.get('encryption_format', 'none')
+        if encryption_format != 'none':
+            remote.run(
+                args=[
+                    'echo',
+                    CLONE_ENCRYPTION_PASSPHRASE,
+                    run.Raw('>'),
+                    clone_passphrase_file
+                    ]
+                )
+
+            commands.append(
+                ('encryption', 'format', name, encryption_format,
+                 clone_passphrase_file)
+            )
+
+        for cmd in commands:
+            args = [
+                    'adjust-ulimits',
+                    'ceph-coverage',
+                    '{tdir}/archive/coverage'.format(tdir=testdir),
+                    'rbd', '-p', 'rbd'
+                    ]
+            args.extend(cmd)
+            remote.run(args=args)
+
+    try:
+        yield
+    finally:
+        log.info('Deleting rbd clones...')
+        remote.run(args=['rm', '-f', clone_passphrase_file])
+        for role, properties in images:
+            if properties is None:
+                properties = {}
+            name = properties.get('image_name', default_image_name(role))
+            parent_name = properties.get('parent_name')
+            parent_spec = '{name}@{snap}'.format(name=parent_name, snap=name)
+
+            (remote,) = ctx.cluster.only(role).remotes.keys()
+
+            for cmd in [('rm', name),
+                        ('snap', 'unprotect', parent_spec),
+                        ('snap', 'rm', parent_spec)]:
+                args = [
+                        'adjust-ulimits',
+                        'ceph-coverage',
+                        '{tdir}/archive/coverage'.format(tdir=testdir),
+                        'rbd', '-p', 'rbd'
+                        ]
+                args.extend(cmd)
+                remote.run(args=args)
+
+@contextlib.contextmanager
+def modprobe(ctx, config):
+    """
+    Load the rbd kernel module..
+
+    For example::
+
+        tasks:
+        - ceph:
+        - rbd.create_image: [client.0]
+        - rbd.modprobe: [client.0]
+    """
+    log.info('Loading rbd kernel module...')
+    for role in config:
+        (remote,) = ctx.cluster.only(role).remotes.keys()
+        remote.run(
+            args=[
+                'sudo',
+                'modprobe',
+                'rbd',
+                ],
+            )
+    try:
+        yield
+    finally:
+        log.info('Unloading rbd kernel module...')
+        for role in config:
+            (remote,) = ctx.cluster.only(role).remotes.keys()
+            remote.run(
+                args=[
+                    'sudo',
+                    'modprobe',
+                    '-r',
+                    'rbd',
+                    # force errors to be ignored; necessary if more
+                    # than one device was created, which may mean
+                    # the module isn't quite ready to go the first
+                    # time through.
+                    run.Raw('||'),
+                    'true',
+                    ],
+                )
+
+@contextlib.contextmanager
+def dev_create(ctx, config):
+    """
+    Map block devices to rbd images.
+
+    For example::
+
+        tasks:
+        - ceph:
+        - rbd.create_image: [client.0]
+        - rbd.modprobe: [client.0]
+        - rbd.dev_create:
+            client.0:
+                image_name: testimage.client.0
+                encryption_format: luks2
+                parent_encryption_format: luks1
+    """
+    assert isinstance(config, dict) or isinstance(config, list), \
+        "task dev_create only supports a list or dictionary for configuration"
+
+    if isinstance(config, dict):
+        images = config.items()
+    else:
+        images = [(role, None) for role in config]
+
+    log.info('Creating rbd block devices...')
+
+    testdir = teuthology.get_testdir(ctx)
+    passphrase_file = '{tdir}/passphrase'.format(tdir=testdir)
+    clone_passphrase_file = '{tdir}/clone-passphrase'.format(tdir=testdir)
+    device_path = {}
+
+    for role, properties in images:
+        if properties is None:
+            properties = {}
+        name = properties.get('image_name', default_image_name(role))
+        parent_encryption_format = properties.get('parent_encryption_format',
+                                                  'none')
+        encryption_format = properties.get('encryption_format',
+                                           parent_encryption_format)
+        (remote,) = ctx.cluster.only(role).remotes.keys()
+
+        if encryption_format == 'none' and parent_encryption_format == 'none':
+            device_path[role] = '/dev/rbd/rbd/{image}'.format(image=name)
+            device_specific_args = []
+        else:
+            device_specific_args = ['-t', 'nbd', '-o']
+
+            is_cloned = properties.get('parent_name') is not None
+            encryption_args = ""
+            if is_cloned and properties.get('encryption_format') != 'none':
+                remote.run(
+                    args=[
+                        'echo',
+                        CLONE_ENCRYPTION_PASSPHRASE,
+                        run.Raw('>'),
+                        clone_passphrase_file
+                        ]
+                    )
+
+                encryption_args = \
+                    'encryption-format=%s,encryption-passphrase-file=%s' % (
+                        encryption_format, clone_passphrase_file)
+
+            if not is_cloned or parent_encryption_format != 'none':
+                remote.run(
+                    args=[
+                        'echo',
+                        ENCRYPTION_PASSPHRASE,
+                        run.Raw('>'),
+                        passphrase_file
+                        ]
+                    )
+
+                if is_cloned and properties.get('encryption_format') != 'none':
+                    encryption_args += ","
+
+                if parent_encryption_format != 'none':
+                    encryption_args += \
+                        'encryption-format=%s,encryption-passphrase-file=%s' % (
+                            parent_encryption_format, passphrase_file)
+                else:
+                    encryption_args += \
+                        'encryption-format=%s,encryption-passphrase-file=%s' % (
+                            encryption_format, passphrase_file)
+
+            device_specific_args.append(encryption_args)
+
+        map_fp = StringIO()
+        remote.run(
+            args=[
+                'sudo',
+                'adjust-ulimits',
+                'ceph-coverage',
+                '{tdir}/archive/coverage'.format(tdir=testdir),
+                'rbd',
+                '--id', role.rsplit('.')[-1],
+                '-p', 'rbd',
+                'map',
+                name] + device_specific_args,
+            stdout=map_fp,
+            )
+
+        if encryption_format != 'none' or parent_encryption_format != 'none':
+            device_path[role] = map_fp.getvalue().rstrip()
+            properties['device_path'] = device_path[role]
+            remote.run(args=['sudo', 'chmod', '666', device_path[role]])
+    try:
+        yield
+    finally:
+        log.info('Unmapping rbd devices...')
+        remote.run(args=['rm', '-f', passphrase_file, clone_passphrase_file])
+        for role, properties in images:
+            if not device_path.get(role):
+                continue
+
+            if properties is None:
+                properties = {}
+            encryption_format = properties.get('encryption_format', 'none')
+            parent_encryption_format = properties.get(
+                'parent_encryption_format', 'none')
+            (remote,) = ctx.cluster.only(role).remotes.keys()
+
+            if encryption_format == 'none' and \
+                    parent_encryption_format == 'none':
+                device_specific_args = []
+            else:
+                device_specific_args = ['-t', 'nbd']
+
+            remote.run(
+                args=[
+                    'LD_LIBRARY_PATH={tdir}/binary/usr/local/lib'.format(tdir=testdir),
+                    'sudo',
+                    'adjust-ulimits',
+                    'ceph-coverage',
+                    '{tdir}/archive/coverage'.format(tdir=testdir),
+                    'rbd',
+                    '-p', 'rbd',
+                    'unmap',
+                    device_path[role],
+                    ] + device_specific_args,
+                )
+
+
+def rbd_devname_rtn(ctx, image):
+    return '/dev/rbd/rbd/{image}'.format(image=image)    
+
+def canonical_path(ctx, role, path):
+    """
+    Determine the canonical path for a given path on the host
+    representing the given role.  A canonical path contains no
+    . or .. components, and includes no symbolic links.
+    """
+    version_fp = StringIO()
+    ctx.cluster.only(role).run(
+        args=[ 'readlink', '-f', path ],
+        stdout=version_fp,
+        )
+    canonical_path = version_fp.getvalue().rstrip('\n')
+    version_fp.close()
+    return canonical_path
+
+@contextlib.contextmanager
+def run_xfstests(ctx, config):
+    """
+    Run xfstests over specified devices.
+
+    Warning: both the test and scratch devices specified will be
+    overwritten.  Normally xfstests modifies (but does not destroy)
+    the test device, but for now the run script used here re-makes
+    both filesystems.
+
+    Note: Only one instance of xfstests can run on a single host at
+    a time, although this is not enforced.
+
+    This task in its current form needs some improvement.  For
+    example, it assumes all roles provided in the config are
+    clients, and that the config provided is a list of key/value
+    pairs.  For now please use the xfstests() interface, below.
+
+    For example::
+
+        tasks:
+        - ceph:
+        - rbd.run_xfstests:
+            client.0:
+                count: 2
+                test_dev: 'test_dev'
+                scratch_dev: 'scratch_dev'
+                fs_type: 'xfs'
+                tests: 'generic/100 xfs/003 xfs/005 xfs/006 generic/015'
+                exclude:
+                - generic/42
+                randomize: true
+    """
+    with parallel() as p:
+        for role, properties in config.items():
+            p.spawn(run_xfstests_one_client, ctx, role, properties)
+        exc = None
+        while True:
+            try:
+                p.next()
+            except StopIteration:
+                break
+            except:
+                exc = sys.exc_info()[1]
+        if exc is not None:
+            raise exc
+    yield
+
+def run_xfstests_one_client(ctx, role, properties):
+    """
+    Spawned routine to handle xfs tests for a single client
+    """
+    testdir = teuthology.get_testdir(ctx)
+    try:
+        count = properties.get('count')
+        test_dev = properties.get('test_dev')
+        assert test_dev is not None, \
+            "task run_xfstests requires test_dev to be defined"
+        test_dev = canonical_path(ctx, role, test_dev)
+
+        scratch_dev = properties.get('scratch_dev')
+        assert scratch_dev is not None, \
+            "task run_xfstests requires scratch_dev to be defined"
+        scratch_dev = canonical_path(ctx, role, scratch_dev)
+
+        fs_type = properties.get('fs_type')
+        tests = properties.get('tests')
+        exclude_list = properties.get('exclude')
+        randomize = properties.get('randomize')
+
+        (remote,) = ctx.cluster.only(role).remotes.keys()
+
+        # Fetch the test script
+        test_root = teuthology.get_testdir(ctx)
+        test_script = 'run_xfstests.sh'
+        test_path = os.path.join(test_root, test_script)
+
+        xfstests_url = properties.get('xfstests_url')
+        assert xfstests_url is not None, \
+            "task run_xfstests requires xfstests_url to be defined"
+
+        xfstests_krbd_url = xfstests_url + '/' + test_script
+
+        log.info('Fetching {script} for {role} from {url}'.format(
+            script=test_script,
+            role=role,
+            url=xfstests_krbd_url))
+
+        args = [ 'wget', '-O', test_path, '--', xfstests_krbd_url ]
+        remote.run(args=args)
+
+        log.info('Running xfstests on {role}:'.format(role=role))
+        log.info('   iteration count: {count}:'.format(count=count))
+        log.info('       test device: {dev}'.format(dev=test_dev))
+        log.info('    scratch device: {dev}'.format(dev=scratch_dev))
+        log.info('     using fs_type: {fs_type}'.format(fs_type=fs_type))
+        log.info('      tests to run: {tests}'.format(tests=tests))
+        log.info('      exclude list: {}'.format(' '.join(exclude_list)))
+        log.info('         randomize: {randomize}'.format(randomize=randomize))
+
+        if exclude_list:
+            with tempfile.NamedTemporaryFile(mode='w', prefix='exclude') as exclude_file:
+                for test in exclude_list:
+                    exclude_file.write("{}\n".format(test))
+                exclude_file.flush()
+                remote.put_file(exclude_file.name, exclude_file.name)
+
+        # Note that the device paths are interpreted using
+        # readlink -f <path> in order to get their canonical
+        # pathname (so it matches what the kernel remembers).
+        args = [
+            '/usr/bin/sudo',
+            'TESTDIR={tdir}'.format(tdir=testdir),
+            'adjust-ulimits',
+            'ceph-coverage',
+            '{tdir}/archive/coverage'.format(tdir=testdir),
+            '/bin/bash',
+            test_path,
+            '-c', str(count),
+            '-f', fs_type,
+            '-t', test_dev,
+            '-s', scratch_dev,
+            ]
+        if exclude_list:
+            args.extend(['-x', exclude_file.name])
+        if randomize:
+            args.append('-r')
+        if tests:
+            args.extend(['--', tests])
+        remote.run(args=args, logger=log.getChild(role))
+    finally:
+        log.info('Removing {script} on {role}'.format(script=test_script,
+                                                      role=role))
+        remote.run(args=['rm', '-f', test_path])
+
+@contextlib.contextmanager
+def xfstests(ctx, config):
+    """
+    Run xfstests over rbd devices.  This interface sets up all
+    required configuration automatically if not otherwise specified.
+    Note that only one instance of xfstests can run on a single host
+    at a time.  By default, the set of tests specified is run once.
+    If a (non-zero) count value is supplied, the complete set of
+    tests will be run that number of times.
+
+    For example::
+
+        tasks:
+        - ceph:
+        # Image sizes are in MB
+        - rbd.xfstests:
+            client.0:
+                count: 3
+                test_image: 'test_image'
+                test_size: 250
+                test_format: 2
+                scratch_image: 'scratch_image'
+                scratch_size: 250
+                scratch_format: 1
+                fs_type: 'xfs'
+                tests: 'generic/100 xfs/003 xfs/005 xfs/006 generic/015'
+                exclude:
+                - generic/42
+                randomize: true
+                xfstests_url: 'https://raw.github.com/ceph/ceph-ci/wip-55555/qa'
+    """
+    if config is None:
+        config = { 'all': None }
+    assert isinstance(config, dict) or isinstance(config, list), \
+        "task xfstests only supports a list or dictionary for configuration"
+    if isinstance(config, dict):
+        config = teuthology.replace_all_with_clients(ctx.cluster, config)
+        runs = config.items()
+    else:
+        runs = [(role, None) for role in config]
+
+    running_xfstests = {}
+    for role, properties in runs:
+        assert role.startswith('client.'), \
+            "task xfstests can only run on client nodes"
+        for host, roles_for_host in ctx.cluster.remotes.items():
+            if role in roles_for_host:
+                assert host not in running_xfstests, \
+                    "task xfstests allows only one instance at a time per host"
+                running_xfstests[host] = True
+
+    images_config = {}
+    scratch_config = {}
+    modprobe_config = {}
+    image_map_config = {}
+    scratch_map_config = {}
+    xfstests_config = {}
+    for role, properties in runs:
+        if properties is None:
+            properties = {}
+
+        test_image = properties.get('test_image', 'test_image.{role}'.format(role=role))
+        test_size = properties.get('test_size', 10000) # 10G
+        test_fmt = properties.get('test_format', 1)
+        scratch_image = properties.get('scratch_image', 'scratch_image.{role}'.format(role=role))
+        scratch_size = properties.get('scratch_size', 10000) # 10G
+        scratch_fmt = properties.get('scratch_format', 1)
+
+        images_config[role] = dict(
+            image_name=test_image,
+            image_size=test_size,
+            image_format=test_fmt,
+            )
+
+        scratch_config[role] = dict(
+            image_name=scratch_image,
+            image_size=scratch_size,
+            image_format=scratch_fmt,
+            )
+
+        xfstests_branch = properties.get('xfstests_branch', 'master')
+        xfstests_url = properties.get('xfstests_url', 'https://raw.github.com/ceph/ceph/{branch}/qa'.format(branch=xfstests_branch))
+
+        xfstests_config[role] = dict(
+            count=properties.get('count', 1),
+            test_dev='/dev/rbd/rbd/{image}'.format(image=test_image),
+            scratch_dev='/dev/rbd/rbd/{image}'.format(image=scratch_image),
+            fs_type=properties.get('fs_type', 'xfs'),
+            randomize=properties.get('randomize', False),
+            tests=properties.get('tests'),
+            exclude=properties.get('exclude', []),
+            xfstests_url=xfstests_url,
+            )
+
+        log.info('Setting up xfstests using RBD images:')
+        log.info('      test ({size} MB): {image}'.format(size=test_size,
+                                                        image=test_image))
+        log.info('   scratch ({size} MB): {image}'.format(size=scratch_size,
+                                                        image=scratch_image))
+        modprobe_config[role] = None
+        image_map_config[role] = {'image_name': test_image}
+        scratch_map_config[role] = {'image_name': scratch_image}
+
+    with contextutil.nested(
+        lambda: create_image(ctx=ctx, config=images_config),
+        lambda: create_image(ctx=ctx, config=scratch_config),
+        lambda: modprobe(ctx=ctx, config=modprobe_config),
+        lambda: dev_create(ctx=ctx, config=image_map_config),
+        lambda: dev_create(ctx=ctx, config=scratch_map_config),
+        lambda: run_xfstests(ctx=ctx, config=xfstests_config),
+        ):
+        yield
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Create and mount an rbd image.
+
+    For example, you can specify which clients to run on::
+
+        tasks:
+        - ceph:
+        - rbd: [client.0, client.1]
+
+    There are a few image options::
+
+        tasks:
+        - ceph:
+        - rbd:
+            client.0: # uses defaults
+            client.1:
+                image_name: foo
+                image_size: 2048
+                image_format: 2
+                fs_type: xfs
+
+    To use default options on all clients::
+
+        tasks:
+        - ceph:
+        - rbd:
+            all:
+
+    To create 20GiB images and format them with xfs on all clients::
+
+        tasks:
+        - ceph:
+        - rbd:
+            all:
+              image_size: 20480
+              fs_type: xfs
+    """
+    if config is None:
+        config = { 'all': None }
+    norm_config = config
+    if isinstance(config, dict):
+        norm_config = teuthology.replace_all_with_clients(ctx.cluster, config)
+    if isinstance(norm_config, dict):
+        role_images = {}
+        for role, properties in norm_config.items():
+            if properties is None:
+                properties = {}
+            role_images[role] = properties.get('image_name')
+    else:
+        role_images = norm_config
+
+    log.debug('rbd config is: %s', norm_config)
+
+    with contextutil.nested(
+        lambda: create_image(ctx=ctx, config=norm_config),
+        lambda: modprobe(ctx=ctx, config=norm_config),
+        lambda: dev_create(ctx=ctx, config=norm_config),
+        lambda: generic_mkfs(ctx=ctx, config=norm_config,
+                devname_rtn=rbd_devname_rtn),
+        lambda: generic_mount(ctx=ctx, config=role_images,
+                devname_rtn=rbd_devname_rtn),
+        ):
+        yield
diff --git a/qa/tasks/rbd_fio.py b/qa/tasks/rbd_fio.py
new file mode 100644
index 000000000..959d07d49
--- /dev/null
+++ b/qa/tasks/rbd_fio.py
@@ -0,0 +1,225 @@
+"""
+ Long running fio tests on rbd mapped devices for format/features provided in config
+ Many fio parameters can be configured so that this task can be used along with thrash/power-cut tests
+ and exercise IO on full disk for all format/features
+  - This test should not be run on VM due to heavy use of resource
+
+"""
+import contextlib
+import json
+import logging
+import os
+
+from teuthology.parallel import parallel
+from teuthology import misc as teuthology
+from tempfile import NamedTemporaryFile
+from teuthology.orchestra import run
+from teuthology.packaging import install_package, remove_package
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    client.0:
+       fio-io-size: 100g or 80% or 100m
+       fio-version: 2.2.9
+       formats: [2]
+       features: [[layering],[striping],[layering,exclusive-lock,object-map]]
+       test-clone-io: 1  #remove this option to not run create rbd clone and not run io on clone
+       io-engine: "sync or rbd or any io-engine"
+       rw: randrw
+    client.1:
+       fio-io-size: 100g
+       fio-version: 2.2.9
+       rw: read
+       image-size:20480
+
+or
+    all:
+       fio-io-size: 400g
+       rw: randrw
+       formats: [2]
+       features: [[layering],[striping]]
+       io-engine: libaio
+
+    Create rbd image + device and exercise IO for format/features provided in config file
+    Config can be per client or one config can be used for all clients, fio jobs are run in parallel for client provided
+
+    """
+    if config.get('all'):
+        client_config = config['all']
+    clients = ctx.cluster.only(teuthology.is_type('client'))
+    rbd_test_dir = teuthology.get_testdir(ctx) + "/rbd_fio_test"
+    for remote,role in clients.remotes.items():
+        if 'client_config' in locals():
+           with parallel() as p:
+               p.spawn(run_fio, remote, client_config, rbd_test_dir)
+        else:
+           for client_config in config:
+              if client_config in role:
+                 with parallel() as p:
+                     p.spawn(run_fio, remote, config[client_config], rbd_test_dir)
+
+    yield
+
+
+def get_ioengine_package_name(ioengine, remote):
+    system_type = teuthology.get_system_type(remote)
+    if ioengine == 'rbd':
+        return 'librbd1-devel' if system_type == 'rpm' else 'librbd-dev'
+    elif ioengine == 'libaio':
+        return 'libaio-devel' if system_type == 'rpm' else 'libaio-dev'
+    else:
+        return None
+
+
+def run_rbd_map(remote, image, iodepth):
+    iodepth = max(iodepth, 128)  # RBD_QUEUE_DEPTH_DEFAULT
+    dev = remote.sh(['sudo', 'rbd', 'device', 'map', '-o',
+                     'queue_depth={}'.format(iodepth), image]).rstrip('\n')
+    remote.sudo_write_file(
+        '/sys/block/{}/queue/nr_requests'.format(os.path.basename(dev)),
+        str(iodepth))
+    return dev
+
+
+def run_fio(remote, config, rbd_test_dir):
+    """
+    create fio config file with options based on above config
+    get the fio from github, generate binary, and use it to run on
+    the generated fio config file
+    """
+    fio_config=NamedTemporaryFile(mode='w', prefix='fio_rbd_', dir='/tmp/', delete=False)
+    fio_config.write('[global]\n')
+    if config.get('io-engine'):
+        ioengine=config['io-engine']
+        fio_config.write('ioengine={ioe}\n'.format(ioe=ioengine))
+    else:
+        fio_config.write('ioengine=sync\n')
+    if config.get('bs'):
+        bs=config['bs']
+        fio_config.write('bs={bs}\n'.format(bs=bs))
+    else:
+        fio_config.write('bs=4k\n')
+    iodepth = config.get('io-depth', 2)
+    fio_config.write('iodepth={iod}\n'.format(iod=iodepth))
+    if config.get('fio-io-size'):
+        size=config['fio-io-size']
+        fio_config.write('size={size}\n'.format(size=size))
+    else:
+        fio_config.write('size=100m\n')
+
+    fio_config.write('time_based\n')
+    if config.get('runtime'):
+        runtime=config['runtime']
+        fio_config.write('runtime={runtime}\n'.format(runtime=runtime))
+    else:
+        fio_config.write('runtime=1800\n')
+    fio_config.write('allow_file_create=0\n')
+    image_size=10240
+    if config.get('image_size'):
+        image_size=config['image_size']
+
+    formats=[1,2]
+    features=[['layering'],['striping'],['exclusive-lock','object-map']]
+    fio_version='3.32'
+    if config.get('formats'):
+        formats=config['formats']
+    if config.get('features'):
+        features=config['features']
+    if config.get('fio-version'):
+        fio_version=config['fio-version']
+
+    # handle package required for ioengine, if any
+    sn=remote.shortname
+    ioengine_pkg = get_ioengine_package_name(ioengine, remote)
+    if ioengine_pkg:
+        install_package(ioengine_pkg, remote)
+
+    fio_config.write('norandommap\n')
+    if ioengine == 'rbd':
+        fio_config.write('clientname=admin\n')
+        fio_config.write('pool=rbd\n')
+        fio_config.write('invalidate=0\n')
+    elif ioengine == 'libaio':
+        fio_config.write('direct=1\n')
+    for frmt in formats:
+        for feature in features:
+           log.info("Creating rbd images on {sn}".format(sn=sn))
+           feature_name = '-'.join(feature)
+           rbd_name = 'i{i}f{f}{sn}'.format(i=frmt,f=feature_name,sn=sn)
+           rbd_snap_name = 'i{i}f{f}{sn}@i{i}f{f}{sn}Snap'.format(i=frmt,f=feature_name,sn=sn)
+           rbd_clone_name = 'i{i}f{f}{sn}Clone'.format(i=frmt,f=feature_name,sn=sn)
+           create_args=['rbd', 'create',
+                        '--size', '{size}'.format(size=image_size),
+                        '--image', rbd_name,
+                        '--image-format', '{f}'.format(f=frmt)]
+           map(lambda x: create_args.extend(['--image-feature', x]), feature)
+           if config.get('thick-provision'):
+               create_args.append('--thick-provision')
+           remote.run(args=create_args)
+           remote.run(args=['rbd', 'info', rbd_name])
+           if ioengine != 'rbd':
+               rbd_dev = run_rbd_map(remote, rbd_name, iodepth)
+               if config.get('test-clone-io'):
+                    log.info("Testing clones using fio")
+                    remote.run(args=['rbd', 'snap', 'create', rbd_snap_name])
+                    remote.run(args=['rbd', 'snap', 'protect', rbd_snap_name])
+                    remote.run(args=['rbd', 'clone', rbd_snap_name, rbd_clone_name])
+                    rbd_clone_dev = run_rbd_map(remote, rbd_clone_name, iodepth)
+               fio_config.write('[{rbd_dev}]\n'.format(rbd_dev=rbd_dev))
+               if config.get('rw'):
+                   rw=config['rw']
+                   fio_config.write('rw={rw}\n'.format(rw=rw))
+               else:
+                   fio_config .write('rw=randrw\n')
+               fio_config.write('filename={rbd_dev}\n'.format(rbd_dev=rbd_dev))
+               if config.get('test-clone-io'):
+                   fio_config.write('[{rbd_clone_dev}]\n'.format(rbd_clone_dev=rbd_clone_dev))
+                   fio_config.write('rw={rw}\n'.format(rw=rw))
+                   fio_config.write('filename={rbd_clone_dev}\n'.format(rbd_clone_dev=rbd_clone_dev))
+           else:
+               if config.get('test-clone-io'):
+                    log.info("Testing clones using fio")
+                    remote.run(args=['rbd', 'snap', 'create', rbd_snap_name])
+                    remote.run(args=['rbd', 'snap', 'protect', rbd_snap_name])
+                    remote.run(args=['rbd', 'clone', rbd_snap_name, rbd_clone_name])
+               fio_config.write('[{img_name}]\n'.format(img_name=rbd_name))
+               if config.get('rw'):
+                   rw=config['rw']
+                   fio_config.write('rw={rw}\n'.format(rw=rw))
+               else:
+                   fio_config.write('rw=randrw\n')
+               fio_config.write('rbdname={img_name}\n'.format(img_name=rbd_name))
+               if config.get('test-clone-io'):
+                   fio_config.write('[{clone_img_name}]\n'.format(clone_img_name=rbd_clone_name))
+                   fio_config.write('rw={rw}\n'.format(rw=rw))
+                   fio_config.write('rbdname={clone_img_name}\n'.format(clone_img_name=rbd_clone_name))
+
+
+    fio_config.close()
+    remote.put_file(fio_config.name,fio_config.name)
+    try:
+        log.info("Running rbd feature - fio test on {sn}".format(sn=sn))
+        fio = "https://github.com/axboe/fio/archive/fio-" + fio_version + ".tar.gz"
+        remote.run(args=['mkdir', run.Raw(rbd_test_dir),])
+        remote.run(args=['cd' , run.Raw(rbd_test_dir),
+                         run.Raw(';'), 'wget', fio, run.Raw(';'), run.Raw('tar -xvf fio*tar.gz'), run.Raw(';'),
+                         run.Raw('cd fio-fio*'), run.Raw(';'), './configure', run.Raw(';'), 'make'])
+        remote.run(args=['ceph', '-s'])
+        remote.run(args=[run.Raw('{tdir}/fio-fio-{v}/fio --showcmd {f}'.format(tdir=rbd_test_dir,v=fio_version,f=fio_config.name))])
+        remote.run(args=['sudo', run.Raw('{tdir}/fio-fio-{v}/fio {f}'.format(tdir=rbd_test_dir,v=fio_version,f=fio_config.name))])
+        remote.run(args=['ceph', '-s'])
+    finally:
+        out = remote.sh('rbd device list --format=json')
+        mapped_images = json.loads(out)
+        if mapped_images:
+            log.info("Unmapping rbd images on {sn}".format(sn=sn))
+            for image in mapped_images:
+                remote.run(args=['sudo', 'rbd', 'device', 'unmap',
+                                 str(image['device'])])
+        log.info("Cleaning up fio install")
+        remote.run(args=['rm','-rf', run.Raw(rbd_test_dir)])
+        if ioengine_pkg:
+            remove_package(ioengine_pkg, remote)
diff --git a/qa/tasks/rbd_fsx.py b/qa/tasks/rbd_fsx.py
new file mode 100644
index 000000000..efea7208e
--- /dev/null
+++ b/qa/tasks/rbd_fsx.py
@@ -0,0 +1,115 @@
+"""
+Run fsx on an rbd image
+"""
+import contextlib
+import logging
+
+from teuthology.exceptions import ConfigError
+from teuthology.parallel import parallel
+from teuthology import misc as teuthology
+from tasks.ceph_manager import get_valgrind_args
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run fsx on an rbd image.
+
+    Currently this requires running as client.admin
+    to create a pool.
+
+    Specify which clients to run on as a list::
+
+      tasks:
+        ceph:
+        rbd_fsx:
+          clients: [client.0, client.1]
+
+    You can optionally change some properties of fsx:
+
+      tasks:
+        ceph:
+        rbd_fsx:
+          clients: <list of clients>
+          seed: <random seed number, or 0 to use the time>
+          ops: <number of operations to do>
+          size: <maximum image size in bytes>
+          valgrind: [--tool=<valgrind tool>]
+    """
+    log.info('starting rbd_fsx...')
+    with parallel() as p:
+        for role in config['clients']:
+            p.spawn(_run_one_client, ctx, config, role)
+    yield
+
+def _run_one_client(ctx, config, role):
+    """Spawned task that runs the client"""
+    krbd = config.get('krbd', False)
+    nbd = config.get('nbd', False)
+    testdir = teuthology.get_testdir(ctx)
+    (remote,) = ctx.cluster.only(role).remotes.keys()
+
+    args = []
+    if krbd or nbd:
+        args.append('sudo') # rbd(-nbd) map/unmap need privileges
+    args.extend([
+        'adjust-ulimits',
+        'ceph-coverage',
+        '{tdir}/archive/coverage'.format(tdir=testdir)
+    ])
+
+    overrides = ctx.config.get('overrides', {})
+    teuthology.deep_merge(config, overrides.get('rbd_fsx', {}))
+
+    if config.get('valgrind'):
+        args = get_valgrind_args(
+            testdir,
+            'fsx_{id}'.format(id=role),
+            args,
+            config.get('valgrind')
+        )
+
+    cluster_name, type_, client_id = teuthology.split_role(role)
+    if type_ != 'client':
+        msg = 'client role ({0}) must be a client'.format(role)
+        raise ConfigError(msg)
+
+    args.extend([
+        'ceph_test_librbd_fsx',
+        '--cluster', cluster_name,
+        '--id', client_id,
+        '-d', # debug output for all operations
+        '-W', '-R', # mmap doesn't work with rbd
+        '-p', str(config.get('progress_interval', 100)), # show progress
+        '-P', '{tdir}/archive'.format(tdir=testdir),
+        '-r', str(config.get('readbdy',1)),
+        '-w', str(config.get('writebdy',1)),
+        '-t', str(config.get('truncbdy',1)),
+        '-h', str(config.get('holebdy',1)),
+        '-l', str(config.get('size', 250000000)),
+        '-S', str(config.get('seed', 0)),
+        '-N', str(config.get('ops', 1000)),
+    ])
+    if krbd:
+        args.append('-K') # -K enables krbd mode
+    if nbd:
+        args.append('-M') # -M enables nbd mode
+    if config.get('direct_io', False):
+        args.append('-Z') # -Z use direct IO
+    if not config.get('randomized_striping', True):
+        args.append('-U') # -U disables randomized striping
+    if not config.get('punch_holes', True):
+        args.append('-H') # -H disables discard ops
+    if config.get('deep_copy', False):
+        args.append('-g') # -g deep copy instead of clone
+    if config.get('journal_replay', False):
+        args.append('-j') # -j replay all IO events from journal
+    if config.get('keep_images', False):
+        args.append('-k') # -k keep images on success
+    args.extend([
+        config.get('pool_name', 'pool_{pool}'.format(pool=role)),
+        'image_{image}'.format(image=role),
+    ])
+
+    remote.run(args=args)
diff --git a/qa/tasks/rbd_mirror.py b/qa/tasks/rbd_mirror.py
new file mode 100644
index 000000000..5da252560
--- /dev/null
+++ b/qa/tasks/rbd_mirror.py
@@ -0,0 +1,120 @@
+"""
+Task for running rbd mirroring daemons and configuring mirroring
+"""
+
+import logging
+
+from teuthology.orchestra import run
+from teuthology import misc
+from teuthology.exceptions import ConfigError
+from teuthology.task import Task
+from tasks.ceph_manager import get_valgrind_args
+from tasks.util import get_remote_for_role
+
+log = logging.getLogger(__name__)
+
+
+class RBDMirror(Task):
+    """
+    Run an rbd-mirror daemon to sync rbd images between clusters.
+
+    This requires two clients (one from each cluster) on the same host
+    to connect with. The pool configuration should be adjusted by later
+    test scripts to include the remote client and cluster name. This task
+    just needs to know how to connect to the local cluster.
+
+    For example:
+
+        roles:
+        - [primary.mon.a, primary.osd.0, primary.osd.1, primary.osd.2]
+        - [secondary.mon.a, secondary.osd.0, secondary.osd.1, secondary.osd.2]
+        - [primary.client.mirror, secondary.client.mirror]
+        tasks:
+        - ceph:
+            cluster: primary
+        - ceph:
+            cluster: secondary
+        - rbd-mirror:
+            client: primary.client.mirror
+
+    To mirror back to the primary cluster as well, add another
+    rbd_mirror instance:
+
+        - rbd-mirror:
+            client: secondary.client.mirror
+
+    Possible options for this task are:
+
+        client: role - ceph client to connect as
+        valgrind: [--tool=<valgrind tool>] - none by default
+        coverage: bool - whether this run may be collecting coverage data
+        thrash: bool - whether this run may be thrashed
+    """
+    def __init__(self, ctx, config):
+        super(RBDMirror, self).__init__(ctx, config)
+        self.log = log
+
+    def setup(self):
+        super(RBDMirror, self).setup()
+        try:
+            self.client = self.config['client']
+        except KeyError:
+            raise ConfigError('rbd-mirror requires a client to connect with')
+
+        self.cluster_name, type_, self.client_id = misc.split_role(self.client)
+
+        if type_ != 'client':
+            msg = 'client role ({0}) must be a client'.format(self.client)
+            raise ConfigError(msg)
+
+        self.remote = get_remote_for_role(self.ctx, self.client)
+
+    def begin(self):
+        super(RBDMirror, self).begin()
+        testdir = misc.get_testdir(self.ctx)
+        daemon_signal = 'kill'
+        if 'coverage' in self.config or 'valgrind' in self.config or \
+                self.config.get('thrash', False):
+            daemon_signal = 'term'
+
+        args = [
+            'adjust-ulimits',
+            'ceph-coverage',
+            '{tdir}/archive/coverage'.format(tdir=testdir),
+            'daemon-helper',
+            daemon_signal,
+            ]
+
+        if 'valgrind' in self.config:
+            args = get_valgrind_args(
+                testdir,
+                'rbd-mirror-{id}'.format(id=self.client),
+                args,
+                self.config.get('valgrind')
+            )
+
+        args.extend([
+            'rbd-mirror', '--foreground',
+            '--cluster',
+            self.cluster_name,
+            '--id',
+            self.client_id,
+            ])
+
+        self.ctx.daemons.add_daemon(
+            self.remote, 'rbd-mirror', self.client,
+            cluster=self.cluster_name,
+            args=args,
+            logger=self.log.getChild(self.client),
+            stdin=run.PIPE,
+            wait=False,
+        )
+
+    def end(self):
+        mirror_daemon = self.ctx.daemons.get_daemon('rbd-mirror',
+                                                    self.client,
+                                                    self.cluster_name)
+        mirror_daemon.stop()
+        super(RBDMirror, self).end()
+
+task = RBDMirror
diff --git a/qa/tasks/rbd_mirror_thrash.py b/qa/tasks/rbd_mirror_thrash.py
new file mode 100644
index 000000000..a42d19e70
--- /dev/null
+++ b/qa/tasks/rbd_mirror_thrash.py
@@ -0,0 +1,218 @@
+"""
+Task for thrashing rbd-mirror daemons
+"""
+
+import contextlib
+import logging
+import random
+import signal
+import socket
+import time
+
+from gevent import sleep
+from gevent.greenlet import Greenlet
+from gevent.event import Event
+
+from teuthology.exceptions import CommandFailedError
+from teuthology.orchestra import run
+from tasks.thrasher import Thrasher
+
+log = logging.getLogger(__name__)
+
+
+class RBDMirrorThrasher(Thrasher, Greenlet):
+    """
+    RBDMirrorThrasher::
+
+    The RBDMirrorThrasher thrashes rbd-mirror daemons during execution of other
+    tasks (workunits, etc).
+
+    The config is optional.  Many of the config parameters are a maximum value
+    to use when selecting a random value from a range.  The config is a dict
+    containing some or all of:
+
+    cluster: [default: ceph] cluster to thrash
+
+    max_thrash: [default: 1] the maximum number of active rbd-mirror daemons per
+      cluster will be thrashed at any given time.
+
+    min_thrash_delay: [default: 60] minimum number of seconds to delay before
+      thrashing again.
+
+    max_thrash_delay: [default: 120] maximum number of seconds to delay before
+      thrashing again.
+
+    max_revive_delay: [default: 10] maximum number of seconds to delay before
+      bringing back a thrashed rbd-mirror daemon.
+
+    randomize: [default: true] enables randomization and use the max/min values
+
+    seed: [no default] seed the random number generator
+
+    Examples::
+
+      The following example disables randomization, and uses the max delay
+      values:
+
+      tasks:
+      - ceph:
+      - rbd_mirror_thrash:
+          randomize: False
+          max_thrash_delay: 10
+    """
+
+    def __init__(self, ctx, config, cluster, daemons):
+        super(RBDMirrorThrasher, self).__init__()
+
+        self.ctx = ctx
+        self.config = config
+        self.cluster = cluster
+        self.daemons = daemons
+
+        self.logger = log
+        self.name = 'thrasher.rbd_mirror.[{cluster}]'.format(cluster = cluster)
+        self.stopping = Event()
+
+        self.randomize = bool(self.config.get('randomize', True))
+        self.max_thrash = int(self.config.get('max_thrash', 1))
+        self.min_thrash_delay = float(self.config.get('min_thrash_delay', 60.0))
+        self.max_thrash_delay = float(self.config.get('max_thrash_delay', 120.0))
+        self.max_revive_delay = float(self.config.get('max_revive_delay', 10.0))
+
+    def _run(self):
+        try:
+            self.do_thrash()
+        except Exception as e:
+            # See _run exception comment for MDSThrasher
+            self.set_thrasher_exception(e)
+            self.logger.exception("exception:")
+            # Allow successful completion so gevent doesn't see an exception.
+            # The DaemonWatchdog will observe the error and tear down the test.
+
+    def log(self, x):
+        """Write data to logger assigned to this RBDMirrorThrasher"""
+        self.logger.info(x)
+
+    def stop(self):
+        self.stopping.set()
+
+    def do_thrash(self):
+        """
+        Perform the random thrashing action
+        """
+
+        self.log('starting thrash for cluster {cluster}'.format(cluster=self.cluster))
+        stats = {
+            "kill": 0,
+        }
+
+        while not self.stopping.is_set():
+            delay = self.max_thrash_delay
+            if self.randomize:
+                delay = random.randrange(self.min_thrash_delay, self.max_thrash_delay)
+
+            if delay > 0.0:
+                self.log('waiting for {delay} secs before thrashing'.format(delay=delay))
+                self.stopping.wait(delay)
+                if self.stopping.is_set():
+                    continue
+
+            killed_daemons = []
+
+            weight = 1.0 / len(self.daemons)
+            count = 0
+            for daemon in self.daemons:
+                skip = random.uniform(0.0, 1.0)
+                if weight <= skip:
+                    self.log('skipping daemon {label} with skip ({skip}) > weight ({weight})'.format(
+                        label=daemon.id_, skip=skip, weight=weight))
+                    continue
+
+                self.log('kill {label}'.format(label=daemon.id_))
+                try:
+                    daemon.signal(signal.SIGTERM)
+                except socket.error:
+                    pass
+                killed_daemons.append(daemon)
+                stats['kill'] += 1
+
+                # if we've reached max_thrash, we're done
+                count += 1
+                if count >= self.max_thrash:
+                    break
+
+            if killed_daemons:
+                # wait for a while before restarting
+                delay = self.max_revive_delay
+                if self.randomize:
+                    delay = random.randrange(0.0, self.max_revive_delay)
+
+                self.log('waiting for {delay} secs before reviving daemons'.format(delay=delay))
+                sleep(delay)
+
+                for daemon in killed_daemons:
+                    self.log('waiting for {label}'.format(label=daemon.id_))
+                    try:
+                        run.wait([daemon.proc], timeout=600)
+                    except CommandFailedError:
+                        pass
+                    except:
+                        self.log('Failed to stop {label}'.format(label=daemon.id_))
+
+                        try:
+                            # try to capture a core dump
+                            daemon.signal(signal.SIGABRT)
+                        except socket.error:
+                            pass
+                        raise
+                    finally:
+                        daemon.reset()
+
+                for daemon in killed_daemons:
+                    self.log('reviving {label}'.format(label=daemon.id_))
+                    daemon.start()
+
+        for stat in stats:
+            self.log("stat['{key}'] = {value}".format(key = stat, value = stats[stat]))
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Stress test the rbd-mirror by thrashing while another task/workunit
+    is running.
+
+    Please refer to RBDMirrorThrasher class for further information on the
+    available options.
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'rbd_mirror_thrash task only accepts a dict for configuration'
+
+    cluster = config.get('cluster', 'ceph')
+    daemons = list(ctx.daemons.iter_daemons_of_role('rbd-mirror', cluster))
+    assert len(daemons) > 0, \
+        'rbd_mirror_thrash task requires at least 1 rbd-mirror daemon'
+
+    # choose random seed
+    if 'seed' in config:
+        seed = int(config['seed'])
+    else:
+        seed = int(time.time())
+    log.info('rbd_mirror_thrash using random seed: {seed}'.format(seed=seed))
+    random.seed(seed)
+
+    thrasher = RBDMirrorThrasher(ctx, config, cluster, daemons)
+    thrasher.start()
+    ctx.ceph[cluster].thrashers.append(thrasher)
+
+    try:
+        log.debug('Yielding')
+        yield
+    finally:
+        log.info('joining rbd_mirror_thrash')
+        thrasher.stop()
+        if thrasher.exception is not None:
+            raise RuntimeError('error during thrashing')
+        thrasher.join()
+        log.info('done joining')
diff --git a/qa/tasks/rbd_pwl_cache_recovery.py b/qa/tasks/rbd_pwl_cache_recovery.py
new file mode 100644
index 000000000..e13c1f664
--- /dev/null
+++ b/qa/tasks/rbd_pwl_cache_recovery.py
@@ -0,0 +1,96 @@
+"""
+persistent write log cache recovery task
+"""
+import contextlib
+import logging
+import random
+import json
+import time
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+
+DEFAULT_NUM_ITERATIONS = 20
+IO_PATTERNS = ("full-seq", "rand")
+IO_SIZES = ('4K', '16K', '128K', '1024K')
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def thrashes_rbd_bench_on_persistent_cache(ctx, config):
+    """
+    thrashes rbd bench on persistent write log cache.
+    It can test recovery feature of persistent write log cache.
+    """
+    log.info("thrashes rbd bench on persistent write log cache")
+
+    client, client_config = list(config.items())[0]
+    (remote,) = ctx.cluster.only(client).remotes.keys()
+    client_config = client_config if client_config is not None else dict()
+    image_name = client_config.get('image_name', 'testimage')
+    num_iterations = client_config.get('num_iterations', DEFAULT_NUM_ITERATIONS)
+
+    for i in range(num_iterations):
+        log.info("start rbd bench")
+        # rbd bench could not specify the run time so set a large enough test size.
+        remote.run(
+            args=[
+                'rbd', 'bench',
+                '--io-type', 'write',
+                '--io-pattern', random.choice(IO_PATTERNS),
+                '--io-size', random.choice(IO_SIZES),
+                '--io-total', '100G',
+                image_name,
+                ],
+            wait=False,
+        )
+        # Wait a few seconds for the rbd bench process to run
+        # and complete the pwl cache initialization
+        time.sleep(10)
+        log.info("dump cache state when rbd bench running.")
+        remote.sh(['rbd', 'status', image_name, '--format=json'])
+        log.info("sleep...")
+        time.sleep(random.randint(10, 60))
+        log.info("rbd bench crash.")
+        remote.run(
+            args=[
+                'killall', '-9', 'rbd',
+                ],
+            check_status=False,
+        )
+        log.info("wait for watch timeout.")
+        time.sleep(40)
+        log.info("check cache state after crash.")
+        out = remote.sh(['rbd', 'status', image_name, '--format=json'])
+        rbd_status = json.loads(out)
+        assert len(rbd_status['watchers']) == 0
+        assert rbd_status['persistent_cache']['present'] == True
+        assert rbd_status['persistent_cache']['empty'] == False
+        assert rbd_status['persistent_cache']['clean'] == False
+        log.info("check dirty cache file.")
+        remote.run(
+            args=[
+                'test', '-e', rbd_status['persistent_cache']['path'],
+                ]
+        )
+    try:
+        yield
+    finally:
+        log.info("cleanup")
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    This is task for testing persistent write log cache recovery.
+    """
+    assert isinstance(config, dict), \
+            "task rbd_pwl_cache_recovery only supports a dictionary for configuration"
+
+    managers = []
+    config = teuthology.replace_all_with_clients(ctx.cluster, config)
+    managers.append(
+        lambda: thrashes_rbd_bench_on_persistent_cache(ctx=ctx, config=config)
+        )
+
+    with contextutil.nested(*managers):
+        yield
diff --git a/qa/tasks/rebuild_mondb.py b/qa/tasks/rebuild_mondb.py
new file mode 100644
index 000000000..bbf6383b1
--- /dev/null
+++ b/qa/tasks/rebuild_mondb.py
@@ -0,0 +1,228 @@
+"""
+Test if we can recover the leveldb from OSD after where all leveldbs are
+corrupted
+"""
+
+import logging
+import os.path
+import shutil
+import tempfile
+
+from tasks import ceph_manager
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+
+def _push_directory(path, remote, remote_dir):
+    """
+    local_temp_path=`mktemp`
+    tar czf $local_temp_path $path
+    ssh remote mkdir -p remote_dir
+    remote_temp_path=`mktemp`
+    scp $local_temp_path $remote_temp_path
+    rm $local_temp_path
+    tar xzf $remote_temp_path -C $remote_dir
+    ssh remote:$remote_temp_path
+    """
+    fd, local_temp_path = tempfile.mkstemp(suffix='.tgz',
+                                           prefix='rebuild_mondb-')
+    os.close(fd)
+    cmd = ' '.join(['tar', 'cz',
+                    '-f', local_temp_path,
+                    '-C', path,
+                    '--', '.'])
+    teuthology.sh(cmd)
+    _, fname = os.path.split(local_temp_path)
+    fd, remote_temp_path = tempfile.mkstemp(suffix='.tgz',
+                                            prefix='rebuild_mondb-')
+    os.close(fd)
+    remote.put_file(local_temp_path, remote_temp_path)
+    os.remove(local_temp_path)
+    remote.run(args=['sudo',
+                     'tar', 'xz',
+                     '-C', remote_dir,
+                     '-f', remote_temp_path])
+    remote.run(args=['sudo', 'rm', '-fr', remote_temp_path])
+
+
+def _nuke_mons(manager, mons, mon_id):
+    assert mons
+    is_mon = teuthology.is_type('mon')
+    for remote, roles in mons.remotes.items():
+        for role in roles:
+            if not is_mon(role):
+                continue
+            cluster, _, m = teuthology.split_role(role)
+            log.info('killing {cluster}:mon.{mon}'.format(
+                cluster=cluster,
+                mon=m))
+            manager.kill_mon(m)
+            mon_data = os.path.join('/var/lib/ceph/mon/',
+                                    '{0}-{1}'.format(cluster, m))
+            if m == mon_id:
+                # so we will only need to recreate the store.db for the
+                # first mon, would be easier than mkfs on it then replace
+                # the its store.db with the recovered one
+                store_dir = os.path.join(mon_data, 'store.db')
+                remote.run(args=['sudo', 'rm', '-r', store_dir])
+                # we need to remove the external_log_to file too, since it
+                # references a version number inside store.db
+                remote.run(args=['sudo', 'rm', '-r', os.path.join(mon_data,
+                                                                  'external_log_to')])
+            else:
+                remote.run(args=['sudo', 'rm', '-r', mon_data])
+
+
+def _rebuild_db(ctx, manager, cluster_name, mon, mon_id, keyring_path):
+    local_mstore = tempfile.mkdtemp()
+
+    # collect the maps from all OSDs
+    is_osd = teuthology.is_type('osd')
+    osds = ctx.cluster.only(is_osd)
+    assert osds
+    for osd, roles in osds.remotes.items():
+        for role in roles:
+            if not is_osd(role):
+                continue
+            cluster, _, osd_id = teuthology.split_role(role)
+            assert cluster_name == cluster
+            log.info('collecting maps from {cluster}:osd.{osd}'.format(
+                cluster=cluster,
+                osd=osd_id))
+            # push leveldb to OSD
+            osd_mstore = os.path.join(teuthology.get_testdir(ctx), 'mon-store')
+            osd.run(args=['sudo', 'mkdir', '-m', 'o+x', '-p', osd_mstore])
+
+            _push_directory(local_mstore, osd, osd_mstore)
+            log.info('rm -rf {0}'.format(local_mstore))
+            shutil.rmtree(local_mstore)
+            # update leveldb with OSD data
+            options = '--no-mon-config --op update-mon-db --mon-store-path {0}'
+            log.info('cot {0}'.format(osd_mstore))
+            manager.objectstore_tool(pool=None,
+                                     options=options.format(osd_mstore),
+                                     args='',
+                                     osd=osd_id,
+                                     do_revive=False)
+            # pull the updated mon db
+            log.info('pull dir {0} -> {1}'.format(osd_mstore, local_mstore))
+            local_mstore = tempfile.mkdtemp()
+            teuthology.pull_directory(osd, osd_mstore, local_mstore)
+            log.info('rm -rf osd:{0}'.format(osd_mstore))
+            osd.run(args=['sudo', 'rm', '-fr', osd_mstore])
+
+    # recover the first_mon with re-built mon db
+    # pull from recovered leveldb from client
+    mon_store_dir = os.path.join('/var/lib/ceph/mon',
+                                 '{0}-{1}'.format(cluster_name, mon_id))
+    _push_directory(local_mstore, mon, mon_store_dir)
+    mon.run(args=['sudo', 'chown', '-R', 'ceph:ceph', mon_store_dir])
+    shutil.rmtree(local_mstore)
+
+    # fill up the caps in the keyring file
+    mon.run(args=['sudo',
+                  'ceph-authtool', keyring_path,
+                  '-n', 'mon.',
+                  '--cap', 'mon', 'allow *'])
+    mon.run(args=['sudo',
+                  'ceph-authtool', keyring_path,
+                  '-n', 'client.admin',
+                  '--cap', 'mon', 'allow *',
+                  '--cap', 'osd', 'allow *',
+                  '--cap', 'mds', 'allow *',
+                  '--cap', 'mgr', 'allow *'])
+    mon.run(args=['sudo', '-u', 'ceph',
+                  'CEPH_ARGS=--no-mon-config',
+                  'ceph-monstore-tool', mon_store_dir,
+                  'rebuild', '--',
+                  '--keyring', keyring_path,
+                  '--monmap', '/tmp/monmap',
+                  ])
+
+
+def _revive_mons(manager, mons, recovered, keyring_path):
+    # revive monitors
+    # the initial monmap is in the ceph.conf, so we are good.
+    n_mons = 0
+    is_mon = teuthology.is_type('mon')
+    for remote, roles in mons.remotes.items():
+        for role in roles:
+            if not is_mon(role):
+                continue
+            cluster, _, m = teuthology.split_role(role)
+            if recovered != m:
+                log.info('running mkfs on {cluster}:mon.{mon}'.format(
+                    cluster=cluster,
+                    mon=m))
+                remote.run(
+                    args=[
+                        'sudo',
+                        'ceph-mon',
+                        '--cluster', cluster,
+                        '--mkfs',
+                        '-i', m,
+                        '--keyring', keyring_path,
+                        '--monmap', '/tmp/monmap'])
+            log.info('reviving mon.{0}'.format(m))
+            manager.revive_mon(m)
+            n_mons += 1
+    manager.wait_for_mon_quorum_size(n_mons, timeout=30)
+
+
+def _revive_mgrs(ctx, manager):
+    is_mgr = teuthology.is_type('mgr')
+    mgrs = ctx.cluster.only(is_mgr)
+    for _, roles in mgrs.remotes.items():
+        for role in roles:
+            if not is_mgr(role):
+                continue
+            _, _, mgr_id = teuthology.split_role(role)
+            log.info('reviving mgr.{0}'.format(mgr_id))
+            manager.revive_mgr(mgr_id)
+
+
+def _revive_osds(ctx, manager):
+    is_osd = teuthology.is_type('osd')
+    osds = ctx.cluster.only(is_osd)
+    for _, roles in osds.remotes.items():
+        for role in roles:
+            if not is_osd(role):
+                continue
+            _, _, osd_id = teuthology.split_role(role)
+            log.info('reviving osd.{0}'.format(osd_id))
+            manager.revive_osd(osd_id)
+
+
+def task(ctx, config):
+    """
+    Test monitor recovery from OSD
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'task only accepts a dict for configuration'
+
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+    # stash a monmap for later
+    mon.run(args=['ceph', 'mon', 'getmap', '-o', '/tmp/monmap'])
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'))
+
+    mons = ctx.cluster.only(teuthology.is_type('mon'))
+    # note down the first cluster_name and mon_id
+    # we will recover it later on
+    cluster_name, _, mon_id = teuthology.split_role(first_mon)
+    _nuke_mons(manager, mons, mon_id)
+    default_keyring = '/etc/ceph/{cluster}.keyring'.format(
+        cluster=cluster_name)
+    keyring_path = config.get('keyring_path', default_keyring)
+    _rebuild_db(ctx, manager, cluster_name, mon, mon_id, keyring_path)
+    _revive_mons(manager, mons, mon_id, keyring_path)
+    _revive_mgrs(ctx, manager)
+    _revive_osds(ctx, manager)
diff --git a/qa/tasks/reg11184.py b/qa/tasks/reg11184.py
new file mode 100644
index 000000000..86cfbf39a
--- /dev/null
+++ b/qa/tasks/reg11184.py
@@ -0,0 +1,242 @@
+"""
+Special regression test for tracker #11184
+
+Synopsis: osd/SnapMapper.cc: 282: FAILED assert(check(oid))
+
+This is accomplished by moving a pg that wasn't part of split and still include
+divergent priors.
+"""
+import logging
+import time
+
+from teuthology.exceptions import CommandFailedError
+from teuthology.orchestra import run
+from teuthology import misc as teuthology
+from tasks.util.rados import rados
+import os
+
+
+log = logging.getLogger(__name__)
+
+
+def task(ctx, config):
+    """
+    Test handling of divergent entries during export / import
+    to regression test tracker #11184
+
+    overrides:
+      ceph:
+        conf:
+          osd:
+            debug osd: 5
+
+    Requires 3 osds on a single test node.
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'divergent_priors task only accepts a dict for configuration'
+
+    manager = ctx.managers['ceph']
+
+    while len(manager.get_osd_status()['up']) < 3:
+        time.sleep(10)
+    osds = [0, 1, 2]
+    manager.flush_pg_stats(osds)
+    manager.raw_cluster_cmd('osd', 'set', 'noout')
+    manager.raw_cluster_cmd('osd', 'set', 'noin')
+    manager.raw_cluster_cmd('osd', 'set', 'nodown')
+    manager.wait_for_clean()
+
+    # something that is always there
+    dummyfile = '/etc/fstab'
+    dummyfile2 = '/etc/resolv.conf'
+    testdir = teuthology.get_testdir(ctx)
+
+    # create 1 pg pool
+    log.info('creating foo')
+    manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1')
+    manager.raw_cluster_cmd(
+        'osd', 'pool', 'application', 'enable',
+        'foo', 'rados', run.Raw('||'), 'true')
+
+    # Remove extra pool to simlify log output
+    manager.raw_cluster_cmd('osd', 'pool', 'delete', 'rbd', 'rbd', '--yes-i-really-really-mean-it')
+
+    for i in osds:
+        manager.set_config(i, osd_min_pg_log_entries=10)
+        manager.set_config(i, osd_max_pg_log_entries=10)
+        manager.set_config(i, osd_pg_log_trim_min=5)
+
+    # determine primary
+    divergent = manager.get_pg_primary('foo', 0)
+    log.info("primary and soon to be divergent is %d", divergent)
+    non_divergent = list(osds)
+    non_divergent.remove(divergent)
+
+    log.info('writing initial objects')
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+    # write 100 objects
+    for i in range(100):
+        rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile])
+
+    manager.wait_for_clean()
+
+    # blackhole non_divergent
+    log.info("blackholing osds %s", str(non_divergent))
+    for i in non_divergent:
+        manager.set_config(i, objectstore_blackhole=1)
+
+    DIVERGENT_WRITE = 5
+    DIVERGENT_REMOVE = 5
+    # Write some soon to be divergent
+    log.info('writing divergent objects')
+    for i in range(DIVERGENT_WRITE):
+        rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i,
+                         dummyfile2], wait=False)
+    # Remove some soon to be divergent
+    log.info('remove divergent objects')
+    for i in range(DIVERGENT_REMOVE):
+        rados(ctx, mon, ['-p', 'foo', 'rm',
+                         'existing_%d' % (i + DIVERGENT_WRITE)], wait=False)
+    time.sleep(10)
+    mon.run(
+        args=['killall', '-9', 'rados'],
+        wait=True,
+        check_status=False)
+
+    # kill all the osds but leave divergent in
+    log.info('killing all the osds')
+    for i in osds:
+        manager.kill_osd(i)
+    for i in osds:
+        manager.mark_down_osd(i)
+    for i in non_divergent:
+        manager.mark_out_osd(i)
+
+    # bring up non-divergent
+    log.info("bringing up non_divergent %s", str(non_divergent))
+    for i in non_divergent:
+        manager.revive_osd(i)
+    for i in non_divergent:
+        manager.mark_in_osd(i)
+
+    # write 1 non-divergent object (ensure that old divergent one is divergent)
+    objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE)
+    log.info('writing non-divergent object ' + objname)
+    rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2])
+
+    manager.wait_for_recovery()
+
+    # ensure no recovery of up osds first
+    log.info('delay recovery')
+    for i in non_divergent:
+        manager.wait_run_admin_socket(
+            'osd', i, ['set_recovery_delay', '100000'])
+
+    # bring in our divergent friend
+    log.info("revive divergent %d", divergent)
+    manager.raw_cluster_cmd('osd', 'set', 'noup')
+    manager.revive_osd(divergent)
+
+    log.info('delay recovery divergent')
+    manager.wait_run_admin_socket(
+        'osd', divergent, ['set_recovery_delay', '100000'])
+
+    manager.raw_cluster_cmd('osd', 'unset', 'noup')
+    while len(manager.get_osd_status()['up']) < 3:
+        time.sleep(10)
+
+    log.info('wait for peering')
+    rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile])
+
+    # At this point the divergent_priors should have been detected
+
+    log.info("killing divergent %d", divergent)
+    manager.kill_osd(divergent)
+
+    # Split pgs for pool foo
+    manager.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'pg_num', '2')
+    time.sleep(5)
+
+    manager.raw_cluster_cmd('pg','dump')
+
+    # Export a pg
+    (exp_remote,) = ctx.\
+        cluster.only('osd.{o}'.format(o=divergent)).remotes.keys()
+    FSPATH = manager.get_filepath()
+    JPATH = os.path.join(FSPATH, "journal")
+    prefix = ("sudo adjust-ulimits ceph-objectstore-tool "
+              "--data-path {fpath} --journal-path {jpath} "
+              "--log-file="
+              "/var/log/ceph/objectstore_tool.$$.log ".
+              format(fpath=FSPATH, jpath=JPATH))
+    pid = os.getpid()
+    expfile = os.path.join(testdir, "exp.{pid}.out".format(pid=pid))
+    cmd = ((prefix + "--op export-remove --pgid 2.0 --file {file}").
+           format(id=divergent, file=expfile))
+    try:
+        exp_remote.sh(cmd, wait=True)
+    except CommandFailedError as e:
+        assert e.exitstatus == 0
+
+    # Kill one of non-divergent OSDs
+    log.info('killing osd.%d' % non_divergent[0])
+    manager.kill_osd(non_divergent[0])
+    manager.mark_down_osd(non_divergent[0])
+    # manager.mark_out_osd(non_divergent[0])
+
+    # An empty collection for pg 2.0 might need to be cleaned up
+    cmd = ((prefix + "--force --op remove --pgid 2.0").
+           format(id=non_divergent[0]))
+    exp_remote.sh(cmd, wait=True, check_status=False)
+
+    cmd = ((prefix + "--op import --file {file}").
+           format(id=non_divergent[0], file=expfile))
+    try:
+        exp_remote.sh(cmd, wait=True)
+    except CommandFailedError as e:
+        assert e.exitstatus == 0
+
+    # bring in our divergent friend and other node
+    log.info("revive divergent %d", divergent)
+    manager.revive_osd(divergent)
+    manager.mark_in_osd(divergent)
+    log.info("revive %d", non_divergent[0])
+    manager.revive_osd(non_divergent[0])
+
+    while len(manager.get_osd_status()['up']) < 3:
+        time.sleep(10)
+
+    log.info('delay recovery divergent')
+    manager.set_config(divergent, osd_recovery_delay_start=100000)
+    log.info('mark divergent in')
+    manager.mark_in_osd(divergent)
+
+    log.info('wait for peering')
+    rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile])
+
+    log.info("killing divergent %d", divergent)
+    manager.kill_osd(divergent)
+    log.info("reviving divergent %d", divergent)
+    manager.revive_osd(divergent)
+    time.sleep(3)
+
+    log.info('allowing recovery')
+    # Set osd_recovery_delay_start back to 0 and kick the queue
+    for i in osds:
+        manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug',
+                                'kick_recovery_wq', ' 0')
+
+    log.info('reading divergent objects')
+    for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE):
+        exit_status = rados(ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i,
+                                       '/tmp/existing'])
+        assert exit_status == 0
+
+    (remote,) = ctx.\
+        cluster.only('osd.{o}'.format(o=divergent)).remotes.keys()
+    cmd = 'rm {file}'.format(file=expfile)
+    remote.run(args=cmd, wait=True)
+    log.info("success")
diff --git a/qa/tasks/rep_lost_unfound_delete.py b/qa/tasks/rep_lost_unfound_delete.py
new file mode 100644
index 000000000..8e99ade27
--- /dev/null
+++ b/qa/tasks/rep_lost_unfound_delete.py
@@ -0,0 +1,179 @@
+"""
+Lost_unfound
+"""
+import logging
+import time
+
+from tasks import ceph_manager
+from tasks.util.rados import rados
+from teuthology import misc as teuthology
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Test handling of lost objects.
+
+    A pretty rigid cluster is brought up and tested by this task
+    """
+    POOL = 'unfounddel_pool'
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'lost_unfound task only accepts a dict for configuration'
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    while len(manager.get_osd_status()['up']) < 3:
+        time.sleep(10)
+    manager.flush_pg_stats([0, 1, 2])
+    manager.wait_for_clean()
+
+    manager.create_pool(POOL)
+
+    # something that is always there
+    dummyfile = '/etc/fstab'
+
+    # take an osd out until the very end
+    manager.kill_osd(2)
+    manager.mark_down_osd(2)
+    manager.mark_out_osd(2)
+
+    # kludge to make sure they get a map
+    rados(ctx, mon, ['-p', POOL, 'put', 'dummy', dummyfile])
+
+    manager.flush_pg_stats([0, 1])
+    manager.wait_for_recovery()
+
+    # create old objects
+    for f in range(1, 10):
+        rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile])
+        rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile])
+        rados(ctx, mon, ['-p', POOL, 'rm', 'existed_%d' % f])
+
+    # delay recovery, and make the pg log very long (to prevent backfill)
+    manager.raw_cluster_cmd(
+            'tell', 'osd.1',
+            'injectargs',
+            '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000'
+            )
+
+    manager.kill_osd(0)
+    manager.mark_down_osd(0)
+    
+    for f in range(1, 10):
+        rados(ctx, mon, ['-p', POOL, 'put', 'new_%d' % f, dummyfile])
+        rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile])
+        rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile])
+
+    # bring osd.0 back up, let it peer, but don't replicate the new
+    # objects...
+    log.info('osd.0 command_args is %s' % 'foo')
+    log.info(ctx.daemons.get_daemon('osd', 0).command_args)
+    ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend([
+            '--osd-recovery-delay-start', '1000'
+            ])
+    manager.revive_osd(0)
+    manager.mark_in_osd(0)
+    manager.wait_till_osd_is_up(0)
+
+    manager.flush_pg_stats([0, 1])
+    manager.wait_till_active()
+
+    # take out osd.1 and the only copy of those objects.
+    manager.kill_osd(1)
+    manager.mark_down_osd(1)
+    manager.mark_out_osd(1)
+    manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it')
+
+    # bring up osd.2 so that things would otherwise, in theory, recovery fully
+    manager.revive_osd(2)
+    manager.mark_in_osd(2)
+    manager.wait_till_osd_is_up(2)
+
+    manager.flush_pg_stats([0, 2])
+    manager.wait_till_active()
+    manager.flush_pg_stats([0, 2])
+
+    # verify that there are unfound objects
+    unfound = manager.get_num_unfound_objects()
+    log.info("there are %d unfound objects" % unfound)
+    assert unfound
+
+    testdir = teuthology.get_testdir(ctx)
+    procs = []
+    if config.get('parallel_bench', True):
+        procs.append(mon.run(
+            args=[
+                "/bin/sh", "-c",
+                " ".join(['adjust-ulimits',
+                          'ceph-coverage',
+                          '{tdir}/archive/coverage',
+                          'rados',
+                          '--no-log-to-stderr',
+                          '--name', 'client.admin',
+                          '-b', str(4<<10),
+                          '-p' , POOL,
+                          '-t', '20',
+                          'bench', '240', 'write',
+                      ]).format(tdir=testdir),
+            ],
+            logger=log.getChild('radosbench.{id}'.format(id='client.admin')),
+            stdin=run.PIPE,
+            wait=False
+        ))
+    time.sleep(10)
+
+    # mark stuff lost
+    pgs = manager.get_pg_stats()
+    for pg in pgs:
+        if pg['stat_sum']['num_objects_unfound'] > 0:
+            primary = 'osd.%d' % pg['acting'][0]
+
+            # verify that i can list them direct from the osd
+            log.info('listing missing/lost in %s state %s', pg['pgid'],
+                     pg['state']);
+            m = manager.list_pg_unfound(pg['pgid'])
+            #log.info('%s' % m)
+            assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound']
+            num_unfound=0
+            for o in m['objects']:
+                if len(o['locations']) == 0:
+                    num_unfound += 1
+            assert m['num_unfound'] == num_unfound
+
+            log.info("reverting unfound in %s on %s", pg['pgid'], primary)
+            manager.raw_cluster_cmd('pg', pg['pgid'],
+                                    'mark_unfound_lost', 'delete')
+        else:
+            log.info("no unfound in %s", pg['pgid'])
+
+    manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5')
+    manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5')
+    manager.flush_pg_stats([0, 2])
+    manager.wait_for_recovery()
+
+    # verify result
+    for f in range(1, 10):
+        err = rados(ctx, mon, ['-p', POOL, 'get', 'new_%d' % f, '-'])
+        assert err
+        err = rados(ctx, mon, ['-p', POOL, 'get', 'existed_%d' % f, '-'])
+        assert err
+        err = rados(ctx, mon, ['-p', POOL, 'get', 'existing_%d' % f, '-'])
+        assert err
+
+    # see if osd.1 can cope
+    manager.mark_in_osd(1)
+    manager.revive_osd(1)
+    manager.wait_till_osd_is_up(1)
+    manager.wait_for_clean()
+    run.wait(procs)
+    manager.wait_for_clean()
+
diff --git a/qa/tasks/repair_test.py b/qa/tasks/repair_test.py
new file mode 100644
index 000000000..cfd6ef791
--- /dev/null
+++ b/qa/tasks/repair_test.py
@@ -0,0 +1,303 @@
+"""
+Test pool repairing after objects are damaged.
+"""
+import logging
+import time
+
+log = logging.getLogger(__name__)
+
+
+def choose_primary(manager, pool, num):
+    """
+    Return primary to test on.
+    """
+    log.info("Choosing primary")
+    return manager.get_pg_primary(pool, num)
+
+
+def choose_replica(manager, pool, num):
+    """
+    Return replica to test on.
+    """
+    log.info("Choosing replica")
+    return manager.get_pg_replica(pool, num)
+
+
+def trunc(manager, osd, pool, obj):
+    """
+    truncate an object
+    """
+    log.info("truncating object")
+    return manager.osd_admin_socket(
+        osd,
+        ['truncobj', pool, obj, '1'])
+
+
+def dataerr(manager, osd, pool, obj):
+    """
+    cause an error in the data
+    """
+    log.info("injecting data err on object")
+    return manager.osd_admin_socket(
+        osd,
+        ['injectdataerr', pool, obj])
+
+
+def mdataerr(manager, osd, pool, obj):
+    """
+    cause an error in the mdata
+    """
+    log.info("injecting mdata err on object")
+    return manager.osd_admin_socket(
+        osd,
+        ['injectmdataerr', pool, obj])
+
+
+def omaperr(manager, osd, pool, obj):
+    """
+    Cause an omap error.
+    """
+    log.info("injecting omap err on object")
+    return manager.osd_admin_socket(osd, ['setomapval', pool, obj,
+                                              'badkey', 'badval'])
+
+
+def repair_test_1(manager, corrupter, chooser, scrub_type):
+    """
+    Creates an object in the pool, corrupts it,
+    scrubs it, and verifies that the pool is inconsistent.  It then repairs
+    the pool, rescrubs it, and verifies that the pool is consistent
+
+    :param corrupter: error generating function (truncate, data-error, or
+     meta-data error, for example).
+    :param chooser: osd type chooser (primary or replica)
+    :param scrub_type: regular scrub or deep-scrub
+    """
+    pool = "repair_pool_1"
+    manager.wait_for_clean()
+    with manager.pool(pool, 1):
+
+        log.info("starting repair test type 1")
+        victim_osd = chooser(manager, pool, 0)
+
+        # create object
+        log.info("doing put")
+        manager.do_put(pool, 'repair_test_obj', '/etc/hosts')
+
+        # corrupt object
+        log.info("corrupting object")
+        corrupter(manager, victim_osd, pool, 'repair_test_obj')
+
+        # verify inconsistent
+        log.info("scrubbing")
+        manager.do_pg_scrub(pool, 0, scrub_type)
+
+        manager.with_pg_state(pool, 0, lambda s: 'inconsistent' in s)
+
+        # repair
+        log.info("repairing")
+        manager.do_pg_scrub(pool, 0, "repair")
+
+        log.info("re-scrubbing")
+        manager.do_pg_scrub(pool, 0, scrub_type)
+
+        # verify consistent
+        manager.with_pg_state(pool, 0, lambda s: 'inconsistent' not in s)
+        log.info("done")
+
+
+def repair_test_2(ctx, manager, config, chooser):
+    """
+    First creates a set of objects and
+    sets the omap value.  It then corrupts an object, does both a scrub
+    and a deep-scrub, and then corrupts more objects.  After that, it
+    repairs the pool and makes sure that the pool is consistent some
+    time after a deep-scrub.
+
+    :param chooser: primary or replica selection routine.
+    """
+    pool = "repair_pool_2"
+    manager.wait_for_clean()
+    with manager.pool(pool, 1):
+        log.info("starting repair test type 2")
+        victim_osd = chooser(manager, pool, 0)
+
+        # create object
+        log.info("doing put and setomapval")
+        manager.do_put(pool, 'file1', '/etc/hosts')
+        manager.do_rados(['setomapval', 'file1', 'key', 'val'], pool=pool)
+        manager.do_put(pool, 'file2', '/etc/hosts')
+        manager.do_put(pool, 'file3', '/etc/hosts')
+        manager.do_put(pool, 'file4', '/etc/hosts')
+        manager.do_put(pool, 'file5', '/etc/hosts')
+        manager.do_rados(['setomapval', 'file5', 'key', 'val'], pool=pool)
+        manager.do_put(pool, 'file6', '/etc/hosts')
+
+        # corrupt object
+        log.info("corrupting object")
+        omaperr(manager, victim_osd, pool, 'file1')
+
+        # verify inconsistent
+        log.info("scrubbing")
+        manager.do_pg_scrub(pool, 0, 'deep-scrub')
+
+        manager.with_pg_state(pool, 0, lambda s: 'inconsistent' in s)
+
+        # Regression test for bug #4778, should still
+        # be inconsistent after scrub
+        manager.do_pg_scrub(pool, 0, 'scrub')
+
+        manager.with_pg_state(pool, 0, lambda s: 'inconsistent' in s)
+
+        # Additional corruptions including 2 types for file1
+        log.info("corrupting more objects")
+        dataerr(manager, victim_osd, pool, 'file1')
+        mdataerr(manager, victim_osd, pool, 'file2')
+        trunc(manager, victim_osd, pool, 'file3')
+        omaperr(manager, victim_osd, pool, 'file6')
+
+        # see still inconsistent
+        log.info("scrubbing")
+        manager.do_pg_scrub(pool, 0, 'deep-scrub')
+
+        manager.with_pg_state(pool, 0, lambda s: 'inconsistent' in s)
+
+        # repair
+        log.info("repairing")
+        manager.do_pg_scrub(pool, 0, "repair")
+
+        # Let repair clear inconsistent flag
+        time.sleep(10)
+
+        # verify consistent
+        manager.with_pg_state(pool, 0, lambda s: 'inconsistent' not in s)
+
+        # In the future repair might determine state of
+        # inconsistency itself, verify with a deep-scrub
+        log.info("scrubbing")
+        manager.do_pg_scrub(pool, 0, 'deep-scrub')
+
+        # verify consistent
+        manager.with_pg_state(pool, 0, lambda s: 'inconsistent' not in s)
+
+        log.info("done")
+
+
+def hinfoerr(manager, victim, pool, obj):
+    """
+    cause an error in the hinfo_key
+    """
+    log.info("remove the hinfo_key")
+    manager.objectstore_tool(pool,
+                             options='',
+                             args='rm-attr hinfo_key',
+                             object_name=obj,
+                             osd=victim)
+
+
+def repair_test_erasure_code(manager, corrupter, victim, scrub_type):
+    """
+    Creates an object in the pool, corrupts it,
+    scrubs it, and verifies that the pool is inconsistent.  It then repairs
+    the pool, rescrubs it, and verifies that the pool is consistent
+
+    :param corrupter: error generating function.
+    :param chooser: osd type chooser (primary or replica)
+    :param scrub_type: regular scrub or deep-scrub
+    """
+    pool = "repair_pool_3"
+    manager.wait_for_clean()
+    with manager.pool(pool_name=pool, pg_num=1,
+                          erasure_code_profile_name='default'):
+
+        log.info("starting repair test for erasure code")
+
+        # create object
+        log.info("doing put")
+        manager.do_put(pool, 'repair_test_obj', '/etc/hosts')
+
+        # corrupt object
+        log.info("corrupting object")
+        corrupter(manager, victim, pool, 'repair_test_obj')
+
+        # verify inconsistent
+        log.info("scrubbing")
+        manager.do_pg_scrub(pool, 0, scrub_type)
+
+        manager.with_pg_state(pool, 0, lambda s: 'inconsistent' in s)
+
+        # repair
+        log.info("repairing")
+        manager.do_pg_scrub(pool, 0, "repair")
+
+        log.info("re-scrubbing")
+        manager.do_pg_scrub(pool, 0, scrub_type)
+
+        # verify consistent
+        manager.with_pg_state(pool, 0, lambda s: 'inconsistent' not in s)
+        log.info("done")
+
+
+def task(ctx, config):
+    """
+    Test [deep] repair in several situations:
+      Repair [Truncate, Data EIO, MData EIO] on [Primary|Replica]
+
+    The config should be as follows:
+
+      Must include the log-ignorelist below
+      Must enable filestore_debug_inject_read_err config
+
+    example:
+
+    tasks:
+    - chef:
+    - install:
+    - ceph:
+        log-ignorelist:
+          - 'candidate had a stat error'
+          - 'candidate had a read error'
+          - 'deep-scrub 0 missing, 1 inconsistent objects'
+          - 'deep-scrub 0 missing, 4 inconsistent objects'
+          - 'deep-scrub [0-9]+ errors'
+          - '!= omap_digest'
+          - '!= data_digest'
+          - 'repair 0 missing, 1 inconsistent objects'
+          - 'repair 0 missing, 4 inconsistent objects'
+          - 'repair [0-9]+ errors, [0-9]+ fixed'
+          - 'scrub 0 missing, 1 inconsistent objects'
+          - 'scrub [0-9]+ errors'
+          - 'size 1 != size'
+          - 'attr name mismatch'
+          - 'Regular scrub request, deep-scrub details will be lost'
+          - 'candidate size [0-9]+ info size [0-9]+ mismatch'
+        conf:
+          osd:
+            filestore debug inject read err: true
+    - repair_test:
+
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'repair_test task only accepts a dict for config'
+
+    manager = ctx.managers['ceph']
+    manager.wait_for_all_osds_up()
+
+    manager.raw_cluster_cmd('osd', 'set', 'noscrub')
+    manager.raw_cluster_cmd('osd', 'set', 'nodeep-scrub')
+
+    repair_test_1(manager, mdataerr, choose_primary, "scrub")
+    repair_test_1(manager, mdataerr, choose_replica, "scrub")
+    repair_test_1(manager, dataerr, choose_primary, "deep-scrub")
+    repair_test_1(manager, dataerr, choose_replica, "deep-scrub")
+    repair_test_1(manager, trunc, choose_primary, "scrub")
+    repair_test_1(manager, trunc, choose_replica, "scrub")
+    repair_test_2(ctx, manager, config, choose_primary)
+    repair_test_2(ctx, manager, config, choose_replica)
+
+    repair_test_erasure_code(manager, hinfoerr, 'primary', "deep-scrub")
+
+    manager.raw_cluster_cmd('osd', 'unset', 'noscrub')
+    manager.raw_cluster_cmd('osd', 'unset', 'nodeep-scrub')
diff --git a/qa/tasks/resolve_stuck_peering.py b/qa/tasks/resolve_stuck_peering.py
new file mode 100644
index 000000000..d140544c4
--- /dev/null
+++ b/qa/tasks/resolve_stuck_peering.py
@@ -0,0 +1,112 @@
+"""
+Resolve stuck peering
+"""
+import logging
+import time
+
+from teuthology import misc as teuthology
+from tasks.util.rados import rados
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Test handling resolve stuck peering
+
+    requires 3 osds on a single test node
+    """
+    if config is None:
+        config = {}
+        assert isinstance(config, dict), \
+            'Resolve stuck peering only accepts a dict for config'
+
+    manager = ctx.managers['ceph']
+
+    while len(manager.get_osd_status()['up']) < 3:
+        time.sleep(10)
+
+
+    manager.wait_for_clean()
+
+    dummyfile = '/etc/fstab'
+    dummyfile1 = '/etc/resolv.conf'
+
+    #create 1 PG pool
+    pool='foo'
+    log.info('creating pool foo')
+    manager.raw_cluster_cmd('osd', 'pool', 'create', '%s' % pool, '1')
+
+    #set min_size of the pool to 1
+    #so that we can continue with I/O
+    #when 2 osds are down
+    manager.set_pool_property(pool, "min_size", 1)
+
+    osds = [0, 1, 2]
+
+    primary = manager.get_pg_primary('foo', 0)
+    log.info("primary osd is %d", primary)
+
+    others = list(osds)
+    others.remove(primary)
+
+    log.info('writing initial objects')
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+    #create few objects
+    for i in range(100):
+        rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile])
+
+    manager.wait_for_clean()
+
+    #kill other osds except primary
+    log.info('killing other osds except primary')
+    for i in others:
+        manager.kill_osd(i)
+    for i in others:
+        manager.mark_down_osd(i)
+
+
+    for i in range(100):
+        rados(ctx, mon, ['-p', 'foo', 'put', 'new_%d' % i, dummyfile1])
+
+    #kill primary osd
+    manager.kill_osd(primary)
+    manager.mark_down_osd(primary)
+
+    #revive other 2 osds
+    for i in others:
+        manager.revive_osd(i)
+
+    #make sure that pg is down
+    #Assuming pg number for single pg pool will start from 0
+    pgnum=0
+    pgstr = manager.get_pgid(pool, pgnum)
+    stats = manager.get_single_pg_stats(pgstr)
+    print(stats['state'])
+
+    timeout=60
+    start=time.time()
+
+    while 'down' not in stats['state']:
+        assert time.time() - start < timeout, \
+            'failed to reach down state before timeout expired'
+        stats = manager.get_single_pg_stats(pgstr)
+
+    #mark primary as lost
+    manager.raw_cluster_cmd('osd', 'lost', '%d' % primary,\
+                            '--yes-i-really-mean-it')
+
+
+    #expect the pg status to be active+undersized+degraded
+    #pg should recover and become active+clean within timeout
+    stats = manager.get_single_pg_stats(pgstr)
+    print(stats['state'])
+
+    timeout=10
+    start=time.time()
+
+    while manager.get_num_down():
+        assert time.time() - start < timeout, \
+            'failed to recover before timeout expired'
+
+    manager.revive_osd(primary)
diff --git a/qa/tasks/rgw.py b/qa/tasks/rgw.py
new file mode 100644
index 000000000..61bcea3a5
--- /dev/null
+++ b/qa/tasks/rgw.py
@@ -0,0 +1,472 @@
+"""
+rgw routines
+"""
+import argparse
+import contextlib
+import logging
+
+from teuthology.orchestra import run
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.exceptions import ConfigError
+from tasks.ceph_manager import get_valgrind_args
+from tasks.util import get_remote_for_role
+from tasks.util.rgw import rgwadmin, wait_for_radosgw
+from tasks.util.rados import (create_ec_pool,
+                              create_replicated_pool,
+                              create_cache_pool)
+
+log = logging.getLogger(__name__)
+
+class RGWEndpoint:
+    def __init__(self, hostname=None, port=None, cert=None, dns_name=None, website_dns_name=None):
+        self.hostname = hostname
+        self.port = port
+        self.cert = cert
+        self.dns_name = dns_name
+        self.website_dns_name = website_dns_name
+
+    def url(self):
+        proto = 'https' if self.cert else 'http'
+        return '{proto}://{hostname}:{port}/'.format(proto=proto, hostname=self.hostname, port=self.port)
+
+@contextlib.contextmanager
+def start_rgw(ctx, config, clients):
+    """
+    Start rgw on remote sites.
+    """
+    log.info('Starting rgw...')
+    testdir = teuthology.get_testdir(ctx)
+    for client in clients:
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        cluster_name, daemon_type, client_id = teuthology.split_role(client)
+        client_with_id = daemon_type + '.' + client_id
+        client_with_cluster = cluster_name + '.' + client_with_id
+
+        client_config = config.get(client)
+        if client_config is None:
+            client_config = {}
+        log.info("rgw %s config is %s", client, client_config)
+        cmd_prefix = [
+            'sudo',
+            'adjust-ulimits',
+            'ceph-coverage',
+            '{tdir}/archive/coverage'.format(tdir=testdir),
+            'daemon-helper',
+            'term',
+            ]
+
+        rgw_cmd = ['radosgw']
+
+        log.info("Using %s as radosgw frontend", ctx.rgw.frontend)
+
+        endpoint = ctx.rgw.role_endpoints[client]
+        frontends = ctx.rgw.frontend
+        frontend_prefix = client_config.get('frontend_prefix', None)
+        if frontend_prefix:
+            frontends += ' prefix={pfx}'.format(pfx=frontend_prefix)
+
+        if endpoint.cert:
+            # add the ssl certificate path
+            frontends += ' ssl_certificate={}'.format(endpoint.cert.certificate)
+            frontends += ' ssl_port={}'.format(endpoint.port)
+        else:
+            frontends += ' port={}'.format(endpoint.port)
+
+        rgw_cmd.extend([
+            '--rgw-frontends', frontends,
+            '-n', client_with_id,
+            '--cluster', cluster_name,
+            '-k', '/etc/ceph/{client_with_cluster}.keyring'.format(client_with_cluster=client_with_cluster),
+            '--log-file',
+            '/var/log/ceph/rgw.{client_with_cluster}.log'.format(client_with_cluster=client_with_cluster),
+            '--rgw_ops_log_socket_path',
+            '{tdir}/rgw.opslog.{client_with_cluster}.sock'.format(tdir=testdir,
+                                                     client_with_cluster=client_with_cluster),
+	    ])
+
+        keystone_role = client_config.get('use-keystone-role', None)
+        if keystone_role is not None:
+            if not ctx.keystone:
+                raise ConfigError('rgw must run after the keystone task')
+            url = 'http://{host}:{port}/v1/KEY_$(tenant_id)s'.format(host=endpoint.hostname,
+                                                                     port=endpoint.port)
+            ctx.keystone.create_endpoint(ctx, keystone_role, 'swift', url)
+
+            keystone_host, keystone_port = \
+                ctx.keystone.public_endpoints[keystone_role]
+            rgw_cmd.extend([
+                '--rgw_keystone_url',
+                'http://{khost}:{kport}'.format(khost=keystone_host,
+                                                kport=keystone_port),
+                ])
+
+
+        if client_config.get('dns-name') is not None:
+            rgw_cmd.extend(['--rgw-dns-name', endpoint.dns_name])
+        if client_config.get('dns-s3website-name') is not None:
+            rgw_cmd.extend(['--rgw-dns-s3website-name', endpoint.website_dns_name])
+
+
+        vault_role = client_config.get('use-vault-role', None)
+        barbican_role = client_config.get('use-barbican-role', None)
+        pykmip_role = client_config.get('use-pykmip-role', None)
+
+        token_path = '/etc/ceph/vault-root-token'
+        if barbican_role is not None:
+            if not hasattr(ctx, 'barbican'):
+                raise ConfigError('rgw must run after the barbican task')
+
+            barbican_host, barbican_port = \
+                ctx.barbican.endpoints[barbican_role]
+            log.info("Use barbican url=%s:%s", barbican_host, barbican_port)
+
+            rgw_cmd.extend([
+                '--rgw_barbican_url',
+                'http://{bhost}:{bport}'.format(bhost=barbican_host,
+                                                bport=barbican_port),
+                ])
+        elif vault_role is not None:
+            if not ctx.vault.root_token:
+                raise ConfigError('vault: no "root_token" specified')
+            # create token on file
+            ctx.rgw.vault_role = vault_role
+            ctx.cluster.only(client).run(args=['sudo', 'echo', '-n', ctx.vault.root_token, run.Raw('|'), 'sudo', 'tee', token_path])
+            log.info("Token file content")
+            ctx.cluster.only(client).run(args=['cat', token_path])
+            log.info("Restrict access to token file")
+            ctx.cluster.only(client).run(args=['sudo', 'chmod', '600', token_path])
+            ctx.cluster.only(client).run(args=['sudo', 'chown', 'ceph', token_path])
+
+            vault_addr = "{}:{}".format(*ctx.vault.endpoints[vault_role])
+            rgw_cmd.extend([
+                '--rgw_crypt_vault_addr', vault_addr,
+                '--rgw_crypt_vault_token_file', token_path,
+                '--rgw_crypt_sse_s3_vault_addr', vault_addr,
+                '--rgw_crypt_sse_s3_vault_token_file', token_path,
+            ])
+        elif pykmip_role is not None:
+            if not hasattr(ctx, 'pykmip'):
+                raise ConfigError('rgw must run after the pykmip task')
+            ctx.rgw.pykmip_role = pykmip_role
+            rgw_cmd.extend([
+                '--rgw_crypt_kmip_addr', "{}:{}".format(*ctx.pykmip.endpoints[pykmip_role]),
+            ])
+
+            clientcert = ctx.ssl_certificates.get('kmip-client')
+            servercert = ctx.ssl_certificates.get('kmip-server')
+            clientca = ctx.ssl_certificates.get('kmiproot')
+
+            clientkey = clientcert.key
+            clientcert = clientcert.certificate
+            serverkey = servercert.key
+            servercert = servercert.certificate
+            rootkey = clientca.key
+            rootcert = clientca.certificate
+
+            cert_path = '/etc/ceph/'
+            ctx.cluster.only(client).run(args=['sudo', 'cp', clientcert, cert_path])
+            ctx.cluster.only(client).run(args=['sudo', 'cp', clientkey, cert_path])
+            ctx.cluster.only(client).run(args=['sudo', 'cp', servercert, cert_path])
+            ctx.cluster.only(client).run(args=['sudo', 'cp', serverkey, cert_path])
+            ctx.cluster.only(client).run(args=['sudo', 'cp', rootkey, cert_path])
+            ctx.cluster.only(client).run(args=['sudo', 'cp', rootcert, cert_path])
+
+            clientcert = cert_path + 'kmip-client.crt'
+            clientkey = cert_path + 'kmip-client.key'
+            servercert = cert_path + 'kmip-server.crt'
+            serverkey = cert_path + 'kmip-server.key'
+            rootkey = cert_path + 'kmiproot.key'
+            rootcert = cert_path + 'kmiproot.crt'
+
+            ctx.cluster.only(client).run(args=['sudo', 'chmod', '600', clientcert, clientkey, servercert, serverkey, rootkey, rootcert])
+            ctx.cluster.only(client).run(args=['sudo', 'chown', 'ceph', clientcert, clientkey, servercert, serverkey, rootkey, rootcert])
+
+        rgw_cmd.extend([
+            '--foreground',
+            run.Raw('|'),
+            'sudo',
+            'tee',
+            '/var/log/ceph/rgw.{client_with_cluster}.stdout'.format(client_with_cluster=client_with_cluster),
+            run.Raw('2>&1'),
+            ])
+
+        if client_config.get('valgrind'):
+            cmd_prefix = get_valgrind_args(
+                testdir,
+                client_with_cluster,
+                cmd_prefix,
+                client_config.get('valgrind'),
+                # see https://github.com/ceph/teuthology/pull/1600
+                exit_on_first_error=False
+                )
+
+        run_cmd = list(cmd_prefix)
+        run_cmd.extend(rgw_cmd)
+
+        ctx.daemons.add_daemon(
+            remote, 'rgw', client_with_id,
+            cluster=cluster_name,
+            fsid=ctx.ceph[cluster_name].fsid,
+            args=run_cmd,
+            logger=log.getChild(client),
+            stdin=run.PIPE,
+            wait=False,
+            )
+
+    # XXX: add_daemon() doesn't let us wait until radosgw finishes startup
+    for client in clients:
+        endpoint = ctx.rgw.role_endpoints[client]
+        url = endpoint.url()
+        log.info('Polling {client} until it starts accepting connections on {url}'.format(client=client, url=url))
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        wait_for_radosgw(url, remote)
+
+    try:
+        yield
+    finally:
+        for client in clients:
+            cluster_name, daemon_type, client_id = teuthology.split_role(client)
+            client_with_id = daemon_type + '.' + client_id
+            client_with_cluster = cluster_name + '.' + client_with_id
+            ctx.daemons.get_daemon('rgw', client_with_id, cluster_name).stop()
+            ctx.cluster.only(client).run(
+                args=[
+                    'rm',
+                    '-f',
+                    '{tdir}/rgw.opslog.{client}.sock'.format(tdir=testdir,
+                                                             client=client_with_cluster),
+                    ],
+                )
+            ctx.cluster.only(client).run(args=['sudo', 'rm', '-f', token_path])
+
+def assign_endpoints(ctx, config, default_cert):
+    role_endpoints = {}
+    for role, client_config in config.items():
+        client_config = client_config or {}
+        remote = get_remote_for_role(ctx, role)
+
+        cert = client_config.get('ssl certificate', default_cert)
+        if cert:
+            # find the certificate created by the ssl task
+            if not hasattr(ctx, 'ssl_certificates'):
+                raise ConfigError('rgw: no ssl task found for option "ssl certificate"')
+            ssl_certificate = ctx.ssl_certificates.get(cert, None)
+            if not ssl_certificate:
+                raise ConfigError('rgw: missing ssl certificate "{}"'.format(cert))
+        else:
+            ssl_certificate = None
+
+        port = client_config.get('port', 443 if ssl_certificate else 80)
+
+        # if dns-name is given, use it as the hostname (or as a prefix)
+        dns_name = client_config.get('dns-name', '')
+        if len(dns_name) == 0 or dns_name.endswith('.'):
+            dns_name += remote.hostname
+
+        website_dns_name = client_config.get('dns-s3website-name')
+        if website_dns_name is not None and (len(website_dns_name) == 0 or website_dns_name.endswith('.')):
+            website_dns_name += remote.hostname
+
+        role_endpoints[role] = RGWEndpoint(remote.hostname, port, ssl_certificate, dns_name, website_dns_name)
+
+    return role_endpoints
+
+@contextlib.contextmanager
+def create_pools(ctx, clients):
+    """Create replicated or erasure coded data pools for rgw."""
+
+    log.info('Creating data pools')
+    for client in clients:
+        log.debug("Obtaining remote for client {}".format(client))
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        data_pool = 'default.rgw.buckets.data'
+        cluster_name, daemon_type, client_id = teuthology.split_role(client)
+
+        if ctx.rgw.ec_data_pool:
+            create_ec_pool(remote, data_pool, client, ctx.rgw.data_pool_pg_size,
+                           ctx.rgw.erasure_code_profile, cluster_name, 'rgw')
+        else:
+            create_replicated_pool(remote, data_pool, ctx.rgw.data_pool_pg_size, cluster_name, 'rgw')
+
+        index_pool = 'default.rgw.buckets.index'
+        create_replicated_pool(remote, index_pool, ctx.rgw.index_pool_pg_size, cluster_name, 'rgw')
+
+        if ctx.rgw.cache_pools:
+            create_cache_pool(remote, data_pool, data_pool + '.cache', 64,
+                              64*1024*1024, cluster_name)
+    log.debug('Pools created')
+    yield
+
+@contextlib.contextmanager
+def configure_compression(ctx, clients, compression):
+    """ set a compression type in the default zone placement """
+    log.info('Configuring compression type = %s', compression)
+    for client in clients:
+        # XXX: the 'default' zone and zonegroup aren't created until we run RGWRados::init_complete().
+        # issue a 'radosgw-admin user list' command to trigger this
+        rgwadmin(ctx, client, cmd=['user', 'list'], check_status=True)
+
+        rgwadmin(ctx, client,
+                cmd=['zone', 'placement', 'modify', '--rgw-zone', 'default',
+                     '--placement-id', 'default-placement',
+                     '--compression', compression],
+                check_status=True)
+    yield
+
+@contextlib.contextmanager
+def disable_inline_data(ctx, clients):
+    for client in clients:
+        # XXX: the 'default' zone and zonegroup aren't created until we run RGWRados::init_complete().
+        # issue a 'radosgw-admin user list' command to trigger this
+        rgwadmin(ctx, client, cmd=['user', 'list'], check_status=True)
+
+        rgwadmin(ctx, client,
+                cmd=['zone', 'placement', 'modify', '--rgw-zone', 'default',
+                     '--placement-id', 'default-placement',
+                     '--placement-inline-data', 'false'],
+                check_status=True)
+    yield
+
+@contextlib.contextmanager
+def configure_datacache(ctx, clients, datacache_path):
+    """ create directory for rgw datacache """
+    log.info('Preparing directory for rgw datacache at %s', datacache_path)
+    for client in clients:
+        if(datacache_path != None):
+            ctx.cluster.only(client).run(args=['mkdir', '-p', datacache_path])
+            ctx.cluster.only(client).run(args=['sudo', 'chmod', 'a+rwx', datacache_path])
+        else:
+            log.info('path for datacache was not provided')
+    yield
+
+@contextlib.contextmanager
+def configure_storage_classes(ctx, clients, storage_classes):
+    """ set a compression type in the default zone placement """
+
+    sc = [s.strip() for s in storage_classes.split(',')]
+
+    for client in clients:
+        # XXX: the 'default' zone and zonegroup aren't created until we run RGWRados::init_complete().
+        # issue a 'radosgw-admin user list' command to trigger this
+        rgwadmin(ctx, client, cmd=['user', 'list'], check_status=True)
+
+        for storage_class in sc:
+            log.info('Configuring storage class type = %s', storage_class)
+            rgwadmin(ctx, client,
+                    cmd=['zonegroup', 'placement', 'add',
+                        '--rgw-zone', 'default',
+                        '--placement-id', 'default-placement',
+                        '--storage-class', storage_class],
+                    check_status=True)
+            rgwadmin(ctx, client,
+                    cmd=['zone', 'placement', 'add',
+                        '--rgw-zone', 'default',
+                        '--placement-id', 'default-placement',
+                        '--storage-class', storage_class,
+                        '--data-pool', 'default.rgw.buckets.data.' + storage_class.lower()],
+                    check_status=True)
+    yield
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    For example, to run rgw on all clients::
+
+        tasks:
+        - ceph:
+        - rgw:
+
+    To only run on certain clients::
+
+        tasks:
+        - ceph:
+        - rgw: [client.0, client.3]
+
+    or
+
+        tasks:
+        - ceph:
+        - rgw:
+            client.0:
+            client.3:
+
+    To run radosgw through valgrind:
+
+        tasks:
+        - ceph:
+        - rgw:
+            client.0:
+              valgrind: [--tool=memcheck]
+            client.3:
+              valgrind: [--tool=memcheck]
+
+    To configure data or index pool pg_size:
+
+        overrides:
+          rgw:
+            data_pool_pg_size: 256
+            index_pool_pg_size: 128
+    """
+    if config is None:
+        config = dict(('client.{id}'.format(id=id_), None)
+                      for id_ in teuthology.all_roles_of_type(
+                          ctx.cluster, 'client'))
+    elif isinstance(config, list):
+        config = dict((name, None) for name in config)
+
+    clients = config.keys() # http://tracker.ceph.com/issues/20417
+
+    overrides = ctx.config.get('overrides', {})
+    teuthology.deep_merge(config, overrides.get('rgw', {}))
+
+    ctx.rgw = argparse.Namespace()
+    ctx.rgw_cloudtier = None
+
+    ctx.rgw.ec_data_pool = bool(config.pop('ec-data-pool', False))
+    ctx.rgw.erasure_code_profile = config.pop('erasure_code_profile', {})
+    ctx.rgw.cache_pools = bool(config.pop('cache-pools', False))
+    ctx.rgw.frontend = config.pop('frontend', 'beast')
+    ctx.rgw.compression_type = config.pop('compression type', None)
+    ctx.rgw.inline_data = config.pop('inline data', True)
+    ctx.rgw.storage_classes = config.pop('storage classes', None)
+    default_cert = config.pop('ssl certificate', None)
+    ctx.rgw.data_pool_pg_size = config.pop('data_pool_pg_size', 64)
+    ctx.rgw.index_pool_pg_size = config.pop('index_pool_pg_size', 64)
+    ctx.rgw.datacache = bool(config.pop('datacache', False))
+    ctx.rgw.datacache_path = config.pop('datacache_path', None)
+    ctx.rgw.config = config
+
+    log.debug("config is {}".format(config))
+    log.debug("client list is {}".format(clients))
+
+    ctx.rgw.role_endpoints = assign_endpoints(ctx, config, default_cert)
+
+    subtasks = [
+        lambda: create_pools(ctx=ctx, clients=clients),
+    ]
+    if ctx.rgw.compression_type:
+        subtasks.extend([
+            lambda: configure_compression(ctx=ctx, clients=clients,
+                                          compression=ctx.rgw.compression_type),
+        ])
+    if not ctx.rgw.inline_data:
+        subtasks.extend([
+            lambda: disable_inline_data(ctx=ctx, clients=clients),
+        ])
+    if ctx.rgw.datacache:
+        subtasks.extend([
+            lambda: configure_datacache(ctx=ctx, clients=clients,
+                                        datacache_path=ctx.rgw.datacache_path),
+        ])
+    if ctx.rgw.storage_classes:
+        subtasks.extend([
+            lambda: configure_storage_classes(ctx=ctx, clients=clients,
+                                              storage_classes=ctx.rgw.storage_classes),
+        ])
+    subtasks.extend([
+        lambda: start_rgw(ctx=ctx, config=config, clients=clients),
+    ])
+
+    with contextutil.nested(*subtasks):
+        yield
diff --git a/qa/tasks/rgw_cloudtier.py b/qa/tasks/rgw_cloudtier.py
new file mode 100644
index 000000000..88f7d0dc2
--- /dev/null
+++ b/qa/tasks/rgw_cloudtier.py
@@ -0,0 +1,122 @@
+"""
+rgw_cloudtier configuration routines
+"""
+import argparse
+import logging
+
+from teuthology import misc as teuthology
+from teuthology.exceptions import ConfigError
+from tasks.util.rgw import rgwadmin, wait_for_radosgw
+from teuthology.task import Task
+
+log = logging.getLogger(__name__)
+
+class RGWCloudTier(Task):
+    """
+    Configure CloudTier storage class.
+
+    To configure cloudtiering on any client::
+
+        tasks:
+        - ceph:
+        - rgw:
+        - rgw-cloudtier:
+            client.0:
+              cloud_storage_class:
+              cloud_client:
+              cloud_regular_storage_class:
+              cloud_target_storage_class:
+              cloud_retain_head_object:
+              cloud_target_path:
+              cloudtier_user:
+                cloud_secret:
+                cloud_access_key:
+
+    """
+    def __init__(self, ctx, config):
+        super(RGWCloudTier, self).__init__(ctx, config)
+
+    def setup(self):
+        super(RGWCloudTier, self).setup()
+
+        overrides = self.ctx.config.get('overrides', {})
+        teuthology.deep_merge(self.config, overrides.get('rgw-cloudtier', {}))
+
+        if not self.ctx.rgw:
+            raise ConfigError('rgw-cloudtier must run after the rgw task')
+
+        self.ctx.rgw_cloudtier = argparse.Namespace()
+        self.ctx.rgw_cloudtier.config = self.config
+
+        log.info('Configuring rgw cloudtier ...')
+        clients = self.config.keys() # http://tracker.ceph.com/issues/20417
+        for client in clients:
+            client_config = self.config.get(client)
+            if client_config is None:
+                client_config = {}
+
+            if client_config is not None:
+                log.info('client %s - cloudtier config is -----------------%s ', client, client_config)
+                # configuring cloudtier
+
+                cloud_client = client_config.get('cloud_client')
+                cloud_storage_class = client_config.get('cloud_storage_class')
+                cloud_target_path = client_config.get('cloud_target_path')
+                cloud_target_storage_class = client_config.get('cloud_target_storage_class')
+                cloud_retain_head_object = client_config.get('cloud_retain_head_object')
+
+                cloudtier_user = client_config.get('cloudtier_user')
+                cloud_access_key = cloudtier_user.get('cloud_access_key')
+                cloud_secret = cloudtier_user.get('cloud_secret')
+
+                # XXX: the 'default' zone and zonegroup aren't created until we run RGWRados::init_complete().
+                # issue a 'radosgw-admin user list' command to trigger this
+                rgwadmin(self.ctx, client, cmd=['user', 'list'], check_status=True)
+
+                endpoint = self.ctx.rgw.role_endpoints[cloud_client]
+
+                # create cloudtier storage class
+                tier_config_params = "endpoint=" + endpoint.url() + \
+                           ",access_key=" + cloud_access_key + \
+                            ",secret=" + cloud_secret + \
+                            ",retain_head_object=" + cloud_retain_head_object
+
+                if (cloud_target_path != None):
+                    tier_config_params += ",target_path=" + cloud_target_path
+                if (cloud_target_storage_class != None):
+                    tier_config_params += ",target_storage_class=" + cloud_target_storage_class
+
+                log.info('Configuring cloud-s3 tier storage class type = %s', cloud_storage_class)
+
+                rgwadmin(self.ctx, client,
+                      cmd=['zonegroup', 'placement', 'add',
+                            '--rgw-zone', 'default',
+                            '--placement-id', 'default-placement',
+                            '--storage-class', cloud_storage_class,
+                            '--tier-type', 'cloud-s3',
+                            '--tier-config', tier_config_params],
+                      check_status=True)
+
+                ## create cloudtier user with the access keys given on the cloud client
+                cloud_tier_user_id = "cloud-tier-user-" + cloud_client
+                cloud_tier_user_name = "CLOUD TIER USER - " + cloud_client
+                rgwadmin(self.ctx, cloud_client,
+                     cmd=['user', 'create', '--uid', cloud_tier_user_id,
+                        '--display-name', cloud_tier_user_name,
+                        '--access-key', cloud_access_key,
+                        '--secret', cloud_secret,
+                        '--caps', 'user-policy=*'],
+                        check_status=True)
+
+                log.info('Finished Configuring rgw cloudtier ...')
+                
+                cluster_name, daemon_type, client_id = teuthology.split_role(client)
+                client_with_id = daemon_type + '.' + client_id
+                self.ctx.daemons.get_daemon('rgw', client_with_id, cluster_name).restart()
+                log.info('restarted rgw daemon ...')
+
+                (remote,) = self.ctx.cluster.only(client).remotes.keys()
+                wait_for_radosgw(endpoint.url(), remote)
+                
+
+task = RGWCloudTier
diff --git a/qa/tasks/rgw_logsocket.py b/qa/tasks/rgw_logsocket.py
new file mode 100644
index 000000000..d76e59d7f
--- /dev/null
+++ b/qa/tasks/rgw_logsocket.py
@@ -0,0 +1,165 @@
+"""
+rgw s3tests logging wrappers
+"""
+from io import BytesIO
+from configobj import ConfigObj
+import contextlib
+import logging
+from tasks import s3tests
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def download(ctx, config):
+    """
+    Run s3tests download function
+    """
+    return s3tests.download(ctx, config)
+
+def _config_user(s3tests_conf, section, user):
+    """
+    Run s3tests user config function
+    """
+    return s3tests._config_user(s3tests_conf, section, user)
+
+@contextlib.contextmanager
+def create_users(ctx, config):
+    """
+    Run s3tests user create function
+    """
+    return s3tests.create_users(ctx, config)
+
+@contextlib.contextmanager
+def configure(ctx, config):
+    """
+    Run s3tests user configure function
+    """
+    return s3tests.configure(ctx, config)
+
+@contextlib.contextmanager
+def run_tests(ctx, config):
+    """
+    Run remote netcat tests
+    """
+    assert isinstance(config, dict)
+    testdir = teuthology.get_testdir(ctx)
+    for client, client_config in config.items():
+        client_config['extra_args'] = [
+            's3tests.functional.test_s3:test_bucket_list_return_data',
+        ]
+#        args = [
+#                'S3TEST_CONF={tdir}/archive/s3-tests.{client}.conf'.format(tdir=testdir, client=client),
+#                '{tdir}/s3-tests/virtualenv/bin/nosetests'.format(tdir=testdir),
+#                '-w',
+#                '{tdir}/s3-tests'.format(tdir=testdir),
+#                '-v',
+#		's3tests.functional.test_s3:test_bucket_list_return_data',
+#                ]
+#        if client_config is not None and 'extra_args' in client_config:
+#            args.extend(client_config['extra_args'])
+#
+#        ctx.cluster.only(client).run(
+#            args=args,
+#            )
+
+    s3tests.run_tests(ctx, config)
+
+    netcat_out = BytesIO()
+
+    for client, client_config in config.items():
+        ctx.cluster.only(client).run(
+            args = [
+                'netcat',
+                '-w', '5',
+                '-U', '{tdir}/rgw.opslog.sock'.format(tdir=testdir),
+                ],
+             stdout = netcat_out,
+        )
+
+        out = netcat_out.getvalue()
+
+        assert len(out) > 100
+
+        log.info('Received', out)
+
+    yield
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run some s3-tests suite against rgw, verify opslog socket returns data
+
+    Must restrict testing to a particular client::
+
+        tasks:
+        - ceph:
+        - rgw: [client.0]
+        - s3tests: [client.0]
+
+    To pass extra arguments to nose (e.g. to run a certain test)::
+
+        tasks:
+        - ceph:
+        - rgw: [client.0]
+        - s3tests:
+            client.0:
+              extra_args: ['test_s3:test_object_acl_grand_public_read']
+            client.1:
+              extra_args: ['--exclude', 'test_100_continue']
+    """
+    assert hasattr(ctx, 'rgw'), 'rgw-logsocket must run after the rgw task'
+    assert config is None or isinstance(config, list) \
+        or isinstance(config, dict), \
+        "task rgw-logsocket only supports a list or dictionary for configuration"
+    all_clients = ['client.{id}'.format(id=id_)
+                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+    if config is None:
+        config = all_clients
+    if isinstance(config, list):
+        config = dict.fromkeys(config)
+    clients = config.keys()
+
+    overrides = ctx.config.get('overrides', {})
+    # merge each client section, not the top level.
+    for (client, cconf) in config.items():
+        teuthology.deep_merge(cconf, overrides.get('rgw-logsocket', {}))
+
+    log.debug('config is %s', config)
+
+    s3tests_conf = {}
+    for client in clients:
+        endpoint = ctx.rgw.role_endpoints.get(client)
+        assert endpoint, 'rgw-logsocket: no rgw endpoint for {}'.format(client)
+
+        s3tests_conf[client] = ConfigObj(
+            indent_type='',
+            infile={
+                'DEFAULT':
+                    {
+                    'port'      : endpoint.port,
+                    'is_secure' : endpoint.cert is not None,
+                    },
+                'fixtures' : {},
+                's3 main'  : {},
+                's3 alt'   : {},
+                }
+            )
+
+    with contextutil.nested(
+        lambda: download(ctx=ctx, config=config),
+        lambda: create_users(ctx=ctx, config=dict(
+                clients=clients,
+                s3tests_conf=s3tests_conf,
+                )),
+        lambda: configure(ctx=ctx, config=dict(
+                clients=config,
+                s3tests_conf=s3tests_conf,
+                )),
+        lambda: run_tests(ctx=ctx, config=config),
+        ):
+        yield
diff --git a/qa/tasks/rgw_module.py b/qa/tasks/rgw_module.py
new file mode 100644
index 000000000..0d2ca9094
--- /dev/null
+++ b/qa/tasks/rgw_module.py
@@ -0,0 +1,53 @@
+import logging
+import yaml
+
+from teuthology import misc as teuthology
+
+
+log = logging.getLogger(__name__)
+
+
+def _shell(ctx, cluster_name, remote, args, extra_cephadm_args=[], **kwargs):
+    teuthology.get_testdir(ctx)
+    return remote.run(
+        args=[
+            'sudo',
+            ctx.cephadm,
+            '--image', ctx.ceph[cluster_name].image,
+            'shell',
+            '-c', '/etc/ceph/{}.conf'.format(cluster_name),
+            '-k', '/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
+            '--fsid', ctx.ceph[cluster_name].fsid,
+            ] + extra_cephadm_args + [
+            '--',
+            ] + args,
+        **kwargs
+    )
+
+
+def apply(ctx, config):
+    """
+    Apply spec
+
+      tasks:
+        - rgw_module.apply:
+            specs:
+            - rgw_realm: myrealm1
+              rgw_zonegroup: myzonegroup1
+              rgw_zone: myzone1
+              placement:
+                hosts:
+                 - ceph-node-0
+                 - ceph-node-1
+              spec:
+                rgw_frontend_port: 5500
+    """
+    cluster_name = config.get('cluster', 'ceph')
+    specs = config.get('specs', [])
+    y = yaml.dump_all(specs)
+    log.info(f'Applying spec(s):\n{y}')
+    _shell(
+        ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote,
+        ['ceph', 'rgw', 'realm', 'bootstrap', '-i', '-'],
+        stdin=y,
+    )
diff --git a/qa/tasks/rgw_multi b/qa/tasks/rgw_multi
new file mode 120000
index 000000000..abfc703b9
--- /dev/null
+++ b/qa/tasks/rgw_multi
@@ -0,0 +1 @@
+../../src/test/rgw/rgw_multi
+\ No newline at end of file
diff --git a/qa/tasks/rgw_multisite.py b/qa/tasks/rgw_multisite.py
new file mode 100644
index 000000000..f5a6f5a26
--- /dev/null
+++ b/qa/tasks/rgw_multisite.py
@@ -0,0 +1,428 @@
+"""
+rgw multisite configuration routines
+"""
+import argparse
+import logging
+import random
+import string
+from copy import deepcopy
+from tasks.util.rgw import rgwadmin, wait_for_radosgw
+from tasks.util.rados import create_ec_pool, create_replicated_pool
+from tasks.rgw_multi import multisite
+from tasks.rgw_multi.zone_rados import RadosZone as RadosZone
+
+from teuthology.orchestra import run
+from teuthology import misc
+from teuthology.exceptions import ConfigError
+from teuthology.task import Task
+
+log = logging.getLogger(__name__)
+
+class RGWMultisite(Task):
+    """
+    Performs rgw multisite configuration to match the given realm definition.
+
+        - rgw-multisite:
+            realm:
+              name: test-realm
+              is_default: true
+
+    List one or more zonegroup definitions. These are provided as json
+    input to `radosgw-admin zonegroup set`, with the exception of these keys:
+
+    * 'is_master' is passed on the command line as --master
+    * 'is_default' is passed on the command line as --default
+    * 'endpoints' given as client names are replaced with actual endpoints
+
+            zonegroups:
+              - name: test-zonegroup
+                api_name: test-api
+                is_master: true
+                is_default: true
+                endpoints: [c1.client.0]
+
+    List each of the zones to be created in this zonegroup.
+
+                zones:
+                  - name: test-zone1
+                    is_master: true
+                    is_default: true
+                    endpoints: [c1.client.0]
+                  - name: test-zone2
+                    is_default: true
+                    endpoints: [c2.client.0]
+
+    A complete example:
+
+        tasks:
+        - install:
+        - ceph: {cluster: c1}
+        - ceph: {cluster: c2}
+        - rgw:
+            c1.client.0:
+            c2.client.0:
+        - rgw-multisite:
+            realm:
+              name: test-realm
+              is_default: true
+            zonegroups:
+              - name: test-zonegroup
+                is_master: true
+                is_default: true
+                zones:
+                  - name: test-zone1
+                    is_master: true
+                    is_default: true
+                    endpoints: [c1.client.0]
+                  - name: test-zone2
+                    is_default: true
+                    endpoints: [c2.client.0]
+
+    """
+    def __init__(self, ctx, config):
+        super(RGWMultisite, self).__init__(ctx, config)
+
+    def setup(self):
+        super(RGWMultisite, self).setup()
+
+        overrides = self.ctx.config.get('overrides', {})
+        misc.deep_merge(self.config, overrides.get('rgw-multisite', {}))
+
+        if not self.ctx.rgw:
+            raise ConfigError('rgw-multisite must run after the rgw task')
+        role_endpoints = self.ctx.rgw.role_endpoints
+
+        # construct Clusters and Gateways for each client in the rgw task
+        clusters, gateways = extract_clusters_and_gateways(self.ctx,
+                                                           role_endpoints)
+
+        # get the master zone and zonegroup configuration
+        mz, mzg = extract_master_zone_zonegroup(self.config['zonegroups'])
+        cluster1 = cluster_for_zone(clusters, mz)
+
+        # create the realm and period on the master zone's cluster
+        log.info('creating realm..')
+        realm = create_realm(cluster1, self.config['realm'])
+        period = realm.current_period
+
+        creds = gen_credentials()
+
+        # create the master zonegroup and its master zone
+        log.info('creating master zonegroup..')
+        master_zonegroup = create_zonegroup(cluster1, gateways, period,
+                                            deepcopy(mzg))
+        period.master_zonegroup = master_zonegroup
+
+        log.info('creating master zone..')
+        master_zone = create_zone(self.ctx, cluster1, gateways, creds,
+                                  master_zonegroup, deepcopy(mz))
+        master_zonegroup.master_zone = master_zone
+
+        period.update(master_zone, commit=True)
+        restart_zone_gateways(master_zone) # restart with --rgw-zone
+
+        # create the admin user on the master zone
+        log.info('creating admin user..')
+        user_args = ['--display-name', 'Realm Admin', '--system']
+        user_args += creds.credential_args()
+        admin_user = multisite.User('realm-admin')
+        admin_user.create(master_zone, user_args)
+
+        # process 'zonegroups'
+        for zg_config in self.config['zonegroups']:
+            zones_config = zg_config.pop('zones')
+
+            zonegroup = None
+            for zone_config in zones_config:
+                # get the cluster for this zone
+                cluster = cluster_for_zone(clusters, zone_config)
+
+                if cluster != cluster1: # already created on master cluster
+                    log.info('pulling realm configuration to %s', cluster.name)
+                    realm.pull(cluster, master_zone.gateways[0], creds)
+
+                # use the first zone's cluster to create the zonegroup
+                if not zonegroup:
+                    if zg_config['name'] == master_zonegroup.name:
+                        zonegroup = master_zonegroup
+                    else:
+                        log.info('creating zonegroup..')
+                        zonegroup = create_zonegroup(cluster, gateways,
+                                                     period, zg_config)
+
+                if zone_config['name'] == master_zone.name:
+                    # master zone was already created
+                    zone = master_zone
+                else:
+                    # create the zone and commit the period
+                    log.info('creating zone..')
+                    zone = create_zone(self.ctx, cluster, gateways, creds,
+                                       zonegroup, zone_config)
+                    period.update(zone, commit=True)
+
+                    restart_zone_gateways(zone) # restart with --rgw-zone
+
+        # attach configuration to the ctx for other tasks
+        self.ctx.rgw_multisite = argparse.Namespace()
+        self.ctx.rgw_multisite.clusters = clusters
+        self.ctx.rgw_multisite.gateways = gateways
+        self.ctx.rgw_multisite.realm = realm
+        self.ctx.rgw_multisite.admin_user = admin_user
+
+        log.info('rgw multisite configuration completed')
+
+    def end(self):
+        del self.ctx.rgw_multisite
+
+class Cluster(multisite.Cluster):
+    """ Issues 'radosgw-admin' commands with the rgwadmin() helper """
+    def __init__(self, ctx, name, client):
+        super(Cluster, self).__init__()
+        self.ctx = ctx
+        self.name = name
+        self.client = client
+
+    def admin(self, args = None, **kwargs):
+        """ radosgw-admin command """
+        args = args or []
+        args += ['--cluster', self.name]
+        args += ['--debug-rgw', str(kwargs.pop('debug_rgw', 0))]
+        args += ['--debug-ms', str(kwargs.pop('debug_ms', 0))]
+        if kwargs.pop('read_only', False):
+            args += ['--rgw-cache-enabled', 'false']
+        kwargs['decode'] = False
+        check_retcode = kwargs.pop('check_retcode', True)
+        r, s = rgwadmin(self.ctx, self.client, args, **kwargs)
+        if check_retcode:
+            assert r == 0
+        return s, r
+
+class Gateway(multisite.Gateway):
+    """ Controls a radosgw instance using its daemon """
+    def __init__(self, role, remote, daemon, *args, **kwargs):
+        super(Gateway, self).__init__(*args, **kwargs)
+        self.role = role
+        self.remote = remote
+        self.daemon = daemon
+
+    def set_zone(self, zone):
+        """ set the zone and add its args to the daemon's command line """
+        assert self.zone is None, 'zone can only be set once'
+        self.zone = zone
+        # daemon.restart_with_args() would be perfect for this, except that
+        # radosgw args likely include a pipe and redirect. zone arguments at
+        # the end won't actually apply to radosgw
+        args = self.daemon.command_kwargs.get('args', [])
+        try:
+            # insert zone args before the first |
+            pipe = args.index(run.Raw('|'))
+            args = args[0:pipe] + zone.zone_args() + args[pipe:]
+        except ValueError:
+            args += zone.zone_args()
+        self.daemon.command_kwargs['args'] = args
+
+    def start(self, args = None):
+        """ (re)start the daemon """
+        self.daemon.restart()
+        # wait until startup completes
+        wait_for_radosgw(self.endpoint(), self.remote)
+
+    def stop(self):
+        """ stop the daemon """
+        self.daemon.stop()
+
+def extract_clusters_and_gateways(ctx, role_endpoints):
+    """ create cluster and gateway instances for all of the radosgw roles """
+    clusters = {}
+    gateways = {}
+    for role, endpoint in role_endpoints.items():
+        cluster_name, daemon_type, client_id = misc.split_role(role)
+        # find or create the cluster by name
+        cluster = clusters.get(cluster_name)
+        if not cluster:
+            clusters[cluster_name] = cluster = Cluster(ctx, cluster_name, role)
+        # create a gateway for this daemon
+        client_with_id = daemon_type + '.' + client_id # match format from rgw.py
+        daemon = ctx.daemons.get_daemon('rgw', client_with_id, cluster_name)
+        if not daemon:
+            raise ConfigError('no daemon for role=%s cluster=%s type=rgw id=%s' % \
+                              (role, cluster_name, client_id))
+        (remote,) = ctx.cluster.only(role).remotes.keys()
+        gateways[role] = Gateway(role, remote, daemon, endpoint.hostname,
+                endpoint.port, cluster)
+    return clusters, gateways
+
+def create_realm(cluster, config):
+    """ create a realm from configuration and initialize its first period """
+    realm = multisite.Realm(config['name'])
+    args = []
+    if config.get('is_default', False):
+        args += ['--default']
+    realm.create(cluster, args)
+    realm.current_period = multisite.Period(realm)
+    return realm
+
+def extract_user_credentials(config):
+    """ extract keys from configuration """
+    return multisite.Credentials(config['access_key'], config['secret_key'])
+
+def extract_master_zone(zonegroup_config):
+    """ find and return the master zone definition """
+    master = None
+    for zone in zonegroup_config['zones']:
+        if not zone.get('is_master', False):
+            continue
+        if master:
+            raise ConfigError('zones %s and %s cannot both set \'is_master\'' % \
+                              (master['name'], zone['name']))
+        master = zone
+        # continue the loop so we can detect duplicates
+    if not master:
+        raise ConfigError('one zone must set \'is_master\' in zonegroup %s' % \
+                          zonegroup_config['name'])
+    return master
+
+def extract_master_zone_zonegroup(zonegroups_config):
+    """ find and return the master zone and zonegroup definitions """
+    master_zone, master_zonegroup = (None, None)
+    for zonegroup in zonegroups_config:
+        # verify that all zonegroups have a master zone set, even if they
+        # aren't in the master zonegroup
+        zone = extract_master_zone(zonegroup)
+        if not zonegroup.get('is_master', False):
+            continue
+        if master_zonegroup:
+            raise ConfigError('zonegroups %s and %s cannot both set \'is_master\'' % \
+                              (master_zonegroup['name'], zonegroup['name']))
+        master_zonegroup = zonegroup
+        master_zone = zone
+        # continue the loop so we can detect duplicates
+    if not master_zonegroup:
+        raise ConfigError('one zonegroup must set \'is_master\'')
+    return master_zone, master_zonegroup
+
+def extract_zone_cluster_name(zone_config):
+    """ return the cluster (must be common to all zone endpoints) """
+    cluster_name = None
+    endpoints = zone_config.get('endpoints')
+    if not endpoints:
+        raise ConfigError('zone %s missing \'endpoints\' list' % \
+                          zone_config['name'])
+    for role in endpoints:
+        name, _, _ = misc.split_role(role)
+        if not cluster_name:
+            cluster_name = name
+        elif cluster_name != name:
+            raise ConfigError('all zone %s endpoints must be in the same cluster' % \
+                              zone_config['name'])
+    return cluster_name
+
+def cluster_for_zone(clusters, zone_config):
+    """ return the cluster entry for the given zone """
+    name = extract_zone_cluster_name(zone_config)
+    try:
+        return clusters[name]
+    except KeyError:
+        raise ConfigError('no cluster %s found' % name)
+
+def gen_access_key():
+    return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(16))
+
+def gen_secret():
+    return ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for _ in range(32))
+
+def gen_credentials():
+    return multisite.Credentials(gen_access_key(), gen_secret())
+
+def extract_gateway_endpoints(gateways, endpoints_config):
+    """ return a list of gateway endpoints associated with the given roles """
+    endpoints = []
+    for role in endpoints_config:
+        try:
+            # replace role names with their gateway's endpoint
+            endpoints.append(gateways[role].endpoint())
+        except KeyError:
+            raise ConfigError('no radosgw endpoint found for role %s' % role)
+    return endpoints
+
+def is_default_arg(config):
+    return ['--default'] if config.pop('is_default', False) else []
+
+def is_master_arg(config):
+    return ['--master'] if config.pop('is_master', False) else []
+
+def create_zonegroup(cluster, gateways, period, config):
+    """ pass the zonegroup configuration to `zonegroup set` """
+    config.pop('zones', None) # remove 'zones' from input to `zonegroup set`
+    endpoints = config.get('endpoints')
+    if endpoints:
+        # replace client names with their gateway endpoints
+        config['endpoints'] = extract_gateway_endpoints(gateways, endpoints)
+    zonegroup = multisite.ZoneGroup(config['name'], period)
+    # `zonegroup set` needs --default on command line, and 'is_master' in json
+    args = is_default_arg(config)
+    zonegroup.set(cluster, config, args)
+    period.zonegroups.append(zonegroup)
+    return zonegroup
+
+def create_zone(ctx, cluster, gateways, creds, zonegroup, config):
+    """ create a zone with the given configuration """
+    zone = multisite.Zone(config['name'], zonegroup, cluster)
+    zone = RadosZone(config['name'], zonegroup, cluster)
+
+    # collect Gateways for the zone's endpoints
+    endpoints = config.get('endpoints')
+    if not endpoints:
+        raise ConfigError('no \'endpoints\' for zone %s' % config['name'])
+    zone.gateways = [gateways[role] for role in endpoints]
+    for gateway in zone.gateways:
+        gateway.set_zone(zone)
+
+    # format the gateway endpoints
+    endpoints = [g.endpoint() for g in zone.gateways]
+
+    args = is_default_arg(config)
+    args += is_master_arg(config)
+    args += creds.credential_args()
+    if len(endpoints):
+        args += ['--endpoints', ','.join(endpoints)]
+    zone.create(cluster, args)
+    zonegroup.zones.append(zone)
+
+    create_zone_pools(ctx, zone)
+    if ctx.rgw.compression_type:
+        configure_zone_compression(zone, ctx.rgw.compression_type)
+
+    zonegroup.zones_by_type.setdefault(zone.tier_type(), []).append(zone)
+
+    if zone.is_read_only():
+        zonegroup.ro_zones.append(zone)
+    else:
+        zonegroup.rw_zones.append(zone)
+
+    return zone
+
+def create_zone_pools(ctx, zone):
+    """ Create the data_pool for each placement type """
+    gateway = zone.gateways[0]
+    cluster = zone.cluster
+    for pool_config in zone.data.get('placement_pools', []):
+        pool_name = pool_config['val']['storage_classes']['STANDARD']['data_pool']
+        if ctx.rgw.ec_data_pool:
+            create_ec_pool(gateway.remote, pool_name, zone.name, 64,
+                           ctx.rgw.erasure_code_profile, cluster.name, 'rgw')
+        else:
+            create_replicated_pool(gateway.remote, pool_name, 64, cluster.name, 'rgw')
+
+def configure_zone_compression(zone, compression):
+    """ Set compression type in the zone's default-placement """
+    zone.json_command(zone.cluster, 'placement', ['modify',
+                          '--placement-id', 'default-placement',
+                          '--compression', compression
+                      ])
+
+def restart_zone_gateways(zone):
+    zone.stop()
+    zone.start()
+
+task = RGWMultisite
diff --git a/qa/tasks/rgw_multisite_tests.py b/qa/tasks/rgw_multisite_tests.py
new file mode 100644
index 000000000..888a37181
--- /dev/null
+++ b/qa/tasks/rgw_multisite_tests.py
@@ -0,0 +1,120 @@
+"""
+rgw multisite testing
+"""
+import importlib.util
+import logging
+import nose.core
+import nose.config
+import sys
+
+from nose.plugins.manager import DefaultPluginManager
+from teuthology.config import config as teuth_config
+from teuthology.exceptions import ConfigError
+from teuthology.repo_utils import fetch_repo
+from teuthology.task import Task
+from teuthology import misc
+
+log = logging.getLogger(__name__)
+
+
+class RGWMultisiteTests(Task):
+    """
+    Runs the rgw_multi tests against a multisite configuration created by the
+    rgw-multisite task. Tests are run with nose, using any additional 'args'
+    provided. Overrides for tests.Config can be set in 'config'. The 'branch'
+    and 'repo' can be overridden to clone the rgw_multi tests from another
+    release.
+
+        - rgw-multisite-tests:
+            args:
+            - tests.py:test_object_sync
+            config:
+              reconfigure_delay: 60
+            branch: octopus
+            repo: https://github.com/ceph/ceph.git
+
+    """
+    def __init__(self, ctx, config):
+        super(RGWMultisiteTests, self).__init__(ctx, config)
+
+    def setup(self):
+        super(RGWMultisiteTests, self).setup()
+
+        overrides = self.ctx.config.get('overrides', {})
+        misc.deep_merge(self.config, overrides.get('rgw-multisite-tests', {}))
+
+        if not self.ctx.rgw_multisite:
+            raise ConfigError('rgw-multisite-tests must run after the rgw-multisite task')
+        realm = self.ctx.rgw_multisite.realm
+        master_zone = realm.meta_master_zone()
+
+        branch = self.config.get('branch')
+        if not branch:
+            # run from suite_path
+            suite_path = self.ctx.config.get('suite_path')
+            self.module_path = suite_path + '/../src/test/rgw/rgw_multi'
+        else:
+            # clone the qa branch
+            repo = self.config.get('repo', teuth_config.get_ceph_qa_suite_git_url())
+            log.info("cloning suite branch %s from %s...", branch, repo)
+            clonedir = fetch_repo(repo, branch)
+            # import its version of rgw_multi
+            self.module_path = clonedir + '/src/test/rgw/rgw_multi'
+
+        log.info("importing tests from %s", self.module_path)
+        spec = importlib.util.spec_from_file_location('rgw_multi', self.module_path + '/__init__.py')
+        module = importlib.util.module_from_spec(spec)
+        sys.modules[spec.name] = module
+        spec.loader.exec_module(module)
+
+        from rgw_multi import multisite, tests
+
+        # create the test user
+        log.info('creating test user..')
+        user = multisite.User('rgw-multisite-test-user')
+        user.create(master_zone, ['--display-name', 'Multisite Test User',
+                                  '--gen-access-key', '--gen-secret', '--caps', 'roles=*'])
+
+        config = self.config.get('config', {})
+        tests.init_multi(realm, user, tests.Config(**config))
+        tests.realm_meta_checkpoint(realm)
+
+    def begin(self):
+        # extra arguments for nose can be passed as a string or list
+        extra_args = self.config.get('args', [])
+        if not isinstance(extra_args, list):
+            extra_args = [extra_args]
+        argv = [__name__] + extra_args
+
+        log.info("running rgw multisite tests on '%s' with args=%r",
+                 self.module_path, extra_args)
+
+        # run nose tests in the module path
+        conf = nose.config.Config(stream=get_log_stream(), verbosity=2, workingDir=self.module_path)
+        conf.plugins = DefaultPluginManager() # overrides default = NoPlugins()
+        assert nose.run(argv=argv, config=conf), 'rgw multisite test failures'
+
+
+def get_log_stream():
+    """ return a log stream for nose output """
+    # XXX: this is a workaround for IOErrors when nose writes to stderr,
+    # copied from vstart_runner.py
+    class LogStream(object):
+        def __init__(self):
+            self.buffer = ""
+
+        def write(self, data):
+            self.buffer += data
+            if "\n" in self.buffer:
+                lines = self.buffer.split("\n")
+                for line in lines[:-1]:
+                    log.info(line)
+                self.buffer = lines[-1]
+
+        def flush(self):
+            pass
+
+    return LogStream()
+
+
+task = RGWMultisiteTests
diff --git a/qa/tasks/rook-ceph.conf b/qa/tasks/rook-ceph.conf
new file mode 100644
index 000000000..38ac11e41
--- /dev/null
+++ b/qa/tasks/rook-ceph.conf
@@ -0,0 +1,41 @@
+[global]
+
+log to file = true
+
+mon clock drift allowed = 1.000
+
+# replicate across OSDs, not hosts
+osd crush chooseleaf type = 0
+
+# enable some debugging
+auth debug = true
+ms die on old message = true
+ms die on bug = true
+debug asserts on shutdown = true
+
+
+[osd]
+# debugging
+osd debug shutdown = true
+osd debug op order = true
+osd debug verify stray on activate = true
+osd debug pg log writeout = true
+osd debug verify cached snaps = true
+osd debug verify missing on start = true
+osd debug misdirected ops = true
+osd op queue = debug_random
+osd op queue cut off = debug_random
+osd shutdown pgref assert = true
+bdev debug aio = true
+osd sloppy crc = true
+
+
+[mon]
+# rotate auth tickets quickly to exercise renewal paths
+auth mon ticket ttl = 660      # 11m
+auth service ticket ttl = 240  # 4m
+
+# don't complain about global id reclaim
+mon_warn_on_insecure_global_id_reclaim = false
+mon_warn_on_insecure_global_id_reclaim_allowed = false
+
diff --git a/qa/tasks/rook.py b/qa/tasks/rook.py
new file mode 100644
index 000000000..427f8324e
--- /dev/null
+++ b/qa/tasks/rook.py
@@ -0,0 +1,654 @@
+"""
+Rook cluster task
+"""
+import argparse
+import configobj
+import contextlib
+import json
+import logging
+import os
+import yaml
+from io import BytesIO
+
+from tarfile import ReadError
+from tasks.ceph_manager import CephManager
+from teuthology import misc as teuthology
+from teuthology.config import config as teuth_config
+from teuthology.contextutil import safe_while
+from teuthology.orchestra import run
+from teuthology import contextutil
+from tasks.ceph import healthy
+from tasks.cephadm import update_archive_setting
+
+log = logging.getLogger(__name__)
+
+def path_to_examples(ctx, cluster_name : str) -> str:
+    for p in ['rook/deploy/examples/', 'rook/cluster/examples/kubernetes/ceph/']:
+        try: 
+           ctx.rook[cluster_name].remote.get_file(p + 'operator.yaml')
+           return p
+        except:
+            pass 
+    assert False, 'Path to examples not found'
+
+def _kubectl(ctx, config, args, **kwargs):
+    cluster_name = config.get('cluster', 'ceph')
+    return ctx.rook[cluster_name].remote.run(
+        args=['kubectl'] + args,
+        **kwargs
+    )
+
+
+def shell(ctx, config):
+    """
+    Run command(s) inside the rook tools container.
+
+      tasks:
+      - kubeadm:
+      - rook:
+      - rook.shell:
+          - ceph -s
+
+    or
+
+      tasks:
+      - kubeadm:
+      - rook:
+      - rook.shell:
+          commands:
+          - ceph -s
+
+    """
+    if isinstance(config, list):
+        config = {'commands': config}
+    for cmd in config.get('commands', []):
+        if isinstance(cmd, str):
+            _shell(ctx, config, cmd.split(' '))
+        else:
+            _shell(ctx, config, cmd)
+
+
+def _shell(ctx, config, args, **kwargs):
+    cluster_name = config.get('cluster', 'ceph')
+    return _kubectl(
+        ctx, config,
+        [
+            '-n', 'rook-ceph',
+            'exec',
+            ctx.rook[cluster_name].toolbox, '--'
+        ] + args,
+        **kwargs
+    )
+
+
+@contextlib.contextmanager
+def rook_operator(ctx, config):
+    cluster_name = config['cluster']
+    rook_branch = config.get('rook_branch', 'master')
+    rook_git_url = config.get('rook_git_url', 'https://github.com/rook/rook')
+
+    log.info(f'Cloning {rook_git_url} branch {rook_branch}')
+    ctx.rook[cluster_name].remote.run(
+        args=[
+            'rm', '-rf', 'rook',
+            run.Raw('&&'),
+            'git',
+            'clone',
+            '--single-branch',
+            '--branch', rook_branch,
+            rook_git_url,
+            'rook',
+        ]
+    )
+
+    # operator.yaml
+    log.info(os.path.abspath(os.getcwd()))
+    object_methods = [method_name for method_name in dir(ctx.rook[cluster_name].remote)
+                  if callable(getattr(ctx.rook[cluster_name].remote, method_name))]
+    log.info(object_methods)
+    operator_yaml = ctx.rook[cluster_name].remote.read_file(
+        (path_to_examples(ctx, cluster_name) + 'operator.yaml')
+    )
+    rook_image = config.get('rook_image')
+    if rook_image:
+        log.info(f'Patching operator to use image {rook_image}')
+        crs = list(yaml.load_all(operator_yaml, Loader=yaml.FullLoader))
+        assert len(crs) == 2
+        crs[1]['spec']['template']['spec']['containers'][0]['image'] = rook_image
+        operator_yaml = yaml.dump_all(crs)
+    ctx.rook[cluster_name].remote.write_file('operator.yaml', operator_yaml)
+
+    op_job = None
+    try:
+        log.info('Deploying operator')
+        _kubectl(ctx, config, [
+            'create',
+            '-f', (path_to_examples(ctx, cluster_name) + 'crds.yaml'),
+            '-f', (path_to_examples(ctx, cluster_name) + 'common.yaml'),
+            '-f', 'operator.yaml',
+        ])
+
+        # on centos:
+        if teuthology.get_distro(ctx) == 'centos':
+            _kubectl(ctx, config, [
+                '-n', 'rook-ceph',
+                'set', 'env', 'deploy/rook-ceph-operator',
+                'ROOK_HOSTPATH_REQUIRES_PRIVILEGED=true'
+            ])
+
+        # wait for operator
+        op_name = None
+        with safe_while(sleep=10, tries=90, action="wait for operator") as proceed:
+            while not op_name and proceed():
+                p = _kubectl(
+                    ctx, config,
+                    ['-n', 'rook-ceph', 'get', 'pods', '-l', 'app=rook-ceph-operator'],
+                    stdout=BytesIO(),
+                )
+                for line in p.stdout.getvalue().decode('utf-8').strip().splitlines():
+                    name, ready, status, _ = line.split(None, 3)
+                    if status == 'Running':
+                        op_name = name
+                        break
+
+        # log operator output
+        op_job = _kubectl(
+            ctx,
+            config,
+            ['-n', 'rook-ceph', 'logs', '-f', op_name],
+            wait=False,
+            logger=log.getChild('operator'),
+        )
+
+        yield
+
+    except Exception as e:
+        log.exception(e)
+        raise
+
+    finally:
+        log.info('Cleaning up rook operator')
+        _kubectl(ctx, config, [
+            'delete',
+            '-f', 'operator.yaml',
+        ])
+        if False:
+            # don't bother since we'll tear down k8s anyway (and this mysteriously
+            # fails sometimes when deleting some of the CRDs... not sure why!)
+            _kubectl(ctx, config, [
+                'delete',
+                '-f', (path_to_examples() + 'common.yaml'),
+            ])
+            _kubectl(ctx, config, [
+                'delete',
+                '-f', (path_to_examples() + 'crds.yaml'),
+            ])
+        ctx.rook[cluster_name].remote.run(args=['rm', '-rf', 'rook', 'operator.yaml'])
+        if op_job:
+            op_job.wait()
+        run.wait(
+            ctx.cluster.run(
+                args=[
+                    'sudo', 'rm', '-rf', '/var/lib/rook'
+                ]
+            )
+        )
+
+
+@contextlib.contextmanager
+def ceph_log(ctx, config):
+    cluster_name = config['cluster']
+
+    log_dir = '/var/lib/rook/rook-ceph/log'
+    update_archive_setting(ctx, 'log', log_dir)
+
+    try:
+        yield
+
+    except Exception:
+        # we need to know this below
+        ctx.summary['success'] = False
+        raise
+
+    finally:
+        log.info('Checking cluster log for badness...')
+        def first_in_ceph_log(pattern, excludes):
+            """
+            Find the first occurrence of the pattern specified in the Ceph log,
+            Returns None if none found.
+
+            :param pattern: Pattern scanned for.
+            :param excludes: Patterns to ignore.
+            :return: First line of text (or None if not found)
+            """
+            args = [
+                'sudo',
+                'egrep', pattern,
+                f'{log_dir}/ceph.log',
+            ]
+            if excludes:
+                for exclude in excludes:
+                    args.extend([run.Raw('|'), 'egrep', '-v', exclude])
+            args.extend([
+                run.Raw('|'), 'head', '-n', '1',
+            ])
+            r = ctx.rook[cluster_name].remote.run(
+                stdout=BytesIO(),
+                args=args,
+            )
+            stdout = r.stdout.getvalue().decode()
+            if stdout:
+                return stdout
+            return None
+
+        if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]',
+                             config.get('log-ignorelist')) is not None:
+            log.warning('Found errors (ERR|WRN|SEC) in cluster log')
+            ctx.summary['success'] = False
+            # use the most severe problem as the failure reason
+            if 'failure_reason' not in ctx.summary:
+                for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']:
+                    match = first_in_ceph_log(pattern, config['log-ignorelist'])
+                    if match is not None:
+                        ctx.summary['failure_reason'] = \
+                            '"{match}" in cluster log'.format(
+                                match=match.rstrip('\n'),
+                            )
+                        break
+
+        if ctx.archive is not None and \
+                not (ctx.config.get('archive-on-error') and ctx.summary['success']):
+            # and logs
+            log.info('Compressing logs...')
+            run.wait(
+                ctx.cluster.run(
+                    args=[
+                        'sudo',
+                        'find',
+                        log_dir,
+                        '-name',
+                        '*.log',
+                        '-print0',
+                        run.Raw('|'),
+                        'sudo',
+                        'xargs',
+                        '-0',
+                        '--no-run-if-empty',
+                        '--',
+                        'gzip',
+                        '--',
+                    ],
+                    wait=False,
+                ),
+            )
+
+            log.info('Archiving logs...')
+            path = os.path.join(ctx.archive, 'remote')
+            try:
+                os.makedirs(path)
+            except OSError:
+                pass
+            for remote in ctx.cluster.remotes.keys():
+                sub = os.path.join(path, remote.name)
+                try:
+                    os.makedirs(sub)
+                except OSError:
+                    pass
+                try:
+                    teuthology.pull_directory(remote, log_dir,
+                                              os.path.join(sub, 'log'))
+                except ReadError:
+                    pass
+
+
+def build_initial_config(ctx, config):
+    path = os.path.join(os.path.dirname(__file__), 'rook-ceph.conf')
+    conf = configobj.ConfigObj(path, file_error=True)
+
+    # overrides
+    for section, keys in config.get('conf',{}).items():
+        for key, value in keys.items():
+            log.info(" override: [%s] %s = %s" % (section, key, value))
+            if section not in conf:
+                conf[section] = {}
+            conf[section][key] = value
+
+    return conf
+
+
+@contextlib.contextmanager
+def rook_cluster(ctx, config):
+    cluster_name = config['cluster']
+
+    # count how many OSDs we'll create
+    num_devs = 0
+    num_hosts = 0
+    for remote in ctx.cluster.remotes.keys():
+        ls = remote.read_file('/scratch_devs').decode('utf-8').strip().splitlines()
+        num_devs += len(ls)
+        num_hosts += 1
+    ctx.rook[cluster_name].num_osds = num_devs
+
+    # config
+    ceph_conf = build_initial_config(ctx, config)
+    ceph_conf_fp = BytesIO()
+    ceph_conf.write(ceph_conf_fp)
+    log.info(f'Config:\n{ceph_conf_fp.getvalue()}')
+    _kubectl(ctx, ceph_conf, ['create', '-f', '-'], stdin=yaml.dump({
+        'apiVersion': 'v1',
+        'kind': 'ConfigMap',
+        'metadata': {
+            'name': 'rook-config-override',
+            'namespace': 'rook-ceph'},
+        'data': {
+            'config': ceph_conf_fp.getvalue()
+        }
+    }))
+
+    # cluster
+    cluster = {
+        'apiVersion': 'ceph.rook.io/v1',
+        'kind': 'CephCluster',
+        'metadata': {'name': 'rook-ceph', 'namespace': 'rook-ceph'},
+        'spec': {
+            'cephVersion': {
+                'image': ctx.rook[cluster_name].image,
+                'allowUnsupported': True,
+            },
+            'dataDirHostPath': '/var/lib/rook',
+            'skipUpgradeChecks': True,
+            'mgr': {
+                'count': 1,
+                'modules': [
+                    { 'name': 'rook', 'enabled': True },
+                ],
+            },
+            'mon': {
+                'count': num_hosts,
+                'allowMultiplePerNode': True,
+            },
+            'storage': {
+                'storageClassDeviceSets': [
+                    {
+                        'name': 'scratch',
+                        'count': num_devs,
+                        'portable': False,
+                        'volumeClaimTemplates': [
+                            {
+                                'metadata': {'name': 'data'},
+                                'spec': {
+                                    'resources': {
+                                        'requests': {
+                                            'storage': '10Gi'  # <= (lte) the actual PV size
+                                        }
+                                    },
+                                    'storageClassName': 'scratch',
+                                    'volumeMode': 'Block',
+                                    'accessModes': ['ReadWriteOnce'],
+                                },
+                            },
+                        ],
+                    }
+                ],
+            },
+        }
+    }
+    teuthology.deep_merge(cluster['spec'], config.get('spec', {}))
+    
+    cluster_yaml = yaml.dump(cluster)
+    log.info(f'Cluster:\n{cluster_yaml}')
+    try:
+        ctx.rook[cluster_name].remote.write_file('cluster.yaml', cluster_yaml)
+        _kubectl(ctx, config, ['create', '-f', 'cluster.yaml'])
+        yield
+
+    except Exception as e:
+        log.exception(e)
+        raise
+
+    finally:
+        _kubectl(ctx, config, ['delete', '-f', 'cluster.yaml'], check_status=False)
+
+        # wait for cluster to shut down
+        log.info('Waiting for cluster to stop')
+        running = True
+        with safe_while(sleep=5, tries=100, action="wait for teardown") as proceed:
+            while running and proceed():
+                p = _kubectl(
+                    ctx, config,
+                    ['-n', 'rook-ceph', 'get', 'pods'],
+                    stdout=BytesIO(),
+                )
+                running = False
+                for line in p.stdout.getvalue().decode('utf-8').strip().splitlines():
+                    name, ready, status, _ = line.split(None, 3)
+                    if (
+                            name != 'NAME'
+                            and not name.startswith('csi-')
+                            and not name.startswith('rook-ceph-operator-')
+                            and not name.startswith('rook-ceph-tools-')
+                    ):
+                        running = True
+                        break
+
+        _kubectl(
+            ctx, config,
+            ['-n', 'rook-ceph', 'delete', 'configmap', 'rook-config-override'],
+            check_status=False,
+        )
+        ctx.rook[cluster_name].remote.run(args=['rm', '-f', 'cluster.yaml'])
+
+
+@contextlib.contextmanager
+def rook_toolbox(ctx, config):
+    cluster_name = config['cluster']
+    try:
+        _kubectl(ctx, config, [
+            'create',
+            '-f', (path_to_examples(ctx, cluster_name) + 'toolbox.yaml'),
+        ])
+
+        log.info('Waiting for tools container to start')
+        toolbox = None
+        with safe_while(sleep=5, tries=100, action="wait for toolbox") as proceed:
+            while not toolbox and proceed():
+                p = _kubectl(
+                    ctx, config,
+                    ['-n', 'rook-ceph', 'get', 'pods', '-l', 'app=rook-ceph-tools'],
+                    stdout=BytesIO(),
+                )
+                _kubectl(
+                    ctx, config,
+                    ['-n', 'rook-ceph', 'get', 'pods'],
+                    stdout=BytesIO(),
+                )
+                for line in p.stdout.getvalue().decode('utf-8').strip().splitlines():
+                    name, ready, status, _ = line.split(None, 3)
+                    if status == 'Running':
+                        toolbox = name
+                        break
+        ctx.rook[cluster_name].toolbox = toolbox
+        yield
+
+    except Exception as e:
+        log.exception(e)
+        raise
+
+    finally:
+        _kubectl(ctx, config, [
+            'delete',
+            '-f', (path_to_examples(ctx, cluster_name) + 'toolbox.yaml'),
+        ], check_status=False)
+
+
+@contextlib.contextmanager
+def wait_for_osds(ctx, config):
+    cluster_name = config.get('cluster', 'ceph')
+
+    want = ctx.rook[cluster_name].num_osds
+    log.info(f'Waiting for {want} OSDs')
+    with safe_while(sleep=10, tries=90, action="check osd count") as proceed:
+        while proceed():
+            p = _shell(ctx, config, ['ceph', 'osd', 'stat', '-f', 'json'],
+                       stdout=BytesIO(),
+                       check_status=False)
+            if p.exitstatus == 0:
+                r = json.loads(p.stdout.getvalue().decode('utf-8'))
+                have = r.get('num_up_osds', 0)
+                if have == want:
+                    break
+                log.info(f' have {have}/{want} OSDs')
+
+    yield
+
+@contextlib.contextmanager
+def ceph_config_keyring(ctx, config):
+    # get config and push to hosts
+    log.info('Distributing ceph config and client.admin keyring')
+    p = _shell(ctx, config, ['cat', '/etc/ceph/ceph.conf'], stdout=BytesIO())
+    conf = p.stdout.getvalue()
+    p = _shell(ctx, config, ['cat', '/etc/ceph/keyring'], stdout=BytesIO())
+    keyring = p.stdout.getvalue()
+    ctx.cluster.run(args=['sudo', 'mkdir', '-p', '/etc/ceph'])
+    for remote in ctx.cluster.remotes.keys():
+        remote.write_file(
+            '/etc/ceph/ceph.conf',
+            conf,
+            sudo=True,
+        )
+        remote.write_file(
+            '/etc/ceph/keyring',
+            keyring,
+            sudo=True,
+        )
+
+    try:
+        yield
+
+    except Exception as e:
+        log.exception(e)
+        raise
+
+    finally:
+        log.info('Cleaning up config and client.admin keyring')
+        ctx.cluster.run(args=[
+            'sudo', 'rm', '-f',
+            '/etc/ceph/ceph.conf',
+            '/etc/ceph/ceph.client.admin.keyring'
+        ])
+
+
+@contextlib.contextmanager
+def ceph_clients(ctx, config):
+    cluster_name = config['cluster']
+
+    log.info('Setting up client nodes...')
+    clients = ctx.cluster.only(teuthology.is_type('client', cluster_name))
+    for remote, roles_for_host in clients.remotes.items():
+        for role in teuthology.cluster_roles_of_type(roles_for_host, 'client',
+                                                     cluster_name):
+            name = teuthology.ceph_role(role)
+            client_keyring = '/etc/ceph/{0}.{1}.keyring'.format(cluster_name,
+                                                                name)
+            r = _shell(ctx, config,
+                args=[
+                    'ceph', 'auth',
+                    'get-or-create', name,
+                    'mon', 'allow *',
+                    'osd', 'allow *',
+                    'mds', 'allow *',
+                    'mgr', 'allow *',
+                ],
+                stdout=BytesIO(),
+            )
+            keyring = r.stdout.getvalue()
+            remote.write_file(client_keyring, keyring, sudo=True, mode='0644')
+    yield
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Deploy rook-ceph cluster
+
+      tasks:
+      - kubeadm:
+      - rook:
+          branch: wip-foo
+          spec:
+            mon:
+              count: 1
+
+    The spec item is deep-merged against the cluster.yaml.  The branch, sha1, or
+    image items are used to determine the Ceph container image.
+    """
+    if not config:
+        config = {}
+    assert isinstance(config, dict), \
+        "task only supports a dictionary for configuration"
+
+    log.info('Rook start')
+
+    overrides = ctx.config.get('overrides', {})
+    teuthology.deep_merge(config, overrides.get('ceph', {}))
+    teuthology.deep_merge(config, overrides.get('rook', {}))
+    log.info('Config: ' + str(config))
+
+    # set up cluster context
+    if not hasattr(ctx, 'rook'):
+        ctx.rook = {}
+    if 'cluster' not in config:
+        config['cluster'] = 'ceph'
+    cluster_name = config['cluster']
+    if cluster_name not in ctx.rook:
+        ctx.rook[cluster_name] = argparse.Namespace()
+
+    ctx.rook[cluster_name].remote = list(ctx.cluster.remotes.keys())[0]
+
+    # image
+    teuth_defaults = teuth_config.get('defaults', {})
+    cephadm_defaults = teuth_defaults.get('cephadm', {})
+    containers_defaults = cephadm_defaults.get('containers', {})
+    container_image_name = containers_defaults.get('image', None)
+    if 'image' in config:
+        ctx.rook[cluster_name].image = config.get('image')
+    else:
+        sha1 = config.get('sha1')
+        flavor = config.get('flavor', 'default')
+        if sha1:
+            if flavor == "crimson":
+                ctx.rook[cluster_name].image = container_image_name + ':' + sha1 + '-' + flavor
+            else:
+                ctx.rook[cluster_name].image = container_image_name + ':' + sha1
+        else:
+            # hmm, fall back to branch?
+            branch = config.get('branch', 'master')
+            ctx.rook[cluster_name].image = container_image_name + ':' + branch
+    log.info('Ceph image is %s' % ctx.rook[cluster_name].image)
+    
+    with contextutil.nested(
+            lambda: rook_operator(ctx, config),
+            lambda: ceph_log(ctx, config),
+            lambda: rook_cluster(ctx, config),
+            lambda: rook_toolbox(ctx, config),
+            lambda: wait_for_osds(ctx, config),
+            lambda: ceph_config_keyring(ctx, config),
+            lambda: ceph_clients(ctx, config),
+    ):
+        if not hasattr(ctx, 'managers'):
+            ctx.managers = {}
+        ctx.managers[cluster_name] = CephManager(
+            ctx.rook[cluster_name].remote,
+            ctx=ctx,
+            logger=log.getChild('ceph_manager.' + cluster_name),
+            cluster=cluster_name,
+            rook=True,
+        )
+        try:
+            if config.get('wait-for-healthy', True):
+                healthy(ctx=ctx, config=config)
+            log.info('Rook complete, yielding')
+            yield
+
+        finally:
+            log.info('Tearing down rook')
diff --git a/qa/tasks/s3a_hadoop.py b/qa/tasks/s3a_hadoop.py
new file mode 100644
index 000000000..7b77359fc
--- /dev/null
+++ b/qa/tasks/s3a_hadoop.py
@@ -0,0 +1,285 @@
+import contextlib
+import logging
+from teuthology import misc
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+     Run Hadoop S3A tests using Ceph
+     usage:
+      -tasks:
+         ceph-ansible:
+         s3a-hadoop:
+           maven-version: '3.6.3' (default)
+           hadoop-version: '2.9.2'
+           bucket-name: 's3atest' (default)
+           access-key: 'anykey' (uses a default value)
+           secret-key: 'secretkey' ( uses a default value)
+           role: client.0
+    """
+    if config is None:
+        config = {}
+
+    assert isinstance(config, dict), \
+        "task only supports a dictionary for configuration"
+
+    assert hasattr(ctx, 'rgw'), 's3a-hadoop must run after the rgw task'
+
+    overrides = ctx.config.get('overrides', {})
+    misc.deep_merge(config, overrides.get('s3a-hadoop', {}))
+    testdir = misc.get_testdir(ctx)
+
+    role = config.get('role')
+    (remote,) = ctx.cluster.only(role).remotes.keys()
+    endpoint = ctx.rgw.role_endpoints.get(role)
+    assert endpoint, 's3tests: no rgw endpoint for {}'.format(role)
+
+    # get versions
+    maven_major = config.get('maven-major', 'maven-3')
+    maven_version = config.get('maven-version', '3.6.3')
+    hadoop_ver = config.get('hadoop-version', '2.9.2')
+    bucket_name = config.get('bucket-name', 's3atest')
+    access_key = config.get('access-key', 'EGAQRD2ULOIFKFSKCT4F')
+    secret_key = config.get(
+        'secret-key',
+        'zi816w1vZKfaSM85Cl0BxXTwSLyN7zB4RbTswrGb')
+
+    # set versions for cloning the repo
+    apache_maven = 'apache-maven-{maven_version}-bin.tar.gz'.format(
+        maven_version=maven_version)
+    maven_link = 'http://archive.apache.org/dist/maven/' + \
+        '{maven_major}/{maven_version}/binaries/'.format(maven_major=maven_major, maven_version=maven_version) + apache_maven
+    hadoop_git = 'https://github.com/apache/hadoop'
+    hadoop_rel = 'hadoop-{ver} rel/release-{ver}'.format(ver=hadoop_ver)
+    if hadoop_ver == 'trunk':
+        # just checkout a new branch out of trunk
+        hadoop_rel = 'hadoop-ceph-trunk'
+    install_prereq(remote)
+    remote.run(
+        args=[
+            'cd',
+            testdir,
+            run.Raw('&&'),
+            'wget',
+            maven_link,
+            run.Raw('&&'),
+            'tar',
+            '-xvf',
+            apache_maven,
+            run.Raw('&&'),
+            'git',
+            'clone',
+            run.Raw(hadoop_git),
+            run.Raw('&&'),
+            'cd',
+            'hadoop',
+            run.Raw('&&'),
+            'git',
+            'checkout',
+            '-b',
+            run.Raw(hadoop_rel)
+        ]
+    )
+    configure_s3a(remote, endpoint.dns_name, access_key, secret_key, bucket_name, testdir)
+    setup_user_bucket(remote, endpoint.dns_name, access_key, secret_key, bucket_name, testdir)
+    if hadoop_ver.startswith('2.8'):
+        # test all ITtests but skip AWS test using public bucket landsat-pds
+        # which is not available from within this test
+        test_options = '-Dit.test=ITestS3A* -Dparallel-tests -Dscale \
+                        -Dfs.s3a.scale.test.timeout=1200 \
+                        -Dfs.s3a.scale.test.huge.filesize=256M verify'
+    else:
+        test_options = 'test -Dtest=S3a*,TestS3A*'
+    try:
+        run_s3atest(remote, maven_version, testdir, test_options)
+        yield
+    finally:
+        log.info("Done s3a testing, Cleaning up")
+        for fil in ['apache*', 'hadoop*', 'venv*', 'create*']:
+            remote.run(args=['rm', run.Raw('-rf'), run.Raw('{tdir}/{file}'.format(tdir=testdir, file=fil))])
+
+
+def install_prereq(client):
+    """
+    Install pre requisites for RHEL and CentOS
+    TBD: Ubuntu
+    """
+    if client.os.name == 'rhel' or client.os.name == 'centos':
+        client.run(
+               args=[
+                    'sudo',
+                    'yum',
+                    'install',
+                    '-y',
+                    'protobuf-c.x86_64',
+                    'java',
+                    'java-1.8.0-openjdk-devel',
+                    'dnsmasq'
+                    ]
+                )
+
+
+def setup_user_bucket(client, dns_name, access_key, secret_key, bucket_name, testdir):
+    """
+    Create user with access_key and secret_key that will be
+    used for the s3a testdir
+    """
+    client.run(
+        args=[
+            'sudo',
+            'radosgw-admin',
+            'user',
+            'create',
+            run.Raw('--uid'),
+            's3a',
+            run.Raw('--display-name="s3a cephtests"'),
+            run.Raw('--access-key={access_key}'.format(access_key=access_key)),
+            run.Raw('--secret-key={secret_key}'.format(secret_key=secret_key)),
+            run.Raw('--email=s3a@ceph.com'),
+        ]
+    )
+    client.run(
+        args=[
+            'python3',
+            '-m',
+            'venv',
+            '{testdir}/venv'.format(testdir=testdir),
+            run.Raw('&&'),
+            run.Raw('{testdir}/venv/bin/pip'.format(testdir=testdir)),
+            'install',
+            'boto'
+        ]
+    )
+    create_bucket = """
+#!/usr/bin/env python
+import boto
+import boto.s3.connection
+access_key = '{access_key}'
+secret_key = '{secret_key}'
+
+conn = boto.connect_s3(
+        aws_access_key_id = access_key,
+        aws_secret_access_key = secret_key,
+        host = '{dns_name}',
+        is_secure=False,
+        calling_format = boto.s3.connection.OrdinaryCallingFormat(),
+        )
+bucket = conn.create_bucket('{bucket_name}')
+for bucket in conn.get_all_buckets():
+        print(bucket.name + "\t" + bucket.creation_date)
+""".format(access_key=access_key, secret_key=secret_key, dns_name=dns_name, bucket_name=bucket_name)
+    py_bucket_file = '{testdir}/create_bucket.py'.format(testdir=testdir)
+    client.sudo_write_file(py_bucket_file, create_bucket, mode='0744')
+    client.run(
+        args=[
+            'cat',
+            '{testdir}/create_bucket.py'.format(testdir=testdir),
+        ]
+    )
+    client.run(
+        args=[
+            '{testdir}/venv/bin/python'.format(testdir=testdir),
+            '{testdir}/create_bucket.py'.format(testdir=testdir),
+        ]
+    )
+
+
+def run_s3atest(client, maven_version, testdir, test_options):
+    """
+    Finally run the s3a test
+    """
+    aws_testdir = '{testdir}/hadoop/hadoop-tools/hadoop-aws/'.format(testdir=testdir)
+    run_test = '{testdir}/apache-maven-{maven_version}/bin/mvn'.format(testdir=testdir, maven_version=maven_version)
+    # Remove AWS CredentialsProvider tests as it hits public bucket from AWS
+    # better solution is to create the public bucket on local server and test
+    rm_test = 'rm src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java'
+    client.run(
+        args=[
+            'cd',
+            run.Raw(aws_testdir),
+            run.Raw('&&'),
+            run.Raw(rm_test),
+            run.Raw('&&'),
+            run.Raw(run_test),
+            run.Raw(test_options)
+        ]
+    )
+
+
+def configure_s3a(client, dns_name, access_key, secret_key, bucket_name, testdir):
+    """
+    Use the template to configure s3a test, Fill in access_key, secret_key
+    and other details required for test.
+    """
+    config_template = """<configuration>
+<property>
+<name>fs.s3a.endpoint</name>
+<value>{name}</value>
+</property>
+
+<property>
+<name>fs.contract.test.fs.s3a</name>
+<value>s3a://{bucket_name}/</value>
+</property>
+
+<property>
+<name>fs.s3a.connection.ssl.enabled</name>
+<value>false</value>
+</property>
+
+<property>
+<name>test.fs.s3n.name</name>
+<value>s3n://{bucket_name}/</value>
+</property>
+
+<property>
+<name>test.fs.s3a.name</name>
+<value>s3a://{bucket_name}/</value>
+</property>
+
+<property>
+<name>test.fs.s3.name</name>
+<value>s3://{bucket_name}/</value>
+</property>
+
+<property>
+<name>fs.s3.awsAccessKeyId</name>
+<value>{access_key}</value>
+</property>
+
+<property>
+<name>fs.s3.awsSecretAccessKey</name>
+<value>{secret_key}</value>
+</property>
+
+<property>
+<name>fs.s3n.awsAccessKeyId</name>
+<value>{access_key}</value>
+</property>
+
+<property>
+<name>fs.s3n.awsSecretAccessKey</name>
+<value>{secret_key}</value>
+</property>
+
+<property>
+<name>fs.s3a.access.key</name>
+<description>AWS access key ID. Omit for Role-based authentication.</description>
+<value>{access_key}</value>
+</property>
+
+<property>
+<name>fs.s3a.secret.key</name>
+<description>AWS secret key. Omit for Role-based authentication.</description>
+<value>{secret_key}</value>
+</property>
+</configuration>
+""".format(name=dns_name, bucket_name=bucket_name, access_key=access_key, secret_key=secret_key)
+    config_path = testdir + '/hadoop/hadoop-tools/hadoop-aws/src/test/resources/auth-keys.xml'
+    client.write_file(config_path, config_template)
+    # output for debug
+    client.run(args=['cat', config_path])
diff --git a/qa/tasks/s3tests.py b/qa/tasks/s3tests.py
new file mode 100644
index 000000000..3856f8fad
--- /dev/null
+++ b/qa/tasks/s3tests.py
@@ -0,0 +1,648 @@
+"""
+Run a set of s3 tests on rgw.
+"""
+from io import BytesIO
+from configobj import ConfigObj
+import base64
+import contextlib
+import logging
+import os
+import random
+import string
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.config import config as teuth_config
+from teuthology.orchestra import run
+from teuthology.exceptions import ConfigError
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def download(ctx, config):
+    """
+    Download the s3 tests from the git builder.
+    Remove downloaded s3 file upon exit.
+
+    The context passed in should be identical to the context
+    passed in to the main task.
+    """
+    assert isinstance(config, dict)
+    log.info('Downloading s3-tests...')
+    testdir = teuthology.get_testdir(ctx)
+    for (client, client_config) in config.items():
+        s3tests_branch = client_config.get('force-branch', None)
+        if not s3tests_branch:
+            raise ValueError(
+                "Could not determine what branch to use for s3-tests. Please add 'force-branch: {s3-tests branch name}' to the .yaml config for this s3tests task.")
+
+        log.info("Using branch '%s' for s3tests", s3tests_branch)
+        sha1 = client_config.get('sha1')
+        git_remote = client_config.get('git_remote', teuth_config.ceph_git_base_url)
+        ctx.cluster.only(client).run(
+            args=[
+                'git', 'clone',
+                '-b', s3tests_branch,
+                git_remote + 's3-tests.git',
+                '{tdir}/s3-tests-{client}'.format(tdir=testdir, client=client),
+                ],
+            )
+        if sha1 is not None:
+            ctx.cluster.only(client).run(
+                args=[
+                    'cd', '{tdir}/s3-tests-{client}'.format(tdir=testdir, client=client),
+                    run.Raw('&&'),
+                    'git', 'reset', '--hard', sha1,
+                    ],
+                )
+    try:
+        yield
+    finally:
+        log.info('Removing s3-tests...')
+        testdir = teuthology.get_testdir(ctx)
+        for client in config:
+            ctx.cluster.only(client).run(
+                args=[
+                    'rm',
+                    '-rf',
+                    '{tdir}/s3-tests-{client}'.format(tdir=testdir, client=client),
+                    ],
+                )
+
+
+def _config_user(s3tests_conf, section, user):
+    """
+    Configure users for this section by stashing away keys, ids, and
+    email addresses.
+    """
+    s3tests_conf[section].setdefault('user_id', user)
+    s3tests_conf[section].setdefault('email', '{user}+test@test.test'.format(user=user))
+    s3tests_conf[section].setdefault('display_name', 'Mr. {user}'.format(user=user))
+    s3tests_conf[section].setdefault('access_key',
+        ''.join(random.choice(string.ascii_uppercase) for i in range(20)))
+    s3tests_conf[section].setdefault('secret_key',
+        base64.b64encode(os.urandom(40)).decode())
+    s3tests_conf[section].setdefault('totp_serial',
+        ''.join(random.choice(string.digits) for i in range(10)))
+    s3tests_conf[section].setdefault('totp_seed',
+        base64.b32encode(os.urandom(40)).decode())
+    s3tests_conf[section].setdefault('totp_seconds', '5')
+
+
+@contextlib.contextmanager
+def create_users(ctx, config):
+    """
+    Create a main and an alternate s3 user.
+    """
+    assert isinstance(config, dict)
+    log.info('Creating rgw users...')
+    testdir = teuthology.get_testdir(ctx)
+    
+    users = {'s3 main': 'foo', 's3 alt': 'bar', 's3 tenant': 'testx$tenanteduser', 'iam': 'foobar'}
+    for client in config['clients']:
+        s3tests_conf = config['s3tests_conf'][client]
+        s3tests_conf.setdefault('fixtures', {})
+        s3tests_conf['fixtures'].setdefault('bucket prefix', 'test-' + client + '-{random}-')
+        for section, user in users.items():
+            _config_user(s3tests_conf, section, '{user}.{client}'.format(user=user, client=client))
+            log.debug('Creating user {user} on {host}'.format(user=s3tests_conf[section]['user_id'], host=client))
+            cluster_name, daemon_type, client_id = teuthology.split_role(client)
+            client_with_id = daemon_type + '.' + client_id
+            # create user
+            ctx.cluster.only(client).run(
+                args=[
+                    'adjust-ulimits',
+                    'ceph-coverage',
+                    '{tdir}/archive/coverage'.format(tdir=testdir),
+                    'radosgw-admin',
+                    '-n', client_with_id,
+                    'user', 'create',
+                    '--uid', s3tests_conf[section]['user_id'],
+                    '--display-name', s3tests_conf[section]['display_name'],
+                    '--email', s3tests_conf[section]['email'],
+                    '--caps', 'user-policy=*',
+                    '--access-key', s3tests_conf[section]['access_key'],
+                    '--secret', s3tests_conf[section]['secret_key'],
+                    '--cluster', cluster_name,
+                ],
+            )
+
+            if not ctx.dbstore_variable:
+                ctx.cluster.only(client).run(
+                    args=[
+                        'adjust-ulimits',
+                        'ceph-coverage',
+                        '{tdir}/archive/coverage'.format(tdir=testdir),
+                        'radosgw-admin',
+                        '-n', client_with_id,
+                        'mfa', 'create',
+                        '--uid', s3tests_conf[section]['user_id'],
+                        '--totp-serial', s3tests_conf[section]['totp_serial'],
+                        '--totp-seed', s3tests_conf[section]['totp_seed'],
+                        '--totp-seconds', s3tests_conf[section]['totp_seconds'],
+                        '--totp-window', '8',
+                        '--totp-seed-type', 'base32',
+                        '--cluster', cluster_name,
+                    ],
+                )
+
+            # add/configure caps for iam user
+            if section=='iam':
+                ctx.cluster.only(client).run(
+                    args=[
+                        'adjust-ulimits',
+                        'ceph-coverage',
+                        '{tdir}/archive/coverage'.format(tdir=testdir),
+                        'radosgw-admin',
+                        '-n', client_with_id,
+                        'caps', 'add',
+                        '--uid', s3tests_conf[section]['user_id'],
+                        '--caps', 'roles=*',
+                        '--cluster', cluster_name,
+                    ],
+                )
+                ctx.cluster.only(client).run(
+                    args=[
+                        'adjust-ulimits',
+                        'ceph-coverage',
+                        '{tdir}/archive/coverage'.format(tdir=testdir),
+                        'radosgw-admin',
+                        '-n', client_with_id,
+                        'caps', 'add',
+                        '--uid', s3tests_conf[section]['user_id'],
+                        '--caps', 'oidc-provider=*',
+                        '--cluster', cluster_name,
+                    ],
+                )
+
+    if "TOKEN" in os.environ:
+        s3tests_conf.setdefault('webidentity', {})
+        s3tests_conf['webidentity'].setdefault('token',os.environ['TOKEN'])
+        s3tests_conf['webidentity'].setdefault('aud',os.environ['AUD'])
+        s3tests_conf['webidentity'].setdefault('sub',os.environ['SUB'])
+        s3tests_conf['webidentity'].setdefault('azp',os.environ['AZP'])
+        s3tests_conf['webidentity'].setdefault('user_token',os.environ['USER_TOKEN'])
+        s3tests_conf['webidentity'].setdefault('thumbprint',os.environ['THUMBPRINT'])
+        s3tests_conf['webidentity'].setdefault('KC_REALM',os.environ['KC_REALM'])
+
+    try:
+        yield
+    finally:
+        for client in config['clients']:
+            for user in users.values():
+                uid = '{user}.{client}'.format(user=user, client=client)
+                cluster_name, daemon_type, client_id = teuthology.split_role(client)
+                client_with_id = daemon_type + '.' + client_id
+                ctx.cluster.only(client).run(
+                    args=[
+                        'adjust-ulimits',
+                        'ceph-coverage',
+                        '{tdir}/archive/coverage'.format(tdir=testdir),
+                        'radosgw-admin',
+                        '-n', client_with_id,
+                        'user', 'rm',
+                        '--uid', uid,
+                        '--purge-data',
+                        '--cluster', cluster_name,
+                        ],
+                    )
+
+
+@contextlib.contextmanager
+def configure(ctx, config):
+    """
+    Create the config files for s3tests an boto.
+    """
+    assert isinstance(config, dict)
+    log.info('Configuring s3-tests...')
+    testdir = teuthology.get_testdir(ctx)
+    for client, properties in config['clients'].items():
+        properties = properties or {}
+        s3tests_conf = config['s3tests_conf'][client]
+        s3tests_conf['DEFAULT']['calling_format'] = properties.get('calling-format', 'ordinary')
+
+        # use rgw_server if given, or default to local client
+        role = properties.get('rgw_server', client)
+
+        endpoint = ctx.rgw.role_endpoints.get(role)
+        assert endpoint, 's3tests: no rgw endpoint for {}'.format(role)
+
+        s3tests_conf['DEFAULT']['host'] = endpoint.dns_name
+
+        website_role = properties.get('rgw_website_server')
+        if website_role:
+            website_endpoint = ctx.rgw.role_endpoints.get(website_role)
+            assert website_endpoint, \
+                    's3tests: no rgw endpoint for rgw_website_server {}'.format(website_role)
+            assert website_endpoint.website_dns_name, \
+                    's3tests: no dns-s3website-name for rgw_website_server {}'.format(website_role)
+            s3tests_conf['DEFAULT']['s3website_domain'] = website_endpoint.website_dns_name
+
+        if hasattr(ctx, 'barbican'):
+            properties = properties['barbican']
+            if properties is not None and 'kms_key' in properties:
+                if not (properties['kms_key'] in ctx.barbican.keys):
+                    raise ConfigError('Key '+properties['kms_key']+' not defined')
+
+                if not (properties['kms_key2'] in ctx.barbican.keys):
+                    raise ConfigError('Key '+properties['kms_key2']+' not defined')
+
+                key = ctx.barbican.keys[properties['kms_key']]
+                s3tests_conf['DEFAULT']['kms_keyid'] = key['id']
+
+                key = ctx.barbican.keys[properties['kms_key2']]
+                s3tests_conf['DEFAULT']['kms_keyid2'] = key['id']
+
+        elif hasattr(ctx, 'vault'):
+            engine_or_flavor = vars(ctx.vault).get('flavor',ctx.vault.engine)
+            keys=[]
+            for name in (x['Path'] for x in vars(ctx.vault).get('keys', {}).get(ctx.rgw.vault_role)):
+                keys.append(name)
+
+            keys.extend(['testkey-1','testkey-2'])
+            if engine_or_flavor == "old":
+                keys=[keys[i] + "/1" for i in range(len(keys))]
+
+            properties = properties.get('vault_%s' % engine_or_flavor, {})
+            s3tests_conf['DEFAULT']['kms_keyid'] = properties.get('key_path', keys[0])
+            s3tests_conf['DEFAULT']['kms_keyid2'] = properties.get('key_path2', keys[1])
+        elif hasattr(ctx.rgw, 'pykmip_role'):
+            keys=[]
+            for name in (x['Name'] for x in ctx.pykmip.keys[ctx.rgw.pykmip_role]):
+                p=name.partition('-')
+                keys.append(p[2] if p[2] else p[0])
+            keys.extend(['testkey-1', 'testkey-2'])
+            s3tests_conf['DEFAULT']['kms_keyid'] = properties.get('kms_key', keys[0])
+            s3tests_conf['DEFAULT']['kms_keyid2'] = properties.get('kms_key2', keys[1])
+        else:
+            # Fallback scenario where it's the local (ceph.conf) kms being tested
+            s3tests_conf['DEFAULT']['kms_keyid'] = 'testkey-1'
+            s3tests_conf['DEFAULT']['kms_keyid2'] = 'testkey-2'
+
+        slow_backend = properties.get('slow_backend')
+        if slow_backend:
+            s3tests_conf['fixtures']['slow backend'] = slow_backend
+
+        storage_classes = properties.get('storage classes')
+        if storage_classes:
+            s3tests_conf['s3 main']['storage_classes'] = storage_classes
+
+        lc_debug_interval = properties.get('lc_debug_interval')
+        if lc_debug_interval:
+            s3tests_conf['s3 main']['lc_debug_interval'] = lc_debug_interval
+
+        if ctx.rgw_cloudtier is not None:
+            log.info(' ctx.rgw_cloudtier config  is %s ...', ctx.rgw_cloudtier.config)
+            client_rgw_config = ctx.rgw_cloudtier.config.get(client)
+            if client_rgw_config:
+                log.info(' ctx.rgw_cloudtier config  is %s ...', client_rgw_config)
+                cloudtier_user = client_rgw_config.get('cloudtier_user')
+                cloud_client = client_rgw_config.get('cloud_client')
+                endpoint = ctx.rgw.role_endpoints.get(cloud_client)
+                s3tests_conf['s3 cloud']['host'] = endpoint.dns_name
+                s3tests_conf['s3 cloud']['port'] = endpoint.port
+                s3tests_conf['s3 cloud']['access_key'] = cloudtier_user.get('cloud_access_key')
+                s3tests_conf['s3 cloud']['secret_key'] = cloudtier_user.get('cloud_secret')
+                s3tests_conf['s3 cloud']['cloud_storage_class'] = client_rgw_config.get('cloud_storage_class')
+                s3tests_conf['s3 cloud']['storage_class'] = client_rgw_config.get('cloud_regular_storage_class')
+                s3tests_conf['s3 cloud']['retain_head_object'] = client_rgw_config.get('cloud_retain_head_object')
+                cloud_target_path = client_rgw_config.get('cloud_target_path')
+                cloud_target_storage_class = client_rgw_config.get('cloud_target_storage_class')
+                if (cloud_target_path != None):
+                    s3tests_conf['s3 cloud']['target_path'] = cloud_target_path
+                if (cloud_target_storage_class != None):
+                    s3tests_conf['s3 cloud']['target_storage_class'] = cloud_target_storage_class
+
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        conf_fp = BytesIO()
+        s3tests_conf.write(conf_fp)
+        remote.write_file(
+            path='{tdir}/archive/s3-tests.{client}.conf'.format(tdir=testdir, client=client),
+            data=conf_fp.getvalue(),
+            )
+
+    log.info('Configuring boto...')
+    boto_src = os.path.join(os.path.dirname(__file__), 'boto.cfg.template')
+    for client, properties in config['clients'].items():
+        with open(boto_src) as f:
+            (remote,) = ctx.cluster.only(client).remotes.keys()
+            conf = f.read().format(
+                idle_timeout=config.get('idle_timeout', 30)
+                )
+            remote.write_file('{tdir}/boto-{client}.cfg'.format(tdir=testdir, client=client), conf)
+
+    try:
+        yield
+
+    finally:
+        log.info('Cleaning up boto...')
+        for client, properties in config['clients'].items():
+            (remote,) = ctx.cluster.only(client).remotes.keys()
+            remote.run(
+                args=[
+                    'rm',
+                    '{tdir}/boto-{client}.cfg'.format(tdir=testdir, client=client),
+                    ],
+                )
+
+def get_toxvenv_dir(ctx):
+    return ctx.tox.venv_path
+
+def toxvenv_sh(ctx, remote, args, **kwargs):
+    activate = get_toxvenv_dir(ctx) + '/bin/activate'
+    return remote.sh(['source', activate, run.Raw('&&')] + args, **kwargs)
+
+@contextlib.contextmanager
+def run_tests(ctx, config):
+    """
+    Run the s3tests after everything is set up.
+
+    :param ctx: Context passed to task
+    :param config: specific configuration information
+    """
+    assert isinstance(config, dict)
+    testdir = teuthology.get_testdir(ctx)
+    for client, client_config in config.items():
+        client_config = client_config or {}
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        args = [
+            'cd', '{tdir}/s3-tests-{client}'.format(tdir=testdir, client=client), run.Raw('&&'),
+            'S3TEST_CONF={tdir}/archive/s3-tests.{client}.conf'.format(tdir=testdir, client=client),
+            'BOTO_CONFIG={tdir}/boto-{client}.cfg'.format(tdir=testdir, client=client)
+            ]
+        # the 'requests' library comes with its own ca bundle to verify ssl
+        # certificates - override that to use the system's ca bundle, which
+        # is where the ssl task installed this certificate
+        if remote.os.package_type == 'deb':
+            args += ['REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt']
+        else:
+            args += ['REQUESTS_CA_BUNDLE=/etc/pki/tls/certs/ca-bundle.crt']
+        # civetweb > 1.8 && beast parsers are strict on rfc2616
+        attrs = ["not fails_on_rgw", "not lifecycle_expiration", "not test_of_sts", "not webidentity_test"]
+        if client_config.get('calling-format') != 'ordinary':
+            attrs += ['not fails_with_subdomain']
+        if not client_config.get('with-sse-s3'):
+            attrs += ['not sse_s3']
+       
+        if 'extra_attrs' in client_config:
+            attrs = client_config.get('extra_attrs') 
+        args += ['tox', '--', '-v', '-m', ' and '.join(attrs)]
+        if 'extra_args' in client_config:
+            args.append(client_config['extra_args'])
+
+        toxvenv_sh(ctx, remote, args, label="s3 tests against rgw")
+    yield
+
+@contextlib.contextmanager
+def scan_for_leaked_encryption_keys(ctx, config):
+    """
+    Scan radosgw logs for the encryption keys used by s3tests to
+    verify that we're not leaking secrets.
+
+    :param ctx: Context passed to task
+    :param config: specific configuration information
+    """
+    assert isinstance(config, dict)
+
+    try:
+        yield
+    finally:
+        # x-amz-server-side-encryption-customer-key
+        s3test_customer_key = 'pO3upElrwuEXSoFwCfnZPdSsmt/xWeFa0N9KgDijwVs='
+
+        log.debug('Scanning radosgw logs for leaked encryption keys...')
+        procs = list()
+        for client, client_config in config.items():
+            if not client_config.get('scan_for_encryption_keys', True):
+                continue
+            cluster_name, daemon_type, client_id = teuthology.split_role(client)
+            client_with_cluster = '.'.join((cluster_name, daemon_type, client_id))
+            (remote,) = ctx.cluster.only(client).remotes.keys()
+            proc = remote.run(
+                args=[
+                    'grep',
+                    '--binary-files=text',
+                    s3test_customer_key,
+                    '/var/log/ceph/rgw.{client}.log'.format(client=client_with_cluster),
+                ],
+                wait=False,
+                check_status=False,
+            )
+            procs.append(proc)
+
+        for proc in procs:
+            proc.wait()
+            if proc.returncode == 1: # 1 means no matches
+                continue
+            log.error('radosgw log is leaking encryption keys!')
+            raise Exception('radosgw log is leaking encryption keys')
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run the s3-tests suite against rgw.
+
+    To run all tests on all clients::
+
+        tasks:
+        - ceph:
+        - rgw:
+        - s3tests:
+
+    To restrict testing to particular clients::
+
+        tasks:
+        - ceph:
+        - rgw: [client.0]
+        - s3tests: [client.0]
+
+    To run against a server on client.1 and increase the boto timeout to 10m::
+
+        tasks:
+        - ceph:
+        - rgw: [client.1]
+        - s3tests:
+            client.0:
+              rgw_server: client.1
+              idle_timeout: 600
+
+    To pass extra arguments to pytest (e.g. to run a certain test)::
+
+        tasks:
+        - ceph:
+        - rgw: [client.0]
+        - s3tests:
+            client.0:
+              extra_args: ['test_s3:test_object_acl_grand_public_read']
+            client.1:
+              extra_args: ['--exclude', 'test_100_continue']
+
+    To run any sts-tests don't forget to set a config variable named 'sts_tests' to 'True' as follows::
+
+        tasks:
+        - ceph:
+        - rgw: [client.0]
+        - s3tests:
+            client.0:
+              sts_tests: True
+              rgw_server: client.0
+
+    To run any cloud-transition tests don't forget to set a config variable named 'cloudtier_tests' to 'True' as follows::
+
+        tasks:
+        - ceph:
+        - rgw: [client.0 client.1]
+        - s3tests:
+            client.0:
+              cloudtier_tests: True
+              rgw_server: client.0
+
+    """
+    assert hasattr(ctx, 'rgw'), 's3tests must run after the rgw task'
+    assert hasattr(ctx, 'tox'), 's3tests must run after the tox task'
+    assert config is None or isinstance(config, list) \
+        or isinstance(config, dict), \
+        "task s3tests only supports a list or dictionary for configuration"
+    all_clients = ['client.{id}'.format(id=id_)
+                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+    if config is None:
+        config = all_clients
+    if isinstance(config, list):
+        config = dict.fromkeys(config)
+    clients = config.keys()
+
+    overrides = ctx.config.get('overrides', {})
+    # merge each client section, not the top level.
+    for client in config.keys():
+        if not config[client]:
+            config[client] = {}
+        teuthology.deep_merge(config[client], overrides.get('s3tests', {}))
+
+    log.debug('s3tests config is %s', config)
+
+    s3tests_conf = {}
+
+    for client, client_config in config.items():
+        if 'sts_tests' in client_config:
+            ctx.sts_variable = True
+        else:
+            ctx.sts_variable = False
+
+        if 'cloudtier_tests' in client_config:
+            ctx.cloudtier_variable = True
+        else:
+            ctx.cloudtier_variable = False
+
+        if 'dbstore_tests' in client_config:
+            ctx.dbstore_variable = True
+        else:
+            ctx.dbstore_variable = False
+
+        #This will be the structure of config file when you want to run webidentity_test (sts-test)
+        if ctx.sts_variable and "TOKEN" in os.environ:
+            for client in clients:
+                endpoint = ctx.rgw.role_endpoints.get(client)
+                assert endpoint, 's3tests: no rgw endpoint for {}'.format(client)
+
+                s3tests_conf[client] = ConfigObj(
+                    indent_type='',
+                    infile={
+                        'DEFAULT':
+                            {
+                            'port'      : endpoint.port,
+                            'is_secure' : endpoint.cert is not None,
+                            'api_name'  : 'default',
+                            },
+                        'fixtures'   : {},
+                        's3 main'    : {},
+                        's3 alt'     : {},
+                        's3 tenant'  : {},
+                        'iam'        : {},
+                        'webidentity': {},
+                    }
+                )
+
+        elif ctx.sts_variable:
+            #This will be the structure of config file when you want to run assume_role_test and get_session_token_test (sts-test) or iam-tests
+            for client in clients:
+                endpoint = ctx.rgw.role_endpoints.get(client)
+                assert endpoint, 's3tests: no rgw endpoint for {}'.format(client)
+
+                s3tests_conf[client] = ConfigObj(
+                    indent_type='',
+                    infile={
+                        'DEFAULT':
+                            {
+                            'port'      : endpoint.port,
+                            'is_secure' : endpoint.cert is not None,
+                            'api_name'  : 'default',
+                            },
+                        'fixtures'   : {},
+                        's3 main'    : {},
+                        's3 alt'     : {},
+                        'iam'        : {},
+                        's3 tenant'  : {},
+                        }
+                    ) 
+
+        elif ctx.cloudtier_variable:
+            #This will be the structure of config file when you want to run normal s3-tests
+            for client in clients:
+                endpoint = ctx.rgw.role_endpoints.get(client)
+                assert endpoint, 's3tests: no rgw endpoint for {}'.format(client)
+
+                s3tests_conf[client] = ConfigObj(
+                    indent_type='',
+                    infile={
+                        'DEFAULT':
+                            {
+                            'port'      : endpoint.port,
+                            'is_secure' : endpoint.cert is not None,
+                            'api_name'  : 'default',
+                            },
+                        'fixtures'   : {},
+                        's3 main'    : {},
+                        's3 alt'     : {},
+                        's3 tenant'  : {},
+                        's3 cloud'   : {},
+                        'iam'        : {},
+                        }
+                    ) 
+        else:
+            #This will be the structure of config file when you want to run normal s3-tests
+            for client in clients:
+                endpoint = ctx.rgw.role_endpoints.get(client)
+                assert endpoint, 's3tests: no rgw endpoint for {}'.format(client)
+
+                s3tests_conf[client] = ConfigObj(
+                    indent_type='',
+                    infile={
+                        'DEFAULT':
+                            {
+                            'port'      : endpoint.port,
+                            'is_secure' : endpoint.cert is not None,
+                            'api_name'  : 'default',
+                            },
+                        'fixtures'   : {},
+                        's3 main'    : {},
+                        's3 alt'     : {},
+                        's3 tenant'  : {},
+                        'iam'        : {},
+                        }
+                    )
+
+    with contextutil.nested(
+        lambda: download(ctx=ctx, config=config),
+        lambda: create_users(ctx=ctx, config=dict(
+                clients=clients,
+                s3tests_conf=s3tests_conf,
+                )),
+        lambda: configure(ctx=ctx, config=dict(
+                clients=config,
+                s3tests_conf=s3tests_conf,
+                )),
+        lambda: run_tests(ctx=ctx, config=config),
+        lambda: scan_for_leaked_encryption_keys(ctx=ctx, config=config),
+        ):
+        pass
+    yield
diff --git a/qa/tasks/s3tests_java.py b/qa/tasks/s3tests_java.py
new file mode 100644
index 000000000..dbe03921c
--- /dev/null
+++ b/qa/tasks/s3tests_java.py
@@ -0,0 +1,402 @@
+"""
+Task for running RGW S3 tests with the AWS Java SDK
+"""
+from io import BytesIO
+import logging
+
+import base64
+import os
+import random
+import string
+import yaml
+import getpass
+
+from teuthology import misc as teuthology
+from teuthology.task import Task
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+"""
+    Task for running RGW S3 tests with the AWS Java SDK
+    
+    Tests run only on clients specified in the s3tests-java config section. 
+    If no client is given a default 'client.0' is chosen.
+    If such does not match the rgw client the task will fail.
+        
+        tasks:
+        - ceph:
+        - rgw: [client.0]
+        - s3tests-java:
+            client.0:
+
+    Extra arguments can be passed by adding options to the corresponding client
+    section under the s3tests-java task (e.g. to run a certain test, 
+    specify a different repository and branch for the test suite, 
+    run in info/debug mode (for the java suite) or forward the gradle output to a log file):
+
+        tasks:
+        - ceph:
+        - rgw: [client.0]
+        - s3tests-java:
+            client.0:
+                force-branch: wip
+                force-repo: 'https://github.com/adamyanova/java_s3tests.git'
+                log-fwd: '../s3tests-java.log'
+                log-level: info
+                extra-args: ['--tests', 'ObjectTest.testEncryptionKeySSECInvalidMd5']
+
+    To run a specific test, provide its name to the extra-args section e.g.:
+        - s3tests-java:
+            client.0:
+                extra-args: ['--tests', 'ObjectTest.testEncryptionKeySSECInvalidMd5']
+    
+"""
+
+
+class S3tests_java(Task):
+    """
+    Download and install S3 tests in Java
+    This will require openjdk and gradle
+    """
+
+    def __init__(self, ctx, config):
+        super(S3tests_java, self).__init__(ctx, config)
+        self.log = log
+        log.debug('S3 Tests Java: __INIT__ ')
+        assert hasattr(ctx, 'rgw'), 'S3tests_java must run after the rgw task'
+        clients = ['client.{id}'.format(id=id_)
+                   for id_ in teuthology.all_roles_of_type(self.ctx.cluster, 'client')]
+        self.all_clients = []
+        for client in clients:
+            if client in self.config:
+                self.all_clients.extend([client])
+        if self.all_clients is None:
+            self.all_clients = 'client.0'
+        self.users = {'s3main': 'tester',
+                      's3alt': 'johndoe', 'tenanted': 'testx$tenanteduser'}
+
+    def setup(self):
+        super(S3tests_java, self).setup()
+        log.debug('S3 Tests Java: SETUP')
+        for client in self.all_clients:
+            self.download_test_suite(client)
+            self.install_required_packages(client)
+
+    def begin(self):
+        super(S3tests_java, self).begin()
+        log.debug('S3 Tests Java: BEGIN')
+        for (host, roles) in self.ctx.cluster.remotes.items():
+            log.debug(
+                'S3 Tests Java: Cluster config is: {cfg}'.format(cfg=roles))
+            log.debug('S3 Tests Java: Host is: {host}'.format(host=host))
+        self.create_users()
+        self.run_tests()
+
+    def end(self):
+        super(S3tests_java, self).end()
+        log.debug('S3 Tests Java: END')
+        for client in self.all_clients:
+            self.remove_tests(client)
+            self.delete_users(client)
+
+    def download_test_suite(self, client):
+        log.info("S3 Tests Java: Downloading test suite...")
+        testdir = teuthology.get_testdir(self.ctx)
+        branch = 'master'
+        repo = 'https://github.com/ceph/java_s3tests.git'
+        if client in self.config and self.config[client] is not None:
+            if 'force-branch' in self.config[client] and self.config[client]['force-branch'] is not None:
+                branch = self.config[client]['force-branch']
+            if 'force-repo' in self.config[client] and self.config[client]['force-repo'] is not None:
+                repo = self.config[client]['force-repo']
+        self.ctx.cluster.only(client).run(
+            args=[
+                'git', 'clone',
+                '-b', branch,
+                repo,
+                '{tdir}/s3-tests-java'.format(tdir=testdir),
+            ],
+            stdout=BytesIO()
+        )
+        if client in self.config and self.config[client] is not None:
+            if 'sha1' in self.config[client] and self.config[client]['sha1'] is not None:
+                self.ctx.cluster.only(client).run(
+                    args=[
+                        'cd', '{tdir}/s3-tests-java'.format(tdir=testdir),
+                        run.Raw('&&'),
+                        'git', 'reset', '--hard', self.config[client]['sha1'],
+                    ],
+                )
+
+            if 'log-level' in self.config[client]:
+                if self.config[client]['log-level'] == 'info':
+                    self.ctx.cluster.only(client).run(
+                        args=[
+                            'sed', '-i', '\'s/log4j.rootLogger=WARN/log4j.rootLogger=INFO/g\'',
+                            '{tdir}/s3-tests-java/src/main/resources/log4j.properties'.format(
+                                tdir=testdir)
+                        ]
+                    )
+                if self.config[client]['log-level'] == 'debug':
+                    self.ctx.cluster.only(client).run(
+                        args=[
+                            'sed', '-i', '\'s/log4j.rootLogger=WARN/log4j.rootLogger=DEBUG/g\'',
+                            '{tdir}/s3-tests-java/src/main/resources/log4j.properties'.format(
+                                tdir=testdir)
+                        ]
+                    )
+
+    def install_required_packages(self, client):
+        """
+        Run bootstrap script to install openjdk and gradle.
+        Add certificates to java keystore
+        """
+        log.info("S3 Tests Java: Installing required packages...")
+        testdir = teuthology.get_testdir(self.ctx)
+        self.ctx.cluster.only(client).run(
+            args=['{tdir}/s3-tests-java/bootstrap.sh'.format(tdir=testdir)],
+            stdout=BytesIO()
+        )
+
+        endpoint = self.ctx.rgw.role_endpoints[client]
+        if endpoint.cert:
+            path = 'lib/security/cacerts'
+            self.ctx.cluster.only(client).run(
+                args=['sudo',
+                      'keytool',
+                      '-import', '-alias', '{alias}'.format(
+                          alias=endpoint.hostname),
+                      '-keystore',
+                      run.Raw(
+                          '$(readlink -e $(dirname $(readlink -e $(which keytool)))/../{path})'.format(path=path)),
+                      '-file', endpoint.cert.certificate,
+                      '-storepass', 'changeit',
+                      ],
+                stdout=BytesIO()
+            )
+
+    def create_users(self):
+        """
+        Create a main and an alternative s3 user.
+        Configuration is read from a skelethon config file
+        s3tests.teuth.config.yaml in the java-s3tests repository
+        and missing information is added from the task.
+        Existing values are NOT overriden unless they are empty!
+        """
+        log.info("S3 Tests Java: Creating S3 users...")
+        testdir = teuthology.get_testdir(self.ctx)
+        for client in self.all_clients:
+            endpoint = self.ctx.rgw.role_endpoints.get(client)
+            local_user = getpass.getuser()
+            remote_user = teuthology.get_test_user()
+            os.system("scp {remote}@{host}:{tdir}/s3-tests-java/s3tests.teuth.config.yaml /home/{local}/".format(
+                host=endpoint.hostname, tdir=testdir, remote=remote_user, local=local_user))
+            s3tests_conf = teuthology.config_file(
+                '/home/{local}/s3tests.teuth.config.yaml'.format(local=local_user))
+            log.debug("S3 Tests Java: s3tests_conf is {s3cfg}".format(
+                s3cfg=s3tests_conf))
+            for section, user in list(self.users.items()):
+                if section in s3tests_conf:
+                    s3_user_id = '{user}.{client}'.format(
+                        user=user, client=client)
+                    log.debug(
+                        'S3 Tests Java: Creating user {s3_user_id}'.format(s3_user_id=s3_user_id))
+                    self._config_user(s3tests_conf=s3tests_conf,
+                                      section=section, user=s3_user_id, client=client)
+                    cluster_name, daemon_type, client_id = teuthology.split_role(
+                        client)
+                    client_with_id = daemon_type + '.' + client_id
+                    args = [
+                        'adjust-ulimits',
+                        'ceph-coverage',
+                        '{tdir}/archive/coverage'.format(tdir=testdir),
+                        'radosgw-admin',
+                        '-n', client_with_id,
+                        'user', 'create',
+                        '--uid', s3tests_conf[section]['user_id'],
+                        '--display-name', s3tests_conf[section]['display_name'],
+                        '--access-key', s3tests_conf[section]['access_key'],
+                        '--secret', s3tests_conf[section]['access_secret'],
+                        '--email', s3tests_conf[section]['email'],
+                        '--cluster', cluster_name,
+                    ]
+                    log.info('{args}'.format(args=args))
+                    self.ctx.cluster.only(client).run(
+                        args=args,
+                        stdout=BytesIO()
+                    )
+                else:
+                    self.users.pop(section)
+            self._write_cfg_file(s3tests_conf, client)
+            os.system(
+                "rm -rf /home/{local}/s3tests.teuth.config.yaml".format(local=local_user))
+
+    def _config_user(self, s3tests_conf, section, user, client):
+        """
+        Generate missing users data for this section by stashing away keys, ids, and
+        email addresses.
+        """
+        access_key = ''.join(random.choice(string.ascii_uppercase)
+                             for i in range(20))
+        access_secret = base64.b64encode(os.urandom(40)).decode('ascii')
+        endpoint = self.ctx.rgw.role_endpoints.get(client)
+
+        self._set_cfg_entry(
+            s3tests_conf[section], 'user_id', '{user}'.format(user=user))
+        self._set_cfg_entry(
+            s3tests_conf[section], 'email', '{user}_test@test.test'.format(user=user))
+        self._set_cfg_entry(
+            s3tests_conf[section], 'display_name', 'Ms. {user}'.format(user=user))
+        self._set_cfg_entry(
+            s3tests_conf[section], 'access_key', '{ak}'.format(ak=access_key))
+        self._set_cfg_entry(
+            s3tests_conf[section], 'access_secret', '{asc}'.format(asc=access_secret))
+        self._set_cfg_entry(
+            s3tests_conf[section], 'region', 'us-east-1')
+        self._set_cfg_entry(
+            s3tests_conf[section], 'endpoint', '{ip}:{port}'.format(
+                ip=endpoint.hostname, port=endpoint.port))
+        self._set_cfg_entry(
+            s3tests_conf[section], 'host', endpoint.hostname)
+        self._set_cfg_entry(
+            s3tests_conf[section], 'port', endpoint.port)
+        self._set_cfg_entry(
+            s3tests_conf[section], 'is_secure', True if endpoint.cert else False)
+
+        log.debug("S3 Tests Java: s3tests_conf[{sect}] is {s3cfg}".format(
+            sect=section, s3cfg=s3tests_conf[section]))
+        log.debug('S3 Tests Java: Setion, User = {sect}, {user}'.format(
+            sect=section, user=user))
+
+    def _write_cfg_file(self, cfg_dict, client):
+        """
+        Write s3 tests java config file on the remote node.
+        """
+        testdir = teuthology.get_testdir(self.ctx)
+        (remote,) = self.ctx.cluster.only(client).remotes.keys()
+        data = yaml.safe_dump(cfg_dict, default_flow_style=False)
+        path = testdir + '/archive/s3-tests-java.' + client + '.conf'
+        remote.write_file(path, data)
+
+    def _set_cfg_entry(self, cfg_dict, key, value):
+        if not (key in cfg_dict):
+            cfg_dict.setdefault(key, value)
+        elif cfg_dict[key] is None:
+            cfg_dict[key] = value
+
+    def run_tests(self):
+        log.info("S3 Tests Java: Running tests...")
+        testdir = teuthology.get_testdir(self.ctx)
+        for client in self.all_clients:
+            self.ctx.cluster.only(client).run(
+                args=['cp',
+                      '{tdir}/archive/s3-tests-java.{client}.conf'.format(
+                          tdir=testdir, client=client),
+                      '{tdir}/s3-tests-java/config.properties'.format(
+                          tdir=testdir)
+                      ],
+                stdout=BytesIO()
+            )
+            args = ['cd',
+                    '{tdir}/s3-tests-java'.format(tdir=testdir),
+                    run.Raw('&&'),
+                    '/opt/gradle/gradle/bin/gradle', 'clean', 'test',
+                    '--rerun-tasks', '--no-build-cache',
+                    ]
+            extra_args = []
+            suppress_groups = False
+            self.log_fwd = False
+            self.log_name = ''
+            if client in self.config and self.config[client] is not None:
+                if 'extra-args' in self.config[client]:
+                    extra_args.extend(self.config[client]['extra-args'])
+                    suppress_groups = True
+                if 'log-level' in self.config[client] and self.config[client]['log-level'] == 'debug':
+                    extra_args += ['--debug']
+                if 'log-fwd' in self.config[client]:
+                    self.log_fwd = True
+                    self.log_name = '{tdir}/s3tests_log.txt'.format(
+                        tdir=testdir)
+                    if self.config[client]['log-fwd'] is not None:
+                        self.log_name = self.config[client]['log-fwd']
+                    extra_args += [run.Raw('>>'),
+                                   self.log_name]
+
+            if not suppress_groups:
+                test_groups = ['AWS4Test', 'BucketTest', 'ObjectTest']
+            else:
+                test_groups = ['All']
+
+            for gr in test_groups:
+                for i in range(2):
+                    self.ctx.cluster.only(client).run(
+                        args=['radosgw-admin', 'gc',
+                              'process', '--include-all'],
+                        stdout=BytesIO()
+                    )
+
+                if gr != 'All':
+                    self.ctx.cluster.only(client).run(
+                        args=args + ['--tests'] + [gr] + extra_args,
+                        stdout=BytesIO()
+                    )
+                else:
+                    self.ctx.cluster.only(client).run(
+                        args=args + extra_args,
+                        stdout=BytesIO()
+                    )
+
+                for i in range(2):
+                    self.ctx.cluster.only(client).run(
+                        args=['radosgw-admin', 'gc',
+                              'process', '--include-all'],
+                        stdout=BytesIO()
+                    )
+
+    def remove_tests(self, client):
+        log.info('S3 Tests Java: Cleaning up s3-tests-java...')
+        testdir = teuthology.get_testdir(self.ctx)
+
+        if self.log_fwd:
+            self.ctx.cluster.only(client).run(
+                args=['cd',
+                      '{tdir}/s3-tests-java'.format(tdir=testdir),
+                      run.Raw('&&'),
+                      'cat', self.log_name,
+                      run.Raw('&&'),
+                      'rm', self.log_name],
+                stdout=BytesIO()
+            )
+
+        self.ctx.cluster.only(client).run(
+            args=[
+                'rm',
+                '-rf',
+                '{tdir}/s3-tests-java'.format(tdir=testdir),
+            ],
+            stdout=BytesIO()
+        )
+
+    def delete_users(self, client):
+        log.info("S3 Tests Java: Deleting S3 users...")
+        testdir = teuthology.get_testdir(self.ctx)
+        for section, user in self.users.items():
+            s3_user_id = '{user}.{client}'.format(user=user, client=client)
+            self.ctx.cluster.only(client).run(
+                args=[
+                    'adjust-ulimits',
+                    'ceph-coverage',
+                    '{tdir}/archive/coverage'.format(tdir=testdir),
+                    'radosgw-admin',
+                    '-n', client,
+                    'user', 'rm',
+                    '--uid', s3_user_id,
+                    '--purge-data',
+                    '--cluster', 'ceph',
+                ],
+                stdout=BytesIO()
+            )
+
+
+task = S3tests_java
diff --git a/qa/tasks/samba.py b/qa/tasks/samba.py
new file mode 100644
index 000000000..bcc247697
--- /dev/null
+++ b/qa/tasks/samba.py
@@ -0,0 +1,244 @@
+"""
+Samba
+"""
+import contextlib
+import logging
+import time
+
+from teuthology import misc as teuthology
+from teuthology.orchestra import run
+from teuthology.orchestra.daemon import DaemonGroup
+
+log = logging.getLogger(__name__)
+
+
+def get_sambas(ctx, roles):
+    """
+    Scan for roles that are samba.  Yield the id of the the samba role
+    (samba.0, samba.1...)  and the associated remote site
+
+    :param ctx: Context
+    :param roles: roles for this test (extracted from yaml files)
+    """
+    for role in roles:
+        assert isinstance(role, str)
+        PREFIX = 'samba.'
+        assert role.startswith(PREFIX)
+        id_ = role[len(PREFIX):]
+        (remote,) = ctx.cluster.only(role).remotes.keys()
+        yield (id_, remote)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Setup samba smbd with ceph vfs module.  This task assumes the samba
+    package has already been installed via the install task.
+
+    The config is optional and defaults to starting samba on all nodes.
+    If a config is given, it is expected to be a list of
+    samba nodes to start smbd servers on.
+
+    Example that starts smbd on all samba nodes::
+
+        tasks:
+        - install:
+        - install:
+            project: samba
+            extra_packages: ['samba']
+        - ceph:
+        - samba:
+        - interactive:
+
+    Example that starts smbd on just one of the samba nodes and cifs on the other::
+
+        tasks:
+        - samba: [samba.0]
+        - cifs: [samba.1]
+
+    An optional backend can be specified, and requires a path which smbd will
+    use as the backend storage location:
+
+        roles:
+            - [osd.0, osd.1, osd.2, mon.0, mon.1, mon.2, mds.a]
+            - [client.0, samba.0]
+
+        tasks:
+        - ceph:
+        - ceph-fuse: [client.0]
+        - samba:
+            samba.0:
+              cephfuse: "{testdir}/mnt.0"
+
+    This mounts ceph to {testdir}/mnt.0 using fuse, and starts smbd with
+    a UNC of //localhost/cephfuse.  Access through that UNC will be on
+    the ceph fuse mount point.
+
+    If no arguments are specified in the samba
+    role, the default behavior is to enable the ceph UNC //localhost/ceph
+    and use the ceph vfs module as the smbd backend.
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    log.info("Setting up smbd with ceph vfs...")
+    assert config is None or isinstance(config, list) or isinstance(config, dict), \
+        "task samba got invalid config"
+
+    if config is None:
+        config = dict(('samba.{id}'.format(id=id_), None)
+                  for id_ in teuthology.all_roles_of_type(ctx.cluster, 'samba'))
+    elif isinstance(config, list):
+        config = dict((name, None) for name in config)
+
+    samba_servers = list(get_sambas(ctx=ctx, roles=config.keys()))
+
+    testdir = teuthology.get_testdir(ctx)
+
+    if not hasattr(ctx, 'daemons'):
+        ctx.daemons = DaemonGroup()
+
+    for id_, remote in samba_servers:
+
+        rolestr = "samba.{id_}".format(id_=id_)
+
+        confextras = """vfs objects = ceph
+  ceph:config_file = /etc/ceph/ceph.conf"""
+
+        unc = "ceph"
+        backend = "/"
+
+        if config[rolestr] is not None:
+            # verify that there's just one parameter in role
+            if len(config[rolestr]) != 1:
+                log.error("samba config for role samba.{id_} must have only one parameter".format(id_=id_))
+                raise Exception('invalid config')
+            confextras = ""
+            (unc, backendstr) = config[rolestr].items()[0]
+            backend = backendstr.format(testdir=testdir)
+
+        # on first samba role, set ownership and permissions of ceph root
+        # so that samba tests succeed
+        if config[rolestr] is None and id_ == samba_servers[0][0]:
+            remote.run(
+                    args=[
+                        'mkdir', '-p', '/tmp/cmnt', run.Raw('&&'),
+                        'sudo', 'ceph-fuse', '/tmp/cmnt', run.Raw('&&'),
+                        'sudo', 'chown', 'ubuntu:ubuntu', '/tmp/cmnt/', run.Raw('&&'),
+                        'sudo', 'chmod', '1777', '/tmp/cmnt/', run.Raw('&&'),
+                        'sudo', 'umount', '/tmp/cmnt/', run.Raw('&&'),
+                        'rm', '-rf', '/tmp/cmnt',
+                        ],
+                    )
+        else:
+            remote.run(
+                    args=[
+                        'sudo', 'chown', 'ubuntu:ubuntu', backend, run.Raw('&&'),
+                        'sudo', 'chmod', '1777', backend,
+                        ],
+                    )
+
+        remote.sudo_write_file("/usr/local/samba/etc/smb.conf", """
+[global]
+  workgroup = WORKGROUP
+  netbios name = DOMAIN
+
+[{unc}]
+  path = {backend}
+  {extras}
+  writeable = yes
+  valid users = ubuntu
+""".format(extras=confextras, unc=unc, backend=backend))
+
+        # create ubuntu user
+        remote.run(
+            args=[
+                'sudo', '/usr/local/samba/bin/smbpasswd', '-e', 'ubuntu',
+                run.Raw('||'),
+                'printf', run.Raw('"ubuntu\nubuntu\n"'),
+                run.Raw('|'),
+                'sudo', '/usr/local/samba/bin/smbpasswd', '-s', '-a', 'ubuntu'
+            ])
+
+        smbd_cmd = [
+                'sudo',
+                'daemon-helper',
+                'term',
+                'nostdin',
+                '/usr/local/samba/sbin/smbd',
+                '-F',
+                ]
+        ctx.daemons.add_daemon(remote, 'smbd', id_,
+                               args=smbd_cmd,
+                               logger=log.getChild("smbd.{id_}".format(id_=id_)),
+                               stdin=run.PIPE,
+                               wait=False,
+                               )
+
+        # let smbd initialize, probably a better way...
+        seconds_to_sleep = 100
+        log.info('Sleeping for %s  seconds...' % seconds_to_sleep)
+        time.sleep(seconds_to_sleep)
+        log.info('Sleeping stopped...')
+
+    try:
+        yield
+    finally:
+        log.info('Stopping smbd processes...')
+        exc = None
+        for d in ctx.daemons.iter_daemons_of_role('smbd'):
+            try:
+                d.stop()
+            except (run.CommandFailedError,
+                    run.CommandCrashedError,
+                    run.ConnectionLostError) as e:
+                exc = e
+                log.exception('Saw exception from %s.%s', d.role, d.id_)
+        if exc is not None:
+            raise exc
+
+        for id_, remote in samba_servers:
+            remote.run(
+                args=[
+                    'sudo',
+                    'rm', '-rf',
+                    '/usr/local/samba/etc/smb.conf',
+                    '/usr/local/samba/private/*',
+                    '/usr/local/samba/var/run/',
+                    '/usr/local/samba/var/locks',
+                    '/usr/local/samba/var/lock',
+                    ],
+                )
+            # make sure daemons are gone
+            try:
+                remote.run(
+                    args=[
+                        'while',
+                        'sudo', 'killall', '-9', 'smbd',
+                        run.Raw(';'),
+                        'do', 'sleep', '1',
+                        run.Raw(';'),
+                        'done',
+                        ],
+                    )
+
+                remote.run(
+                    args=[
+                        'sudo',
+                        'lsof',
+                        backend,
+                        ],
+                    check_status=False
+                    )
+                remote.run(
+                    args=[
+                        'sudo',
+                        'fuser',
+                        '-M',
+                        backend,
+                        ],
+                    check_status=False
+                    )
+            except Exception:
+                log.exception("Saw exception")
+                pass
diff --git a/qa/tasks/scrub.py b/qa/tasks/scrub.py
new file mode 100644
index 000000000..ddc1a9164
--- /dev/null
+++ b/qa/tasks/scrub.py
@@ -0,0 +1,117 @@
+"""
+Scrub osds
+"""
+import contextlib
+import gevent
+import logging
+import random
+import time
+
+from tasks import ceph_manager
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run scrub periodically. Randomly chooses an OSD to scrub.
+
+    The config should be as follows:
+
+    scrub:
+        frequency: <seconds between scrubs>
+        deep: <bool for deepness>
+
+    example:
+
+    tasks:
+    - ceph:
+    - scrub:
+        frequency: 30
+        deep: 0
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'scrub task only accepts a dict for configuration'
+
+    log.info('Beginning scrub...')
+
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
+    while len(manager.get_osd_status()['up']) < num_osds:
+        time.sleep(10)
+
+    scrub_proc = Scrubber(
+        manager,
+        config,
+        )
+    try:
+        yield
+    finally:
+        log.info('joining scrub')
+        scrub_proc.do_join()
+
+class Scrubber:
+    """
+    Scrubbing is actually performed during initialization
+    """
+    def __init__(self, manager, config):
+        """
+        Spawn scrubbing thread upon completion.
+        """
+        self.ceph_manager = manager
+        self.ceph_manager.wait_for_clean()
+
+        osd_status = self.ceph_manager.get_osd_status()
+        self.osds = osd_status['up']
+
+        self.config = config
+        if self.config is None:
+            self.config = dict()
+
+        else:
+            def tmp(x):
+                """Local display"""
+                print(x)
+            self.log = tmp
+
+        self.stopping = False
+
+        log.info("spawning thread")
+
+        self.thread = gevent.spawn(self.do_scrub)
+
+    def do_join(self):
+        """Scrubbing thread finished"""
+        self.stopping = True
+        self.thread.get()
+
+    def do_scrub(self):
+        """Perform the scrub operation"""
+        frequency = self.config.get("frequency", 30)
+        deep = self.config.get("deep", 0)
+
+        log.info("stopping %s" % self.stopping)
+
+        while not self.stopping:
+            osd = str(random.choice(self.osds))
+
+            if deep:
+                cmd = 'deep-scrub'
+            else:
+                cmd = 'scrub'
+
+            log.info('%sbing %s' % (cmd, osd))
+            self.ceph_manager.raw_cluster_cmd('osd', cmd, osd)
+
+            time.sleep(frequency)
diff --git a/qa/tasks/scrub_test.py b/qa/tasks/scrub_test.py
new file mode 100644
index 000000000..edf106952
--- /dev/null
+++ b/qa/tasks/scrub_test.py
@@ -0,0 +1,413 @@
+"""Scrub testing"""
+
+import contextlib
+import json
+import logging
+import os
+import time
+import tempfile
+
+from tasks import ceph_manager
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+
+def wait_for_victim_pg(manager, poolid):
+    """Return a PG with some data and its acting set"""
+    # wait for some PG to have data that we can mess with
+    victim = None
+    while victim is None:
+        stats = manager.get_pg_stats()
+        for pg in stats:
+            pgid = str(pg['pgid'])
+            pgpool = int(pgid.split('.')[0])
+            if poolid != pgpool:
+                continue
+            size = pg['stat_sum']['num_bytes']
+            if size > 0:
+                victim = pg['pgid']
+                acting = pg['acting']
+                return victim, acting
+        time.sleep(3)
+
+
+def find_victim_object(ctx, pg, osd):
+    """Return a file to be fuzzed"""
+    (osd_remote,) = ctx.cluster.only('osd.%d' % osd).remotes.keys()
+    data_path = os.path.join(
+        '/var/lib/ceph/osd',
+        'ceph-{id}'.format(id=osd),
+        'fuse',
+        '{pg}_head'.format(pg=pg),
+        'all',
+        )
+
+    # fuzz time
+    ls_out = osd_remote.sh('sudo ls %s' % data_path)
+
+    # find an object file we can mess with (and not the pg info object)
+    osdfilename = next(line for line in ls_out.split('\n')
+                       if not line.endswith('::::head#'))
+    assert osdfilename is not None
+
+    # Get actual object name from osd stored filename
+    objname = osdfilename.split(':')[4]
+    return osd_remote, os.path.join(data_path, osdfilename), objname
+
+
+def corrupt_file(osd_remote, path):
+    # put a single \0 at the beginning of the file
+    osd_remote.run(
+        args=['sudo', 'dd',
+              'if=/dev/zero',
+              'of=%s/data' % path,
+              'bs=1', 'count=1', 'conv=notrunc']
+    )
+
+
+def get_pgnum(pgid):
+    pos = pgid.find('.')
+    assert pos != -1
+    return pgid[pos+1:]
+
+
+def deep_scrub(manager, victim, pool):
+    # scrub, verify inconsistent
+    pgnum = get_pgnum(victim)
+    manager.do_pg_scrub(pool, pgnum, 'deep-scrub')
+
+    stats = manager.get_single_pg_stats(victim)
+    inconsistent = stats['state'].find('+inconsistent') != -1
+    assert inconsistent
+
+
+def repair(manager, victim, pool):
+    # repair, verify no longer inconsistent
+    pgnum = get_pgnum(victim)
+    manager.do_pg_scrub(pool, pgnum, 'repair')
+
+    stats = manager.get_single_pg_stats(victim)
+    inconsistent = stats['state'].find('+inconsistent') != -1
+    assert not inconsistent
+
+
+def test_repair_corrupted_obj(ctx, manager, pg, osd_remote, obj_path, pool):
+    corrupt_file(osd_remote, obj_path)
+    deep_scrub(manager, pg, pool)
+    repair(manager, pg, pool)
+
+
+def test_repair_bad_omap(ctx, manager, pg, osd, objname):
+    # Test deep-scrub with various omap modifications
+    # Modify omap on specific osd
+    log.info('fuzzing omap of %s' % objname)
+    manager.osd_admin_socket(osd, ['rmomapkey', 'rbd', objname, 'key'])
+    manager.osd_admin_socket(osd, ['setomapval', 'rbd', objname,
+                                   'badkey', 'badval'])
+    manager.osd_admin_socket(osd, ['setomapheader', 'rbd', objname, 'badhdr'])
+
+    deep_scrub(manager, pg, 'rbd')
+    # please note, the repair here is errnomous, it rewrites the correct omap
+    # digest and data digest on the replicas with the corresponding digests
+    # from the primary osd which is hosting the victim object, see
+    # find_victim_object().
+    # so we need to either put this test and the end of this task or
+    # undo the mess-up manually before the "repair()" that just ensures
+    # the cleanup is sane, otherwise the succeeding tests will fail. if they
+    # try set "badkey" in hope to get an "inconsistent" pg with a deep-scrub.
+    manager.osd_admin_socket(osd, ['setomapheader', 'rbd', objname, 'hdr'])
+    manager.osd_admin_socket(osd, ['rmomapkey', 'rbd', objname, 'badkey'])
+    manager.osd_admin_socket(osd, ['setomapval', 'rbd', objname,
+                                   'key', 'val'])
+    repair(manager, pg, 'rbd')
+
+
+class MessUp:
+    def __init__(self, manager, osd_remote, pool, osd_id,
+                 obj_name, obj_path, omap_key, omap_val):
+        self.manager = manager
+        self.osd = osd_remote
+        self.pool = pool
+        self.osd_id = osd_id
+        self.obj = obj_name
+        self.path = obj_path
+        self.omap_key = omap_key
+        self.omap_val = omap_val
+
+    @contextlib.contextmanager
+    def _test_with_file(self, messup_cmd, *checks):
+        temp = tempfile.mktemp()
+        backup_cmd = ['sudo', 'cp', os.path.join(self.path, 'data'), temp]
+        self.osd.run(args=backup_cmd)
+        self.osd.run(args=messup_cmd.split())
+        yield checks
+        create_cmd = ['sudo', 'mkdir', self.path]
+        self.osd.run(args=create_cmd, check_status=False)
+        restore_cmd = ['sudo', 'cp', temp, os.path.join(self.path, 'data')]
+        self.osd.run(args=restore_cmd)
+
+    def remove(self):
+        cmd = 'sudo rmdir {path}'.format(path=self.path)
+        return self._test_with_file(cmd, 'missing')
+
+    def append(self):
+        cmd = 'sudo dd if=/dev/zero of={path}/data bs=1 count=1 ' \
+              'conv=notrunc oflag=append'.format(path=self.path)
+        return self._test_with_file(cmd,
+                                    'data_digest_mismatch',
+                                    'size_mismatch')
+
+    def truncate(self):
+        cmd = 'sudo dd if=/dev/null of={path}/data'.format(path=self.path)
+        return self._test_with_file(cmd,
+                                    'data_digest_mismatch',
+                                    'size_mismatch')
+
+    def change_obj(self):
+        cmd = 'sudo dd if=/dev/zero of={path}/data bs=1 count=1 ' \
+              'conv=notrunc'.format(path=self.path)
+        return self._test_with_file(cmd,
+                                    'data_digest_mismatch')
+
+    @contextlib.contextmanager
+    def rm_omap(self):
+        cmd = ['rmomapkey', self.pool, self.obj, self.omap_key]
+        self.manager.osd_admin_socket(self.osd_id, cmd)
+        yield ('omap_digest_mismatch',)
+        cmd = ['setomapval', self.pool, self.obj,
+               self.omap_key, self.omap_val]
+        self.manager.osd_admin_socket(self.osd_id, cmd)
+
+    @contextlib.contextmanager
+    def add_omap(self):
+        cmd = ['setomapval', self.pool, self.obj, 'badkey', 'badval']
+        self.manager.osd_admin_socket(self.osd_id, cmd)
+        yield ('omap_digest_mismatch',)
+        cmd = ['rmomapkey', self.pool, self.obj, 'badkey']
+        self.manager.osd_admin_socket(self.osd_id, cmd)
+
+    @contextlib.contextmanager
+    def change_omap(self):
+        cmd = ['setomapval', self.pool, self.obj, self.omap_key, 'badval']
+        self.manager.osd_admin_socket(self.osd_id, cmd)
+        yield ('omap_digest_mismatch',)
+        cmd = ['setomapval', self.pool, self.obj, self.omap_key, self.omap_val]
+        self.manager.osd_admin_socket(self.osd_id, cmd)
+
+
+class InconsistentObjChecker:
+    """Check the returned inconsistents/inconsistent info"""
+
+    def __init__(self, osd, acting, obj_name):
+        self.osd = osd
+        self.acting = acting
+        self.obj = obj_name
+        assert self.osd in self.acting
+
+    def basic_checks(self, inc):
+        assert inc['object']['name'] == self.obj
+        assert inc['object']['snap'] == "head"
+        assert len(inc['shards']) == len(self.acting), \
+            "the number of returned shard does not match with the acting set"
+
+    def run(self, check, inc):
+        func = getattr(self, check)
+        func(inc)
+
+    def _check_errors(self, inc, err_name):
+        bad_found = False
+        good_found = False
+        for shard in inc['shards']:
+            log.info('shard = %r' % shard)
+            log.info('err = %s' % err_name)
+            assert 'osd' in shard
+            osd = shard['osd']
+            err = err_name in shard['errors']
+            if osd == self.osd:
+                assert bad_found is False, \
+                    "multiple entries found for the given OSD"
+                assert err is True, \
+                    "Didn't find '{err}' in errors".format(err=err_name)
+                bad_found = True
+            else:
+                assert osd in self.acting, "shard not in acting set"
+                assert err is False, \
+                    "Expected '{err}' in errors".format(err=err_name)
+                good_found = True
+        assert bad_found is True, \
+            "Shard for osd.{osd} not found".format(osd=self.osd)
+        assert good_found is True, \
+            "No other acting shards found"
+
+    def _check_attrs(self, inc, attr_name):
+        bad_attr = None
+        good_attr = None
+        for shard in inc['shards']:
+            log.info('shard = %r' % shard)
+            log.info('attr = %s' % attr_name)
+            assert 'osd' in shard
+            osd = shard['osd']
+            attr = shard.get(attr_name, False)
+            if osd == self.osd:
+                assert bad_attr is None, \
+                    "multiple entries found for the given OSD"
+                bad_attr = attr
+            else:
+                assert osd in self.acting, "shard not in acting set"
+                assert good_attr is None or good_attr == attr, \
+                    "multiple good attrs found"
+                good_attr = attr
+        assert bad_attr is not None, \
+            "bad {attr} not found".format(attr=attr_name)
+        assert good_attr is not None, \
+            "good {attr} not found".format(attr=attr_name)
+        assert good_attr != bad_attr, \
+            "bad attr is identical to the good ones: " \
+            "{0} == {1}".format(good_attr, bad_attr)
+
+    def data_digest_mismatch(self, inc):
+        assert 'data_digest_mismatch' in inc['errors']
+        self._check_attrs(inc, 'data_digest')
+
+    def missing(self, inc):
+        assert 'missing' in inc['union_shard_errors']
+        self._check_errors(inc, 'missing')
+
+    def size_mismatch(self, inc):
+        assert 'size_mismatch' in inc['errors']
+        self._check_attrs(inc, 'size')
+
+    def omap_digest_mismatch(self, inc):
+        assert 'omap_digest_mismatch' in inc['errors']
+        self._check_attrs(inc, 'omap_digest')
+
+
+def test_list_inconsistent_obj(ctx, manager, osd_remote, pg, acting, osd_id,
+                               obj_name, obj_path):
+    mon = manager.controller
+    pool = 'rbd'
+    omap_key = 'key'
+    omap_val = 'val'
+    manager.do_rados(['setomapval', obj_name, omap_key, omap_val], pool=pool)
+    # Update missing digests, requires "osd deep scrub update digest min age: 0"
+    pgnum = get_pgnum(pg)
+    manager.do_pg_scrub(pool, pgnum, 'deep-scrub')
+
+    messup = MessUp(manager, osd_remote, pool, osd_id, obj_name, obj_path,
+                    omap_key, omap_val)
+    for test in [messup.rm_omap, messup.add_omap, messup.change_omap,
+                 messup.append, messup.truncate, messup.change_obj,
+                 messup.remove]:
+        with test() as checks:
+            deep_scrub(manager, pg, pool)
+            cmd = 'rados list-inconsistent-pg {pool} ' \
+                  '--format=json'.format(pool=pool)
+            pgs = json.loads(mon.sh(cmd))
+            assert pgs == [pg]
+
+            cmd = 'rados list-inconsistent-obj {pg} ' \
+                  '--format=json'.format(pg=pg)
+            objs = json.loads(mon.sh(cmd))
+            assert len(objs['inconsistents']) == 1
+
+            checker = InconsistentObjChecker(osd_id, acting, obj_name)
+            inc_obj = objs['inconsistents'][0]
+            log.info('inc = %r', inc_obj)
+            checker.basic_checks(inc_obj)
+            for check in checks:
+                checker.run(check, inc_obj)
+
+
+def task(ctx, config):
+    """
+    Test [deep] scrub
+
+    tasks:
+    - chef:
+    - install:
+    - ceph:
+        log-ignorelist:
+        - '!= data_digest'
+        - '!= omap_digest'
+        - '!= size'
+        - deep-scrub 0 missing, 1 inconsistent objects
+        - deep-scrub [0-9]+ errors
+        - repair 0 missing, 1 inconsistent objects
+        - repair [0-9]+ errors, [0-9]+ fixed
+        - shard [0-9]+ .* : missing
+        - deep-scrub 1 missing, 1 inconsistent objects
+        - does not match object info size
+        - attr name mistmatch
+        - deep-scrub 1 missing, 0 inconsistent objects
+        - failed to pick suitable auth object
+        - candidate size [0-9]+ info size [0-9]+ mismatch
+      conf:
+        osd:
+          osd deep scrub update digest min age: 0
+    - scrub_test:
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'scrub_test task only accepts a dict for configuration'
+    first_mon = teuthology.get_first_mon(ctx, config)
+    (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
+    log.info('num_osds is %s' % num_osds)
+
+    manager = ceph_manager.CephManager(
+        mon,
+        ctx=ctx,
+        logger=log.getChild('ceph_manager'),
+        )
+
+    while len(manager.get_osd_status()['up']) < num_osds:
+        time.sleep(10)
+
+    for i in range(num_osds):
+        manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'injectargs',
+                                '--', '--osd-objectstore-fuse')
+    manager.flush_pg_stats(range(num_osds))
+    manager.wait_for_clean()
+
+    osd_dump = manager.get_osd_dump_json()
+    poolid = -1
+    for p in osd_dump['pools']:
+        if p['pool_name'] == 'rbd':
+            poolid = p['pool']
+            break
+    assert poolid != -1
+
+    # write some data
+    p = manager.do_rados(['bench', '--no-cleanup', '1', 'write', '-b', '4096'], pool='rbd')
+    log.info('err is %d' % p.exitstatus)
+
+    # wait for some PG to have data that we can mess with
+    pg, acting = wait_for_victim_pg(manager, poolid)
+    osd = acting[0]
+
+    osd_remote, obj_path, obj_name = find_victim_object(ctx, pg, osd)
+    manager.do_rados(['setomapval', obj_name, 'key', 'val'], pool='rbd')
+    log.info('err is %d' % p.exitstatus)
+    manager.do_rados(['setomapheader', obj_name, 'hdr'], pool='rbd')
+    log.info('err is %d' % p.exitstatus)
+
+    # Update missing digests, requires "osd deep scrub update digest min age: 0"
+    pgnum = get_pgnum(pg)
+    manager.do_pg_scrub('rbd', pgnum, 'deep-scrub')
+
+    log.info('messing with PG %s on osd %d' % (pg, osd))
+    test_repair_corrupted_obj(ctx, manager, pg, osd_remote, obj_path, 'rbd')
+    test_repair_bad_omap(ctx, manager, pg, osd, obj_name)
+    test_list_inconsistent_obj(ctx, manager, osd_remote, pg, acting, osd,
+                               obj_name, obj_path)
+    log.info('test successful!')
+
+    # shut down fuse mount
+    for i in range(num_osds):
+        manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'injectargs',
+                                '--', '--no-osd-objectstore-fuse')
+    time.sleep(5)
+    log.info('done')
diff --git a/qa/tasks/systemd.py b/qa/tasks/systemd.py
new file mode 100644
index 000000000..1728b920f
--- /dev/null
+++ b/qa/tasks/systemd.py
@@ -0,0 +1,135 @@
+"""
+Systemd test
+"""
+import contextlib
+import logging
+import re
+import time
+
+from teuthology.orchestra import run
+from teuthology.misc import reconnect, get_first_mon, wait_until_healthy
+
+log = logging.getLogger(__name__)
+
+def _remote_service_status(remote, service):
+    status = remote.sh('sudo systemctl status %s' % service,
+                       check_status=False)
+    return status
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+      - tasks:
+          ceph-deploy:
+          systemd:
+
+    Test ceph systemd services can start, stop and restart and
+    check for any failed services and report back errors
+    """
+    for remote, roles in ctx.cluster.remotes.items():
+        remote.run(args=['sudo', 'ps', '-eaf', run.Raw('|'),
+                         'grep', 'ceph'])
+        units = remote.sh('sudo systemctl list-units | grep ceph',
+                          check_status=False)
+        log.info(units)
+        if units.find('failed'):
+            log.info("Ceph services in failed state")
+
+        # test overall service stop and start using ceph.target
+        # ceph.target tests are meant for ceph systemd tests
+        # and not actual process testing using 'ps'
+        log.info("Stopping all Ceph services")
+        remote.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'])
+        status = _remote_service_status(remote, 'ceph.target')
+        log.info(status)
+        log.info("Checking process status")
+        ps_eaf = remote.sh('sudo ps -eaf | grep ceph')
+        if ps_eaf.find('Active: inactive'):
+            log.info("Successfully stopped all ceph services")
+        else:
+            log.info("Failed to stop ceph services")
+
+        log.info("Starting all Ceph services")
+        remote.run(args=['sudo', 'systemctl', 'start', 'ceph.target'])
+        status = _remote_service_status(remote, 'ceph.target')
+        log.info(status)
+        if status.find('Active: active'):
+            log.info("Successfully started all Ceph services")
+        else:
+            log.info("info", "Failed to start Ceph services")
+        ps_eaf = remote.sh('sudo ps -eaf | grep ceph')
+        log.info(ps_eaf)
+        time.sleep(4)
+
+        # test individual services start stop
+        name = remote.shortname
+        mon_name = 'ceph-mon@' + name + '.service'
+        mds_name = 'ceph-mds@' + name + '.service'
+        mgr_name = 'ceph-mgr@' + name + '.service'
+        mon_role_name = 'mon.' + name
+        mds_role_name = 'mds.' + name
+        mgr_role_name = 'mgr.' + name
+        m_osd = re.search('--id (\d+) --setuser ceph', ps_eaf)
+        if m_osd:
+            osd_service = 'ceph-osd@{m}.service'.format(m=m_osd.group(1))
+            remote.run(args=['sudo', 'systemctl', 'status',
+                             osd_service])
+            remote.run(args=['sudo', 'systemctl', 'stop',
+                             osd_service])
+            time.sleep(4)  # immediate check will result in deactivating state
+            status = _remote_service_status(remote, osd_service)
+            log.info(status)
+            if status.find('Active: inactive'):
+                log.info("Successfully stopped single osd ceph service")
+            else:
+                log.info("Failed to stop ceph osd services")
+            remote.sh(['sudo', 'systemctl', 'start', osd_service])
+            time.sleep(4)
+        if mon_role_name in roles:
+            remote.run(args=['sudo', 'systemctl', 'status', mon_name])
+            remote.run(args=['sudo', 'systemctl', 'stop', mon_name])
+            time.sleep(4)  # immediate check will result in deactivating state
+            status = _remote_service_status(remote, mon_name)
+            if status.find('Active: inactive'):
+                log.info("Successfully stopped single mon ceph service")
+            else:
+                log.info("Failed to stop ceph mon service")
+            remote.run(args=['sudo', 'systemctl', 'start', mon_name])
+            time.sleep(4)
+        if mgr_role_name in roles:
+            remote.run(args=['sudo', 'systemctl', 'status', mgr_name])
+            remote.run(args=['sudo', 'systemctl', 'stop', mgr_name])
+            time.sleep(4)  # immediate check will result in deactivating state
+            status = _remote_service_status(remote, mgr_name)
+            if status.find('Active: inactive'):
+                log.info("Successfully stopped single ceph mgr service")
+            else:
+                log.info("Failed to stop ceph mgr service")
+            remote.run(args=['sudo', 'systemctl', 'start', mgr_name])
+            time.sleep(4)
+        if mds_role_name in roles:
+            remote.run(args=['sudo', 'systemctl', 'status', mds_name])
+            remote.run(args=['sudo', 'systemctl', 'stop', mds_name])
+            time.sleep(4)  # immediate check will result in deactivating state
+            status = _remote_service_status(remote, mds_name)
+            if status.find('Active: inactive'):
+                log.info("Successfully stopped single ceph mds service")
+            else:
+                log.info("Failed to stop ceph mds service")
+            remote.run(args=['sudo', 'systemctl', 'start', mds_name])
+            time.sleep(4)
+
+    # reboot all nodes and verify the systemd units restart
+    # workunit that runs would fail if any of the systemd unit doesnt start
+    ctx.cluster.run(args='sudo reboot', wait=False, check_status=False)
+    # avoid immediate reconnect
+    time.sleep(120)
+    reconnect(ctx, 480)  # reconnect all nodes
+    # for debug info
+    ctx.cluster.run(args=['sudo', 'ps', '-eaf', run.Raw('|'),
+                          'grep', 'ceph'])
+    # wait for HEALTH_OK
+    mon = get_first_mon(ctx, config)
+    (mon_remote,) = ctx.cluster.only(mon).remotes.keys()
+    wait_until_healthy(ctx, mon_remote, use_sudo=True)
+    yield
diff --git a/qa/tasks/tempest.py b/qa/tasks/tempest.py
new file mode 100644
index 000000000..142c097cd
--- /dev/null
+++ b/qa/tasks/tempest.py
@@ -0,0 +1,263 @@
+"""
+Deploy and configure Tempest for Teuthology
+"""
+import configparser
+import contextlib
+import logging
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.exceptions import ConfigError
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+
+def get_tempest_dir(ctx):
+    return '{tdir}/tempest'.format(tdir=teuthology.get_testdir(ctx))
+
+def run_in_tempest_dir(ctx, client, cmdargs, **kwargs):
+    ctx.cluster.only(client).run(
+        args=[ 'cd', get_tempest_dir(ctx), run.Raw('&&'), ] + cmdargs,
+        **kwargs
+    )
+
+def run_in_tempest_rgw_dir(ctx, client, cmdargs, **kwargs):
+    ctx.cluster.only(client).run(
+        args=[ 'cd', get_tempest_dir(ctx) + '/rgw', run.Raw('&&'), ] + cmdargs,
+        **kwargs
+    )
+
+def run_in_tempest_venv(ctx, client, cmdargs, **kwargs):
+    run_in_tempest_dir(ctx, client,
+                        [   'source',
+                            '.tox/venv/bin/activate',
+                            run.Raw('&&')
+                        ] + cmdargs, **kwargs)
+
+@contextlib.contextmanager
+def download(ctx, config):
+    """
+    Download the Tempest from github.
+    Remove downloaded file upon exit.
+
+    The context passed in should be identical to the context
+    passed in to the main task.
+    """
+    assert isinstance(config, dict)
+    log.info('Downloading Tempest...')
+    for (client, cconf) in config.items():
+        ctx.cluster.only(client).run(
+            args=[
+                'git', 'clone',
+                '-b', cconf.get('force-branch', 'master'),
+                'https://github.com/openstack/tempest.git',
+                get_tempest_dir(ctx)
+            ],
+        )
+
+        sha1 = cconf.get('sha1')
+        if sha1 is not None:
+            run_in_tempest_dir(ctx, client, [ 'git', 'reset', '--hard', sha1 ])
+    try:
+        yield
+    finally:
+        log.info('Removing Tempest...')
+        for client in config:
+            ctx.cluster.only(client).run(
+                args=[ 'rm', '-rf', get_tempest_dir(ctx) ],
+            )
+
+def get_toxvenv_dir(ctx):
+    return ctx.tox.venv_path
+
+@contextlib.contextmanager
+def setup_venv(ctx, config):
+    """
+    Setup the virtualenv for Tempest using tox.
+    """
+    assert isinstance(config, dict)
+    log.info('Setting up virtualenv for Tempest')
+    for (client, _) in config.items():
+        run_in_tempest_dir(ctx, client,
+            [   '{tvdir}/bin/tox'.format(tvdir=get_toxvenv_dir(ctx)),
+                '-e', 'venv', '--notest'
+            ])
+    yield
+
+def setup_logging(ctx, cpar):
+    cpar.set('DEFAULT', 'log_dir', teuthology.get_archive_dir(ctx))
+    cpar.set('DEFAULT', 'log_file', 'tempest.log')
+
+def to_config(config, params, section, cpar):
+    for (k, v) in config[section].items():
+        if isinstance(v, str):
+            v = v.format(**params)
+        elif isinstance(v, bool):
+            v = 'true' if v else 'false'
+        else:
+            v = str(v)
+        cpar.set(section, k, v)
+
+@contextlib.contextmanager
+def configure_instance(ctx, config):
+    assert isinstance(config, dict)
+    log.info('Configuring Tempest')
+
+    for (client, cconfig) in config.items():
+        run_in_tempest_venv(ctx, client,
+            [
+                'tempest',
+                'init',
+                '--workspace-path',
+                get_tempest_dir(ctx) + '/workspace.yaml',
+                'rgw'
+            ])
+
+        # prepare the config file
+        tetcdir = '{tdir}/rgw/etc'.format(tdir=get_tempest_dir(ctx))
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        local_conf = remote.get_file(tetcdir + '/tempest.conf.sample')
+
+        # fill the params dictionary which allows to use templatized configs
+        keystone_role = cconfig.get('use-keystone-role', None)
+        if keystone_role is None \
+            or keystone_role not in ctx.keystone.public_endpoints:
+            raise ConfigError('the use-keystone-role is misconfigured')
+        public_host, public_port = ctx.keystone.public_endpoints[keystone_role]
+        params = {
+            'keystone_public_host': public_host,
+            'keystone_public_port': str(public_port),
+        }
+
+        cpar = configparser.ConfigParser()
+        cpar.read(local_conf)
+        setup_logging(ctx, cpar)
+        to_config(cconfig, params, 'auth', cpar)
+        to_config(cconfig, params, 'identity', cpar)
+        to_config(cconfig, params, 'object-storage', cpar)
+        to_config(cconfig, params, 'object-storage-feature-enabled', cpar)
+        cpar.write(open(local_conf, 'w+'))
+
+        remote.put_file(local_conf, tetcdir + '/tempest.conf')
+    yield
+
+@contextlib.contextmanager
+def run_tempest(ctx, config):
+    assert isinstance(config, dict)
+    log.info('Configuring Tempest')
+
+    for (client, cconf) in config.items():
+        blocklist = cconf.get('blocklist', [])
+        assert isinstance(blocklist, list)
+        run_in_tempest_venv(ctx, client,
+            [
+                'tempest',
+                'run',
+                '--workspace-path',
+                get_tempest_dir(ctx) + '/workspace.yaml',
+                '--workspace',
+                'rgw',
+                '--regex', '^tempest.api.object_storage',
+                '--black-regex', '|'.join(blocklist)
+            ])
+    try:
+        yield
+    finally:
+        pass
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Deploy and run Tempest's object storage campaign
+
+    Example of configuration:
+
+      overrides:
+        ceph:
+          conf:
+            client:
+              rgw keystone api version: 3
+              rgw keystone accepted roles: admin,member
+              rgw keystone implicit tenants: true
+              rgw keystone accepted admin roles: admin
+              rgw swift enforce content length: true
+              rgw swift account in url: true
+              rgw swift versioning enabled: true
+              rgw keystone admin domain: Default
+              rgw keystone admin user: admin
+              rgw keystone admin password: ADMIN
+              rgw keystone admin project: admin
+      tasks:
+      # typically, the task should be preceded with install, ceph, tox,
+      # keystone and rgw. Tox and Keystone are specific requirements
+      # of tempest.py.
+      - rgw:
+          # it's important to match the prefix with the endpoint's URL
+          # in Keystone. Additionally, if we want to test /info and its
+          # accompanying stuff, the whole Swift API must be put in root
+          # of the whole URL  hierarchy (read: frontend_prefix == /swift).
+          frontend_prefix: /swift
+          client.0:
+            use-keystone-role: client.0
+      - tempest:
+          client.0:
+            force-branch: master
+            use-keystone-role: client.0
+            auth:
+              admin_username: admin
+              admin_project_name: admin
+              admin_password: ADMIN
+              admin_domain_name: Default
+            identity:
+              uri: http://{keystone_public_host}:{keystone_public_port}/v2.0/
+              uri_v3: http://{keystone_public_host}:{keystone_public_port}/v3/
+              admin_role: admin
+            object-storage:
+              reseller_admin_role: admin
+            object-storage-feature-enabled:
+              container_sync: false
+              discoverability: false
+            blocklist:
+              # please strip half of these items after merging PRs #15369
+              # and #12704
+              - .*test_list_containers_reverse_order.*
+              - .*test_list_container_contents_with_end_marker.*
+              - .*test_delete_non_empty_container.*
+              - .*test_container_synchronization.*
+              - .*test_get_object_after_expiration_time.*
+              - .*test_create_object_with_transfer_encoding.*
+    """
+    assert config is None or isinstance(config, list) \
+        or isinstance(config, dict), \
+        'task tempest only supports a list or dictionary for configuration'
+
+    if not ctx.tox:
+        raise ConfigError('tempest must run after the tox task')
+    if not ctx.keystone:
+        raise ConfigError('tempest must run after the keystone task')
+
+    all_clients = ['client.{id}'.format(id=id_)
+                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+    if config is None:
+        config = all_clients
+    if isinstance(config, list):
+        config = dict.fromkeys(config)
+
+    overrides = ctx.config.get('overrides', {})
+    # merge each client section, not the top level.
+    for client in config.keys():
+        if not config[client]:
+            config[client] = {}
+        teuthology.deep_merge(config[client], overrides.get('keystone', {}))
+
+    log.debug('Tempest config is %s', config)
+
+    with contextutil.nested(
+        lambda: download(ctx=ctx, config=config),
+        lambda: setup_venv(ctx=ctx, config=config),
+        lambda: configure_instance(ctx=ctx, config=config),
+        lambda: run_tempest(ctx=ctx, config=config),
+        ):
+        yield
diff --git a/qa/tasks/tests/__init__.py b/qa/tasks/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/tasks/tests/__init__.py
diff --git a/qa/tasks/tests/conftest.py b/qa/tasks/tests/conftest.py
new file mode 100644
index 000000000..7cc617a41
--- /dev/null
+++ b/qa/tasks/tests/conftest.py
@@ -0,0 +1,12 @@
+import glob
+
+
+def pytest_addoption(parser):
+    parser.addoption("--suite-dir", help="suite dir")
+
+def pytest_generate_tests(metafunc):
+    if "yaml_file" in metafunc.fixturenames:
+        suite_dir = metafunc.config.getoption("--suite-dir")
+        files = glob.glob(f"{suite_dir}/**/*.yaml", recursive=True)
+
+        metafunc.parametrize("yaml_file", list(set(files)))
diff --git a/qa/tasks/tests/test_devstack.py b/qa/tasks/tests/test_devstack.py
new file mode 100644
index 000000000..39b94a64c
--- /dev/null
+++ b/qa/tasks/tests/test_devstack.py
@@ -0,0 +1,48 @@
+from textwrap import dedent
+
+from tasks import devstack
+
+
+class TestDevstack(object):
+    def test_parse_os_table(self):
+        table_str = dedent("""
+            +---------------------+--------------------------------------+
+            |       Property      |                Value                 |
+            +---------------------+--------------------------------------+
+            |     attachments     |                  []                  |
+            |  availability_zone  |                 nova                 |
+            |       bootable      |                false                 |
+            |      created_at     |      2014-02-21T17:14:47.548361      |
+            | display_description |                 None                 |
+            |     display_name    |                 NAME                 |
+            |          id         | ffdbd1bb-60dc-4d95-acfe-88774c09ad3e |
+            |       metadata      |                  {}                  |
+            |         size        |                  1                   |
+            |     snapshot_id     |                 None                 |
+            |     source_volid    |                 None                 |
+            |        status       |               creating               |
+            |     volume_type     |                 None                 |
+            +---------------------+--------------------------------------+
+            """).strip()
+        expected = {
+            'Property': 'Value',
+            'attachments': '[]',
+            'availability_zone': 'nova',
+            'bootable': 'false',
+            'created_at': '2014-02-21T17:14:47.548361',
+            'display_description': 'None',
+            'display_name': 'NAME',
+            'id': 'ffdbd1bb-60dc-4d95-acfe-88774c09ad3e',
+            'metadata': '{}',
+            'size': '1',
+            'snapshot_id': 'None',
+            'source_volid': 'None',
+            'status': 'creating',
+            'volume_type': 'None'}
+
+        vol_info = devstack.parse_os_table(table_str)
+        assert vol_info == expected
+
+
+
+
diff --git a/qa/tasks/tests/test_import_yaml.py b/qa/tasks/tests/test_import_yaml.py
new file mode 100644
index 000000000..d6e0e2640
--- /dev/null
+++ b/qa/tasks/tests/test_import_yaml.py
@@ -0,0 +1,5 @@
+import yaml
+
+
+def test_load_yaml(yaml_file):
+    yaml.safe_load(open(yaml_file))
diff --git a/qa/tasks/tests/test_radosgw_admin.py b/qa/tasks/tests/test_radosgw_admin.py
new file mode 100644
index 000000000..8506eda7b
--- /dev/null
+++ b/qa/tasks/tests/test_radosgw_admin.py
@@ -0,0 +1,31 @@
+from unittest.mock import Mock
+
+from tasks import radosgw_admin
+
+acl_with_version = b"""<?xml version="1.0" encoding="UTF-8"?><AccessControlPolicy xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><ID>foo</ID><DisplayName>Foo</DisplayName></Owner><AccessControlList><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>foo</ID><DisplayName>Foo</DisplayName></Grantee><Permission>FULL_CONTROL</Permission></Grant></AccessControlList></AccessControlPolicy>
+"""  # noqa
+
+
+acl_without_version = b"""<AccessControlPolicy xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><ID>foo</ID><DisplayName>Foo</DisplayName></Owner><AccessControlList><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>foo</ID><DisplayName>Foo</DisplayName></Grantee><Permission>FULL_CONTROL</Permission></Grant></AccessControlList></AccessControlPolicy>
+"""  # noqa
+
+
+class TestGetAcl(object):
+
+    def setup(self):
+        self.key = Mock()
+
+    def test_removes_xml_version(self):
+        self.key.get_xml_acl = Mock(return_value=acl_with_version)
+        result = radosgw_admin.get_acl(self.key)
+        assert result.startswith('<AccessControlPolicy')
+
+    def test_xml_version_is_already_removed(self):
+        self.key.get_xml_acl = Mock(return_value=acl_without_version)
+        result = radosgw_admin.get_acl(self.key)
+        assert result.startswith('<AccessControlPolicy')
+
+    def test_newline_gets_trimmed(self):
+        self.key.get_xml_acl = Mock(return_value=acl_without_version)
+        result = radosgw_admin.get_acl(self.key)
+        assert result.endswith('\n') is False
diff --git a/qa/tasks/teuthology_integration.py b/qa/tasks/teuthology_integration.py
new file mode 100644
index 000000000..b5a2278eb
--- /dev/null
+++ b/qa/tasks/teuthology_integration.py
@@ -0,0 +1,19 @@
+import logging
+from teuthology import misc
+from teuthology.task import Task
+
+log = logging.getLogger(__name__)
+
+
+class TeuthologyIntegration(Task):
+
+    def begin(self):
+        misc.sh("""
+        set -x
+        pip install tox
+        tox
+        # tox -e py27-integration
+        tox -e openstack-integration
+        """)
+
+task = TeuthologyIntegration
diff --git a/qa/tasks/tgt.py b/qa/tasks/tgt.py
new file mode 100644
index 000000000..a0758f472
--- /dev/null
+++ b/qa/tasks/tgt.py
@@ -0,0 +1,177 @@
+"""
+Task to handle tgt
+
+Assumptions made:
+    The ceph-extras tgt package may need to get installed.
+    The open-iscsi package needs to get installed.
+"""
+import logging
+import contextlib
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def start_tgt_remotes(ctx, start_tgtd):
+    """
+    This subtask starts up a tgtd on the clients specified
+    """
+    remotes = ctx.cluster.only(teuthology.is_type('client')).remotes
+    tgtd_list = []
+    for rem, roles in remotes.items():
+        for _id in roles:
+            if _id in start_tgtd:
+                if not rem in tgtd_list:
+                    tgtd_list.append(rem)
+                    size = ctx.config.get('image_size', 10240)
+                    rem.run(
+                        args=[
+                            'rbd',
+                            'create',
+                            'iscsi-image',
+                            '--size',
+                            str(size),
+                    ])
+                    rem.run(
+                        args=[
+                            'sudo',
+                            'tgtadm',
+                            '--lld',
+                            'iscsi',
+                            '--mode',
+                            'target',
+                            '--op',
+                            'new',
+                            '--tid',
+                            '1',
+                            '--targetname',
+                            'rbd',
+                        ])
+                    rem.run(
+                        args=[
+                            'sudo',
+                            'tgtadm',
+                            '--lld',
+                            'iscsi',
+                            '--mode',
+                            'logicalunit',
+                            '--op',
+                            'new',
+                            '--tid',
+                            '1',
+                            '--lun',
+                            '1',
+                            '--backing-store',
+                            'iscsi-image',
+                            '--bstype',
+                            'rbd',
+                        ])
+                    rem.run(
+                        args=[
+                            'sudo',
+                            'tgtadm',
+                            '--lld',
+                            'iscsi',
+                            '--op',
+                            'bind',
+                            '--mode',
+                            'target',
+                            '--tid',
+                            '1',
+                            '-I',
+                            'ALL',
+                        ])
+    try:
+        yield
+
+    finally:
+        for rem in tgtd_list:
+            rem.run(
+                args=[
+                    'sudo',
+                    'tgtadm',
+                    '--lld',
+                    'iscsi',
+                    '--mode',
+                    'target',
+                    '--op',
+                    'delete',
+                    '--force',
+                    '--tid',
+                    '1',
+                ])
+            rem.run(
+                args=[
+                    'rbd',
+                    'snap',
+                    'purge',
+                    'iscsi-image',
+                ])
+            rem.run(
+                args=[
+                    'sudo',
+                    'rbd',
+                    'rm',
+                    'iscsi-image',
+                ])
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Start up tgt.
+
+    To start on on all clients::
+
+        tasks:
+        - ceph:
+        - tgt:
+
+    To start on certain clients::
+
+        tasks:
+        - ceph:
+        - tgt: [client.0, client.3]
+
+    or
+
+        tasks:
+        - ceph:
+        - tgt:
+            client.0:
+            client.3:
+
+    An image blocksize size can also be specified::
+        
+        tasks:
+        - ceph:
+        - tgt:
+            image_size = 20480
+
+    The general flow of things here is:
+        1. Find clients on which tgt is supposed to run (start_tgtd)
+        2. Remotely start up tgt daemon
+    On cleanup:
+        3. Stop tgt daemon
+
+    The iscsi administration is handled by the iscsi task.
+    """
+    if config:
+        config = {key : val for key, val in config.items()
+                if key.startswith('client')}
+    # config at this point should only contain keys starting with 'client'
+    start_tgtd = []
+    remotes = ctx.cluster.only(teuthology.is_type('client')).remotes
+    log.info(remotes)
+    if not config:
+        start_tgtd = ['client.{id}'.format(id=id_)
+            for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+    else:
+        start_tgtd = config
+    log.info(start_tgtd)
+    with contextutil.nested(
+            lambda: start_tgt_remotes(ctx=ctx, start_tgtd=start_tgtd),):
+        yield
diff --git a/qa/tasks/thrash_pool_snaps.py b/qa/tasks/thrash_pool_snaps.py
new file mode 100644
index 000000000..c71c9ce8d
--- /dev/null
+++ b/qa/tasks/thrash_pool_snaps.py
@@ -0,0 +1,61 @@
+"""
+Thrash -- Simulate random osd failures.
+"""
+import contextlib
+import logging
+import gevent
+import time
+import random
+
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    "Thrash" snap creation and removal on the listed pools
+
+    Example:
+
+    thrash_pool_snaps:
+      pools: [.rgw.buckets, .rgw.buckets.index]
+      max_snaps: 10
+      min_snaps: 5
+      period: 10
+    """
+    stopping = False
+    def do_thrash():
+        pools = config.get('pools', [])
+        max_snaps = config.get('max_snaps', 10)
+        min_snaps = config.get('min_snaps', 5)
+        period = config.get('period', 30)
+        snaps = []
+        manager = ctx.managers['ceph']
+        def remove_snap():
+            assert len(snaps) > 0
+            snap = random.choice(snaps)
+            log.info("Removing snap %s" % (snap,))
+            for pool in pools:
+                manager.remove_pool_snap(pool, str(snap))
+            snaps.remove(snap)
+        def add_snap(snap):
+            log.info("Adding snap %s" % (snap,))
+            for pool in pools:
+                manager.add_pool_snap(pool, str(snap))
+            snaps.append(snap)
+        index = 0
+        while not stopping:
+            index += 1
+            time.sleep(period)
+            if len(snaps) <= min_snaps:
+                add_snap(index)
+            elif len(snaps) >= max_snaps:
+                remove_snap()
+            else:
+                random.choice([lambda: add_snap(index), remove_snap])()
+        log.info("Stopping")
+    thread = gevent.spawn(do_thrash)
+    yield
+    stopping = True
+    thread.join()
+
diff --git a/qa/tasks/thrasher.py b/qa/tasks/thrasher.py
new file mode 100644
index 000000000..0ea1bf0ee
--- /dev/null
+++ b/qa/tasks/thrasher.py
@@ -0,0 +1,15 @@
+"""
+Thrasher base class
+"""
+class Thrasher(object):
+
+    def __init__(self):
+        super(Thrasher, self).__init__()
+        self._exception = None
+
+    @property
+    def exception(self):
+        return self._exception
+
+    def set_thrasher_exception(self, e):
+        self._exception = e
diff --git a/qa/tasks/thrashosds-health.yaml b/qa/tasks/thrashosds-health.yaml
new file mode 100644
index 000000000..1405f4740
--- /dev/null
+++ b/qa/tasks/thrashosds-health.yaml
@@ -0,0 +1,21 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd max markdown count: 1000
+        osd blocked scrub grace period: 3600
+    log-ignorelist:
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(POOL_
+      - \(CACHE_POOL_
+      - \(SMALLER_PGP_NUM\)
+      - \(OBJECT_
+      - \(SLOW_OPS\)
+      - \(REQUEST_SLOW\)
+      - \(TOO_FEW_PGS\)
+      - slow request
+      - timeout on replica
+      - late reservation from
diff --git a/qa/tasks/thrashosds.py b/qa/tasks/thrashosds.py
new file mode 100644
index 000000000..aa7ec437a
--- /dev/null
+++ b/qa/tasks/thrashosds.py
@@ -0,0 +1,221 @@
+"""
+Thrash -- Simulate random osd failures.
+"""
+import contextlib
+import logging
+from tasks import ceph_manager
+from teuthology import misc as teuthology
+
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    "Thrash" the OSDs by randomly marking them out/down (and then back
+    in) until the task is ended. This loops, and every op_delay
+    seconds it randomly chooses to add or remove an OSD (even odds)
+    unless there are fewer than min_out OSDs out of the cluster, or
+    more than min_in OSDs in the cluster.
+
+    All commands are run on mon0 and it stops when __exit__ is called.
+
+    The config is optional, and is a dict containing some or all of:
+
+    cluster: (default 'ceph') the name of the cluster to thrash
+
+    min_in: (default 4) the minimum number of OSDs to keep in the
+       cluster
+
+    min_out: (default 0) the minimum number of OSDs to keep out of the
+       cluster
+
+    op_delay: (5) the length of time to sleep between changing an
+       OSD's status
+
+    min_dead: (0) minimum number of osds to leave down/dead.
+
+    max_dead: (0) maximum number of osds to leave down/dead before waiting
+       for clean.  This should probably be num_replicas - 1.
+
+    clean_interval: (60) the approximate length of time to loop before
+       waiting until the cluster goes clean. (In reality this is used
+       to probabilistically choose when to wait, and the method used
+       makes it closer to -- but not identical to -- the half-life.)
+
+    scrub_interval: (-1) the approximate length of time to loop before
+       waiting until a scrub is performed while cleaning. (In reality
+       this is used to probabilistically choose when to wait, and it
+       only applies to the cases where cleaning is being performed).
+       -1 is used to indicate that no scrubbing will be done.
+
+    chance_down: (0.4) the probability that the thrasher will mark an
+       OSD down rather than marking it out. (The thrasher will not
+       consider that OSD out of the cluster, since presently an OSD
+       wrongly marked down will mark itself back up again.) This value
+       can be either an integer (eg, 75) or a float probability (eg
+       0.75).
+
+    chance_test_min_size: (0) chance to run test_pool_min_size,
+       which:
+       - kills all but one osd
+       - waits
+       - kills that osd
+       - revives all other osds
+       - verifies that the osds fully recover
+
+    timeout: (360) the number of seconds to wait for the cluster
+       to become clean after each cluster change. If this doesn't
+       happen within the timeout, an exception will be raised.
+
+    revive_timeout: (150) number of seconds to wait for an osd asok to
+       appear after attempting to revive the osd
+
+    thrash_primary_affinity: (true) randomly adjust primary-affinity
+
+    chance_pgnum_grow: (0) chance to increase a pool's size
+    chance_pgpnum_fix: (0) chance to adjust pgpnum to pg for a pool
+    pool_grow_by: (10) amount to increase pgnum by
+    chance_pgnum_shrink: (0) chance to decrease a pool's size
+    pool_shrink_by: (10) amount to decrease pgnum by
+    max_pgs_per_pool_osd: (1200) don't expand pools past this size per osd
+
+    pause_short: (3) duration of short pause
+    pause_long: (80) duration of long pause
+    pause_check_after: (50) assert osd down after this long
+    chance_inject_pause_short: (1) chance of injecting short stall
+    chance_inject_pause_long: (0) chance of injecting long stall
+
+    clean_wait: (0) duration to wait before resuming thrashing once clean
+
+    sighup_delay: (0.1) duration to delay between sending signal.SIGHUP to a
+                  random live osd
+
+    powercycle: (false) whether to power cycle the node instead
+        of just the osd process. Note that this assumes that a single
+        osd is the only important process on the node.
+
+    bdev_inject_crash: (0) seconds to delay while inducing a synthetic crash.
+        the delay lets the BlockDevice "accept" more aio operations but blocks
+        any flush, and then eventually crashes (losing some or all ios).  If 0,
+        no bdev failure injection is enabled.
+
+    bdev_inject_crash_probability: (.5) probability of doing a bdev failure
+        injection crash vs a normal OSD kill.
+
+    chance_test_backfill_full: (0) chance to simulate full disks stopping
+        backfill
+
+    chance_test_map_discontinuity: (0) chance to test map discontinuity
+    map_discontinuity_sleep_time: (40) time to wait for map trims
+
+    ceph_objectstore_tool: (true) whether to export/import a pg while an osd is down
+    chance_move_pg: (1.0) chance of moving a pg if more than 1 osd is down (default 100%)
+
+    optrack_toggle_delay: (2.0) duration to delay between toggling op tracker
+                  enablement to all osds
+
+    dump_ops_enable: (true) continuously dump ops on all live osds
+
+    noscrub_toggle_delay: (2.0) duration to delay between toggling noscrub
+
+    disable_objectstore_tool_tests: (false) disable ceph_objectstore_tool based
+                                    tests
+
+    chance_thrash_cluster_full: .05
+
+    chance_thrash_pg_upmap: 1.0
+    chance_thrash_pg_upmap_items: 1.0
+
+    aggressive_pg_num_changes: (true)  whether we should bypass the careful throttling of pg_num and pgp_num changes in mgr's adjust_pgs() controller
+
+    example:
+
+    tasks:
+    - ceph:
+    - thrashosds:
+        cluster: ceph
+        chance_down: 10
+        op_delay: 3
+        min_in: 1
+        timeout: 600
+    - interactive:
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'thrashosds task only accepts a dict for configuration'
+    # add default value for sighup_delay
+    config['sighup_delay'] = config.get('sighup_delay', 0.1)
+    # add default value for optrack_toggle_delay
+    config['optrack_toggle_delay'] = config.get('optrack_toggle_delay', 2.0)
+    # add default value for dump_ops_enable
+    config['dump_ops_enable'] = config.get('dump_ops_enable', "true")
+    # add default value for noscrub_toggle_delay
+    config['noscrub_toggle_delay'] = config.get('noscrub_toggle_delay', 2.0)
+    # add default value for random_eio
+    config['random_eio'] = config.get('random_eio', 0.0)
+    aggro = config.get('aggressive_pg_num_changes', True)
+
+    log.info("config is {config}".format(config=str(config)))
+
+    overrides = ctx.config.get('overrides', {})
+    log.info("overrides is {overrides}".format(overrides=str(overrides)))
+    teuthology.deep_merge(config, overrides.get('thrashosds', {}))
+    cluster = config.get('cluster', 'ceph')
+
+    log.info("config is {config}".format(config=str(config)))
+
+    if 'powercycle' in config:
+
+        # sync everyone first to avoid collateral damage to / etc.
+        log.info('Doing preliminary sync to avoid collateral damage...')
+        ctx.cluster.run(args=['sync'])
+
+        if 'ipmi_user' in ctx.teuthology_config:
+            for remote in ctx.cluster.remotes.keys():
+                log.debug('checking console status of %s' % remote.shortname)
+                if not remote.console.check_status():
+                    log.warning('Failed to get console status for %s',
+                             remote.shortname)
+
+            # check that all osd remotes have a valid console
+            osds = ctx.cluster.only(teuthology.is_type('osd', cluster))
+            for remote in osds.remotes.keys():
+                if not remote.console.has_ipmi_credentials:
+                    raise Exception(
+                        'IPMI console required for powercycling, '
+                        'but not available on osd role: {r}'.format(
+                            r=remote.name))
+
+    cluster_manager = ctx.managers[cluster]
+    for f in ['powercycle', 'bdev_inject_crash']:
+        if config.get(f):
+            cluster_manager.config[f] = config.get(f)
+
+    if aggro:
+        cluster_manager.raw_cluster_cmd(
+            'config', 'set', 'mgr',
+            'mgr_debug_aggressive_pg_num_changes',
+            'true')
+
+    log.info('Beginning thrashosds...')
+    thrash_proc = ceph_manager.OSDThrasher(
+        cluster_manager,
+        config,
+        "OSDThrasher",
+        logger=log.getChild('thrasher')
+        )
+    ctx.ceph[cluster].thrashers.append(thrash_proc)
+    try:
+        yield
+    finally:
+        log.info('joining thrashosds')
+        thrash_proc.do_join()
+        cluster_manager.wait_for_all_osds_up()
+        cluster_manager.flush_all_pg_stats()
+        cluster_manager.wait_for_recovery(config.get('timeout', 360))
+        if aggro:
+            cluster_manager.raw_cluster_cmd(
+                'config', 'rm', 'mgr',
+                'mgr_debug_aggressive_pg_num_changes')
diff --git a/qa/tasks/tox.py b/qa/tasks/tox.py
new file mode 100644
index 000000000..61c5b7411
--- /dev/null
+++ b/qa/tasks/tox.py
@@ -0,0 +1,50 @@
+import argparse
+import contextlib
+import logging
+
+from teuthology import misc as teuthology
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+
+def get_toxvenv_dir(ctx):
+    return '{tdir}/tox-venv'.format(tdir=teuthology.get_testdir(ctx))
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Deploy tox from pip. It's a dependency for both Keystone and Tempest.
+    """
+    assert config is None or isinstance(config, list) \
+        or isinstance(config, dict), \
+        "task tox only supports a list or dictionary for configuration"
+    all_clients = ['client.{id}'.format(id=id_)
+                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+    if config is None:
+        config = all_clients
+    if isinstance(config, list):
+        config = dict.fromkeys(config)
+
+    log.info('Deploying tox from pip...')
+    for (client, _) in config.items():
+        # yup, we have to deploy tox first. The packaged one, available
+        # on Sepia's Ubuntu machines, is outdated for Keystone/Tempest.
+        tvdir = get_toxvenv_dir(ctx)
+        ctx.cluster.only(client).run(args=['python3', '-m', 'venv', tvdir])
+        ctx.cluster.only(client).run(args=[
+            'source', '{tvdir}/bin/activate'.format(tvdir=tvdir),
+            run.Raw('&&'),
+            'pip', 'install', 'tox==3.15.0'
+        ])
+
+    # export the path Keystone and Tempest
+    ctx.tox = argparse.Namespace()
+    ctx.tox.venv_path = get_toxvenv_dir(ctx)
+
+    try:
+        yield
+    finally:
+        for (client, _) in config.items():
+            ctx.cluster.only(client).run(
+                args=[ 'rm', '-rf', get_toxvenv_dir(ctx) ])
diff --git a/qa/tasks/userdata_setup.yaml b/qa/tasks/userdata_setup.yaml
new file mode 100644
index 000000000..afcc08e22
--- /dev/null
+++ b/qa/tasks/userdata_setup.yaml
@@ -0,0 +1,36 @@
+#cloud-config-archive
+
+- type: text/cloud-config
+  content: |
+    output:
+      all: '| tee -a /var/log/cloud-init-output.log'
+
+# allow passwordless access for debugging
+- |
+  #!/usr/bin/env bash
+  exec passwd -d ubuntu
+
+- |
+  #!/usr/bin/env bash
+
+  # mount a NFS share for storing logs
+  sed -i 's/archive.ubuntu.com/old-releases.ubuntu.com/' /etc/apt/sources.list
+  sed -i 's/security.ubuntu.com/old-releases.ubuntu.com/' /etc/apt/sources.list
+  apt-get update
+
+  # DST Root CA X3 certificate expired on Sep 30, 2021.  It was used by
+  # Let's Encrypt, which is what git.ceph.com relies on for HTTPS.  Get the
+  # new Let's Encrypt root certificate in place and deactivate the old one
+  # (lines that begin with "!" are deselected).
+  apt-get install --only-upgrade ca-certificates libssl1.0.0
+  sed -i 's/mozilla\/DST_Root_CA_X3\.crt/!mozilla\/DST_Root_CA_X3\.crt/' /etc/ca-certificates.conf
+  update-ca-certificates
+
+  apt-get -y install nfs-common
+  mkdir /mnt/log
+  # 10.0.2.2 is the host
+  mount -v -t nfs -o proto=tcp 10.0.2.2:{mnt_dir} /mnt/log
+
+  # mount the iso image that has the test script
+  mkdir /mnt/cdrom
+  mount -t auto /dev/cdrom /mnt/cdrom
diff --git a/qa/tasks/userdata_teardown.yaml b/qa/tasks/userdata_teardown.yaml
new file mode 100644
index 000000000..731d769f0
--- /dev/null
+++ b/qa/tasks/userdata_teardown.yaml
@@ -0,0 +1,11 @@
+- |
+  #!/usr/bin/env bash
+  cp /var/log/cloud-init-output.log /mnt/log
+
+- |
+  #!/usr/bin/env bash
+  umount /mnt/log
+
+- |
+  #!/usr/bin/env bash
+  shutdown -h -P now
diff --git a/qa/tasks/util/__init__.py b/qa/tasks/util/__init__.py
new file mode 100644
index 000000000..5b8575ed9
--- /dev/null
+++ b/qa/tasks/util/__init__.py
@@ -0,0 +1,26 @@
+from teuthology import misc
+
+def get_remote(ctx, cluster, service_type, service_id):
+    """
+    Get the Remote for the host where a particular role runs.
+
+    :param cluster: name of the cluster the service is part of
+    :param service_type: e.g. 'mds', 'osd', 'client'
+    :param service_id: The third part of a role, e.g. '0' for
+                       the role 'ceph.client.0'
+    :return: a Remote instance for the host where the
+             requested role is placed
+    """
+    def _is_instance(role):
+        role_tuple = misc.split_role(role)
+        return role_tuple == (cluster, service_type, str(service_id))
+    try:
+        (remote,) = ctx.cluster.only(_is_instance).remotes.keys()
+    except ValueError:
+        raise KeyError("Service {0}.{1}.{2} not found".format(cluster,
+                                                              service_type,
+                                                              service_id))
+    return remote
+
+def get_remote_for_role(ctx, role):
+    return get_remote(ctx, *misc.split_role(role))
diff --git a/qa/tasks/util/chacra.py b/qa/tasks/util/chacra.py
new file mode 100644
index 000000000..ed9358a59
--- /dev/null
+++ b/qa/tasks/util/chacra.py
@@ -0,0 +1,186 @@
+#!/usr/bin/env python3
+
+import argparse
+import logging
+import requests
+import sys
+
+from pathlib import Path
+from urllib.parse import urlparse
+
+log = logging.getLogger(__name__)
+
+SHAMAN_SEARCH_URL = 'https://shaman.ceph.com/api/search'
+
+PROJECT = 'ceph'
+DISTRO = 'ubuntu'
+RELEASE = 'focal'
+ARCH='x86_64'
+BRANCH = 'main'
+SHA1 = 'latest'
+FLAVOR = 'default'
+FILENAME = 'cephadm'
+
+
+def search(*args, **kwargs):
+    '''
+    Query shaman for a build result
+    '''
+    resp = requests.get(SHAMAN_SEARCH_URL, params=kwargs)
+    resp.raise_for_status()
+    return resp
+
+def _get_distros(distro, release, arch=None):
+    ret = f'{distro}/{release}'
+    if arch:
+        ret = f'{ret}/{arch}'
+    return ret
+
+def _get_binary_url(host, project, ref, sha1, distro, release, arch, flavor, filename):
+    return f'https://{host}/binaries/{project}/{ref}/{sha1}/{distro}/{release}/{arch}/flavors/{flavor}/{filename}'
+
+def get_binary_url(
+    filename,
+    project=None,
+    distro=None,
+    release=None,
+    arch=None,
+    flavor=None,
+    branch=None,
+    sha1=None
+):
+    '''
+    Return the chacra url for a build result
+    '''
+    # query shaman for the built binary
+    s = {}
+    if project:
+        s['project'] = project
+    if distro:
+        s['distros'] = _get_distros(distro, release, arch)
+    if flavor:
+        s['flavor'] = flavor
+    if branch:
+        s['ref'] = branch
+    if sha1:
+        s['sha1'] = sha1
+
+    resp = search(**s)
+    result = resp.json()
+
+    if len(result) == 0:
+        raise RuntimeError(f'no results found at {resp.url}')
+
+    # TODO: filter the result down to the correct arch etc.?
+    result = result[0]
+
+    status = result['status']
+    if status != 'ready':
+        raise RuntimeError(f'cannot pull file with status: {status}')
+
+    # build the chacra url
+    chacra_host = urlparse(result['url']).netloc
+    chacra_ref = result['ref']
+    chacra_sha1 = result['sha1']
+    log.info(f'got chacra host {chacra_host}, ref {chacra_ref}, sha1 {chacra_sha1} from {resp.url}')
+
+    # prefer codename if a release is not specified
+    if result.get('distro_codename'):
+        release = result.get('distro_codename')
+    elif result.get('distro_version'):
+        release = result.get('distro_version')
+    elif not release:
+        raise RuntimeError('cannot determine distro release!')
+
+    if not arch:
+        if ARCH in result['archs']:
+            arch = ARCH
+        elif len(result['archs']) > 0:
+            arch = result['archs'][0]
+        else:
+            raise RuntimeError('cannot determine the arch type!')
+
+    # build the url to the binary
+    url = _get_binary_url(
+        chacra_host,
+        result['project'],
+        chacra_ref,
+        chacra_sha1,
+        result['distro'],
+        release,
+        arch,
+        result['flavor'],
+        filename,
+    )
+
+    return url
+
+def pull(
+    filename,
+    project=None,
+    distro=None,
+    release=None,
+    arch=None,
+    flavor=None,
+    branch=None,
+    sha1=None
+):
+    '''
+    Pull a build result from chacra
+    '''
+    url = get_binary_url(
+            filename,
+            project=project,
+            distro=distro,
+            release=release,
+            arch=arch,
+            flavor=flavor,
+            branch=branch,
+            sha1=sha1
+    )
+    resp = requests.get(url, stream=True)
+    resp.raise_for_status()
+    log.info(f'got file from {resp.url}')
+
+    return resp
+
+def main():
+    handler = logging.StreamHandler(sys.stdout)
+    log.addHandler(handler)
+    log.setLevel(logging.INFO)
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--project', default=PROJECT)
+    parser.add_argument('--distro', default=DISTRO)
+    parser.add_argument('--release', default=RELEASE)
+    parser.add_argument('--arch', default=ARCH)
+    parser.add_argument('--branch', default=BRANCH)
+    parser.add_argument('--sha1', default=SHA1)
+    parser.add_argument('--flavor', default=FLAVOR)
+    parser.add_argument('--src', default=FILENAME)
+    parser.add_argument('--dest', default=FILENAME)
+    args = parser.parse_args()
+
+    resp = pull(
+        args.src,
+        project=args.project,
+        distro=args.distro,
+        release=args.release,
+        arch=args.arch,
+        flavor=args.flavor,
+        branch=args.branch,
+        sha1=args.sha1
+    )
+
+    dest = Path(args.dest).absolute()
+    with open(dest, 'wb') as f:
+        for chunk in resp.iter_content(chunk_size=None, decode_unicode=True):
+            log.info('.',)
+            f.write(chunk)
+    log.info(f'wrote binary file: {dest}')
+
+    return 0
+
+
+if __name__ == '__main__':
+   sys.exit(main())
diff --git a/qa/tasks/util/rados.py b/qa/tasks/util/rados.py
new file mode 100644
index 000000000..a0c54ce4e
--- /dev/null
+++ b/qa/tasks/util/rados.py
@@ -0,0 +1,87 @@
+import logging
+
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+def rados(ctx, remote, cmd, wait=True, check_status=False):
+    testdir = teuthology.get_testdir(ctx)
+    log.info("rados %s" % ' '.join(cmd))
+    pre = [
+        'adjust-ulimits',
+        'ceph-coverage',
+        '{tdir}/archive/coverage'.format(tdir=testdir),
+        'rados',
+        ];
+    pre.extend(cmd)
+    proc = remote.run(
+        args=pre,
+        check_status=check_status,
+        wait=wait,
+        )
+    if wait:
+        return proc.exitstatus
+    else:
+        return proc
+
+def create_ec_pool(remote, name, profile_name, pgnum, profile={}, cluster_name="ceph", application=None):
+    remote.run(args=['sudo', 'ceph'] +
+               cmd_erasure_code_profile(profile_name, profile) + ['--cluster', cluster_name])
+    remote.run(args=[
+        'sudo', 'ceph', 'osd', 'pool', 'create', name,
+        str(pgnum), str(pgnum), 'erasure', profile_name, '--cluster', cluster_name
+        ])
+    if application:
+        remote.run(args=[
+            'sudo', 'ceph', 'osd', 'pool', 'application', 'enable', name, application, '--cluster', cluster_name
+        ], check_status=False) # may fail as EINVAL when run in jewel upgrade test
+
+def create_replicated_pool(remote, name, pgnum, cluster_name="ceph", application=None):
+    remote.run(args=[
+        'sudo', 'ceph', 'osd', 'pool', 'create', name, str(pgnum), str(pgnum), '--cluster', cluster_name
+        ])
+    if application:
+        remote.run(args=[
+            'sudo', 'ceph', 'osd', 'pool', 'application', 'enable', name, application, '--cluster', cluster_name
+        ], check_status=False)
+
+def create_cache_pool(remote, base_name, cache_name, pgnum, size, cluster_name="ceph"):
+    remote.run(args=[
+        'sudo', 'ceph', 'osd', 'pool', 'create', cache_name, str(pgnum), '--cluster', cluster_name
+    ])
+    remote.run(args=[
+        'sudo', 'ceph', 'osd', 'tier', 'add-cache', base_name, cache_name,
+        str(size), '--cluster', cluster_name
+    ])
+
+def cmd_erasure_code_profile(profile_name, profile):
+    """
+    Return the shell command to run to create the erasure code profile
+    described by the profile parameter.
+    
+    :param profile_name: a string matching [A-Za-z0-9-_.]+
+    :param profile: a map whose semantic depends on the erasure code plugin
+    :returns: a shell command as an array suitable for Remote.run
+
+    If profile is {}, it is replaced with 
+
+      { 'k': '2', 'm': '1', 'crush-failure-domain': 'osd'}
+
+    for backward compatibility. In previous versions of teuthology,
+    these values were hardcoded as function arguments and some yaml
+    files were designed with these implicit values. The teuthology
+    code should not know anything about the erasure code profile
+    content or semantic. The valid values and parameters are outside
+    its scope.
+    """
+
+    if profile == {}:
+        profile = {
+            'k': '2',
+            'm': '1',
+            'crush-failure-domain': 'osd'
+        }
+    return [
+        'osd', 'erasure-code-profile', 'set',
+        profile_name
+        ] + [ str(key) + '=' + str(value) for key, value in profile.items() ]
diff --git a/qa/tasks/util/rgw.py b/qa/tasks/util/rgw.py
new file mode 100644
index 000000000..59c801028
--- /dev/null
+++ b/qa/tasks/util/rgw.py
@@ -0,0 +1,99 @@
+import logging
+import json
+import time
+
+from io import StringIO
+
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+def rgwadmin(ctx, client, cmd, stdin=StringIO(), check_status=False,
+             omit_sudo=False, omit_tdir=False, format='json', decode=True,
+             log_level=logging.DEBUG):
+    log.info('rgwadmin: {client} : {cmd}'.format(client=client,cmd=cmd))
+    testdir = teuthology.get_testdir(ctx)
+    cluster_name, daemon_type, client_id = teuthology.split_role(client)
+    client_with_id = daemon_type + '.' + client_id
+    pre = [
+        'adjust-ulimits',
+        'ceph-coverage']
+    if not omit_tdir:
+        pre.append(
+            '{tdir}/archive/coverage'.format(tdir=testdir))
+    pre.extend([
+        'radosgw-admin',
+        '--log-to-stderr',
+        '--format', format,
+        '-n',  client_with_id,
+        '--cluster', cluster_name,
+        ])
+    pre.extend(cmd)
+    log.log(log_level, 'rgwadmin: cmd=%s' % pre)
+    (remote,) = ctx.cluster.only(client).remotes.keys()
+    proc = remote.run(
+        args=pre,
+        check_status=check_status,
+        omit_sudo=omit_sudo,
+        stdout=StringIO(),
+        stderr=StringIO(),
+        stdin=stdin,
+        )
+    r = proc.exitstatus
+    out = proc.stdout.getvalue()
+    if not decode:
+        return (r, out)
+    j = None
+    if not r and out != '':
+        try:
+            j = json.loads(out)
+            log.log(log_level, ' json result: %s' % j)
+        except ValueError:
+            j = out
+            log.log(log_level, ' raw result: %s' % j)
+    return (r, j)
+
+def get_user_summary(out, user):
+    """Extract the summary for a given user"""
+    user_summary = None
+    for summary in out['summary']:
+        if summary.get('user') == user:
+            user_summary = summary
+
+    if not user_summary:
+        raise AssertionError('No summary info found for user: %s' % user)
+
+    return user_summary
+
+def get_user_successful_ops(out, user):
+    summary = out['summary']
+    if len(summary) == 0:
+        return 0
+    return get_user_summary(out, user)['total']['successful_ops']
+
+def wait_for_radosgw(url, remote):
+    """ poll the given url until it starts accepting connections
+
+    add_daemon() doesn't wait until radosgw finishes startup, so this is used
+    to avoid racing with later tasks that expect radosgw to be up and listening
+    """
+    # TODO: use '--retry-connrefused --retry 8' when teuthology is running on
+    # Centos 8 and other OS's with an updated version of curl
+    curl_cmd = ['curl',
+                url]
+    exit_status = 0
+    num_retries = 8
+    for seconds in range(num_retries):
+        proc = remote.run(
+            args=curl_cmd,
+            check_status=False,
+            stdout=StringIO(),
+            stderr=StringIO(),
+            stdin=StringIO(),
+            )
+        exit_status = proc.exitstatus
+        if exit_status == 0:
+            break
+        time.sleep(2**seconds)
+
+    assert exit_status == 0
diff --git a/qa/tasks/util/test/__init__.py b/qa/tasks/util/test/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/tasks/util/test/__init__.py
diff --git a/qa/tasks/util/test/test_rados.py b/qa/tasks/util/test/test_rados.py
new file mode 100644
index 000000000..a8f4cb02d
--- /dev/null
+++ b/qa/tasks/util/test/test_rados.py
@@ -0,0 +1,40 @@
+#
+#  The MIT License
+#
+# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+#  Permission is hereby granted, free of charge, to any person
+#  obtaining a copy of this software and associated documentation
+#  files (the "Software"), to deal in the Software without
+#  restriction, including without limitation the rights to use,
+#  copy, modify, merge, publish, distribute, sublicense, and/or sell
+#  copies of the Software, and to permit persons to whom the
+#  Software is furnished to do so, subject to the following
+#  conditions:
+#
+#  The above copyright notice and this permission notice shall be
+#  included in all copies or substantial portions of the Software.
+#
+#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+#  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+#  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+#  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+#  OTHER DEALINGS IN THE SOFTWARE.
+#
+from tasks.util import rados
+
+class TestRados(object):
+
+    def test_cmd_erasure_code_profile(self):
+        name = 'NAME'
+        cmd = rados.cmd_erasure_code_profile(name, {})
+        assert 'k=2' in cmd
+        assert name in cmd
+        cmd = rados.cmd_erasure_code_profile(name, { 'k': '88' })
+        assert 'k=88' in cmd
+        assert name in cmd
diff --git a/qa/tasks/util/workunit.py b/qa/tasks/util/workunit.py
new file mode 100644
index 000000000..1f5623af8
--- /dev/null
+++ b/qa/tasks/util/workunit.py
@@ -0,0 +1,78 @@
+import copy
+
+from teuthology import misc
+from teuthology.orchestra import run
+
+class Refspec:
+    def __init__(self, refspec):
+        self.refspec = refspec
+
+    def __str__(self):
+        return self.refspec
+
+    def _clone(self, git_url, clonedir, opts=None):
+        if opts is None:
+            opts = []
+        return (['rm', '-rf', clonedir] +
+                [run.Raw('&&')] +
+                ['git', 'clone'] + opts +
+                [git_url, clonedir])
+
+    def _cd(self, clonedir):
+        return ['cd', clonedir]
+
+    def _checkout(self):
+        return ['git', 'checkout', self.refspec]
+
+    def clone(self, git_url, clonedir):
+        return (self._clone(git_url, clonedir) +
+                [run.Raw('&&')] +
+                self._cd(clonedir) +
+                [run.Raw('&&')] +
+                self._checkout())
+
+
+class Branch(Refspec):
+    def __init__(self, tag):
+        Refspec.__init__(self, tag)
+
+    def clone(self, git_url, clonedir):
+        opts = ['--depth', '1',
+                '--branch', self.refspec]
+        return (self._clone(git_url, clonedir, opts) +
+                [run.Raw('&&')] +
+                self._cd(clonedir))
+
+
+class Head(Refspec):
+    def __init__(self):
+        Refspec.__init__(self, 'HEAD')
+
+    def clone(self, git_url, clonedir):
+        opts = ['--depth', '1']
+        return (self._clone(git_url, clonedir, opts) +
+                [run.Raw('&&')] +
+                self._cd(clonedir))
+
+
+def get_refspec_after_overrides(config, overrides):
+    # mimic the behavior of the "install" task, where the "overrides" are
+    # actually the defaults of that task. in other words, if none of "sha1",
+    # "tag", or "branch" is specified by a "workunit" tasks, we will update
+    # it with the information in the "workunit" sub-task nested in "overrides".
+    overrides = copy.deepcopy(overrides.get('workunit', {}))
+    refspecs = {'suite_sha1': Refspec, 'suite_branch': Branch,
+                'sha1': Refspec, 'tag': Refspec, 'branch': Branch}
+    if any(map(lambda i: i in config, refspecs.keys())):
+        for i in refspecs.keys():
+            overrides.pop(i, None)
+    misc.deep_merge(config, overrides)
+
+    for spec, cls in refspecs.items():
+        refspec = config.get(spec)
+        if refspec:
+            refspec = cls(refspec)
+            break
+    if refspec is None:
+        refspec = Head()
+    return refspec
diff --git a/qa/tasks/vault.py b/qa/tasks/vault.py
new file mode 100644
index 000000000..2ff008c4d
--- /dev/null
+++ b/qa/tasks/vault.py
@@ -0,0 +1,288 @@
+"""
+Deploy and configure Vault for Teuthology
+"""
+
+import argparse
+import contextlib
+import logging
+import time
+import json
+from os import path
+from http import client as http_client
+from urllib.parse import urljoin
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.orchestra import run
+from teuthology.exceptions import ConfigError, CommandFailedError
+
+
+log = logging.getLogger(__name__)
+
+
+def assign_ports(ctx, config, initial_port):
+    """
+    Assign port numbers starting from @initial_port
+    """
+    port = initial_port
+    role_endpoints = {}
+    for remote, roles_for_host in ctx.cluster.remotes.items():
+        for role in roles_for_host:
+            if role in config:
+                role_endpoints[role] = (remote.name.split('@')[1], port)
+                port += 1
+
+    return role_endpoints
+
+
+@contextlib.contextmanager
+def download(ctx, config):
+    """
+    Download Vault Release from Hashicopr website.
+    Remove downloaded file upon exit.
+    """
+    assert isinstance(config, dict)
+    log.info('Downloading Vault...')
+    testdir = teuthology.get_testdir(ctx)
+
+    for (client, cconf) in config.items():
+        install_url = cconf.get('install_url')
+        install_sha256 = cconf.get('install_sha256')
+        if not install_url or not install_sha256:
+            raise ConfigError("Missing Vault install_url and/or install_sha256")
+        install_zip = path.join(testdir, 'vault.zip')
+        install_dir = path.join(testdir, 'vault')
+
+        log.info('Downloading Vault...')
+        ctx.cluster.only(client).run(
+            args=['curl', '-L', install_url, '-o', install_zip])
+
+        log.info('Verifying SHA256 signature...')
+        ctx.cluster.only(client).run(
+            args=['echo', ' '.join([install_sha256, install_zip]), run.Raw('|'),
+                  'sha256sum', '--check', '--status'])
+
+        log.info('Extracting vault...')
+        ctx.cluster.only(client).run(args=['mkdir', '-p', install_dir])
+        # Using python in case unzip is not installed on hosts
+        # Using python3 in case python is not installed on hosts
+        failed=True
+        for f in [
+                lambda z,d: ['unzip', z, '-d', d],
+                lambda z,d: ['python3', '-m', 'zipfile', '-e', z, d],
+                lambda z,d: ['python', '-m', 'zipfile', '-e', z, d]]:
+            try:
+                ctx.cluster.only(client).run(args=f(install_zip, install_dir))
+                failed = False
+                break
+            except CommandFailedError as e:
+                failed = e
+        if failed:
+            raise failed
+
+    try:
+        yield
+    finally:
+        log.info('Removing Vault...')
+        testdir = teuthology.get_testdir(ctx)
+        for client in config:
+            ctx.cluster.only(client).run(
+                args=['rm', '-rf', install_dir, install_zip])
+
+
+def get_vault_dir(ctx):
+    return '{tdir}/vault'.format(tdir=teuthology.get_testdir(ctx))
+
+
+@contextlib.contextmanager
+def run_vault(ctx, config):
+    assert isinstance(config, dict)
+
+    for (client, cconf) in config.items():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        cluster_name, _, client_id = teuthology.split_role(client)
+
+        _, port = ctx.vault.endpoints[client]
+        listen_addr = "0.0.0.0:{}".format(port)
+
+        root_token = ctx.vault.root_token = cconf.get('root_token', 'root')
+
+        log.info("Starting Vault listening on %s ...", listen_addr)
+        v_params = [
+            '-dev',
+            '-dev-listen-address={}'.format(listen_addr),
+            '-dev-no-store-token',
+            '-dev-root-token-id={}'.format(root_token)
+        ]
+
+        cmd = "chmod +x {vdir}/vault && {vdir}/vault server {vargs}".format(vdir=get_vault_dir(ctx), vargs=" ".join(v_params))
+
+        ctx.daemons.add_daemon(
+            remote, 'vault', client_id,
+            cluster=cluster_name,
+            args=['bash', '-c', cmd, run.Raw('& { read; kill %1; }')],
+            logger=log.getChild(client),
+            stdin=run.PIPE,
+            cwd=get_vault_dir(ctx),
+            wait=False,
+            check_status=False,
+        )
+        time.sleep(10)
+    try:
+        yield
+    finally:
+        log.info('Stopping Vault instance')
+        ctx.daemons.get_daemon('vault', client_id, cluster_name).stop()
+
+
+@contextlib.contextmanager
+def setup_vault(ctx, config):
+    """
+    Mount Transit or KV version 2 secrets engine
+    """
+    (cclient, cconfig) = next(iter(config.items()))
+    engine = cconfig.get('engine')
+
+    if engine == 'kv':
+        log.info('Mounting kv version 2 secrets engine')
+        mount_path = '/v1/sys/mounts/kv'
+        data = {
+            "type": "kv",
+            "options": {
+                "version": "2"
+            }
+        }
+    elif engine == 'transit':
+        log.info('Mounting transit secrets engine')
+        mount_path = '/v1/sys/mounts/transit'
+        data = {
+            "type": "transit"
+        }
+    else:
+        raise Exception("Unknown or missing secrets engine")
+
+    send_req(ctx, cconfig, cclient, mount_path, json.dumps(data))
+    yield
+
+
+def send_req(ctx, cconfig, client, path, body, method='POST'):
+    host, port = ctx.vault.endpoints[client]
+    req = http_client.HTTPConnection(host, port, timeout=30)
+    token = cconfig.get('root_token', 'atoken')
+    log.info("Send request to Vault: %s:%s at %s with token: %s", host, port, path, token)
+    headers = {'X-Vault-Token': token}
+    req.request(method, path, headers=headers, body=body)
+    resp = req.getresponse()
+    log.info(resp.read())
+    if not (resp.status >= 200 and resp.status < 300):
+        raise Exception("Request to Vault server failed with status %d" % resp.status)
+    return resp
+
+
+@contextlib.contextmanager
+def create_secrets(ctx, config):
+    (cclient, cconfig) = next(iter(config.items()))
+    engine = cconfig.get('engine')
+    prefix = cconfig.get('prefix')
+    secrets = cconfig.get('secrets')
+    flavor = cconfig.get('flavor')
+    if secrets is None:
+        raise ConfigError("No secrets specified, please specify some.")
+
+    ctx.vault.keys[cclient] = []
+    for secret in secrets:
+        try:
+            path = secret['path']
+        except KeyError:
+            raise ConfigError('Missing "path" field in secret')
+        exportable = secret.get("exportable", flavor == "old")
+
+        if engine == 'kv':
+            try:
+                data = {
+                    "data": {
+                        "key": secret['secret']
+                    }
+                }
+            except KeyError:
+                raise ConfigError('Missing "secret" field in secret')
+        elif engine == 'transit':
+            data = {"exportable": "true" if exportable else "false"}
+        else:
+            raise Exception("Unknown or missing secrets engine")
+
+        send_req(ctx, cconfig, cclient, urljoin(prefix, path), json.dumps(data))
+
+        ctx.vault.keys[cclient].append({ 'Path': path });
+
+    log.info("secrets created")
+    yield
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Deploy and configure Vault
+
+    Example of configuration:
+
+    tasks:
+    - vault:
+        client.0:
+          install_url: http://my.special.place/vault.zip
+          install_sha256: zipfiles-sha256-sum-much-larger-than-this
+          root_token: test_root_token
+          engine: transit
+          flavor: old
+          prefix: /v1/transit/keys
+          secrets:
+            - path: kv/teuthology/key_a
+              secret: base64_only_if_using_kv_aWxkCmNlcGguY29uZgo=
+              exportable: true
+            - path: kv/teuthology/key_b
+              secret: base64_only_if_using_kv_dApzcmMKVGVzdGluZwo=
+
+    engine can be 'kv' or 'transit'
+    prefix should be /v1/kv/data/ for kv, /v1/transit/keys/ for transit
+    flavor should be 'old' only if testing the original transit logic
+        otherwise omit.
+    for kv only: 256-bit key value should be specified via secret,
+        otherwise should omit.
+    for transit: exportable may be used to make individual keys exportable.
+    flavor may be set to 'old' to make all keys exportable by default,
+        which is required by the original transit logic.
+    """
+    all_clients = ['client.{id}'.format(id=id_)
+                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+    if config is None:
+        config = all_clients
+    if isinstance(config, list):
+        config = dict.fromkeys(config)
+
+    overrides = ctx.config.get('overrides', {})
+    # merge each client section, not the top level.
+    for client in config.keys():
+        if not config[client]:
+            config[client] = {}
+        teuthology.deep_merge(config[client], overrides.get('vault', {}))
+
+    log.debug('Vault config is %s', config)
+
+    ctx.vault = argparse.Namespace()
+    ctx.vault.endpoints = assign_ports(ctx, config, 8200)
+    ctx.vault.root_token = None
+    ctx.vault.prefix = config[client].get('prefix')
+    ctx.vault.engine = config[client].get('engine')
+    ctx.vault.keys = {}
+    q=config[client].get('flavor')
+    if q:
+        ctx.vault.flavor = q
+
+    with contextutil.nested(
+        lambda: download(ctx=ctx, config=config),
+        lambda: run_vault(ctx=ctx, config=config),
+        lambda: setup_vault(ctx=ctx, config=config),
+        lambda: create_secrets(ctx=ctx, config=config)
+        ):
+        yield
+
diff --git a/qa/tasks/vip.py b/qa/tasks/vip.py
new file mode 100644
index 000000000..52114b104
--- /dev/null
+++ b/qa/tasks/vip.py
@@ -0,0 +1,205 @@
+import contextlib
+import ipaddress
+import logging
+import re
+
+from teuthology import misc as teuthology
+from teuthology.config import config as teuth_config
+
+log = logging.getLogger(__name__)
+
+
+def subst_vip(ctx, cmd):
+    p = re.compile(r'({{VIP(\d+)}})')
+    for m in p.findall(cmd):
+        n = int(m[1])
+        if n >= len(ctx.vip["vips"]):
+            log.warning(f'no VIP{n} (we have {len(ctx.vip["vips"])})')
+        else:
+            cmd = cmd.replace(m[0], str(ctx.vip["vips"][n]))
+
+    if '{{VIPPREFIXLEN}}' in cmd:
+        cmd = cmd.replace('{{VIPPREFIXLEN}}', str(ctx.vip["vnet"].prefixlen))
+
+    if '{{VIPSUBNET}}' in cmd:
+        cmd = cmd.replace('{{VIPSUBNET}}', str(ctx.vip["vnet"].network_address))
+
+    return cmd
+
+
+def echo(ctx, config):
+    """
+    This is mostly for debugging
+    """
+    for remote in ctx.cluster.remotes.keys():
+        log.info(subst_vip(ctx, config))
+
+
+def exec(ctx, config):
+    """
+    This is similar to the standard 'exec' task, but does the VIP substitutions.
+    """
+    assert isinstance(config, dict), "task exec got invalid config"
+
+    testdir = teuthology.get_testdir(ctx)
+
+    if 'all-roles' in config and len(config) == 1:
+        a = config['all-roles']
+        roles = teuthology.all_roles(ctx.cluster)
+        config = dict((id_, a) for id_ in roles if not id_.startswith('host.'))
+    elif 'all-hosts' in config and len(config) == 1:
+        a = config['all-hosts']
+        roles = teuthology.all_roles(ctx.cluster)
+        config = dict((id_, a) for id_ in roles if id_.startswith('host.'))
+
+    for role, ls in config.items():
+        (remote,) = ctx.cluster.only(role).remotes.keys()
+        log.info('Running commands on role %s host %s', role, remote.name)
+        for c in ls:
+            c.replace('$TESTDIR', testdir)
+            remote.run(
+                args=[
+                    'sudo',
+                    'TESTDIR={tdir}'.format(tdir=testdir),
+                    'bash',
+                    '-ex',
+                    '-c',
+                    subst_vip(ctx, c)],
+                )
+
+
+def map_vips(mip, count):
+    for mapping in teuth_config.get('vip', []):
+        mnet = ipaddress.ip_network(mapping['machine_subnet'])
+        vnet = ipaddress.ip_network(mapping['virtual_subnet'])
+        if vnet.prefixlen >= mnet.prefixlen:
+            log.error(f"virtual_subnet {vnet} prefix >= machine_subnet {mnet} prefix")
+            return None
+        if mip in mnet:
+            pos = list(mnet.hosts()).index(mip)
+            log.info(f"{mip} in {mnet}, pos {pos}")
+            r = []
+            for sub in vnet.subnets(new_prefix=mnet.prefixlen):
+                r += [list(sub.hosts())[pos]]
+                count -= 1
+                if count == 0:
+                    break
+            return vnet, r
+    return None
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Set up a virtual network and allocate virtual IP(s) for each machine.
+
+    The strategy here is to set up a private virtual subnet that is larger than
+    the subnet the machine(s) exist in, and allocate virtual IPs from that pool.
+
+    - The teuthology.yaml must include a section like::
+
+        vip:
+          - machine_subnet: 172.21.0.0/20
+            virtual_subnet: 10.0.0.0/16
+
+      At least one item's machine_subnet should map the subnet the test machine's
+      primary IP lives in (the one DNS resolves to).  The virtual_subnet must have a
+      shorter prefix (i.e., larger than the machine_subnet).  If there are multiple
+      machine_subnets, they cannot map into the same virtual_subnet.
+
+    - Each machine gets an IP in the virtual_subset statically configured by the vip
+      task. This lets all test machines reach each other and (most importantly) any
+      virtual IPs.
+
+    - 1 or more virtual IPs are then mapped for the task.  These IPs are chosen based
+      on one of the remotes.  This uses a lot of network space but it avoids any
+      conflicts between tests.
+
+    To use a virtual IP, the {{VIP0}}, {{VIP1}}, etc. substitutions can be used.
+    
+    {{VIPSUBNET}} is the virtual_subnet address (10.0.0.0 in the example).
+
+    {{VIPPREFIXLEN}} is the virtual_subnet prefix (16 in the example.
+
+    These substitutions work for vip.echo, and (at the time of writing) cephadm.apply
+    and cephadm.shell.
+    """
+    if config is None:
+        config = {}
+    count = config.get('count', 1)
+
+    ctx.vip_static = {}
+    ctx.vip = {}
+
+    log.info("Allocating static IPs for each host...")
+    for remote in ctx.cluster.remotes.keys():
+        ip = remote.ssh.get_transport().getpeername()[0]
+        log.info(f'peername {ip}')
+        mip = ipaddress.ip_address(ip)
+        vnet, vips = map_vips(mip, count + 1)
+        static = vips.pop(0)
+        log.info(f"{remote.hostname} static {static}, vnet {vnet}")
+
+        if not ctx.vip:
+            # do this only once (use the first remote we see), since we only need 1
+            # set of virtual IPs, regardless of how many remotes we have.
+            log.info("VIPs are {map(str, vips)}")
+            ctx.vip = {
+                'vnet': vnet,
+                'vips': vips,
+            }
+        else:
+            # all remotes must be in the same virtual network...
+            assert vnet == ctx.vip['vnet']
+
+        # pick interface
+        p = re.compile(r'^(\S+) dev (\S+) (.*)scope link (.*)src (\S+)')
+        iface = None
+        for line in remote.sh(['sudo', 'ip','route','ls']).splitlines():
+            m = p.findall(line)
+            if not m:
+                continue
+            route_iface = m[0][1]
+            route_ip = m[0][4]
+            if route_ip == ip:
+                iface = route_iface
+                break
+
+        if not iface:
+            log.error(f"Unable to find {remote.hostname} interface for {ip}")
+            continue
+
+        # configure
+        log.info(f"Configuring {static} on {remote.hostname} iface {iface}...")
+        remote.sh(['sudo',
+                   'ip', 'addr', 'add',
+                   str(static) + '/' + str(vnet.prefixlen),
+                   'dev', iface])
+
+        ctx.vip_static[remote] = {
+            "iface": iface,
+            "static": static,
+        }
+
+    try:
+        yield
+
+    finally:
+        for remote, m in ctx.vip_static.items():
+            log.info(f"Removing {m['static']} (and any VIPs) on {remote.hostname} iface {m['iface']}...")
+            remote.sh(['sudo',
+                       'ip', 'addr', 'del',
+                       str(m['static']) + '/' + str(ctx.vip['vnet'].prefixlen),
+                       'dev', m['iface']])
+
+            for vip in ctx.vip['vips']:
+                remote.sh(
+                    [
+                        'sudo',
+                        'ip', 'addr', 'del',
+                        str(vip) + '/' + str(ctx.vip['vnet'].prefixlen),
+                        'dev', m['iface']
+                    ],
+                    check_status=False,
+                )
+            
diff --git a/qa/tasks/vstart_runner.py b/qa/tasks/vstart_runner.py
new file mode 100644
index 000000000..df4886fb6
--- /dev/null
+++ b/qa/tasks/vstart_runner.py
@@ -0,0 +1,1516 @@
+"""
+vstart_runner: override Filesystem and Mount interfaces to run a CephFSTestCase against a vstart
+ceph instance instead of a packaged/installed cluster.  Use this to turn around test cases
+quickly during development.
+
+Simple usage (assuming teuthology and ceph checked out in ~/git):
+
+    # Activate the teuthology virtualenv
+    source ~/git/teuthology/virtualenv/bin/activate
+    # Go into your ceph build directory
+    cd ~/git/ceph/build
+    # Invoke a test using this script
+    python ~/git/ceph/qa/tasks/vstart_runner.py --create tasks.cephfs.test_data_scan
+
+Alternative usage:
+
+    # Alternatively, if you use different paths, specify them as follows:
+    LD_LIBRARY_PATH=`pwd`/lib PYTHONPATH=~/git/teuthology:~/git/ceph/qa:`pwd`/../src/pybind:`pwd`/lib/cython_modules/lib.3 python ~/git/ceph/qa/tasks/vstart_runner.py
+
+    # If you wish to drop to a python shell on failures, use --interactive:
+    python ~/git/ceph/qa/tasks/vstart_runner.py --interactive
+
+    # If you wish to run a named test case, pass it as an argument:
+    python ~/git/ceph/qa/tasks/vstart_runner.py tasks.cephfs.test_data_scan
+
+    # Also, you can create the cluster once and then run named test cases against it:
+    python ~/git/ceph/qa/tasks/vstart_runner.py --create-cluster-only
+    python ~/git/ceph/qa/tasks/vstart_runner.py tasks.mgr.dashboard.test_health
+    python ~/git/ceph/qa/tasks/vstart_runner.py tasks.mgr.dashboard.test_rgw
+
+Following are few important notes that might save some investigation around
+vstart_runner.py -
+
+* If using the FUSE client, ensure that the fuse package is installed and
+  enabled on the system and that "user_allow_other" is added to /etc/fuse.conf.
+
+* If using the kernel client, the user must have the ability to run commands
+  with passwordless sudo access.
+
+* A failure on the kernel client may crash the host, so it's recommended to
+  use this functionality within a virtual machine.
+
+* "adjust-ulimits", "ceph-coverage" and "sudo" in command arguments are
+  overridden by vstart_runner.py. Former two usually have no applicability
+  for test runs on developer's machines and see note point on "omit_sudo"
+  to know more about overriding of "sudo".
+
+* "omit_sudo" is re-set to False unconditionally in cases of commands
+  "passwd" and "chown".
+
+* The presence of binary file named after the first argument in the command
+  arguments received by the method LocalRemote.run() is checked for in
+  <ceph-repo-root>/build/bin/. If present, the first argument is replaced with
+  the path to binary file.
+"""
+
+from io import StringIO
+from json import loads
+from collections import defaultdict
+import getpass
+import signal
+import tempfile
+import threading
+import datetime
+import shutil
+import re
+import os
+import time
+import sys
+import errno
+from IPy import IP
+import unittest
+import platform
+import logging
+from argparse import Namespace
+
+from unittest import suite, loader
+
+from teuthology.orchestra.run import quote, PIPE
+from teuthology.orchestra.daemon import DaemonGroup
+from teuthology.orchestra.remote import RemoteShell
+from teuthology.config import config as teuth_config
+from teuthology.contextutil import safe_while
+from teuthology.contextutil import MaxWhileTries
+from teuthology.exceptions import CommandFailedError
+try:
+    import urllib3
+    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+except:
+    pass
+
+def init_log(log_level=logging.INFO):
+    global log
+    if log is not None:
+        del log
+    log = logging.getLogger(__name__)
+
+    global logpath
+    logpath = './vstart_runner.log'
+
+    handler = logging.FileHandler(logpath)
+    formatter = logging.Formatter(
+        fmt=u'%(asctime)s.%(msecs)03d %(levelname)s:%(name)s:%(message)s',
+        datefmt='%Y-%m-%dT%H:%M:%S')
+    handler.setFormatter(formatter)
+    log.addHandler(handler)
+    log.setLevel(log_level)
+
+log = None
+init_log()
+
+
+def respawn_in_path(lib_path, python_paths):
+    execv_cmd = ['python']
+    if platform.system() == "Darwin":
+        lib_path_var = "DYLD_LIBRARY_PATH"
+    else:
+        lib_path_var = "LD_LIBRARY_PATH"
+
+    py_binary = os.environ.get("PYTHON", sys.executable)
+
+    if lib_path_var in os.environ:
+        if lib_path not in os.environ[lib_path_var]:
+            os.environ[lib_path_var] += ':' + lib_path
+            os.execvp(py_binary, execv_cmd + sys.argv)
+    else:
+        os.environ[lib_path_var] = lib_path
+        os.execvp(py_binary, execv_cmd + sys.argv)
+
+    for p in python_paths:
+        sys.path.insert(0, p)
+
+
+def launch_subprocess(args, cwd=None, env=None, shell=True,
+                      executable='/bin/bash'):
+    return subprocess.Popen(args, cwd=cwd, env=env, shell=shell,
+                            executable=executable, stdout=subprocess.PIPE,
+                            stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+
+
+# Let's use some sensible defaults
+if os.path.exists("./CMakeCache.txt") and os.path.exists("./bin"):
+
+    # A list of candidate paths for each package we need
+    guesses = [
+        ["~/git/teuthology", "~/scm/teuthology", "~/teuthology"],
+        ["lib/cython_modules/lib.3"],
+        ["../src/pybind"],
+    ]
+
+    python_paths = []
+
+    # Up one level so that "tasks.foo.bar" imports work
+    python_paths.append(os.path.abspath(
+        os.path.join(os.path.dirname(os.path.realpath(__file__)), "..")
+    ))
+
+    for package_guesses in guesses:
+        for g in package_guesses:
+            g_exp = os.path.abspath(os.path.expanduser(g))
+            if os.path.exists(g_exp):
+                python_paths.append(g_exp)
+
+    ld_path = os.path.join(os.getcwd(), "lib/")
+    print("Using guessed paths {0} {1}".format(ld_path, python_paths))
+    respawn_in_path(ld_path, python_paths)
+
+
+try:
+    from tasks.ceph_manager import CephManager
+    from tasks.cephfs.fuse_mount import FuseMount
+    from tasks.cephfs.kernel_mount import KernelMount
+    from tasks.cephfs.filesystem import Filesystem, MDSCluster, CephCluster
+    from tasks.cephfs.mount import CephFSMount
+    from tasks.mgr.mgr_test_case import MgrCluster
+    from teuthology.task import interactive
+except ImportError:
+    sys.stderr.write("***\nError importing packages, have you activated your teuthology virtualenv "
+                     "and set PYTHONPATH to point to teuthology and ceph-qa-suite?\n***\n\n")
+    raise
+
+# Must import after teuthology because of gevent monkey patching
+import subprocess
+
+if os.path.exists("./CMakeCache.txt"):
+    # Running in build dir of a cmake build
+    BIN_PREFIX = "./bin/"
+    SRC_PREFIX = "../src"
+else:
+    # Running in src/ of an autotools build
+    BIN_PREFIX = "./"
+    SRC_PREFIX = "./"
+
+CEPH_CMD = os.path.join(BIN_PREFIX, 'ceph')
+RADOS_CMD = os.path.join(BIN_PREFIX, 'rados')
+
+
+def rm_nonascii_chars(var):
+    var = var.replace(b'\xe2\x80\x98', b'\'')
+    var = var.replace(b'\xe2\x80\x99', b'\'')
+    return var
+
+class LocalRemoteProcess(object):
+    def __init__(self, args, subproc, check_status, stdout, stderr, usr_args):
+        self.args = args
+        self.subproc = subproc
+        self.stdout = stdout
+        self.stderr = stderr
+        self.usr_args = usr_args
+        # this variable is meant for instance of this class named fuse_daemon.
+        # child process of the command launched with sudo must be killed,
+        # since killing parent process alone has no impact on the child
+        # process.
+        self.fuse_pid = -1
+
+        self.check_status = check_status
+        self.exitstatus = self.returncode = None
+
+    def _write_stdout(self, out):
+        if isinstance(self.stdout, StringIO):
+            self.stdout.write(out.decode(errors='ignore'))
+        elif self.stdout is None:
+            pass
+        else:
+            self.stdout.write(out)
+
+    def _write_stderr(self, err):
+        if isinstance(self.stderr, StringIO):
+            self.stderr.write(err.decode(errors='ignore'))
+        elif self.stderr is None:
+            pass
+        else:
+            self.stderr.write(err)
+
+    def wait(self):
+        if self.finished:
+            # Avoid calling communicate() on a dead process because it'll
+            # give you stick about std* already being closed
+            if self.check_status and self.exitstatus != 0:
+                # TODO: print self.args or self.usr_args in exception msg?
+                raise CommandFailedError(self.args, self.exitstatus)
+            else:
+                return
+
+        out, err = self.subproc.communicate()
+        out, err = rm_nonascii_chars(out), rm_nonascii_chars(err)
+        self._write_stdout(out)
+        self._write_stderr(err)
+
+        self.exitstatus = self.returncode = self.subproc.returncode
+
+        if self.exitstatus != 0:
+            sys.stderr.write(out.decode())
+            sys.stderr.write(err.decode())
+
+        if self.check_status and self.exitstatus != 0:
+            # TODO: print self.args or self.usr_args in exception msg?
+            raise CommandFailedError(self.args, self.exitstatus)
+
+    @property
+    def finished(self):
+        if self.exitstatus is not None:
+            return True
+
+        if self.subproc.poll() is not None:
+            out, err = self.subproc.communicate()
+            self._write_stdout(out)
+            self._write_stderr(err)
+
+            self.exitstatus = self.returncode = self.subproc.returncode
+
+            return True
+        else:
+            return False
+
+    def kill(self):
+        log.debug("kill ")
+        if self.subproc.pid and not self.finished:
+            log.debug(f"kill: killing pid {self.subproc.pid} "
+                      f"({self.usr_args})")
+            if self.fuse_pid != -1:
+                safe_kill(self.fuse_pid)
+            else:
+                safe_kill(self.subproc.pid)
+        else:
+            log.debug(f"kill: already terminated ({self.usr_args})")
+
+    @property
+    def stdin(self):
+        class FakeStdIn(object):
+            def __init__(self, mount_daemon):
+                self.mount_daemon = mount_daemon
+
+            def close(self):
+                self.mount_daemon.kill()
+
+        return FakeStdIn(self)
+
+
+class LocalRemote(RemoteShell):
+    """
+    Amusingly named class to present the teuthology RemoteProcess interface when we are really
+    running things locally for vstart
+
+    Run this inside your src/ dir!
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.name = "local"
+        self._hostname = "localhost"
+        self.user = getpass.getuser()
+
+    @property
+    def hostname(self):
+        if not hasattr(self, '_hostname'):
+            self._hostname = 'localhost'
+        return self._hostname
+
+    def get_file(self, path, sudo, dest_dir):
+        tmpfile = tempfile.NamedTemporaryFile(delete=False).name
+        shutil.copy(path, tmpfile)
+        return tmpfile
+
+    # XXX: This method ignores the error raised when src and dst are
+    # holding same path. For teuthology, same path still represents
+    # different locations as they lie on different machines.
+    def put_file(self, src, dst, sudo=False):
+        try:
+            shutil.copy(src, dst)
+        except shutil.SameFileError:
+            pass
+
+
+    def _omit_cmd_args(self, args, omit_sudo):
+        """
+        Helper tools are omitted since those are not meant for tests executed
+        using vstart_runner.py. And sudo's omission depends on the value of
+        the variable omit_sudo.
+        """
+        helper_tools = ('adjust-ulimits', 'ceph-coverage',
+                        'None/archive/coverage')
+        for i in helper_tools:
+            if i in args:
+                helper_tools_found = True
+                break
+        else:
+            helper_tools_found = False
+
+        if not helper_tools_found and 'sudo' not in args:
+            return args, args
+
+        prefix = ''
+
+        if helper_tools_found:
+            args = args.replace('None/archive/coverage', '')
+            prefix += """
+adjust-ulimits() {
+    "$@"
+}
+ceph-coverage() {
+    "$@"
+}
+"""
+            log.debug('Helper tools like adjust-ulimits and ceph-coverage '
+                      'were omitted from the following cmd args before '
+                      'logging and execution; check vstart_runner.py for '
+                      'more details.')
+
+        first_arg = args[ : args.find(' ')]
+        # We'll let sudo be a part of command even omit flag says otherwise in
+        # cases of commands which can normally be ran only by root.
+        last_arg = args[args.rfind(' ') + 1 : ]
+        # XXX: should sudo be omitted/allowed by default in cases similar to
+        # that of "exec sudo" as well?
+        if 'sudo' in args:
+            for x in ('passwd', 'chown'):
+                if x == first_arg or x == last_arg or f' {x} ' in args:
+                    omit_sudo = False
+
+        if omit_sudo:
+            prefix += """
+sudo() {
+    "$@"
+}
+"""
+            log.debug('"sudo" was omitted from the following cmd args '
+                      'before execution and logging using function '
+                      'overriding; check vstart_runner.py for more details.')
+
+        # usr_args = args passed by the user/caller of this method
+        usr_args, args = args, prefix + args
+
+        return usr_args, args
+
+    def _perform_checks_and_adjustments(self, args, omit_sudo):
+        if isinstance(args, list):
+            args = quote(args)
+
+        assert isinstance(args, str)
+
+        first_arg = args[ : args.find(' ')]
+        if '/' not in first_arg:
+            local_bin = os.path.join(BIN_PREFIX, first_arg)
+            if os.path.exists(local_bin):
+                args = args.replace(first_arg, local_bin, 1)
+
+        usr_args, args = self._omit_cmd_args(args, omit_sudo)
+
+        log.debug('> ' + usr_args)
+
+        return args, usr_args
+
+    # Wrapper to keep the interface exactly same as that of
+    # teuthology.remote.run.
+    def run(self, **kwargs):
+        return self._do_run(**kwargs)
+
+    # XXX: omit_sudo is set to True since using sudo can change the ownership
+    # of files which becomes problematic for following executions of
+    # vstart_runner.py.
+    # XXX: omit_sudo is re-set to False even in cases of commands like passwd
+    # and chown.
+    # XXX: "adjust-ulimits", "ceph-coverage" and "sudo" in command arguments
+    # are overridden. Former two usually have no applicability for test runs
+    # on developer's machines and see note point on "omit_sudo" to know more
+    # about overriding of "sudo".
+    # XXX: the presence of binary file named after the first argument is
+    # checked in build/bin/, if present the first argument is replaced with
+    # the path to binary file.
+    def _do_run(self, args, check_status=True, wait=True, stdout=None,
+                stderr=None, cwd=None, stdin=None, logger=None, label=None,
+                env=None, timeout=None, omit_sudo=True, shell=True, quiet=False):
+        args, usr_args = self._perform_checks_and_adjustments(args, omit_sudo)
+
+        subproc = launch_subprocess(args, cwd, env, shell)
+
+        if stdin:
+            # Hack: writing to stdin is not deadlock-safe, but it "always" works
+            # as long as the input buffer is "small"
+            if isinstance(stdin, str):
+                subproc.stdin.write(stdin.encode())
+            elif stdin == subprocess.PIPE or stdin == PIPE:
+                pass
+            elif isinstance(stdin, StringIO):
+                subproc.stdin.write(bytes(stdin.getvalue(),encoding='utf8'))
+            else:
+                subproc.stdin.write(stdin.getvalue())
+
+        proc = LocalRemoteProcess(
+            args, subproc, check_status,
+            stdout, stderr, usr_args
+        )
+
+        if wait:
+            proc.wait()
+
+        return proc
+
+class LocalDaemon(object):
+    def __init__(self, daemon_type, daemon_id):
+        self.daemon_type = daemon_type
+        self.daemon_id = daemon_id
+        self.controller = LocalRemote()
+        self.proc = None
+
+    @property
+    def remote(self):
+        return LocalRemote()
+
+    def running(self):
+        return self._get_pid() is not None
+
+    def check_status(self):
+        if self.proc:
+            return self.proc.poll()
+
+    def _get_pid(self):
+        """
+        Return PID as an integer or None if not found
+        """
+        ps_txt = self.controller.run(args=["ps", "ww", "-u"+str(os.getuid())],
+                                     stdout=StringIO()).\
+            stdout.getvalue().strip()
+        lines = ps_txt.split("\n")[1:]
+
+        for line in lines:
+            if line.find("ceph-{0} -i {1}".format(self.daemon_type, self.daemon_id)) != -1:
+                log.debug("Found ps line for daemon: {0}".format(line))
+                return int(line.split()[0])
+        if not opt_log_ps_output:
+            ps_txt = '(omitted)'
+        log.debug("No match for {0} {1}: {2}".format(
+            self.daemon_type, self.daemon_id, ps_txt))
+        return None
+
+    def wait(self, timeout):
+        waited = 0
+        while self._get_pid() is not None:
+            if waited > timeout:
+                raise MaxWhileTries("Timed out waiting for daemon {0}.{1}".format(self.daemon_type, self.daemon_id))
+            time.sleep(1)
+            waited += 1
+
+    def stop(self, timeout=300):
+        if not self.running():
+            log.error('tried to stop a non-running daemon')
+            return
+
+        pid = self._get_pid()
+        if pid is None:
+            return
+        log.debug("Killing PID {0} for {1}.{2}".format(pid, self.daemon_type, self.daemon_id))
+        os.kill(pid, signal.SIGTERM)
+
+        waited = 0
+        while pid is not None:
+            new_pid = self._get_pid()
+            if new_pid is not None and new_pid != pid:
+                log.debug("Killing new PID {0}".format(new_pid))
+                pid = new_pid
+                os.kill(pid, signal.SIGTERM)
+
+            if new_pid is None:
+                break
+            else:
+                if waited > timeout:
+                    raise MaxWhileTries(
+                        "Timed out waiting for daemon {0}.{1}".format(
+                            self.daemon_type, self.daemon_id))
+                time.sleep(1)
+                waited += 1
+
+        self.wait(timeout=timeout)
+
+    def restart(self):
+        if self._get_pid() is not None:
+            self.stop()
+
+        self.proc = self.controller.run(args=[
+            os.path.join(BIN_PREFIX, "ceph-{0}".format(self.daemon_type)),
+            "-i", self.daemon_id])
+
+    def signal(self, sig, silent=False):
+        if not self.running():
+            raise RuntimeError("Can't send signal to non-running daemon")
+
+        os.kill(self._get_pid(), sig)
+        if not silent:
+            log.debug("Sent signal {0} to {1}.{2}".format(sig, self.daemon_type, self.daemon_id))
+
+
+def safe_kill(pid):
+    """
+    os.kill annoyingly raises exception if process already dead.  Ignore it.
+    """
+    try:
+        return remote.run(args=f'sudo kill -{signal.SIGKILL.value} {pid}',
+                          omit_sudo=False)
+    except OSError as e:
+        if e.errno == errno.ESRCH:
+            # Raced with process termination
+            pass
+        else:
+            raise
+
+def mon_in_localhost(config_path="./ceph.conf"):
+    """
+    If the ceph cluster is using the localhost IP as mon host, will must disable ns unsharing
+    """
+    with open(config_path) as f:
+        for line in f:
+            local = re.match(r'^\s*mon host\s*=\s*\[((v1|v2):127\.0\.0\.1:\d+,?)+\]', line)
+            if local:
+                return True
+    return False
+
+class LocalCephFSMount():
+    @property
+    def config_path(self):
+        return "./ceph.conf"
+
+    def get_keyring_path(self):
+        # This is going to end up in a config file, so use an absolute path
+        # to avoid assumptions about daemons' pwd
+        keyring_path = "./client.{0}.keyring".format(self.client_id)
+        try:
+            os.stat(keyring_path)
+        except OSError:
+            return os.path.join(os.getcwd(), 'keyring')
+        else:
+            return keyring_path
+
+    @property
+    def _prefix(self):
+        return BIN_PREFIX
+
+    def _asok_path(self):
+        # In teuthology, the asok is named after the PID of the ceph-fuse
+        # process, because it's run foreground.  When running it daemonized
+        # however, the asok is named after the PID of the launching process,
+        # not the long running ceph-fuse process.  Therefore we need to give
+        # an exact path here as the logic for checking /proc/ for which asok
+        # is alive does not work.
+
+        # Load the asok path from ceph.conf as vstart.sh now puts admin sockets
+        # in a tmpdir. All of the paths are the same, so no need to select
+        # based off of the service type.
+        d = "./asok"
+        with open(self.config_path) as f:
+            for line in f:
+                asok_conf = re.search("^\s*admin\s+socket\s*=\s*(.*?)[^/]+$", line)
+                if asok_conf:
+                    d = asok_conf.groups(1)[0]
+                    break
+        path = "{0}/client.{1}.*.asok".format(d, self.client_id)
+        return path
+
+    def _run_python(self, pyscript, py_version='python', sudo=False):
+        """
+        Override this to remove the daemon-helper prefix that is used otherwise
+        to make the process killable.
+        """
+        args = []
+        if sudo:
+            args.append('sudo')
+        args += [py_version, '-c', pyscript]
+        return self.client_remote.run(args=args, wait=False,
+                                      stdout=StringIO(), omit_sudo=(not sudo))
+
+    def setup_netns(self):
+        if opt_use_ns:
+            super(type(self), self).setup_netns()
+
+    @property
+    def _nsenter_args(self):
+            if opt_use_ns:
+                return super(type(self), self)._nsenter_args
+            else:
+                return []
+
+    def setupfs(self, name=None):
+        if name is None and self.fs is not None:
+            # Previous mount existed, reuse the old name
+            name = self.fs.name
+        self.fs = LocalFilesystem(self.ctx, name=name)
+        log.info('Wait for MDS to reach steady state...')
+        self.fs.wait_for_daemons()
+        log.info('Ready to start {}...'.format(type(self).__name__))
+
+    def is_blocked(self):
+        self.fs = LocalFilesystem(self.ctx, name=self.cephfs_name)
+
+        output = self.fs.mon_manager.raw_cluster_cmd(args='osd blocklist ls')
+        return self.addr in output
+
+
+class LocalKernelMount(LocalCephFSMount, KernelMount):
+    def __init__(self, ctx, test_dir, client_id=None,
+                 client_keyring_path=None, client_remote=None,
+                 hostfs_mntpt=None, cephfs_name=None, cephfs_mntpt=None,
+                 brxnet=None):
+        super(LocalKernelMount, self).__init__(ctx=ctx, test_dir=test_dir,
+            client_id=client_id, client_keyring_path=client_keyring_path,
+            client_remote=LocalRemote(), hostfs_mntpt=hostfs_mntpt,
+            cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet)
+
+        # Make vstart_runner compatible with teuth and qa/tasks/cephfs.
+        self._mount_bin = [os.path.join(BIN_PREFIX , 'mount.ceph')]
+
+    def get_global_addr(self):
+        self.get_global_inst()
+        self.addr = self.inst[self.inst.find(' ') + 1 : ]
+        return self.addr
+
+    def get_global_inst(self):
+        clients = self.client_remote.run(
+            args=f'{CEPH_CMD} tell mds.* session ls',
+            stdout=StringIO()).stdout.getvalue()
+        clients = loads(clients)
+        for c in clients:
+            if c['id'] == self.id:
+                self.inst = c['inst']
+                return self.inst
+
+
+class LocalFuseMount(LocalCephFSMount, FuseMount):
+    def __init__(self, ctx, test_dir, client_id, client_keyring_path=None,
+                 client_remote=None, hostfs_mntpt=None, cephfs_name=None,
+                 cephfs_mntpt=None, brxnet=None):
+        super(LocalFuseMount, self).__init__(ctx=ctx, test_dir=test_dir,
+            client_id=client_id, client_keyring_path=client_keyring_path,
+            client_remote=LocalRemote(), hostfs_mntpt=hostfs_mntpt,
+            cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet)
+
+        # Following block makes tests meant for teuthology compatible with
+        # vstart_runner.
+        self._mount_bin = [os.path.join(BIN_PREFIX, 'ceph-fuse')]
+        self._mount_cmd_cwd, self._mount_cmd_logger, \
+            self._mount_cmd_stdin = None, None, None
+
+    # XXX: CephFSMount._create_mntpt() sets mountpoint's permission mode to
+    # 0000 which doesn't work for vstart_runner since superuser privileges are
+    # not used for mounting Ceph FS with FUSE.
+    def _create_mntpt(self):
+        self.client_remote.run(args=f'mkdir -p -v {self.hostfs_mntpt}')
+
+    def _run_mount_cmd(self, mntopts, mntargs, check_status):
+        retval = super(type(self), self)._run_mount_cmd(mntopts, mntargs,
+                                                        check_status)
+        if retval is None: # None represents success
+            self._set_fuse_daemon_pid(check_status)
+        return retval
+
+    def _get_mount_cmd(self, mntopts, mntargs):
+        mount_cmd = super(type(self), self)._get_mount_cmd(mntopts, mntargs)
+
+        if os.getuid() != 0:
+            mount_cmd += ['--client_die_on_failed_dentry_invalidate=false']
+        return mount_cmd
+
+    @property
+    def _fuse_conn_check_timeout(self):
+        return 30
+
+    def _add_valgrind_args(self, mount_cmd):
+        return []
+
+    def _set_fuse_daemon_pid(self, check_status):
+        # NOTE: When a command <args> is launched with sudo, two processes are
+        # launched, one with sudo in <args> and other without. Make sure we
+        # get the PID of latter one.
+        try:
+            with safe_while(sleep=1, tries=15) as proceed:
+                while proceed():
+                    try:
+                        sock = self.find_admin_socket()
+                    except (RuntimeError, CommandFailedError):
+                        continue
+
+                    self.fuse_daemon.fuse_pid = int(re.match(".*\.(\d+)\.asok$",
+                                                             sock).group(1))
+                    break
+        except MaxWhileTries:
+            if check_status:
+                raise
+            else:
+                pass
+
+# XXX: this class has nothing to do with the Ceph daemon (ceph-mgr) of
+# the same name.
+class LocalCephManager(CephManager):
+    def __init__(self, ctx=None):
+        self.ctx = ctx
+        if self.ctx:
+            self.cluster = self.ctx.config['cluster']
+
+        # Deliberately skip parent init, only inheriting from it to get
+        # util methods like osd_dump that sit on top of raw_cluster_cmd
+        self.controller = LocalRemote()
+
+        # A minority of CephManager fns actually bother locking for when
+        # certain teuthology tests want to run tasks in parallel
+        self.lock = threading.RLock()
+
+        self.log = lambda x: log.debug(x)
+
+        # Don't bother constructing a map of pools: it should be empty
+        # at test cluster start, and in any case it would be out of date
+        # in no time.  The attribute needs to exist for some of the CephManager
+        # methods to work though.
+        self.pools = {}
+
+        # NOTE: These variables are being overriden here so that parent class
+        # can pick it up.
+        self.cephadm = False
+        self.rook = False
+        self.testdir = None
+        self.run_ceph_w_prefix = self.run_cluster_cmd_prefix = [CEPH_CMD]
+        self.CEPH_CMD = [CEPH_CMD]
+        self.RADOS_CMD = [RADOS_CMD]
+
+    def find_remote(self, daemon_type, daemon_id):
+        """
+        daemon_type like 'mds', 'osd'
+        daemon_id like 'a', '0'
+        """
+        return LocalRemote()
+
+    def admin_socket(self, daemon_type, daemon_id, command, check_status=True,
+                     timeout=None, stdout=None):
+        if stdout is None:
+            stdout = StringIO()
+
+        args=[CEPH_CMD, "daemon", f"{daemon_type}.{daemon_id}"] + command
+        return self.controller.run(args=args, check_status=check_status,
+                                   timeout=timeout, stdout=stdout)
+
+
+class LocalCephCluster(CephCluster):
+    def __init__(self, ctx):
+        # Deliberately skip calling CephCluster constructor
+        self._ctx = ctx
+        self.mon_manager = LocalCephManager(ctx=self._ctx)
+        self._conf = defaultdict(dict)
+
+    @property
+    def admin_remote(self):
+        return LocalRemote()
+
+    def get_config(self, key, service_type=None):
+        if service_type is None:
+            service_type = 'mon'
+
+        # FIXME hardcoded vstart service IDs
+        service_id = {
+            'mon': 'a',
+            'mds': 'a',
+            'osd': '0'
+        }[service_type]
+
+        return self.json_asok(['config', 'get', key], service_type, service_id)[key]
+
+    def _write_conf(self):
+        # In teuthology, we have the honour of writing the entire ceph.conf, but
+        # in vstart land it has mostly already been written and we need to carefully
+        # append to it.
+        conf_path = "./ceph.conf"
+        banner = "\n#LOCAL_TEST\n"
+        existing_str = open(conf_path).read()
+
+        if banner in existing_str:
+            existing_str = existing_str[0:existing_str.find(banner)]
+
+        existing_str += banner
+
+        for subsys, kvs in self._conf.items():
+            existing_str += "\n[{0}]\n".format(subsys)
+            for key, val in kvs.items():
+                # Comment out existing instance if it exists
+                log.debug("Searching for existing instance {0}/{1}".format(
+                    key, subsys
+                ))
+                existing_section = re.search("^\[{0}\]$([\n]|[^\[])+".format(
+                    subsys
+                ), existing_str, re.MULTILINE)
+
+                if existing_section:
+                    section_str = existing_str[existing_section.start():existing_section.end()]
+                    existing_val = re.search("^\s*[^#]({0}) =".format(key), section_str, re.MULTILINE)
+                    if existing_val:
+                        start = existing_section.start() + existing_val.start(1)
+                        log.debug("Found string to replace at {0}".format(
+                            start
+                        ))
+                        existing_str = existing_str[0:start] + "#" + existing_str[start:]
+
+                existing_str += "{0} = {1}\n".format(key, val)
+
+        open(conf_path, "w").write(existing_str)
+
+    def set_ceph_conf(self, subsys, key, value):
+        self._conf[subsys][key] = value
+        self._write_conf()
+
+    def clear_ceph_conf(self, subsys, key):
+        del self._conf[subsys][key]
+        self._write_conf()
+
+
+class LocalMDSCluster(LocalCephCluster, MDSCluster):
+    def __init__(self, ctx):
+        LocalCephCluster.__init__(self, ctx)
+        # Deliberately skip calling MDSCluster constructor
+        self._mds_ids = ctx.daemons.daemons['ceph.mds'].keys()
+        log.debug("Discovered MDS IDs: {0}".format(self._mds_ids))
+        self._mds_daemons = dict([(id_, LocalDaemon("mds", id_)) for id_ in self.mds_ids])
+
+    @property
+    def mds_ids(self):
+        return self._mds_ids
+
+    @property
+    def mds_daemons(self):
+        return self._mds_daemons
+
+    def clear_firewall(self):
+        # FIXME: unimplemented
+        pass
+
+    def newfs(self, name='cephfs', create=True):
+        return LocalFilesystem(self._ctx, name=name, create=create)
+
+    def delete_all_filesystems(self):
+        """
+        Remove all filesystems that exist, and any pools in use by them.
+        """
+        for fs in self.status().get_filesystems():
+            LocalFilesystem(ctx=self._ctx, fscid=fs['id']).destroy()
+
+
+class LocalMgrCluster(LocalCephCluster, MgrCluster):
+    def __init__(self, ctx):
+        super(LocalMgrCluster, self).__init__(ctx)
+
+        self.mgr_ids = ctx.daemons.daemons['ceph.mgr'].keys()
+        self.mgr_daemons = dict([(id_, LocalDaemon("mgr", id_)) for id_ in self.mgr_ids])
+
+
+class LocalFilesystem(LocalMDSCluster, Filesystem):
+    def __init__(self, ctx, fs_config={}, fscid=None, name=None, create=False):
+        # Deliberately skip calling Filesystem constructor
+        LocalMDSCluster.__init__(self, ctx)
+
+        self.id = None
+        self.name = name
+        self.metadata_pool_name = None
+        self.metadata_overlay = False
+        self.data_pool_name = None
+        self.data_pools = None
+        self.fs_config = fs_config
+        self.ec_profile = fs_config.get('ec_profile')
+
+        self.mon_manager = LocalCephManager(ctx=self._ctx)
+
+        self.client_remote = LocalRemote()
+
+        self._conf = defaultdict(dict)
+
+        if name is not None:
+            if fscid is not None:
+                raise RuntimeError("cannot specify fscid when creating fs")
+            if create and not self.legacy_configured():
+                self.create()
+        else:
+            if fscid is not None:
+                self.id = fscid
+                self.getinfo(refresh=True)
+
+        # Stash a reference to the first created filesystem on ctx, so
+        # that if someone drops to the interactive shell they can easily
+        # poke our methods.
+        if not hasattr(self._ctx, "filesystem"):
+            self._ctx.filesystem = self
+
+    @property
+    def _prefix(self):
+        return BIN_PREFIX
+
+    def set_clients_block(self, blocked, mds_id=None):
+        raise NotImplementedError()
+
+
+class LocalCluster(object):
+    def __init__(self, rolename="placeholder"):
+        self.remotes = {
+            LocalRemote(): [rolename]
+        }
+
+    def only(self, requested):
+        return self.__class__(rolename=requested)
+
+    def run(self, *args, **kwargs):
+        r = []
+        for remote in self.remotes.keys():
+            r.append(remote.run(*args, **kwargs))
+        return r
+
+
+class LocalContext(object):
+    def __init__(self):
+        FSID = remote.run(args=[os.path.join(BIN_PREFIX, 'ceph'), 'fsid'],
+                          stdout=StringIO()).stdout.getvalue()
+
+        cluster_name = 'ceph'
+        self.config = {'cluster': cluster_name}
+        self.ceph = {cluster_name: Namespace()}
+        self.ceph[cluster_name].fsid = FSID
+        self.teuthology_config = teuth_config
+        self.cluster = LocalCluster()
+        self.daemons = DaemonGroup()
+        if not hasattr(self, 'managers'):
+            self.managers = {}
+        self.managers[self.config['cluster']] = LocalCephManager(ctx=self)
+
+        # Shove some LocalDaemons into the ctx.daemons DaemonGroup instance so that any
+        # tests that want to look these up via ctx can do so.
+        # Inspect ceph.conf to see what roles exist
+        for conf_line in open("ceph.conf").readlines():
+            for svc_type in ["mon", "osd", "mds", "mgr"]:
+                prefixed_type = "ceph." + svc_type
+                if prefixed_type not in self.daemons.daemons:
+                    self.daemons.daemons[prefixed_type] = {}
+                match = re.match("^\[{0}\.(.+)\]$".format(svc_type), conf_line)
+                if match:
+                    svc_id = match.group(1)
+                    self.daemons.daemons[prefixed_type][svc_id] = LocalDaemon(svc_type, svc_id)
+
+    def __del__(self):
+        test_path = self.teuthology_config['test_path']
+        # opt_create_cluster_only does not create the test path
+        if test_path:
+            shutil.rmtree(test_path)
+
+
+#########################################
+#
+# stuff necessary for launching tests...
+#
+#########################################
+
+
+def enumerate_methods(s):
+    log.debug("e: {0}".format(s))
+    for t in s._tests:
+        if isinstance(t, suite.BaseTestSuite):
+            for sub in enumerate_methods(t):
+                yield sub
+        else:
+            yield s, t
+
+
+def load_tests(modules, loader):
+    if modules:
+        log.debug("Executing modules: {0}".format(modules))
+        module_suites = []
+        for mod_name in modules:
+            # Test names like cephfs.test_auto_repair
+            module_suites.append(loader.loadTestsFromName(mod_name))
+        log.debug("Loaded: {0}".format(list(module_suites)))
+        return suite.TestSuite(module_suites)
+    else:
+        log.debug("Executing all cephfs tests")
+        return loader.discover(
+            os.path.join(os.path.dirname(os.path.abspath(__file__)), "cephfs")
+        )
+
+
+def scan_tests(modules):
+    overall_suite = load_tests(modules, loader.TestLoader())
+    max_required_mds = 0
+    max_required_clients = 0
+    max_required_mgr = 0
+    require_memstore = False
+
+    for suite_, case in enumerate_methods(overall_suite):
+        max_required_mds = max(max_required_mds,
+                               getattr(case, "MDSS_REQUIRED", 0))
+        max_required_clients = max(max_required_clients,
+                               getattr(case, "CLIENTS_REQUIRED", 0))
+        max_required_mgr = max(max_required_mgr,
+                               getattr(case, "MGRS_REQUIRED", 0))
+        require_memstore = getattr(case, "REQUIRE_MEMSTORE", False) \
+                               or require_memstore
+
+    return max_required_mds, max_required_clients, \
+            max_required_mgr, require_memstore
+
+
+class LogRotate():
+    def __init__(self):
+        self.conf_file_path = os.path.join(os.getcwd(), 'logrotate.conf')
+        self.state_file_path = os.path.join(os.getcwd(), 'logrotate.state')
+
+    def run_logrotate(self):
+        remote.run(args=['logrotate', '-f', self.conf_file_path, '-s',
+                         self.state_file_path, '--verbose'])
+
+
+def teardown_cluster():
+    log.info('\ntearing down the cluster...')
+    try:
+        remote.run(args=[os.path.join(SRC_PREFIX, "stop.sh")], timeout=60)
+    except CommandFailedError as e:
+        log.error('stop.sh failed: %s', e)
+    log.info('\nceph cluster torn down')
+    remote.run(args=['rm', '-rf', './dev', './out'])
+
+
+def clear_old_log():
+    try:
+        os.stat(logpath)
+    except FileNotFoundError:
+        return
+    else:
+        os.remove(logpath)
+        with open(logpath, 'w') as logfile:
+            logfile.write('')
+        init_log(log.level)
+        log.debug('logging in a fresh file now...')
+
+
+class LogStream(object):
+    def __init__(self):
+        self.buffer = ""
+        self.omit_result_lines = False
+
+    def _del_result_lines(self):
+        """
+        Don't let unittest.TextTestRunner print "Ran X tests in Ys",
+        vstart_runner.py will do it for itself since it runs tests in a
+        testsuite one by one.
+        """
+        if self.omit_result_lines:
+            self.buffer = re.sub('-'*70+'\nran [0-9]* test in [0-9.]*s\n*',
+                                 '', self.buffer, flags=re.I)
+        self.buffer = re.sub('failed \(failures=[0-9]*\)\n', '', self.buffer,
+                             flags=re.I)
+        self.buffer = self.buffer.replace('OK\n', '')
+
+    def write(self, data):
+        self.buffer += data
+        if self.buffer.count("\n") > 5:
+            self._write()
+
+    def _write(self):
+        if opt_rotate_logs:
+            self._del_result_lines()
+        if self.buffer == '':
+            return
+
+        lines = self.buffer.split("\n")
+        for line in lines:
+            # sys.stderr.write(line + "\n")
+            log.info(line)
+        self.buffer = ''
+
+    def flush(self):
+        pass
+
+    def __del__(self):
+        self._write()
+
+
+class InteractiveFailureResult(unittest.TextTestResult):
+    """
+    Specialization that implements interactive-on-error style
+    behavior.
+    """
+    def addFailure(self, test, err):
+        super(InteractiveFailureResult, self).addFailure(test, err)
+        log.error(self._exc_info_to_string(err, test))
+        log.error("Failure in test '{0}', going interactive".format(
+            self.getDescription(test)
+        ))
+        interactive.task(ctx=None, config=None)
+
+    def addError(self, test, err):
+        super(InteractiveFailureResult, self).addError(test, err)
+        log.error(self._exc_info_to_string(err, test))
+        log.error("Error in test '{0}', going interactive".format(
+            self.getDescription(test)
+        ))
+        interactive.task(ctx=None, config=None)
+
+
+# XXX: class we require would be inherited from this one and one of
+# InteractiveFailureResult and unittestunittest.TextTestResult.
+class LoggingResultTemplate(object):
+    fail_on_skip = False
+
+    def startTest(self, test):
+        log.info("Starting test: {0}".format(self.getDescription(test)))
+        test.started_at = datetime.datetime.utcnow()
+        return super(LoggingResultTemplate, self).startTest(test)
+
+    def stopTest(self, test):
+        log.info("Stopped test: {0} in {1}s".format(
+            self.getDescription(test),
+            (datetime.datetime.utcnow() - test.started_at).total_seconds()
+        ))
+
+    def addSkip(self, test, reason):
+        if LoggingResultTemplate.fail_on_skip:
+            # Don't just call addFailure because that requires a traceback
+            self.failures.append((test, reason))
+        else:
+            super(LoggingResultTemplate, self).addSkip(test, reason)
+
+
+def launch_tests(overall_suite):
+    if opt_rotate_logs or not opt_exit_on_test_failure:
+        return launch_individually(overall_suite)
+    else:
+        return launch_entire_suite(overall_suite)
+
+
+def get_logging_result_class():
+    result_class = InteractiveFailureResult if opt_interactive_on_error else \
+        unittest.TextTestResult
+    return type('', (LoggingResultTemplate, result_class), {})
+
+
+def launch_individually(overall_suite):
+    no_of_tests_execed = 0
+    no_of_tests_failed, no_of_tests_execed = 0, 0
+    LoggingResult = get_logging_result_class()
+    stream = LogStream()
+    stream.omit_result_lines = True
+    if opt_rotate_logs:
+        logrotate = LogRotate()
+
+    started_at = datetime.datetime.utcnow()
+    for suite_, case in enumerate_methods(overall_suite):
+        # don't run logrotate beforehand since some ceph daemons might be
+        # down and pre/post-rotate scripts in logrotate.conf might fail.
+        if opt_rotate_logs:
+            logrotate.run_logrotate()
+
+        result = unittest.TextTestRunner(stream=stream,
+                                         resultclass=LoggingResult,
+                                         verbosity=2, failfast=True).run(case)
+
+        if not result.wasSuccessful():
+            if opt_exit_on_test_failure:
+                break
+            else:
+                no_of_tests_failed += 1
+
+        no_of_tests_execed += 1
+    time_elapsed = (datetime.datetime.utcnow() - started_at).total_seconds()
+
+    if result.wasSuccessful():
+        log.info('')
+        log.info('-'*70)
+        log.info(f'Ran {no_of_tests_execed} tests successfully in '
+                 f'{time_elapsed}s')
+        if no_of_tests_failed > 0:
+            log.info(f'{no_of_tests_failed} tests failed')
+        log.info('')
+        log.info('OK')
+
+    return result
+
+
+def launch_entire_suite(overall_suite):
+    LoggingResult = get_logging_result_class()
+
+    testrunner = unittest.TextTestRunner(stream=LogStream(),
+                                         resultclass=LoggingResult,
+                                         verbosity=2, failfast=True)
+    return testrunner.run(overall_suite)
+
+
+def exec_test():
+    # Parse arguments
+    global opt_interactive_on_error
+    opt_interactive_on_error = False
+    opt_create_cluster = False
+    opt_create_cluster_only = False
+    opt_ignore_missing_binaries = False
+    opt_teardown_cluster = False
+    global opt_log_ps_output
+    opt_log_ps_output = False
+    use_kernel_client = False
+    global opt_use_ns
+    opt_use_ns = False
+    opt_brxnet= None
+    opt_verbose = True
+    global opt_rotate_logs
+    opt_rotate_logs = False
+    global opt_exit_on_test_failure
+    opt_exit_on_test_failure = True
+
+    args = sys.argv[1:]
+    flags = [a for a in args if a.startswith("-")]
+    modules = [a for a in args if not a.startswith("-")]
+    for f in flags:
+        if f == "--interactive":
+            opt_interactive_on_error = True
+        elif f == "--create":
+            opt_create_cluster = True
+        elif f == "--create-cluster-only":
+            opt_create_cluster_only = True
+        elif f == "--ignore-missing-binaries":
+            opt_ignore_missing_binaries = True
+        elif f == '--teardown':
+            opt_teardown_cluster = True
+        elif f == '--log-ps-output':
+            opt_log_ps_output = True
+        elif f == '--clear-old-log':
+            clear_old_log()
+        elif f == "--kclient":
+            use_kernel_client = True
+        elif f == '--usens':
+            opt_use_ns = True
+        elif '--brxnet' in f:
+            if re.search(r'=[0-9./]+', f) is None:
+                log.error("--brxnet=<ip/mask> option needs one argument: '{0}'".format(f))
+                sys.exit(-1)
+            opt_brxnet=f.split('=')[1]
+            try:
+                IP(opt_brxnet)
+                if IP(opt_brxnet).iptype() == 'PUBLIC':
+                    raise RuntimeError('is public')
+            except Exception as e:
+                log.error("Invalid ip '{0}' {1}".format(opt_brxnet, e))
+                sys.exit(-1)
+        elif '--no-verbose' == f:
+            opt_verbose = False
+        elif f == '--rotate-logs':
+            opt_rotate_logs = True
+        elif f == '--run-all-tests':
+            opt_exit_on_test_failure = False
+        elif f == '--debug':
+            log.setLevel(logging.DEBUG)
+        else:
+            log.error("Unknown option '{0}'".format(f))
+            sys.exit(-1)
+
+    # Help developers by stopping up-front if their tree isn't built enough for all the
+    # tools that the tests might want to use (add more here if needed)
+    require_binaries = ["ceph-dencoder", "cephfs-journal-tool", "cephfs-data-scan",
+                        "cephfs-table-tool", "ceph-fuse", "rados", "cephfs-meta-injection"]
+    # What binaries may be required is task specific
+    require_binaries = ["ceph-dencoder",  "rados"]
+    missing_binaries = [b for b in require_binaries if not os.path.exists(os.path.join(BIN_PREFIX, b))]
+    if missing_binaries and not opt_ignore_missing_binaries:
+        log.error("Some ceph binaries missing, please build them: {0}".format(" ".join(missing_binaries)))
+        sys.exit(-1)
+
+    max_required_mds, max_required_clients, \
+            max_required_mgr, require_memstore = scan_tests(modules)
+
+    global remote
+    remote = LocalRemote()
+
+    CephFSMount.cleanup_stale_netnses_and_bridge(remote)
+
+    # Tolerate no MDSs or clients running at start
+    ps_txt = remote.run(args=["ps", "-u"+str(os.getuid())],
+                        stdout=StringIO()).stdout.getvalue().strip()
+    lines = ps_txt.split("\n")[1:]
+    for line in lines:
+        if 'ceph-fuse' in line or 'ceph-mds' in line:
+            pid = int(line.split()[0])
+            log.warning("Killing stray process {0}".format(line))
+            remote.run(args=f'sudo kill -{signal.SIGKILL.value} {pid}',
+                       omit_sudo=False)
+
+    # Fire up the Ceph cluster if the user requested it
+    if opt_create_cluster or opt_create_cluster_only:
+        log.info("Creating cluster with {0} MDS daemons".format(
+            max_required_mds))
+        teardown_cluster()
+        vstart_env = os.environ.copy()
+        vstart_env["FS"] = "0"
+        vstart_env["MDS"] = max_required_mds.__str__()
+        vstart_env["OSD"] = "4"
+        vstart_env["MGR"] = max(max_required_mgr, 1).__str__()
+
+        args = [
+            os.path.join(SRC_PREFIX, "vstart.sh"),
+            "-n",
+            "--nolockdep",
+        ]
+        if require_memstore:
+            args.append("--memstore")
+
+        if opt_verbose:
+            args.append("-d")
+
+        log.info('\nrunning vstart.sh now...')
+        # usually, i get vstart.sh running completely in less than 100
+        # seconds.
+        remote.run(args=args, env=vstart_env, timeout=(3 * 60))
+        log.info('\nvstart.sh finished running')
+
+        # Wait for OSD to come up so that subsequent injectargs etc will
+        # definitely succeed
+        LocalCephCluster(LocalContext()).mon_manager.wait_for_all_osds_up(timeout=30)
+
+    if opt_create_cluster_only:
+        return
+
+    if opt_use_ns and mon_in_localhost() and not opt_create_cluster:
+        raise RuntimeError("cluster is on localhost; '--usens' option is incompatible. Or you can pass an extra '--create' option to create a new cluster without localhost!")
+
+    # List of client mounts, sufficient to run the selected tests
+    clients = [i.__str__() for i in range(0, max_required_clients)]
+
+    test_dir = tempfile.mkdtemp()
+    teuth_config['test_path'] = test_dir
+
+    ctx = LocalContext()
+    ceph_cluster = LocalCephCluster(ctx)
+    mds_cluster = LocalMDSCluster(ctx)
+    mgr_cluster = LocalMgrCluster(ctx)
+
+    # Construct Mount classes
+    mounts = []
+    for client_id in clients:
+        # Populate client keyring (it sucks to use client.admin for test clients
+        # because it's awkward to find the logs later)
+        client_name = "client.{0}".format(client_id)
+
+        if client_name not in open("./keyring").read():
+            p = remote.run(args=[CEPH_CMD, "auth", "get-or-create", client_name,
+                                 "osd", "allow rw",
+                                 "mds", "allow",
+                                 "mon", "allow r"], stdout=StringIO())
+
+            open("./keyring", "at").write(p.stdout.getvalue())
+
+        if use_kernel_client:
+            mount = LocalKernelMount(ctx=ctx, test_dir=test_dir,
+                                     client_id=client_id, brxnet=opt_brxnet)
+        else:
+            mount = LocalFuseMount(ctx=ctx, test_dir=test_dir,
+                                   client_id=client_id, brxnet=opt_brxnet)
+
+        mounts.append(mount)
+        if os.path.exists(mount.hostfs_mntpt):
+            if mount.is_mounted():
+                log.warning("unmounting {0}".format(mount.hostfs_mntpt))
+                mount.umount_wait()
+            else:
+                os.rmdir(mount.hostfs_mntpt)
+
+    from tasks.cephfs_test_runner import DecoratingLoader
+
+    decorating_loader = DecoratingLoader({
+        "ctx": ctx,
+        "mounts": mounts,
+        "ceph_cluster": ceph_cluster,
+        "mds_cluster": mds_cluster,
+        "mgr_cluster": mgr_cluster,
+    })
+
+    # For the benefit of polling tests like test_full -- in teuthology land we set this
+    # in a .yaml, here it's just a hardcoded thing for the developer's pleasure.
+    remote.run(args=[CEPH_CMD, "tell", "osd.*", "injectargs", "--osd-mon-report-interval", "5"])
+    ceph_cluster.set_ceph_conf("osd", "osd_mon_report_interval", "5")
+
+    # Enable override of recovery options if mClock scheduler is active. This is to allow
+    # current and future tests to modify recovery related limits. This is because by default,
+    # with mclock enabled, a subset of recovery options are not allowed to be modified.
+    remote.run(args=[CEPH_CMD, "tell", "osd.*", "injectargs", "--osd-mclock-override-recovery-settings", "true"])
+    ceph_cluster.set_ceph_conf("osd", "osd_mclock_override_recovery_settings", "true")
+
+    # Vstart defaults to two segments, which very easily gets a "behind on trimming" health warning
+    # from normal IO latency.  Increase it for running teests.
+    ceph_cluster.set_ceph_conf("mds", "mds log max segments", "10")
+
+    # Make sure the filesystem created in tests has uid/gid that will let us talk to
+    # it after mounting it (without having to  go root).  Set in 'global' not just 'mds'
+    # so that cephfs-data-scan will pick it up too.
+    ceph_cluster.set_ceph_conf("global", "mds root ino uid", "%s" % os.getuid())
+    ceph_cluster.set_ceph_conf("global", "mds root ino gid", "%s" % os.getgid())
+
+    # Monkeypatch get_package_version to avoid having to work out what kind of distro we're on
+    def _get_package_version(remote, pkg_name):
+        # Used in cephfs tests to find fuse version.  Your development workstation *does* have >=2.9, right?
+        return "2.9"
+
+    import teuthology.packaging
+    teuthology.packaging.get_package_version = _get_package_version
+
+    overall_suite = load_tests(modules, decorating_loader)
+
+    # Filter out tests that don't lend themselves to interactive running,
+    victims = []
+    for case, method in enumerate_methods(overall_suite):
+        fn = getattr(method, method._testMethodName)
+
+        drop_test = False
+
+        if hasattr(fn, 'is_for_teuthology') and getattr(fn, 'is_for_teuthology') is True:
+            drop_test = True
+            log.warning("Dropping test because long running: {method_id}".format(method_id=method.id()))
+
+        if getattr(fn, "needs_trimming", False) is True:
+            drop_test = (os.getuid() != 0)
+            log.warning("Dropping test because client trim unavailable: {method_id}".format(method_id=method.id()))
+
+        if drop_test:
+            # Don't drop the test if it was explicitly requested in arguments
+            is_named = False
+            for named in modules:
+                if named.endswith(method.id()):
+                    is_named = True
+                    break
+
+            if not is_named:
+                victims.append((case, method))
+
+    log.debug("Disabling {0} tests because of is_for_teuthology or needs_trimming".format(len(victims)))
+    for s, method in victims:
+        s._tests.remove(method)
+
+    overall_suite = load_tests(modules, loader.TestLoader())
+    result = launch_tests(overall_suite)
+
+    CephFSMount.cleanup_stale_netnses_and_bridge(remote)
+    if opt_teardown_cluster:
+        teardown_cluster()
+
+    if not result.wasSuccessful():
+        # no point in duplicating if we can have multiple failures in same
+        # run.
+        if opt_exit_on_test_failure:
+            result.printErrors()  # duplicate output at end for convenience
+
+        bad_tests = []
+        for test, error in result.errors:
+            bad_tests.append(str(test))
+        for test, failure in result.failures:
+            bad_tests.append(str(test))
+
+        sys.exit(-1)
+    else:
+        sys.exit(0)
+
+
+if __name__ == "__main__":
+    exec_test()
diff --git a/qa/tasks/watch_notify_same_primary.py b/qa/tasks/watch_notify_same_primary.py
new file mode 100644
index 000000000..448fee193
--- /dev/null
+++ b/qa/tasks/watch_notify_same_primary.py
@@ -0,0 +1,129 @@
+
+"""
+watch_notify_same_primary task
+"""
+from io import StringIO
+import contextlib
+import logging
+
+
+from teuthology.orchestra import run
+from teuthology.contextutil import safe_while
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run watch_notify_same_primary
+
+    The config should be as follows:
+
+    watch_notify_same_primary:
+        clients: [client list]
+
+    The client list should contain 1 client
+
+    The test requires 3 osds.
+
+    example:
+
+    tasks:
+    - ceph:
+    - watch_notify_same_primary:
+        clients: [client.0]
+    - interactive:
+    """
+    log.info('Beginning watch_notify_same_primary...')
+    assert isinstance(config, dict), \
+        "please list clients to run on"
+
+    clients = config.get('clients', ['client.0'])
+    assert len(clients) == 1
+    role = clients[0]
+    assert isinstance(role, str)
+    PREFIX = 'client.'
+    assert role.startswith(PREFIX)
+    (remote,) = ctx.cluster.only(role).remotes.keys()
+    manager = ctx.managers['ceph']
+    manager.raw_cluster_cmd('osd', 'set', 'noout')
+
+    pool = manager.create_pool_with_unique_name()
+    def obj(n): return "foo-{num}".format(num=n)
+    def start_watch(n):
+        remote.run(
+            args = [
+                "rados",
+                "-p", pool,
+                "put",
+                obj(n),
+                "/etc/resolv.conf"],
+            logger=log.getChild('watch.{id}'.format(id=n)))
+        proc = remote.run(
+            args = [
+                "rados",
+                "-p", pool,
+                "watch",
+                obj(n)],
+            stdin=run.PIPE,
+            stdout=StringIO(),
+            stderr=StringIO(),
+            wait=False)
+        return proc
+
+    num = 20
+
+    watches = [start_watch(i) for i in range(num)]
+
+    # wait for them all to register
+    for i in range(num):
+        with safe_while() as proceed:
+            while proceed():
+                lines = remote.sh(
+                    ["rados", "-p", pool, "listwatchers", obj(i)])
+                num_watchers = lines.count('watcher=')
+                log.info('i see %d watchers for %s', num_watchers, obj(i))
+                if num_watchers >= 1:
+                    break
+
+    def notify(n, msg):
+        remote.run(
+            args = [
+                "rados",
+                "-p", pool,
+                "notify",
+                obj(n),
+                msg],
+            logger=log.getChild('notify.{id}'.format(id=n)))
+
+    [notify(n, 'notify1') for n in range(len(watches))]
+
+    manager.kill_osd(0)
+    manager.mark_down_osd(0)
+
+    [notify(n, 'notify2') for n in range(len(watches))]
+
+    try:
+        yield
+    finally:
+        log.info('joining watch_notify_stress')
+        for watch in watches:
+            watch.stdin.write("\n")
+
+        run.wait(watches)
+
+        for watch in watches:
+            lines = watch.stdout.getvalue().split("\n")
+            got1 = False
+            got2 = False
+            for l in lines:
+                if 'notify1' in l:
+                    got1 = True
+                if 'notify2' in l:
+                    got2 = True
+            log.info(lines)
+            assert got1 and got2
+
+        manager.revive_osd(0)
+        manager.remove_pool(pool)
diff --git a/qa/tasks/watch_notify_stress.py b/qa/tasks/watch_notify_stress.py
new file mode 100644
index 000000000..47747b1ca
--- /dev/null
+++ b/qa/tasks/watch_notify_stress.py
@@ -0,0 +1,69 @@
+"""
+test_stress_watch task
+"""
+import contextlib
+import logging
+
+from teuthology.orchestra import run
+from teuthology.task import proc_thrasher
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Run test_stress_watch
+
+    The config should be as follows:
+
+    test_stress_watch:
+        clients: [client list]
+
+    example:
+
+    tasks:
+    - ceph:
+    - test_stress_watch:
+        clients: [client.0]
+    - interactive:
+    """
+    log.info('Beginning test_stress_watch...')
+    assert isinstance(config, dict), \
+        "please list clients to run on"
+    testwatch = {}
+
+    remotes = []
+
+    for role in config.get('clients', ['client.0']):
+        assert isinstance(role, str)
+        PREFIX = 'client.'
+        assert role.startswith(PREFIX)
+        id_ = role[len(PREFIX):]
+        (remote,) = ctx.cluster.only(role).remotes.keys()
+        remotes.append(remote)
+
+        args =['CEPH_CLIENT_ID={id_}'.format(id_=id_),
+               'CEPH_ARGS="{flags}"'.format(flags=config.get('flags', '')),
+               'daemon-helper',
+               'kill',
+               'multi_stress_watch foo foo'
+               ]
+
+        log.info("args are %s" % (args,))
+
+        proc = proc_thrasher.ProcThrasher({}, remote,
+            args=[run.Raw(i) for i in args],
+            logger=log.getChild('testwatch.{id}'.format(id=id_)),
+            stdin=run.PIPE,
+            wait=False
+            )
+        proc.start()
+        testwatch[id_] = proc
+
+    try:
+        yield
+    finally:
+        log.info('joining watch_notify_stress')
+        for i in testwatch.values():
+            i.join()
diff --git a/qa/tasks/workunit.py b/qa/tasks/workunit.py
new file mode 100644
index 000000000..92c5780f9
--- /dev/null
+++ b/qa/tasks/workunit.py
@@ -0,0 +1,439 @@
+"""
+Workunit task -- Run ceph on sets of specific clients
+"""
+import logging
+import pipes
+import os
+import re
+import shlex
+
+from tasks.util import get_remote_for_role
+from tasks.util.workunit import get_refspec_after_overrides
+
+from teuthology import misc
+from teuthology.config import config as teuth_config
+from teuthology.exceptions import CommandFailedError
+from teuthology.parallel import parallel
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Run ceph on all workunits found under the specified path.
+
+    For example::
+
+        tasks:
+        - ceph:
+        - ceph-fuse: [client.0]
+        - workunit:
+            clients:
+              client.0: [direct_io, xattrs.sh]
+              client.1: [snaps]
+            branch: foo
+
+    You can also run a list of workunits on all clients:
+        tasks:
+        - ceph:
+        - ceph-fuse:
+        - workunit:
+            tag: v0.47
+            clients:
+              all: [direct_io, xattrs.sh, snaps]
+
+    If you have an "all" section it will run all the workunits
+    on each client simultaneously, AFTER running any workunits specified
+    for individual clients. (This prevents unintended simultaneous runs.)
+
+    To customize tests, you can specify environment variables as a dict. You
+    can also specify a time limit for each work unit (defaults to 3h):
+
+        tasks:
+        - ceph:
+        - ceph-fuse:
+        - workunit:
+            sha1: 9b28948635b17165d17c1cf83d4a870bd138ddf6
+            clients:
+              all: [snaps]
+            env:
+              FOO: bar
+              BAZ: quux
+            timeout: 3h
+
+    You can also pass optional arguments to the found workunits:
+
+        tasks:
+        - workunit:
+            clients:
+              all:
+                - test-ceph-helpers.sh test_get_config
+
+    This task supports roles that include a ceph cluster, e.g.::
+
+        tasks:
+        - ceph:
+        - workunit:
+            clients:
+              backup.client.0: [foo]
+              client.1: [bar] # cluster is implicitly 'ceph'
+
+    You can also specify an alternative top-level dir to 'qa/workunits', like
+    'qa/standalone', with::
+
+        tasks:
+        - install:
+        - workunit:
+            basedir: qa/standalone
+            clients:
+              client.0:
+                - test-ceph-helpers.sh
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    assert isinstance(config, dict)
+    assert isinstance(config.get('clients'), dict), \
+        'configuration must contain a dictionary of clients'
+
+    overrides = ctx.config.get('overrides', {})
+    refspec = get_refspec_after_overrides(config, overrides)
+    timeout = config.get('timeout', '3h')
+    cleanup = config.get('cleanup', True)
+
+    log.info('Pulling workunits from ref %s', refspec)
+
+    created_mountpoint = {}
+
+    if config.get('env') is not None:
+        assert isinstance(config['env'], dict), 'env must be a dictionary'
+    clients = config['clients']
+
+    # Create scratch dirs for any non-all workunits
+    log.info('Making a separate scratch dir for every client...')
+    for role in clients.keys():
+        assert isinstance(role, str)
+        if role == "all":
+            continue
+
+        assert 'client' in role
+        created_mnt_dir = _make_scratch_dir(ctx, role, config.get('subdir'))
+        created_mountpoint[role] = created_mnt_dir
+
+    # Execute any non-all workunits
+    log.info("timeout={}".format(timeout))
+    log.info("cleanup={}".format(cleanup))
+    with parallel() as p:
+        for role, tests in clients.items():
+            if role != "all":
+                p.spawn(_run_tests, ctx, refspec, role, tests,
+                        config.get('env'),
+                        basedir=config.get('basedir','qa/workunits'),
+                        subdir=config.get('subdir'),
+                        timeout=timeout,
+                        cleanup=cleanup,
+                        coverage_and_limits=not config.get('no_coverage_and_limits', None))
+
+    if cleanup:
+        # Clean up dirs from any non-all workunits
+        for role, created in created_mountpoint.items():
+            _delete_dir(ctx, role, created)
+
+    # Execute any 'all' workunits
+    if 'all' in clients:
+        all_tasks = clients["all"]
+        _spawn_on_all_clients(ctx, refspec, all_tasks, config.get('env'),
+                              config.get('basedir', 'qa/workunits'),
+                              config.get('subdir'), timeout=timeout,
+                              cleanup=cleanup)
+
+
+def _client_mountpoint(ctx, cluster, id_):
+    """
+    Returns the path to the expected mountpoint for workunits running
+    on some kind of filesystem.
+    """
+    # for compatibility with tasks like ceph-fuse that aren't cluster-aware yet,
+    # only include the cluster name in the dir if the cluster is not 'ceph'
+    if cluster == 'ceph':
+        dir_ = 'mnt.{0}'.format(id_)
+    else:
+        dir_ = 'mnt.{0}.{1}'.format(cluster, id_)
+    return os.path.join(misc.get_testdir(ctx), dir_)
+
+
+def _delete_dir(ctx, role, created_mountpoint):
+    """
+    Delete file used by this role, and delete the directory that this
+    role appeared in.
+
+    :param ctx: Context
+    :param role: "role.#" where # is used for the role id.
+    """
+    cluster, _, id_ = misc.split_role(role)
+    remote = get_remote_for_role(ctx, role)
+    mnt = _client_mountpoint(ctx, cluster, id_)
+    client = os.path.join(mnt, 'client.{id}'.format(id=id_))
+
+    # Remove the directory inside the mount where the workunit ran
+    remote.run(
+        args=[
+            'sudo',
+            'rm',
+            '-rf',
+            '--',
+            client,
+        ],
+    )
+    log.info("Deleted dir {dir}".format(dir=client))
+
+    # If the mount was an artificially created dir, delete that too
+    if created_mountpoint:
+        remote.run(
+            args=[
+                'rmdir',
+                '--',
+                mnt,
+            ],
+        )
+        log.info("Deleted artificial mount point {dir}".format(dir=client))
+
+
+def _make_scratch_dir(ctx, role, subdir):
+    """
+    Make scratch directories for this role.  This also makes the mount
+    point if that directory does not exist.
+
+    :param ctx: Context
+    :param role: "role.#" where # is used for the role id.
+    :param subdir: use this subdir (False if not used)
+    """
+    created_mountpoint = False
+    cluster, _, id_ = misc.split_role(role)
+    remote = get_remote_for_role(ctx, role)
+    dir_owner = remote.user
+    mnt = _client_mountpoint(ctx, cluster, id_)
+    # if neither kclient nor ceph-fuse are required for a workunit,
+    # mnt may not exist. Stat and create the directory if it doesn't.
+    try:
+        remote.run(
+            args=[
+                'stat',
+                '--',
+                mnt,
+            ],
+        )
+        log.info('Did not need to create dir {dir}'.format(dir=mnt))
+    except CommandFailedError:
+        remote.run(
+            args=[
+                'mkdir',
+                '--',
+                mnt,
+            ],
+        )
+        log.info('Created dir {dir}'.format(dir=mnt))
+        created_mountpoint = True
+
+    if not subdir:
+        subdir = 'client.{id}'.format(id=id_)
+
+    if created_mountpoint:
+        remote.run(
+            args=[
+                'cd',
+                '--',
+                mnt,
+                run.Raw('&&'),
+                'mkdir',
+                '--',
+                subdir,
+            ],
+        )
+    else:
+        remote.run(
+            args=[
+                # cd first so this will fail if the mount point does
+                # not exist; pure install -d will silently do the
+                # wrong thing
+                'cd',
+                '--',
+                mnt,
+                run.Raw('&&'),
+                'sudo',
+                'install',
+                '-d',
+                '-m', '0755',
+                '--owner={user}'.format(user=dir_owner),
+                '--',
+                subdir,
+            ],
+        )
+
+    return created_mountpoint
+
+
+def _spawn_on_all_clients(ctx, refspec, tests, env, basedir, subdir, timeout=None, cleanup=True):
+    """
+    Make a scratch directory for each client in the cluster, and then for each
+    test spawn _run_tests() for each role.
+
+    See run_tests() for parameter documentation.
+    """
+    is_client = misc.is_type('client')
+    client_remotes = {}
+    created_mountpoint = {}
+    for remote, roles_for_host in ctx.cluster.remotes.items():
+        for role in roles_for_host:
+            if is_client(role):
+                client_remotes[role] = remote
+                created_mountpoint[role] = _make_scratch_dir(ctx, role, subdir)
+
+    for unit in tests:
+        with parallel() as p:
+            for role, remote in client_remotes.items():
+                p.spawn(_run_tests, ctx, refspec, role, [unit], env,
+                        basedir,
+                        subdir,
+                        timeout=timeout)
+
+    # cleanup the generated client directories
+    if cleanup:
+        for role, _ in client_remotes.items():
+            _delete_dir(ctx, role, created_mountpoint[role])
+
+
+def _run_tests(ctx, refspec, role, tests, env, basedir,
+               subdir=None, timeout=None, cleanup=True,
+               coverage_and_limits=True):
+    """
+    Run the individual test. Create a scratch directory and then extract the
+    workunits from git. Make the executables, and then run the tests.
+    Clean up (remove files created) after the tests are finished.
+
+    :param ctx:     Context
+    :param refspec: branch, sha1, or version tag used to identify this
+                    build
+    :param tests:   specific tests specified.
+    :param env:     environment set in yaml file.  Could be None.
+    :param subdir:  subdirectory set in yaml file.  Could be None
+    :param timeout: If present, use the 'timeout' command on the remote host
+                    to limit execution time. Must be specified by a number
+                    followed by 's' for seconds, 'm' for minutes, 'h' for
+                    hours, or 'd' for days. If '0' or anything that evaluates
+                    to False is passed, the 'timeout' command is not used.
+    """
+    testdir = misc.get_testdir(ctx)
+    assert isinstance(role, str)
+    cluster, type_, id_ = misc.split_role(role)
+    assert type_ == 'client'
+    remote = get_remote_for_role(ctx, role)
+    mnt = _client_mountpoint(ctx, cluster, id_)
+    # subdir so we can remove and recreate this a lot without sudo
+    if subdir is None:
+        scratch_tmp = os.path.join(mnt, 'client.{id}'.format(id=id_), 'tmp')
+    else:
+        scratch_tmp = os.path.join(mnt, subdir)
+    clonedir = '{tdir}/clone.{role}'.format(tdir=testdir, role=role)
+    srcdir = '{cdir}/{basedir}'.format(cdir=clonedir,
+                                       basedir=basedir)
+
+    git_url = teuth_config.get_ceph_qa_suite_git_url()
+    # if we are running an upgrade test, and ceph-ci does not have branches like
+    # `jewel`, so should use ceph.git as an alternative.
+    try:
+        remote.run(logger=log.getChild(role),
+                   args=refspec.clone(git_url, clonedir))
+    except CommandFailedError:
+        if git_url.endswith('/ceph-ci.git'):
+            alt_git_url = git_url.replace('/ceph-ci.git', '/ceph.git')
+        elif git_url.endswith('/ceph-ci'):
+            alt_git_url = re.sub(r'/ceph-ci$', '/ceph.git', git_url)
+        else:
+            raise
+        log.info(
+            "failed to check out '%s' from %s; will also try in %s",
+            refspec,
+            git_url,
+            alt_git_url,
+        )
+        remote.run(logger=log.getChild(role),
+                   args=refspec.clone(alt_git_url, clonedir))
+    remote.run(
+        logger=log.getChild(role),
+        args=[
+            'cd', '--', srcdir,
+            run.Raw('&&'),
+            'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi',
+            run.Raw('&&'),
+            'find', '-executable', '-type', 'f', '-printf', r'%P\0',
+            run.Raw('>{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role)),
+        ],
+    )
+
+    workunits_file = '{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role)
+    workunits = sorted(remote.read_file(workunits_file).decode().split('\0'))
+    assert workunits
+
+    try:
+        assert isinstance(tests, list)
+        for spec in tests:
+            dir_or_fname, *optional_args = shlex.split(spec)
+            log.info('Running workunits matching %s on %s...', dir_or_fname, role)
+            # match executables named "foo" or "foo/*" with workunit named
+            # "foo"
+            to_run = [w for w in workunits
+                      if os.path.commonpath([w, dir_or_fname]) == dir_or_fname]
+            if not to_run:
+                raise RuntimeError('Spec did not match any workunits: {spec!r}'.format(spec=spec))
+            for workunit in to_run:
+                log.info('Running workunit %s...', workunit)
+                args = [
+                    'mkdir', '-p', '--', scratch_tmp,
+                    run.Raw('&&'),
+                    'cd', '--', scratch_tmp,
+                    run.Raw('&&'),
+                    run.Raw('CEPH_CLI_TEST_DUP_COMMAND=1'),
+                    run.Raw('CEPH_REF={ref}'.format(ref=refspec)),
+                    run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)),
+                    run.Raw('CEPH_ARGS="--cluster {0}"'.format(cluster)),
+                    run.Raw('CEPH_ID="{id}"'.format(id=id_)),
+                    run.Raw('PATH=$PATH:/usr/sbin'),
+                    run.Raw('CEPH_BASE={dir}'.format(dir=clonedir)),
+                    run.Raw('CEPH_ROOT={dir}'.format(dir=clonedir)),
+                    run.Raw('CEPH_MNT={dir}'.format(dir=mnt)),
+                ]
+                if env is not None:
+                    for var, val in env.items():
+                        quoted_val = pipes.quote(val)
+                        env_arg = '{var}={val}'.format(var=var, val=quoted_val)
+                        args.append(run.Raw(env_arg))
+                if coverage_and_limits:
+                    args.extend([
+                        'adjust-ulimits',
+                        'ceph-coverage',
+                        '{tdir}/archive/coverage'.format(tdir=testdir)])
+                if timeout and timeout != '0':
+                    args.extend(['timeout', timeout])
+                args.extend([
+                    '{srcdir}/{workunit}'.format(
+                        srcdir=srcdir,
+                        workunit=workunit,
+                    ),
+                ])
+                remote.run(
+                    logger=log.getChild(role),
+                    args=args + optional_args,
+                    label="workunit test {workunit}".format(workunit=workunit)
+                )
+                if cleanup:
+                    args=['sudo', 'rm', '-rf', '--', scratch_tmp]
+                    remote.run(logger=log.getChild(role), args=args, timeout=(60*60))
+    finally:
+        log.info('Stopping %s on %s...', tests, role)
+        args=['sudo', 'rm', '-rf', '--', workunits_file, clonedir]
+        # N.B. don't cleanup scratch_tmp! If the mount is broken then rm will hang.
+        remote.run(
+            logger=log.getChild(role),
+            args=args,
+        )
diff --git a/qa/test_import.py b/qa/test_import.py
new file mode 100644
index 000000000..4ee59b565
--- /dev/null
+++ b/qa/test_import.py
@@ -0,0 +1,38 @@
+# try to import all .py files from a given directory
+
+import glob
+import os
+import importlib
+import importlib.util
+import pytest
+
+def _module_name(path):
+    task = os.path.splitext(path)[0]
+    parts = task.split(os.path.sep)
+    package = parts[0]
+    name = ''.join('.' + c for c in parts[1:])
+    return package, name
+
+def _import_file(path):
+    package, mod_name = _module_name(path)
+    line = f'Importing {package}{mod_name} from {path}'
+    print(f'{line:<80}', end='')
+    mod_spec = importlib.util.find_spec(mod_name, package)
+    mod = mod_spec.loader.load_module(f'{package}{mod_name}')
+    if mod is None:
+        result = 'FAIL'
+    else:
+        result = 'DONE'
+    print(f'{result:>6}')
+    mod_spec.loader.exec_module(mod)
+    return result
+
+def get_paths():
+    for g in ['tasks/**/*.py']:
+        for p in glob.glob(g, recursive=True):
+            yield p
+
+@pytest.mark.parametrize("path", list(sorted(get_paths())))
+def test_import(path):
+    assert _import_file(path) == 'DONE'
+
diff --git a/qa/timezone/eastern.yaml b/qa/timezone/eastern.yaml
new file mode 100644
index 000000000..019c761e1
--- /dev/null
+++ b/qa/timezone/eastern.yaml
@@ -0,0 +1,4 @@
+tasks:
+- exec:
+    all:
+      - echo America/New_York | sudo tee /etc/timezone
diff --git a/qa/timezone/pacific.yaml b/qa/timezone/pacific.yaml
new file mode 100644
index 000000000..6944aa6d8
--- /dev/null
+++ b/qa/timezone/pacific.yaml
@@ -0,0 +1,4 @@
+tasks:
+- exec:
+    all:
+      - echo America/Los_Angeles | sudo tee /etc/timezone
diff --git a/qa/timezone/random.yaml b/qa/timezone/random.yaml
new file mode 100644
index 000000000..1d48ce918
--- /dev/null
+++ b/qa/timezone/random.yaml
@@ -0,0 +1,5 @@
+tasks:
+- exec:
+    all:
+      - echo America/Los_Angeles | sudo tee /etc/timezone
+      - [ $RANDOM -gt 32000 ] && echo America/New_York | sudo tee /etc/timezone
diff --git a/qa/tox.ini b/qa/tox.ini
new file mode 100644
index 000000000..5b2149c49
--- /dev/null
+++ b/qa/tox.ini
@@ -0,0 +1,46 @@
+[tox]
+envlist = flake8, mypy, pytest, deadsymlinks
+skipsdist = True
+
+[testenv]
+setenv =
+  LC_ALL = C.UTF-8
+  LANG = C
+
+[testenv:flake8]
+basepython = python3
+deps=
+  flake8
+commands=flake8 --select=F,E9 --exclude=venv,.tox
+
+[testenv:mypy]
+basepython = python3
+deps =
+  mypy
+  types-boto
+  types-requests
+  types-jwt
+  types-paramiko
+  types-PyYAML
+  types-cryptography
+  types-python-dateutil
+  -c{toxinidir}/../src/mypy-constrains.txt
+commands = mypy {posargs:.}
+
+[testenv:py3]
+basepython = python3
+deps =
+  {env:TEUTHOLOGY_GIT:git+https://github.com/ceph/teuthology.git@main}#egg=teuthology[coverage,orchestra,test]
+  httplib2
+  pytest
+commands =
+  pytest --assert=plain test_import.py
+  pytest tasks/tests --suite-dir {toxinidir}/suites {posargs}
+
+[testenv:deadsymlinks]
+basepython = python3
+toxworkdir = {toxinidir}
+allowlist_externals = 
+  bash
+commands =
+  bash -c '! (find . -xtype l | grep ^)'
diff --git a/qa/valgrind.supp b/qa/valgrind.supp
new file mode 100644
index 000000000..6a5a08f14
--- /dev/null
+++ b/qa/valgrind.supp
@@ -0,0 +1,703 @@
+
+{
+   <allthefrees, so we can behave with tcmalloc>
+   Memcheck:Free
+   fun:free
+   ...
+}
+{
+   operator delete[] in Rados::shutdown
+   Memcheck:Free
+   fun:_ZdaPvm
+   ...
+   fun:_ZN8librados7v14_2_05Rados8shutdownEv
+}
+{
+   older boost mersenne twister uses uninitialized memory for randomness
+   Memcheck:Cond
+   ...
+   fun:*Monitor::prepare_new_fingerprint*
+   ...
+}
+{
+   older boost mersenne twister uses uninitialized memory for randomness
+   Memcheck:Value8
+   ...
+   fun:*Monitor::prepare_new_fingerprint*
+   ...
+}
+{
+   apparent TLS leak in eglibc
+   Memcheck:Leak
+   fun:calloc
+   ...
+   fun:_dl_allocate_tls
+   fun:pthread_create*
+   ...
+}
+{
+   osd: ignore ec plugin loading (FIXME SOMEDAY)
+   Memcheck:Leak
+   ...
+   fun:*ErasureCodePluginRegistry*load*
+   ...
+}
+{
+   osd: ignore ec plugin factory (FIXME SOMEDAY)
+   Memcheck:Leak
+   ...
+   fun:*ErasureCodePluginRegistry*factory*
+   ...
+}
+{
+   tcmalloc: libboost_thread-mt.so.1.53 is linked with tcmalloc
+   Memcheck:Param
+   msync(start)
+   obj:/usr/lib64/libpthread-2.17.so
+   obj:/usr/lib64/libunwind.so.8.0.1
+   obj:/usr/lib64/libunwind.so.8.0.1
+   obj:/usr/lib64/libunwind.so.8.0.1
+   obj:/usr/lib64/libunwind.so.8.0.1
+   ...
+   fun:*tcmalloc*ThreadCache*
+   ...
+   obj:/usr/lib64/libboost_thread-mt.so.1.53.0
+}
+{
+   tcmalloc: msync heap allocation points to uninit bytes (centos 6.5)
+   Memcheck:Param
+   msync(start)
+   obj:/lib64/libpthread-2.12.so
+   obj:/usr/lib64/libunwind.so.8.0.1
+   obj:/usr/lib64/libunwind.so.8.0.1
+   obj:/usr/lib64/libunwind.so.8.0.1
+   obj:/usr/lib64/libunwind.so.8.0.1
+   fun:_ULx86_64_step
+   fun:_Z13GetStackTracePPvii
+   fun:_ZN8tcmalloc8PageHeap8GrowHeapEm
+   fun:_ZN8tcmalloc8PageHeap3NewEm
+}
+{
+   tcmalloc: msync heap allocation points to unaddressible bytes (centos 6.5 #2)
+   Memcheck:Param
+   msync(start)
+   obj:/lib64/libpthread-2.12.so
+   obj:/usr/lib64/libunwind.so.7.0.0
+   fun:_ULx86_64_step
+   fun:_Z13GetStackTracePPvii
+   fun:_ZN8tcmalloc8PageHeap8GrowHeapEm
+   fun:_ZN8tcmalloc8PageHeap3NewEm
+}
+{
+   tcmalloc: msync heap allocation points to uninit bytes (rhel7)
+   Memcheck:Param
+   msync(start)
+   obj:/usr/lib64/libpthread-2.17.so
+   obj:/usr/lib64/libunwind.so.8.0.1
+   obj:/usr/lib64/libunwind.so.8.0.1
+   obj:/usr/lib64/libunwind.so.8.0.1
+   obj:/usr/lib64/libunwind.so.8.0.1
+   fun:_ULx86_64_step
+   fun:_Z13GetStackTracePPvii
+   fun:_ZN8tcmalloc8PageHeap8GrowHeapEm
+   fun:_ZN8tcmalloc8PageHeap3NewEm
+}
+{
+   tcmalloc: msync heap allocation points to uninit bytes (rhel7 #2)
+   Memcheck:Param
+   msync(start)
+   obj:/usr/lib64/libpthread-2.17.so
+   obj:/usr/lib64/libunwind.so.8.0.1
+   obj:/usr/lib64/libunwind.so.8.0.1
+   obj:/usr/lib64/libunwind.so.8.0.1
+   obj:/usr/lib64/libunwind.so.8.0.1
+   fun:_ULx86_64_step
+   obj:/usr/lib64/libtcmalloc.so.4.2.6
+   fun:_Z13GetStackTracePPvii
+   fun:_ZN8tcmalloc8PageHeap8GrowHeapEm
+   fun:_ZN8tcmalloc8PageHeap3NewEm
+}
+{
+   tcmalloc: msync heap allocation points to uninit bytes (wheezy)
+   Memcheck:Param
+   msync(start)
+   obj:/lib/x86_64-linux-gnu/libpthread-2.13.so
+   obj:/usr/lib/libunwind.so.7.0.0
+   fun:_ULx86_64_step
+   fun:_Z13GetStackTracePPvii
+   fun:_ZN8tcmalloc8PageHeap8GrowHeapEm
+   fun:_ZN8tcmalloc8PageHeap3NewEm
+}
+{
+   tcmalloc: msync heap allocation points to uninit bytes (precise)
+   Memcheck:Param
+   msync(start)
+   obj:/lib/x86_64-linux-gnu/libpthread-2.15.so
+   obj:/usr/lib/libunwind.so.7.0.0
+   fun:_ULx86_64_step
+   fun:_Z13GetStackTracePPvii
+   fun:_ZN8tcmalloc8PageHeap8GrowHeapEm
+   fun:_ZN8tcmalloc8PageHeap3NewEm
+   obj:/usr/lib/libtcmalloc.so.0.1.0
+}
+{
+   tcmalloc: msync heap allocation points to uninit bytes (trusty)
+   Memcheck:Param
+   msync(start)
+   obj:/lib/x86_64-linux-gnu/libpthread-2.19.so
+   obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1
+   obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1
+   obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1
+   obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1
+   fun:_ULx86_64_step
+   fun:_Z13GetStackTracePPvii
+   fun:_ZN8tcmalloc8PageHeap8GrowHeapEm
+   fun:_ZN8tcmalloc8PageHeap3NewEm
+}
+{
+   tcmalloc: msync heap allocation points to uninit bytes 2 (trusty)
+   Memcheck:Param
+   msync(start)
+   fun:__msync_nocancel
+   obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1
+   obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1
+   obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1
+   obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1
+   fun:_ULx86_64_step
+   fun:_Z13GetStackTracePPvii
+   fun:_ZN8tcmalloc8PageHeap8GrowHeapEm
+   fun:_ZN8tcmalloc8PageHeap3NewEm
+   fun:_ZN8tcmalloc15CentralFreeList8PopulateEv
+   fun:_ZN8tcmalloc15CentralFreeList18FetchFromSpansSafeEv
+   fun:_ZN8tcmalloc15CentralFreeList11RemoveRangeEPPvS2_i
+}
+{
+   tcmalloc: msync (xenial)
+   Memcheck:Param
+   msync(start)
+   fun:__msync_nocancel
+   obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1
+   obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1
+   obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1
+   obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1
+   obj:*tcmalloc*
+   fun:*GetStackTrace*
+}
+{
+   tcmalloc: param points to uninit bytes under call_init (jammy)
+   Memcheck:Param
+   write(buf)
+   fun:syscall
+   obj:*libunwind*
+   obj:*libunwind*
+   obj:*libunwind*
+   obj:*libunwind*
+   fun:_ULx86_64_step
+   obj:*tcmalloc*
+   obj:*tcmalloc*
+   obj:*tcmalloc*
+   obj:*tcmalloc*
+   fun:call_init.part.0
+}
+{
+	tcmalloc: string
+	Memcheck:Leak
+	...
+	obj:*tcmalloc*
+	fun:call_init*
+	...
+}
+{
+	ceph global: deliberate onexit leak
+	Memcheck:Leak
+	...
+	fun:*set_flush_on_exit*
+	...
+}
+{
+	libleveldb: ignore all static leveldb leaks
+	Memcheck:Leak
+	...
+	fun:*leveldb*
+	...
+}
+{
+	libleveldb: ignore all dynamic libleveldb leaks
+	Memcheck:Leak
+	...
+	obj:*libleveldb.so*
+	...
+}
+{
+	libcurl: ignore libcurl leaks
+	Memcheck:Leak
+	...
+	fun:*curl_global_init
+}
+{
+	ignore gnutls leaks
+	Memcheck:Leak
+	...
+	fun:gnutls_global_init
+}
+{
+	ignore libfcgi leak; OS_LibShutdown has no callers!
+	Memcheck:Leak
+	...
+	fun:OS_LibInit
+	fun:FCGX_Init
+}
+{
+	ignore libnss3 leaks
+	Memcheck:Leak
+	...
+	obj:*libnss3*
+	...
+}
+{
+        strptime suckage
+        Memcheck:Cond
+        fun:__GI___strncasecmp_l
+        fun:__strptime_internal
+        ...
+}
+{
+        strptime suckage 2
+        Memcheck:Value8
+        fun:__GI___strncasecmp_l
+        fun:__strptime_internal
+        ...
+}
+{
+        strptime suckage 3
+        Memcheck:Addr8
+        fun:__GI___strncasecmp_l
+        fun:__strptime_internal
+        ...
+}
+{
+	inet_ntop does something lame on local stack
+	Memcheck:Value8
+	...
+	fun:inet_ntop
+	...
+}
+{
+	inet_ntop does something lame on local stack
+	Memcheck:Addr8
+	...
+	fun:inet_ntop
+	...
+}
+{
+	dl-lookup.c thing .. Invalid write of size 8
+	Memcheck:Value8
+	fun:do_lookup_x
+	...
+	fun:_dl_lookup_symbol_x
+	...
+}
+{
+	dl-lookup.c thing .. Invalid write of size 8
+	Memcheck:Addr8
+	fun:do_lookup_x
+	...
+	fun:_dl_lookup_symbol_x
+	...
+}
+{
+	dl-init.c possible lost init
+	Memcheck:Leak
+	...
+	fun:__trans_list_add
+	fun:call_init.part.0
+	fun:call_init
+	...
+}
+{
+	weird thing from libc
+	Memcheck:Leak
+	...
+	fun:*sub_I_comparator*
+	fun:__libc_csu_init
+	...
+}
+{
+	libfuse leak
+	Memcheck:Leak
+	...
+	fun:fuse_parse_cmdline
+	...
+}
+{
+	boost thread leaks on exit
+	Memcheck:Leak
+	...
+	fun:*boost*detail*
+	...
+	fun:exit
+}
+{
+	lttng appears to not clean up state
+	Memcheck:Leak
+	...
+	fun:lttng_ust_baddr_statedump_init
+	fun:lttng_ust_init
+	fun:call_init.part.0
+	...
+}
+{
+	fun:PK11_CreateContextBySymKey race
+	Helgrind:Race
+	obj:/usr/*lib*/libfreebl*3.so
+	...
+	obj:/usr/*lib*/libsoftokn3.so
+	...
+	obj:/usr/*lib*/libnss3.so
+	fun:PK11_CreateContextBySymKey
+	...
+}
+{
+	thread init race
+	Helgrind:Race
+	fun:mempcpy
+	fun:_dl_allocate_tls_init
+	...
+	fun:pthread_create@*
+	...
+}
+{
+	thread_local memory is falsely detected (https://svn.boost.org/trac/boost/ticket/3296)
+	Memcheck:Leak
+	...
+	fun:*boost*detail*get_once_per_thread_epoch*
+	fun:*boost*call_once*
+	fun:*boost*detail*get_current_thread_data*
+	...
+}
+{
+	rocksdb thread local singletons
+	Memcheck:Leak
+	...
+	fun:rocksdb::Env::Default()
+	...
+}
+{
+	rocksdb column thread local leaks
+	Memcheck:Leak
+	...
+	fun:rocksdb::ThreadLocalPtr::StaticMeta::SetHandler*
+	fun:rocksdb::ColumnFamilyData::ColumnFamilyData*
+	...
+}
+{
+	rocksdb thread crap
+	Memcheck:Leak
+	...
+	fun:*ThreadLocalPtr*
+	...
+}
+{
+	rocksdb singleton Env leak, blech
+	Memcheck:Leak
+	...
+	fun:CreateThreadStatusUpdater
+	fun:PosixEnv
+	...
+}
+{
+	rocksdb::Env::Default()
+	Memcheck:Leak
+	...
+	fun:*rocksdb*Env*Default*
+	...
+}
+{
+	rocksdb config parsing
+	Memcheck:Leak
+	...
+	fun:*rocksdb*Configurable*ParseOption*
+	...
+}
+{
+	rocksdb config parsing
+	Memcheck:Leak
+	...
+	fun:*RocksDBStore*ParseOptionsFromString*
+	...
+}
+{
+	rocksdb config parsing
+	Memcheck:Leak
+	...
+	fun:*RocksDBStore*do_open*
+	...
+}
+{
+	rocksdb column family init
+	Memcheck:Leak
+	...
+	fun:*rocksdb*ColumnFamilyOptions*
+	...
+	fun:*RocksDBStore*init*
+	...
+}
+{
+	rocksdb cache get-or-create
+	Memcheck:Leak
+	...
+	fun:*rocksdb*RegisterCacheDeleterRole*
+	...
+	fun:*rocksdb*GetCacheItemHelperForRole*
+	...
+}
+{
+	rocksdb BGThreadWrapper
+	Memcheck:Leak
+	...
+	fun:*BGThreadWrapper*
+	...
+}
+{
+	rocksdb VersionStorageInfo
+	Memcheck:Leak
+	...
+	fun:*VersionStorageInfo
+	...
+}
+{
+        rocksdb version builder
+        Memcheck:Leak
+        ...
+        fun:*rocksdb*VersionBuilder*Rep*LoadTableHandlers*
+        ...
+}
+{
+	libstdc++ leak on xenial
+	Memcheck:Leak
+	fun:malloc
+	...
+	fun:call_init.part.0
+	fun:call_init
+	fun:_dl_init
+	...
+}
+{
+	strange leak of std::string memory from md_config_t seen in radosgw
+	Memcheck:Leak
+	...
+	fun:_ZNSs4_Rep9_S_createEmmRKSaIcE
+	fun:_ZNSs12_S_constructIPKcEEPcT_S3_RKSaIcESt20forward_iterator_tag
+	...
+	fun:_ZN11md_config_tC1Ev
+	fun:_ZN11CephContextC1Eji
+	...
+}
+{
+    python does not reset the member field when dealloc an object
+    Memcheck:Leak
+    match-leak-kinds: all
+    ...
+    fun:Py_InitializeEx
+    ...
+}
+{
+    statically allocated python types don't get members freed
+    Memcheck:Leak
+    match-leak-kinds: all
+    ...
+    fun:PyType_Ready
+    ...
+}
+{
+    manually constructed python module members don't get freed
+    Memcheck:Leak
+    match-leak-kinds: all
+    ...
+    fun:Py_InitModule4_64
+    ...
+}
+{
+    manually constructed python module members don't get freed
+    Memcheck:Leak
+    match-leak-kinds: all
+    ...
+    fun:PyModule_AddObject
+    ...
+}
+{
+    python subinterpreters may not clean up properly
+    Memcheck:Leak
+    match-leak-kinds: all
+    ...
+    fun:Py_NewInterpreter
+    ...
+}
+{
+    python should be able to take care of itself
+    Memcheck:Leak
+    match-leak-kinds: all
+    ...
+    fun:PyEval_EvalCode
+}
+{
+    python should be able to take care of itself
+    Memcheck:Leak
+    match-leak-kinds: all
+    ...
+    fun:PyImport_ImportModuleLevel
+}
+{
+    python-owned threads may not full clean up after themselves
+    Memcheck:Leak
+    match-leak-kinds: all
+    ...
+    fun:PyEval_CallObjectWithKeywords
+}
+{
+    python should be able to take care of itself
+    Memcheck:Leak
+    match-leak-kinds: all
+    ...
+    fun:PyEval_EvalFrameEx
+    ...
+    obj:/usr/lib64/libpython2.7.so.1.0
+}
+{
+    python should be able to take care of itself
+    Memcheck:Leak
+    match-leak-kinds: all
+    ...
+    fun:PyObject_Call
+}
+
+{
+   rados cython constants
+   Memcheck:Leak
+   match-leak-kinds: definite
+   fun:malloc
+   fun:PyObject_Malloc
+   fun:PyCode_New
+   fun:__Pyx_InitCachedConstants
+   fun:initrados
+   fun:_PyImport_LoadDynamicModule
+   ...
+   fun:PyImport_ImportModuleLevel
+   ...
+   fun:PyObject_Call
+   fun:PyEval_CallObjectWithKeywords
+   fun:PyEval_EvalFrameEx
+}
+
+{
+   rbd cython constants
+   Memcheck:Leak
+   match-leak-kinds: definite
+   fun:malloc
+   fun:PyObject_Malloc
+   fun:PyCode_New
+   fun:__Pyx_InitCachedConstants
+   fun:initrbd
+   fun:_PyImport_LoadDynamicModule
+   ...
+   fun:PyImport_ImportModuleLevel
+   ...
+   fun:PyObject_Call
+   fun:PyEval_CallObjectWithKeywords
+   fun:PyEval_EvalFrameEx
+}
+
+{
+  dlopen() with -lceph-common https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=700899
+  Memcheck:Leak
+  match-leak-kinds: reachable
+  fun:*alloc
+  ...
+  fun:_dlerror_run
+  fun:dlopen@@GLIBC_2.2.5
+}
+
+{
+  ethdev_init_log thing
+  Memcheck:Leak
+  match-leak-kinds: reachable
+  ...
+  fun:ethdev_init_log
+  ...
+}
+
+{
+  rte_log_init() in DPDK fails to reset strdup()'ed string at exit
+  Memcheck:Leak
+  match-leak-kinds: reachable
+  fun:*alloc
+  ...
+  fun:rte_log_init
+  fun:__libc_csu_init
+}
+
+{
+  libc_csu_init (strdup, rte_log_register, etc.)
+  Memcheck:Leak
+  match-leak-kinds: reachable
+  ...
+  fun:__libc_csu_init
+  ...
+}
+
+{
+  Boost.Thread fails to call tls_destructor() when the thread exists
+  Memcheck:Leak
+  match-leak-kinds: reachable
+  ...
+  fun:_Znwm
+  ...
+  fun:*boost*detail*set_tss_data*
+  ...
+}
+
+{
+  ignore *all* ceph-mgr python crap.  this is overkill, but better than nothing
+  Memcheck:Leak
+  match-leak-kinds: all
+  ...
+  fun:Py*
+  ...
+}
+
+{
+  something in glibc
+  Memcheck:Leak
+  match-leak-kinds: all
+  ...
+  fun:strdup
+  fun:__trans_list_add
+  ...
+  fun:_dl_init
+  ...
+}
+
+# "Conditional jump or move depends on uninitialised value(s)" in OpenSSL
+# https://github.com/openssl/openssl/issues/19719
+{
+  uninitialized value in gcm_cipher_internal
+  Memcheck:Cond
+  ...
+  fun:gcm_cipher_internal
+  ...
+  fun:ossl_gcm_stream_final
+  fun:EVP_DecryptFinal_ex
+  ...
+}
+{
+   tracker #62141 : valgrind: UninitCondition under __run_exit_handlers
+   Memcheck:Cond
+   fun:free
+   fun:free_res
+   fun:__libc_freeres
+   fun:_vgnU_freeres
+   fun:__run_exit_handlers
+   fun:exit
+   fun:(below main)
+}
+
diff --git a/qa/workunits/Makefile b/qa/workunits/Makefile
new file mode 100644
index 000000000..f75f5dfd4
--- /dev/null
+++ b/qa/workunits/Makefile
@@ -0,0 +1,4 @@
+DIRS = direct_io fs
+
+all:
+	for d in $(DIRS) ; do ( cd $$d ; $(MAKE) all ) ; done
diff --git a/qa/workunits/caps/mon_commands.sh b/qa/workunits/caps/mon_commands.sh
new file mode 100755
index 000000000..5b5bce62e
--- /dev/null
+++ b/qa/workunits/caps/mon_commands.sh
@@ -0,0 +1,25 @@
+#!/bin/sh -ex
+
+ceph-authtool --create-keyring k --gen-key -p --name client.xx
+ceph auth add -i k client.xx mon "allow command foo; allow command bar *; allow command baz ...; allow command foo add * mon allow\\ rwx osd allow\\ *"
+
+( ceph -k k -n client.xx foo      || true ) | grep 'unrecog'
+( ceph -k k -n client.xx foo ooo  || true ) | grep 'Access denied'
+( ceph -k k -n client.xx fo       || true ) | grep 'Access denied'
+( ceph -k k -n client.xx fooo     || true ) | grep 'Access denied'
+
+( ceph -k k -n client.xx bar       || true ) | grep 'Access denied'
+( ceph -k k -n client.xx bar a     || true ) | grep 'unrecog'
+( ceph -k k -n client.xx bar a b c || true ) | grep 'Access denied'
+( ceph -k k -n client.xx ba        || true ) | grep 'Access denied'
+( ceph -k k -n client.xx barr      || true ) | grep 'Access denied'
+
+( ceph -k k -n client.xx baz     || true ) | grep -v 'Access denied'
+( ceph -k k -n client.xx baz a   || true ) | grep -v 'Access denied'
+( ceph -k k -n client.xx baz a b || true ) | grep -v 'Access denied'
+
+( ceph -k k -n client.xx foo add osd.1 -i k mon 'allow rwx' osd 'allow *' || true ) | grep 'unrecog'
+( ceph -k k -n client.xx foo add osd a b c -i k mon 'allow rwx' osd 'allow *' || true ) | grep 'Access denied'
+( ceph -k k -n client.xx foo add osd a b c -i k mon 'allow *' || true ) | grep 'Access denied'
+
+echo OK
+\ No newline at end of file
diff --git a/qa/workunits/ceph-helpers-root.sh b/qa/workunits/ceph-helpers-root.sh
new file mode 100755
index 000000000..5b5d2b409
--- /dev/null
+++ b/qa/workunits/ceph-helpers-root.sh
@@ -0,0 +1,129 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2015 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+#######################################################################
+
+function distro_id() {
+    source /etc/os-release
+    echo $ID
+}
+
+function distro_version() {
+    source /etc/os-release
+    echo $VERSION
+}
+
+function install() {
+    if [ $(distro_id) = "ubuntu" ]; then
+        sudo apt-get purge -y gcc
+        sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
+    fi
+    for package in "$@" ; do
+        install_one $package
+    done
+    if [ $(distro_id) = "ubuntu" ]; then
+        sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 11
+        sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 11
+        sudo update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 11
+        sudo update-alternatives --set cc /usr/bin/gcc
+        sudo update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 11
+        sudo update-alternatives --set c++ /usr/bin/g++
+    fi
+}
+
+function install_one() {
+    case $(distro_id) in
+        ubuntu|debian|devuan|softiron)
+            sudo env DEBIAN_FRONTEND=noninteractive apt-get install -y "$@"
+            ;;
+        centos|fedora|rhel)
+            sudo yum install -y "$@"
+            ;;
+        opensuse*|suse|sles)
+            sudo zypper --non-interactive install "$@"
+            ;;
+        *)
+            echo "$(distro_id) is unknown, $@ will have to be installed manually."
+            ;;
+    esac
+}
+
+function install_pkg_on_ubuntu {
+    local project=$1
+    shift
+    local sha1=$1
+    shift
+    local codename=$1
+    shift
+    local force=$1
+    shift
+    local pkgs=$@
+    local missing_pkgs
+    if [ $force = "force" ]; then
+	missing_pkgs="$@"
+    else
+	for pkg in $pkgs; do
+	    if ! dpkg -s $pkg &> /dev/null; then
+		missing_pkgs+=" $pkg"
+	    fi
+	done
+    fi
+    if test -n "$missing_pkgs"; then
+	local shaman_url="https://shaman.ceph.com/api/repos/${project}/master/${sha1}/ubuntu/${codename}/repo"
+	sudo curl --silent --location $shaman_url --output /etc/apt/sources.list.d/$project.list
+	sudo env DEBIAN_FRONTEND=noninteractive apt-get update -y -o Acquire::Languages=none -o Acquire::Translation=none || true
+	sudo env DEBIAN_FRONTEND=noninteractive apt-get install --allow-unauthenticated -y $missing_pkgs
+    fi
+}
+
+#######################################################################
+
+function control_osd() {
+    local action=$1
+    local id=$2
+
+    sudo systemctl $action ceph-osd@$id
+
+    return 0
+}
+
+#######################################################################
+
+function pool_read_write() {
+    local size=${1:-1}
+    local dir=/tmp
+    local timeout=360
+    local test_pool=test_pool
+
+    ceph osd pool delete $test_pool $test_pool --yes-i-really-really-mean-it || return 1
+    ceph osd pool create $test_pool 4 || return 1
+    ceph osd pool set $test_pool size $size --yes-i-really-mean-it || return 1
+    ceph osd pool set $test_pool min_size $size || return 1
+    ceph osd pool application enable $test_pool rados
+
+    echo FOO > $dir/BAR
+    timeout $timeout rados --pool $test_pool put BAR $dir/BAR || return 1
+    timeout $timeout rados --pool $test_pool get BAR $dir/BAR.copy || return 1
+    diff $dir/BAR $dir/BAR.copy || return 1
+    ceph osd pool delete $test_pool $test_pool --yes-i-really-really-mean-it || return 1
+}
+
+#######################################################################
+
+set -x
+
+"$@"
diff --git a/qa/workunits/ceph-tests/ceph-admin-commands.sh b/qa/workunits/ceph-tests/ceph-admin-commands.sh
new file mode 100755
index 000000000..4a9f0a66f
--- /dev/null
+++ b/qa/workunits/ceph-tests/ceph-admin-commands.sh
@@ -0,0 +1,10 @@
+#!/bin/sh -ex
+
+ceph -s
+rados lspools
+rbd ls
+# check that the monitors work
+ceph osd set nodown
+ceph osd unset nodown
+
+exit 0
diff --git a/qa/workunits/cephadm/create_iscsi_disks.sh b/qa/workunits/cephadm/create_iscsi_disks.sh
new file mode 100755
index 000000000..45319e3a1
--- /dev/null
+++ b/qa/workunits/cephadm/create_iscsi_disks.sh
@@ -0,0 +1,36 @@
+#!/bin/bash -ex
+# Create some file-backed iSCSI targets and attach them locally.
+
+# Exit if it's not CentOS
+if ! grep -q rhel /etc/*-release; then
+    echo "The script only supports CentOS."
+    exit 1
+fi
+
+[ -z "$SUDO" ] && SUDO=sudo
+
+# 15 GB
+DISK_FILE_SIZE="16106127360"
+
+$SUDO yum install -y targetcli iscsi-initiator-utils
+
+TARGET_NAME="iqn.2003-01.org.linux-iscsi.$(hostname).x8664:sn.foobar"
+$SUDO targetcli /iscsi create ${TARGET_NAME}
+$SUDO targetcli /iscsi/${TARGET_NAME}/tpg1/portals delete 0.0.0.0 3260
+$SUDO targetcli /iscsi/${TARGET_NAME}/tpg1/portals create 127.0.0.1 3260
+$SUDO targetcli /iscsi/${TARGET_NAME}/tpg1 set attribute generate_node_acls=1
+$SUDO targetcli /iscsi/${TARGET_NAME}/tpg1 set attribute demo_mode_write_protect=0
+
+for i in $(seq 3); do
+    # Create truncated files, and add them as luns
+    DISK_FILE="/tmp/disk${i}"
+    $SUDO truncate --size ${DISK_FILE_SIZE} ${DISK_FILE}
+
+    $SUDO targetcli /backstores/fileio create "lun${i}" ${DISK_FILE}
+    # Workaround for https://tracker.ceph.com/issues/47758
+    $SUDO targetcli "/backstores/fileio/lun${i}" set attribute optimal_sectors=0
+    $SUDO targetcli /iscsi/${TARGET_NAME}/tpg1/luns create "/backstores/fileio/lun${i}"
+done
+
+$SUDO iscsiadm -m discovery -t sendtargets -p 127.0.0.1
+$SUDO iscsiadm -m node -p 127.0.0.1 -T ${TARGET_NAME} -l
diff --git a/qa/workunits/cephadm/test_adoption.sh b/qa/workunits/cephadm/test_adoption.sh
new file mode 100755
index 000000000..68580eb62
--- /dev/null
+++ b/qa/workunits/cephadm/test_adoption.sh
@@ -0,0 +1,60 @@
+#!/bin/bash -ex
+
+SCRIPT_NAME=$(basename ${BASH_SOURCE[0]})
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+CEPHADM_SRC_DIR=${SCRIPT_DIR}/../../../src/cephadm
+CORPUS_COMMIT=9cd9ad020d93b0b420924fec55da307aff8bd422
+
+[ -z "$SUDO" ] && SUDO=sudo
+
+[ -d "$TMPDIR" ] || TMPDIR=$(mktemp -d tmp.$SCRIPT_NAME.XXXXXX)
+trap "$SUDO rm -rf $TMPDIR" EXIT
+
+if [ -z "$CEPHADM" ]; then
+    CEPHADM=`mktemp -p $TMPDIR tmp.cephadm.XXXXXX`
+    ${CEPHADM_SRC_DIR}/build.sh "$CEPHADM"
+fi
+
+# at this point, we need $CEPHADM set
+if ! [ -x "$CEPHADM" ]; then
+    echo "cephadm not found. Please set \$CEPHADM"
+    exit 1
+fi
+
+# combine into a single var
+CEPHADM_BIN="$CEPHADM"
+CEPHADM="$SUDO $CEPHADM_BIN"
+
+## adopt
+CORPUS_GIT_SUBMOD="cephadm-adoption-corpus"
+GIT_CLONE_DIR=${TMPDIR}/${CORPUS_GIT_SUBMOD}
+git clone https://github.com/ceph/$CORPUS_GIT_SUBMOD $GIT_CLONE_DIR
+
+git -C $GIT_CLONE_DIR checkout $CORPUS_COMMIT
+CORPUS_DIR=${GIT_CLONE_DIR}/archive
+
+for subdir in `ls ${CORPUS_DIR}`; do
+    for tarfile in `ls ${CORPUS_DIR}/${subdir} | grep .tgz`; do
+	tarball=${CORPUS_DIR}/${subdir}/${tarfile}
+	FSID_LEGACY=`echo "$tarfile" | cut -c 1-36`
+	TMP_TAR_DIR=`mktemp -d -p $TMPDIR`
+	$SUDO tar xzvf $tarball -C $TMP_TAR_DIR
+	NAMES=$($CEPHADM ls --legacy-dir $TMP_TAR_DIR | jq -r '.[].name')
+	for name in $NAMES; do
+            $CEPHADM adopt \
+                     --style legacy \
+                     --legacy-dir $TMP_TAR_DIR \
+                     --name $name
+            # validate after adopt
+            out=$($CEPHADM ls | jq '.[]' \
+                      | jq 'select(.name == "'$name'")')
+            echo $out | jq -r '.style' | grep 'cephadm'
+            echo $out | jq -r '.fsid' | grep $FSID_LEGACY
+	done
+	# clean-up before next iter
+	$CEPHADM rm-cluster --fsid $FSID_LEGACY --force
+	$SUDO rm -rf $TMP_TAR_DIR
+    done
+done
+
+echo "OK"
diff --git a/qa/workunits/cephadm/test_cephadm.sh b/qa/workunits/cephadm/test_cephadm.sh
new file mode 100755
index 000000000..7d06a3326
--- /dev/null
+++ b/qa/workunits/cephadm/test_cephadm.sh
@@ -0,0 +1,474 @@
+#!/bin/bash -ex
+
+SCRIPT_NAME=$(basename ${BASH_SOURCE[0]})
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+# cleanup during exit
+[ -z "$CLEANUP" ] && CLEANUP=true
+
+FSID='00000000-0000-0000-0000-0000deadbeef'
+
+# images that are used
+IMAGE_MAIN=${IMAGE_MAIN:-'quay.ceph.io/ceph-ci/ceph:main'}
+IMAGE_PACIFIC=${IMAGE_PACIFIC:-'quay.ceph.io/ceph-ci/ceph:pacific'}
+#IMAGE_OCTOPUS=${IMAGE_OCTOPUS:-'quay.ceph.io/ceph-ci/ceph:octopus'}
+IMAGE_DEFAULT=${IMAGE_MAIN}
+
+OSD_IMAGE_NAME="${SCRIPT_NAME%.*}_osd.img"
+OSD_IMAGE_SIZE='6G'
+OSD_TO_CREATE=2
+OSD_VG_NAME=${SCRIPT_NAME%.*}
+OSD_LV_NAME=${SCRIPT_NAME%.*}
+
+# TMPDIR for test data
+[ -d "$TMPDIR" ] || TMPDIR=$(mktemp -d tmp.$SCRIPT_NAME.XXXXXX)
+[ -d "$TMPDIR_TEST_MULTIPLE_MOUNTS" ] || TMPDIR_TEST_MULTIPLE_MOUNTS=$(mktemp -d tmp.$SCRIPT_NAME.XXXXXX)
+
+CEPHADM_SRC_DIR=${SCRIPT_DIR}/../../../src/cephadm
+CEPHADM_SAMPLES_DIR=${CEPHADM_SRC_DIR}/samples
+
+[ -z "$SUDO" ] && SUDO=sudo
+
+# If cephadm is already installed on the system, use that one, avoid building
+# # one if we can.
+if [ -z "$CEPHADM" ] && command -v cephadm >/dev/null ; then
+    CEPHADM="$(command -v cephadm)"
+fi
+
+if [ -z "$CEPHADM" ]; then
+    CEPHADM=`mktemp -p $TMPDIR tmp.cephadm.XXXXXX`
+    ${CEPHADM_SRC_DIR}/build.sh "$CEPHADM"
+    NO_BUILD_INFO=1
+fi
+
+# at this point, we need $CEPHADM set
+if ! [ -x "$CEPHADM" ]; then
+    echo "cephadm not found. Please set \$CEPHADM"
+    exit 1
+fi
+
+# add image to args
+CEPHADM_ARGS="$CEPHADM_ARGS --image $IMAGE_DEFAULT"
+
+# combine into a single var
+CEPHADM_BIN="$CEPHADM"
+CEPHADM="$SUDO $CEPHADM_BIN $CEPHADM_ARGS"
+
+# clean up previous run(s)?
+$CEPHADM rm-cluster --fsid $FSID --force
+$SUDO vgchange -an $OSD_VG_NAME || true
+loopdev=$($SUDO losetup -a | grep $(basename $OSD_IMAGE_NAME) | awk -F : '{print $1}')
+if ! [ "$loopdev" = "" ]; then
+    $SUDO losetup -d $loopdev
+fi
+
+function cleanup()
+{
+    if [ $CLEANUP = false ]; then
+        # preserve the TMPDIR state
+        echo "========================"
+        echo "!!! CLEANUP=$CLEANUP !!!"
+        echo
+        echo "TMPDIR=$TMPDIR"
+        echo "========================"
+        return
+    fi
+
+    dump_all_logs $FSID
+    rm -rf $TMPDIR
+}
+trap cleanup EXIT
+
+function expect_false()
+{
+        set -x
+        if eval "$@"; then return 1; else return 0; fi
+}
+
+# expect_return_code $expected_code $command ...
+function expect_return_code()
+{
+  set -x
+  local expected_code="$1"
+  shift
+  local command="$@"
+
+  set +e
+  eval "$command"
+  local return_code="$?"
+  set -e
+
+  if [ ! "$return_code" -eq "$expected_code" ]; then return 1; else return 0; fi
+}
+
+function is_available()
+{
+    local name="$1"
+    local condition="$2"
+    local tries="$3"
+
+    local num=0
+    while ! eval "$condition"; do
+        num=$(($num + 1))
+        if [ "$num" -ge $tries ]; then
+            echo "$name is not available"
+            false
+        fi
+        sleep 5
+    done
+
+    echo "$name is available"
+    true
+}
+
+function dump_log()
+{
+    local fsid="$1"
+    local name="$2"
+    local num_lines="$3"
+
+    if [ -z $num_lines ]; then
+        num_lines=100
+    fi
+
+    echo '-------------------------'
+    echo 'dump daemon log:' $name
+    echo '-------------------------'
+
+    $CEPHADM logs --fsid $fsid --name $name -- --no-pager -n $num_lines
+}
+
+function dump_all_logs()
+{
+    local fsid="$1"
+    local names=$($CEPHADM ls | jq -r '.[] | select(.fsid == "'$fsid'").name')
+
+    echo 'dumping logs for daemons: ' $names
+    for name in $names; do
+        dump_log $fsid $name
+    done
+}
+
+function nfs_stop()
+{
+    # stop the running nfs server
+    local units="nfs-server nfs-kernel-server"
+    for unit in $units; do
+        if systemctl --no-pager status $unit > /dev/null; then
+            $SUDO systemctl stop $unit
+        fi
+    done
+
+    # ensure the NFS port is no longer in use
+    expect_false "$SUDO ss -tlnp '( sport = :nfs )' | grep LISTEN"
+}
+
+## prepare + check host
+$SUDO $CEPHADM check-host
+
+## run a gather-facts (output to stdout)
+$SUDO $CEPHADM gather-facts
+
+## NOTE: cephadm version is, as of around May 2023, no longer basing the
+## output for `cephadm version` on the version of the containers. The version
+## reported is that of the "binary" and is determined during the ceph build.
+## `cephadm version` should NOT require sudo/root.
+$CEPHADM_BIN version
+$CEPHADM_BIN version | grep 'cephadm version'
+# Typically cmake should be running the cephadm build script with CLI arguments
+# that embed version info into the "binary". If not using a cephadm build via
+# cmake you can set `NO_BUILD_INFO` to skip this check.
+if [ -z "$NO_BUILD_INFO" ]; then
+    $CEPHADM_BIN version | grep -v 'UNSET'
+    $CEPHADM_BIN version | grep -v 'UNKNOWN'
+fi
+
+
+## test shell before bootstrap, when crash dir isn't (yet) present on this host
+$CEPHADM shell --fsid $FSID -- ceph -v | grep 'ceph version'
+$CEPHADM shell --fsid $FSID -e FOO=BAR -- printenv | grep FOO=BAR
+
+# test stdin
+echo foo | $CEPHADM shell -- cat | grep -q foo
+
+# the shell commands a bit above this seems to cause the
+# /var/lib/ceph/<fsid> directory to be made. Since we now
+# check in bootstrap that there are no clusters with the same
+# fsid based on the directory existing, we need to make sure
+# this directory is gone before bootstrapping. We can
+# accomplish this with another rm-cluster
+$CEPHADM rm-cluster --fsid $FSID --force
+
+## bootstrap
+ORIG_CONFIG=`mktemp -p $TMPDIR`
+CONFIG=`mktemp -p $TMPDIR`
+MONCONFIG=`mktemp -p $TMPDIR`
+KEYRING=`mktemp -p $TMPDIR`
+IP=127.0.0.1
+cat <<EOF > $ORIG_CONFIG
+[global]
+	log to file = true
+        osd crush chooseleaf type = 0
+EOF
+$CEPHADM bootstrap \
+      --mon-id a \
+      --mgr-id x \
+      --mon-ip $IP \
+      --fsid $FSID \
+      --config $ORIG_CONFIG \
+      --output-config $CONFIG \
+      --output-keyring $KEYRING \
+      --output-pub-ssh-key $TMPDIR/ceph.pub \
+      --allow-overwrite \
+      --skip-mon-network \
+      --skip-monitoring-stack
+test -e $CONFIG
+test -e $KEYRING
+rm -f $ORIG_CONFIG
+
+$SUDO test -e /var/log/ceph/$FSID/ceph-mon.a.log
+$SUDO test -e /var/log/ceph/$FSID/ceph-mgr.x.log
+
+for u in ceph.target \
+	     ceph-$FSID.target \
+	     ceph-$FSID@mon.a \
+	     ceph-$FSID@mgr.x; do
+    systemctl is-enabled $u
+    systemctl is-active $u
+done
+systemctl | grep system-ceph | grep -q .slice  # naming is escaped and annoying
+
+# check ceph -s works (via shell w/ passed config/keyring)
+$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
+      ceph -s | grep $FSID
+
+for t in mon mgr node-exporter prometheus grafana; do
+    $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
+	     ceph orch apply $t --unmanaged
+done
+
+## ls
+$CEPHADM ls | jq '.[]' | jq 'select(.name == "mon.a").fsid' \
+    | grep $FSID
+$CEPHADM ls | jq '.[]' | jq 'select(.name == "mgr.x").fsid' \
+    | grep $FSID
+
+# make sure the version is returned correctly
+$CEPHADM ls | jq '.[]' | jq 'select(.name == "mon.a").version' | grep -q \\.
+
+## deploy
+# add mon.b
+cp $CONFIG $MONCONFIG
+echo "public addrv = [v2:$IP:3301,v1:$IP:6790]" >> $MONCONFIG
+jq --null-input \
+    --arg fsid $FSID \
+    --arg name mon.b \
+    --arg keyring /var/lib/ceph/$FSID/mon.a/keyring \
+    --arg config "$MONCONFIG" \
+    '{"fsid": $fsid, "name": $name, "params":{"keyring": $keyring, "config": $config}}' | \
+    $CEPHADM _orch deploy
+for u in ceph-$FSID@mon.b; do
+    systemctl is-enabled $u
+    systemctl is-active $u
+done
+cond="$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
+	    ceph mon stat | grep '2 mons'"
+is_available "mon.b" "$cond" 30
+
+# add mgr.y
+$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
+      ceph auth get-or-create mgr.y \
+      mon 'allow profile mgr' \
+      osd 'allow *' \
+      mds 'allow *' > $TMPDIR/keyring.mgr.y
+jq --null-input \
+    --arg fsid $FSID \
+    --arg name mgr.y \
+    --arg keyring $TMPDIR/keyring.mgr.y \
+    --arg config "$CONFIG" \
+    '{"fsid": $fsid, "name": $name, "params":{"keyring": $keyring, "config": $config}}' | \
+    $CEPHADM _orch deploy
+for u in ceph-$FSID@mgr.y; do
+    systemctl is-enabled $u
+    systemctl is-active $u
+done
+
+for f in `seq 1 30`; do
+    if $CEPHADM shell --fsid $FSID \
+	     --config $CONFIG --keyring $KEYRING -- \
+	  ceph -s -f json-pretty \
+	| jq '.mgrmap.num_standbys' | grep -q 1 ; then break; fi
+    sleep 1
+done
+$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
+      ceph -s -f json-pretty \
+    | jq '.mgrmap.num_standbys' | grep -q 1
+
+# add osd.{1,2,..}
+dd if=/dev/zero of=$TMPDIR/$OSD_IMAGE_NAME bs=1 count=0 seek=$OSD_IMAGE_SIZE
+loop_dev=$($SUDO losetup -f)
+$SUDO vgremove -f $OSD_VG_NAME || true
+$SUDO losetup $loop_dev $TMPDIR/$OSD_IMAGE_NAME
+$SUDO pvcreate $loop_dev && $SUDO vgcreate $OSD_VG_NAME $loop_dev
+
+# osd bootstrap keyring
+$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
+      ceph auth get client.bootstrap-osd > $TMPDIR/keyring.bootstrap.osd
+
+# create lvs first so ceph-volume doesn't overlap with lv creation
+for id in `seq 0 $((--OSD_TO_CREATE))`; do
+    $SUDO lvcreate -l $((100/$OSD_TO_CREATE))%VG -n $OSD_LV_NAME.$id $OSD_VG_NAME
+done
+
+for id in `seq 0 $((--OSD_TO_CREATE))`; do
+    device_name=/dev/$OSD_VG_NAME/$OSD_LV_NAME.$id
+    CEPH_VOLUME="$CEPHADM ceph-volume \
+                       --fsid $FSID \
+                       --config $CONFIG \
+                       --keyring $TMPDIR/keyring.bootstrap.osd --"
+
+    # prepare the osd
+    $CEPH_VOLUME lvm prepare --bluestore --data $device_name --no-systemd
+    $CEPH_VOLUME lvm batch --no-auto $device_name --yes --no-systemd
+
+    # osd id and osd fsid
+    $CEPH_VOLUME lvm list --format json $device_name > $TMPDIR/osd.map
+    osd_id=$($SUDO cat $TMPDIR/osd.map | jq -cr '.. | ."ceph.osd_id"? | select(.)')
+    osd_fsid=$($SUDO cat $TMPDIR/osd.map | jq -cr '.. | ."ceph.osd_fsid"? | select(.)')
+
+    # deploy the osd
+    jq --null-input \
+        --arg fsid $FSID \
+        --arg name osd.$osd_id \
+        --arg keyring $TMPDIR/keyring.bootstrap.osd \
+        --arg config "$CONFIG" \
+        --arg osd_fsid $osd_fsid \
+        '{"fsid": $fsid, "name": $name, "params":{"keyring": $keyring, "config": $config, "osd_fsid": $osd_fsid}}' | \
+        $CEPHADM _orch deploy
+done
+
+# add node-exporter
+jq --null-input \
+    --arg fsid $FSID \
+    --arg name node-exporter.a \
+    '{"fsid": $fsid, "name": $name}' | \
+    ${CEPHADM//--image $IMAGE_DEFAULT/} _orch deploy
+cond="curl 'http://localhost:9100' | grep -q 'Node Exporter'"
+is_available "node-exporter" "$cond" 10
+
+# add prometheus
+jq --null-input \
+    --arg fsid $FSID \
+    --arg name prometheus.a \
+    --argjson config_blobs "$(cat ${CEPHADM_SAMPLES_DIR}/prometheus.json)" \
+    '{"fsid": $fsid, "name": $name, "config_blobs": $config_blobs}' | \
+    ${CEPHADM//--image $IMAGE_DEFAULT/} _orch deploy
+cond="curl 'localhost:9095/api/v1/query?query=up'"
+is_available "prometheus" "$cond" 10
+
+# add grafana
+jq --null-input \
+    --arg fsid $FSID \
+    --arg name grafana.a \
+    --argjson config_blobs "$(cat ${CEPHADM_SAMPLES_DIR}/grafana.json)" \
+    '{"fsid": $fsid, "name": $name, "config_blobs": $config_blobs}' | \
+    ${CEPHADM//--image $IMAGE_DEFAULT/} _orch deploy
+cond="curl --insecure 'https://localhost:3000' | grep -q 'grafana'"
+is_available "grafana" "$cond" 50
+
+# add nfs-ganesha
+nfs_stop
+nfs_rados_pool=$(cat ${CEPHADM_SAMPLES_DIR}/nfs.json | jq -r '.["pool"]')
+$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
+        ceph osd pool create $nfs_rados_pool 64
+$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
+        rados --pool nfs-ganesha --namespace nfs-ns create conf-nfs.a
+$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
+	 ceph orch pause
+jq --null-input \
+    --arg fsid $FSID \
+    --arg name nfs.a \
+    --arg keyring "$KEYRING" \
+    --arg config "$CONFIG" \
+    --argjson config_blobs "$(cat ${CEPHADM_SAMPLES_DIR}/nfs.json)" \
+    '{"fsid": $fsid, "name": $name, "params": {"keyring": $keyring, "config": $config}, "config_blobs": $config_blobs}' | \
+    ${CEPHADM} _orch deploy
+cond="$SUDO ss -tlnp '( sport = :nfs )' | grep 'ganesha.nfsd'"
+is_available "nfs" "$cond" 10
+$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
+	 ceph orch resume
+
+# add alertmanager via custom container
+alertmanager_image=$(cat ${CEPHADM_SAMPLES_DIR}/custom_container.json | jq -r '.image')
+tcp_ports=$(jq .ports ${CEPHADM_SAMPLES_DIR}/custom_container.json)
+jq --null-input \
+    --arg fsid $FSID \
+    --arg name container.alertmanager.a \
+    --arg keyring $TMPDIR/keyring.bootstrap.osd \
+    --arg config "$CONFIG" \
+    --arg image "$alertmanager_image" \
+    --argjson tcp_ports "${tcp_ports}" \
+    --argjson config_blobs "$(cat ${CEPHADM_SAMPLES_DIR}/custom_container.json)" \
+    '{"fsid": $fsid, "name": $name, "image": $image, "params": {"keyring": $keyring, "config": $config, "tcp_ports": $tcp_ports}, "config_blobs": $config_blobs}' | \
+    ${CEPHADM//--image $IMAGE_DEFAULT/} _orch deploy
+cond="$CEPHADM enter --fsid $FSID --name container.alertmanager.a -- test -f \
+      /etc/alertmanager/alertmanager.yml"
+is_available "alertmanager.yml" "$cond" 10
+cond="curl 'http://localhost:9093' | grep -q 'Alertmanager'"
+is_available "alertmanager" "$cond" 10
+
+## run
+# WRITE ME
+
+## unit
+$CEPHADM unit --fsid $FSID --name mon.a -- is-enabled
+$CEPHADM unit --fsid $FSID --name mon.a -- is-active
+expect_false $CEPHADM unit --fsid $FSID --name mon.xyz -- is-active
+$CEPHADM unit --fsid $FSID --name mon.a -- disable
+expect_false $CEPHADM unit --fsid $FSID --name mon.a -- is-enabled
+$CEPHADM unit --fsid $FSID --name mon.a -- enable
+$CEPHADM unit --fsid $FSID --name mon.a -- is-enabled
+$CEPHADM unit --fsid $FSID --name mon.a -- status
+$CEPHADM unit --fsid $FSID --name mon.a -- stop
+expect_return_code 3 $CEPHADM unit --fsid $FSID --name mon.a -- status
+$CEPHADM unit --fsid $FSID --name mon.a -- start
+
+## shell
+$CEPHADM shell --fsid $FSID -- true
+$CEPHADM shell --fsid $FSID -- test -d /var/log/ceph
+expect_false $CEPHADM --timeout 10 shell --fsid $FSID -- sleep 60
+$CEPHADM --timeout 60 shell --fsid $FSID -- sleep 10
+$CEPHADM shell --fsid $FSID --mount $TMPDIR $TMPDIR_TEST_MULTIPLE_MOUNTS -- stat /mnt/$(basename $TMPDIR)
+
+## enter
+expect_false $CEPHADM enter
+$CEPHADM enter --fsid $FSID --name mon.a -- test -d /var/lib/ceph/mon/ceph-a
+$CEPHADM enter --fsid $FSID --name mgr.x -- test -d /var/lib/ceph/mgr/ceph-x
+$CEPHADM enter --fsid $FSID --name mon.a -- pidof ceph-mon
+expect_false $CEPHADM enter --fsid $FSID --name mgr.x -- pidof ceph-mon
+$CEPHADM enter --fsid $FSID --name mgr.x -- pidof ceph-mgr
+# this triggers a bug in older versions of podman, including 18.04's 1.6.2
+#expect_false $CEPHADM --timeout 5 enter --fsid $FSID --name mon.a -- sleep 30
+$CEPHADM --timeout 60 enter --fsid $FSID --name mon.a -- sleep 10
+
+## ceph-volume
+$CEPHADM ceph-volume --fsid $FSID -- inventory --format=json \
+      | jq '.[]'
+
+## preserve test state
+[ $CLEANUP = false ] && exit 0
+
+## rm-daemon
+# mon and osd require --force
+expect_false $CEPHADM rm-daemon --fsid $FSID --name mon.a
+# mgr does not
+$CEPHADM rm-daemon --fsid $FSID --name mgr.x
+
+expect_false $CEPHADM zap-osds --fsid $FSID
+$CEPHADM zap-osds --fsid $FSID --force
+
+## rm-cluster
+expect_false $CEPHADM rm-cluster --fsid $FSID --zap-osds
+$CEPHADM rm-cluster --fsid $FSID --force --zap-osds
+
+echo PASS
diff --git a/qa/workunits/cephadm/test_dashboard_e2e.sh b/qa/workunits/cephadm/test_dashboard_e2e.sh
new file mode 100755
index 000000000..32e0bcc77
--- /dev/null
+++ b/qa/workunits/cephadm/test_dashboard_e2e.sh
@@ -0,0 +1,107 @@
+#!/bin/bash -ex
+
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+DASHBOARD_FRONTEND_DIR=${SCRIPT_DIR}/../../../src/pybind/mgr/dashboard/frontend
+
+[ -z "$SUDO" ] && SUDO=sudo
+
+install_common () {
+    NODEJS_VERSION="16"
+    if grep -q  debian /etc/*-release; then
+        $SUDO apt-get update
+        # https://github.com/nodesource/distributions#manual-installation
+        $SUDO apt-get install curl gpg
+        KEYRING=/usr/share/keyrings/nodesource.gpg
+        curl -fsSL https://deb.nodesource.com/gpgkey/nodesource.gpg.key | gpg --dearmor | $SUDO tee "$KEYRING" >/dev/null
+        DISTRO="$(source /etc/lsb-release; echo $DISTRIB_CODENAME)"
+        VERSION="node_$NODEJS_VERSION.x"
+        echo "deb [signed-by=$KEYRING] https://deb.nodesource.com/$VERSION $DISTRO main" | $SUDO tee /etc/apt/sources.list.d/nodesource.list
+        echo "deb-src [signed-by=$KEYRING] https://deb.nodesource.com/$VERSION $DISTRO main" | $SUDO tee -a /etc/apt/sources.list.d/nodesource.list
+        $SUDO apt-get update
+        $SUDO apt-get install nodejs
+    elif grep -q rhel /etc/*-release; then
+        $SUDO yum module -y enable nodejs:$NODEJS_VERSION
+        $SUDO yum install -y jq npm
+    else
+        echo "Unsupported distribution."
+        exit 1
+    fi
+}
+
+install_chrome () {
+    if grep -q  debian /etc/*-release; then
+        $SUDO bash -c 'echo "deb [arch=amd64] https://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list'
+        curl -fsSL https://dl.google.com/linux/linux_signing_key.pub | $SUDO apt-key add -
+        $SUDO apt-get update
+        $SUDO apt-get install -y google-chrome-stable
+        $SUDO apt-get install -y xvfb
+        $SUDO rm /etc/apt/sources.list.d/google-chrome.list
+    elif grep -q rhel /etc/*-release; then
+        $SUDO dd of=/etc/yum.repos.d/google-chrome.repo status=none <<EOF
+[google-chrome]
+name=google-chrome
+baseurl=https://dl.google.com/linux/chrome/rpm/stable/\$basearch
+enabled=1
+gpgcheck=1
+gpgkey=https://dl-ssl.google.com/linux/linux_signing_key.pub
+EOF
+        $SUDO yum install -y google-chrome-stable
+        $SUDO rm /etc/yum.repos.d/google-chrome.repo
+        # Cypress dependencies
+        $SUDO yum install -y xorg-x11-server-Xvfb gtk2-devel gtk3-devel libnotify-devel GConf2 nss.x86_64 libXScrnSaver alsa-lib
+    else
+        echo "Unsupported distribution."
+        exit 1
+    fi
+}
+
+cypress_run () {
+    local specs="$1"
+    local timeout="$2"
+    local override_config="excludeSpecPattern=*.po.ts,retries=0,specPattern=${specs}"
+
+    if [ x"$timeout" != "x" ]; then
+        override_config="${override_config},defaultCommandTimeout=${timeout}"
+    fi
+    npx cypress run --browser chrome --headless --config "$override_config"
+}
+
+install_common
+install_chrome
+
+CYPRESS_BASE_URL=$(ceph mgr services | jq -r .dashboard)
+export CYPRESS_BASE_URL
+
+cd $DASHBOARD_FRONTEND_DIR
+
+# This is required for Cypress to understand typescript
+npm ci --unsafe-perm
+npx cypress verify
+npx cypress info
+
+# Take `orch device ls` and `orch ps` as ground truth.
+ceph orch device ls --refresh
+ceph orch ps --refresh
+sleep 10  # the previous call is asynchronous
+ceph orch device ls --format=json | tee cypress/fixtures/orchestrator/inventory.json
+ceph orch ps --format=json | tee cypress/fixtures/orchestrator/services.json
+
+DASHBOARD_ADMIN_SECRET_FILE="/tmp/dashboard-admin-secret.txt"
+printf 'admin' > "${DASHBOARD_ADMIN_SECRET_FILE}"
+ceph dashboard ac-user-set-password admin -i "${DASHBOARD_ADMIN_SECRET_FILE}" --force-password
+
+# Run Dashboard e2e tests.
+# These tests are designed with execution order in mind, since orchestrator operations
+# are likely to change cluster state, we can't just run tests in arbitrarily order.
+# See /ceph/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/ folder.
+find cypress # List all specs
+
+cypress_run "cypress/e2e/orchestrator/01-hosts.e2e-spec.ts"
+
+# Hosts are removed and added in the previous step. Do a refresh again.
+ceph orch device ls --refresh
+sleep 10
+ceph orch device ls --format=json | tee cypress/fixtures/orchestrator/inventory.json
+
+cypress_run "cypress/e2e/orchestrator/03-inventory.e2e-spec.ts"
+cypress_run "cypress/e2e/orchestrator/04-osds.e2e-spec.ts" 300000
diff --git a/qa/workunits/cephadm/test_iscsi_etc_hosts.sh b/qa/workunits/cephadm/test_iscsi_etc_hosts.sh
new file mode 100755
index 000000000..adbc34a92
--- /dev/null
+++ b/qa/workunits/cephadm/test_iscsi_etc_hosts.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+# checks if the container and host's /etc/hosts files match
+# Necessary to avoid potential bugs caused by podman making
+# edits to /etc/hosts file in the container
+# exits with code 1 if host and iscsi container /etc/hosts do no match
+
+set -ex
+
+ISCSI_DAEMON=$(sudo /home/ubuntu/cephtest/cephadm ls | jq -r '.[] | select(.service_name == "iscsi.foo") | .name')
+sudo /home/ubuntu/cephtest/cephadm enter --name $ISCSI_DAEMON -- cat /etc/hosts > iscsi_daemon_etc_hosts.txt
+if cmp --silent /etc/hosts iscsi_daemon_etc_hosts.txt; then
+  echo "Daemon and host /etc/hosts files successfully matched"
+else
+  echo "ERROR: /etc/hosts on host did not match /etc/hosts in the iscsi container!"
+  echo "Host /etc/hosts:"
+  cat /etc/hosts
+  echo "Iscsi container /etc/hosts:"
+  cat iscsi_daemon_etc_hosts.txt
+  exit 1
+fi
diff --git a/qa/workunits/cephadm/test_iscsi_pids_limit.sh b/qa/workunits/cephadm/test_iscsi_pids_limit.sh
new file mode 100755
index 000000000..bed4cc9e2
--- /dev/null
+++ b/qa/workunits/cephadm/test_iscsi_pids_limit.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+# checks if the containers default pids-limit (4096) is removed and Iscsi
+# containers continue to run
+# exits 1 if fails
+
+set -ex
+
+ISCSI_CONT_IDS=$(sudo podman ps -qa --filter='name=iscsi')
+CONT_COUNT=$(echo ${ISCSI_CONT_IDS} | wc -w)
+test ${CONT_COUNT} -eq 2
+
+for i in ${ISCSI_CONT_IDS}
+do
+  test $(sudo podman exec ${i} cat /sys/fs/cgroup/pids/pids.max) == max
+done
+
+for i in ${ISCSI_CONT_IDS}
+do
+  sudo podman exec ${i} /bin/sh -c 'for j in {0..20000}; do sleep 300 & done'
+done
+
+for i in ${ISCSI_CONT_IDS}
+do
+  SLEEP_COUNT=$(sudo podman exec ${i} /bin/sh -c 'ps -ef | grep -c sleep')
+  test ${SLEEP_COUNT} -gt 20000
+done
+
+echo OK
diff --git a/qa/workunits/cephadm/test_repos.sh b/qa/workunits/cephadm/test_repos.sh
new file mode 100755
index 000000000..221585fd0
--- /dev/null
+++ b/qa/workunits/cephadm/test_repos.sh
@@ -0,0 +1,45 @@
+#!/bin/bash -ex
+
+SCRIPT_NAME=$(basename ${BASH_SOURCE[0]})
+SCRIPT_DIR=$(dirname ${BASH_SOURCE[0]})
+CEPHADM_SRC_DIR=${SCRIPT_DIR}/../../../src/cephadm
+
+[ -d "$TMPDIR" ] || TMPDIR=$(mktemp -d tmp.$SCRIPT_NAME.XXXXXX)
+trap "$SUDO rm -rf $TMPDIR" EXIT
+
+if [ -z "$CEPHADM" ]; then
+    CEPHADM=`mktemp -p $TMPDIR tmp.cephadm.XXXXXX`
+    ${CEPHADM_SRC_DIR}/build.sh "$CEPHADM"
+fi
+
+# this is a pretty weak test, unfortunately, since the
+# package may also be in the base OS.
+function test_install_uninstall() {
+    ( sudo apt update && \
+	  sudo apt -y install cephadm && \
+	  sudo $CEPHADM install && \
+	  sudo apt -y remove cephadm ) || \
+	( sudo yum -y install cephadm && \
+	      sudo $CEPHADM install && \
+	      sudo yum -y remove cephadm ) || \
+	( sudo dnf -y install cephadm && \
+	      sudo $CEPHADM install && \
+	      sudo dnf -y remove cephadm ) || \
+	( sudo zypper -n install cephadm && \
+	      sudo $CEPHADM install && \
+	      sudo zypper -n remove cephadm )
+}
+
+sudo $CEPHADM -v add-repo --release octopus
+test_install_uninstall
+sudo $CEPHADM -v rm-repo
+
+sudo $CEPHADM -v add-repo --dev main
+test_install_uninstall
+sudo $CEPHADM -v rm-repo
+
+sudo $CEPHADM -v add-repo --release 15.2.7
+test_install_uninstall
+sudo $CEPHADM -v rm-repo
+
+echo OK.
diff --git a/qa/workunits/cephtool/test.sh b/qa/workunits/cephtool/test.sh
new file mode 100755
index 000000000..aecfd56a9
--- /dev/null
+++ b/qa/workunits/cephtool/test.sh
@@ -0,0 +1,2991 @@
+#!/usr/bin/env bash
+# -*- mode:shell-script; tab-width:8; sh-basic-offset:2; indent-tabs-mode:t -*-
+# vim: ts=8 sw=8 ft=bash smarttab
+set -x
+
+source $(dirname $0)/../../standalone/ceph-helpers.sh
+
+set -e
+set -o functrace
+PS4='${BASH_SOURCE[0]}:$LINENO: ${FUNCNAME[0]}:  '
+SUDO=${SUDO:-sudo}
+export CEPH_DEV=1
+
+function check_no_osd_down()
+{
+    ! ceph osd dump | grep ' down '
+}
+
+function wait_no_osd_down()
+{
+  max_run=300
+  for i in $(seq 1 $max_run) ; do
+    if ! check_no_osd_down ; then
+      echo "waiting for osd(s) to come back up ($i/$max_run)"
+      sleep 1
+    else
+      break
+    fi
+  done
+  check_no_osd_down
+}
+
+function expect_false()
+{
+	set -x
+	if "$@"; then return 1; else return 0; fi
+}
+
+function expect_true()
+{
+	set -x
+	if ! "$@"; then return 1; else return 0; fi
+}
+
+TEMP_DIR=$(mktemp -d ${TMPDIR-/tmp}/cephtool.XXX)
+trap "rm -fr $TEMP_DIR" 0
+
+TMPFILE=$(mktemp $TEMP_DIR/test_invalid.XXX)
+
+#
+# retry_eagain max cmd args ...
+#
+# retry cmd args ... if it exits on error and its output contains the
+# string EAGAIN, at most $max times
+#
+function retry_eagain()
+{
+    local max=$1
+    shift
+    local status
+    local tmpfile=$TEMP_DIR/retry_eagain.$$
+    local count
+    for count in $(seq 1 $max) ; do
+        status=0
+        "$@" > $tmpfile 2>&1 || status=$?
+        if test $status = 0 || 
+            ! grep --quiet EAGAIN $tmpfile ; then
+            break
+        fi
+        sleep 1
+    done
+    if test $count = $max ; then
+        echo retried with non zero exit status, $max times: "$@" >&2
+    fi
+    cat $tmpfile
+    rm $tmpfile
+    return $status
+}
+
+#
+# map_enxio_to_eagain cmd arg ...
+#
+# add EAGAIN to the output of cmd arg ... if the output contains
+# ENXIO.
+#
+function map_enxio_to_eagain()
+{
+    local status=0
+    local tmpfile=$TEMP_DIR/map_enxio_to_eagain.$$
+
+    "$@" > $tmpfile 2>&1 || status=$?
+    if test $status != 0 &&
+        grep --quiet ENXIO $tmpfile ; then
+        echo "EAGAIN added by $0::map_enxio_to_eagain" >> $tmpfile
+    fi
+    cat $tmpfile
+    rm $tmpfile
+    return $status
+}
+
+function check_response()
+{
+	expected_string=$1
+	retcode=$2
+	expected_retcode=$3
+	if [ "$expected_retcode" -a $retcode != $expected_retcode ] ; then
+		echo "return code invalid: got $retcode, expected $expected_retcode" >&2
+		exit 1
+	fi
+
+	if ! grep --quiet -- "$expected_string" $TMPFILE ; then 
+		echo "Didn't find $expected_string in output" >&2
+		cat $TMPFILE >&2
+		exit 1
+	fi
+}
+
+function get_config_value_or_die()
+{
+  local target config_opt raw val
+
+  target=$1
+  config_opt=$2
+
+  raw="`$SUDO ceph daemon $target config get $config_opt 2>/dev/null`"
+  if [[ $? -ne 0 ]]; then
+    echo "error obtaining config opt '$config_opt' from '$target': $raw"
+    exit 1
+  fi
+
+  raw=`echo $raw | sed -e 's/[{} "]//g'`
+  val=`echo $raw | cut -f2 -d:`
+
+  echo "$val"
+  return 0
+}
+
+function expect_config_value()
+{
+  local target config_opt expected_val val
+  target=$1
+  config_opt=$2
+  expected_val=$3
+
+  val=$(get_config_value_or_die $target $config_opt)
+
+  if [[ "$val" != "$expected_val" ]]; then
+    echo "expected '$expected_val', got '$val'"
+    exit 1
+  fi
+}
+
+function ceph_watch_start()
+{
+    local whatch_opt=--watch
+
+    if [ -n "$1" ]; then
+	whatch_opt=--watch-$1
+	if [ -n "$2" ]; then
+	    whatch_opt+=" --watch-channel $2"
+	fi
+    fi
+
+    CEPH_WATCH_FILE=${TEMP_DIR}/CEPH_WATCH_$$
+    ceph $whatch_opt > $CEPH_WATCH_FILE &
+    CEPH_WATCH_PID=$!
+
+    # wait until the "ceph" client is connected and receiving
+    # log messages from monitor
+    for i in `seq 3`; do
+        grep -q "cluster" $CEPH_WATCH_FILE && break
+        sleep 1
+    done
+}
+
+function ceph_watch_wait()
+{
+    local regexp=$1
+    local timeout=30
+
+    if [ -n "$2" ]; then
+	timeout=$2
+    fi
+
+    for i in `seq ${timeout}`; do
+	grep -q "$regexp" $CEPH_WATCH_FILE && break
+	sleep 1
+    done
+
+    kill $CEPH_WATCH_PID
+
+    if ! grep "$regexp" $CEPH_WATCH_FILE; then
+	echo "pattern ${regexp} not found in watch file. Full watch file content:" >&2
+	cat $CEPH_WATCH_FILE >&2
+	return 1
+    fi
+}
+
+function test_mon_injectargs()
+{
+  ceph tell osd.0 injectargs --no-osd_enable_op_tracker
+  ceph tell osd.0 config get osd_enable_op_tracker | grep false
+  ceph tell osd.0 injectargs '--osd_enable_op_tracker --osd_op_history_duration 500'
+  ceph tell osd.0 config get osd_enable_op_tracker | grep true
+  ceph tell osd.0 config get osd_op_history_duration | grep 500
+  ceph tell osd.0 injectargs --no-osd_enable_op_tracker
+  ceph tell osd.0 config get osd_enable_op_tracker | grep false
+  ceph tell osd.0 injectargs -- --osd_enable_op_tracker
+  ceph tell osd.0 config get osd_enable_op_tracker | grep true
+  ceph tell osd.0 injectargs -- '--osd_enable_op_tracker --osd_op_history_duration 600'
+  ceph tell osd.0 config get osd_enable_op_tracker | grep true
+  ceph tell osd.0 config get osd_op_history_duration | grep 600
+
+  ceph tell osd.0 injectargs -- '--osd_deep_scrub_interval 2419200'
+  ceph tell osd.0 config get osd_deep_scrub_interval | grep 2419200
+
+  ceph tell osd.0 injectargs -- '--mon_probe_timeout 2'
+  ceph tell osd.0 config get mon_probe_timeout | grep 2
+
+  ceph tell osd.0 injectargs -- '--mon-lease 6'
+  ceph tell osd.0 config get mon_lease | grep 6
+
+  # osd-scrub-auto-repair-num-errors is an OPT_U32, so -1 is not a valid setting
+  expect_false ceph tell osd.0 injectargs --osd-scrub-auto-repair-num-errors -1  2> $TMPFILE || return 1
+  check_response "Error EINVAL: Parse error setting osd_scrub_auto_repair_num_errors to '-1' using injectargs"
+
+  expect_failure $TEMP_DIR "Option --osd_op_history_duration requires an argument" \
+                 ceph tell osd.0 injectargs -- '--osd_op_history_duration'
+
+}
+
+function test_mon_injectargs_SI()
+{
+  # Test SI units during injectargs and 'config set'
+  # We only aim at testing the units are parsed accordingly
+  # and don't intend to test whether the options being set
+  # actually expect SI units to be passed.
+  # Keep in mind that all integer based options that are not based on bytes
+  # (i.e., INT, LONG, U32, U64) will accept SI unit modifiers and be parsed to
+  # base 10.
+  initial_value=$(get_config_value_or_die "mon.a" "mon_pg_warn_min_objects")
+  $SUDO ceph daemon mon.a config set mon_pg_warn_min_objects 10
+  expect_config_value "mon.a" "mon_pg_warn_min_objects" 10
+  $SUDO ceph daemon mon.a config set mon_pg_warn_min_objects 10K
+  expect_config_value "mon.a" "mon_pg_warn_min_objects" 10000
+  $SUDO ceph daemon mon.a config set mon_pg_warn_min_objects 1G
+  expect_config_value "mon.a" "mon_pg_warn_min_objects" 1000000000
+  $SUDO ceph daemon mon.a config set mon_pg_warn_min_objects 10F > $TMPFILE || true
+  check_response "(22) Invalid argument"
+  # now test with injectargs
+  ceph tell mon.a injectargs '--mon_pg_warn_min_objects 10'
+  expect_config_value "mon.a" "mon_pg_warn_min_objects" 10
+  ceph tell mon.a injectargs '--mon_pg_warn_min_objects 10K'
+  expect_config_value "mon.a" "mon_pg_warn_min_objects" 10000
+  ceph tell mon.a injectargs '--mon_pg_warn_min_objects 1G'
+  expect_config_value "mon.a" "mon_pg_warn_min_objects" 1000000000
+  expect_false ceph tell mon.a injectargs '--mon_pg_warn_min_objects 10F'
+  expect_false ceph tell mon.a injectargs '--mon_globalid_prealloc -1'
+  $SUDO ceph daemon mon.a config set mon_pg_warn_min_objects $initial_value
+}
+
+function test_mon_injectargs_IEC()
+{
+  # Test IEC units during injectargs and 'config set'
+  # We only aim at testing the units are parsed accordingly
+  # and don't intend to test whether the options being set
+  # actually expect IEC units to be passed.
+  # Keep in mind that all integer based options that are based on bytes
+  # (i.e., INT, LONG, U32, U64) will accept IEC unit modifiers, as well as SI
+  # unit modifiers (for backwards compatibility and convenience) and be parsed
+  # to base 2.
+  initial_value=$(get_config_value_or_die "mon.a" "mon_data_size_warn")
+  $SUDO ceph daemon mon.a config set mon_data_size_warn 15000000000
+  expect_config_value "mon.a" "mon_data_size_warn" 15000000000
+  $SUDO ceph daemon mon.a config set mon_data_size_warn 15G
+  expect_config_value "mon.a" "mon_data_size_warn" 16106127360
+  $SUDO ceph daemon mon.a config set mon_data_size_warn 16Gi
+  expect_config_value "mon.a" "mon_data_size_warn" 17179869184
+  $SUDO ceph daemon mon.a config set mon_data_size_warn 10F > $TMPFILE || true
+  check_response "(22) Invalid argument"
+  # now test with injectargs
+  ceph tell mon.a injectargs '--mon_data_size_warn 15000000000'
+  expect_config_value "mon.a" "mon_data_size_warn" 15000000000
+  ceph tell mon.a injectargs '--mon_data_size_warn 15G'
+  expect_config_value "mon.a" "mon_data_size_warn" 16106127360
+  ceph tell mon.a injectargs '--mon_data_size_warn 16Gi'
+  expect_config_value "mon.a" "mon_data_size_warn" 17179869184
+  expect_false ceph tell mon.a injectargs '--mon_data_size_warn 10F'
+  $SUDO ceph daemon mon.a config set mon_data_size_warn $initial_value
+}
+
+function test_tiering_agent()
+{
+  local slow=slow_eviction
+  local fast=fast_eviction
+  ceph osd pool create $slow  1 1
+  ceph osd pool application enable $slow rados
+  ceph osd pool create $fast  1 1
+  ceph osd tier add $slow $fast
+  ceph osd tier cache-mode $fast writeback
+  ceph osd tier set-overlay $slow $fast
+  ceph osd pool set $fast hit_set_type bloom
+  rados -p $slow put obj1 /etc/group
+  ceph osd pool set $fast target_max_objects  1
+  ceph osd pool set $fast hit_set_count 1
+  ceph osd pool set $fast hit_set_period 5
+  # wait for the object to be evicted from the cache
+  local evicted
+  evicted=false
+  for i in `seq 1 300` ; do
+      if ! rados -p $fast ls | grep obj1 ; then
+          evicted=true
+          break
+      fi
+      sleep 1
+  done
+  $evicted # assert
+  # the object is proxy read and promoted to the cache
+  rados -p $slow get obj1 - >/dev/null
+  # wait for the promoted object to be evicted again
+  evicted=false
+  for i in `seq 1 300` ; do
+      if ! rados -p $fast ls | grep obj1 ; then
+          evicted=true
+          break
+      fi
+      sleep 1
+  done
+  $evicted # assert
+  ceph osd tier remove-overlay $slow
+  ceph osd tier remove $slow $fast
+  ceph osd pool delete $fast $fast --yes-i-really-really-mean-it
+  ceph osd pool delete $slow $slow --yes-i-really-really-mean-it
+}
+
+function test_tiering_1()
+{
+  # tiering
+  ceph osd pool create slow 2
+  ceph osd pool application enable slow rados
+  ceph osd pool create slow2 2
+  ceph osd pool application enable slow2 rados
+  ceph osd pool create cache 2
+  ceph osd pool create cache2 2
+  ceph osd tier add slow cache
+  ceph osd tier add slow cache2
+  expect_false ceph osd tier add slow2 cache
+  # application metadata should propagate to the tiers
+  ceph osd pool ls detail -f json | jq '.[] | select(.pool_name == "slow") | .application_metadata["rados"]' | grep '{}'
+  ceph osd pool ls detail -f json | jq '.[] | select(.pool_name == "slow2") | .application_metadata["rados"]' | grep '{}'
+  ceph osd pool ls detail -f json | jq '.[] | select(.pool_name == "cache") | .application_metadata["rados"]' | grep '{}'
+  ceph osd pool ls detail -f json | jq '.[] | select(.pool_name == "cache2") | .application_metadata["rados"]' | grep '{}'
+  # forward is removed/deprecated
+  expect_false ceph osd tier cache-mode cache forward
+  expect_false ceph osd tier cache-mode cache forward --yes-i-really-mean-it
+  # test some state transitions
+  ceph osd tier cache-mode cache writeback
+  expect_false ceph osd tier cache-mode cache readonly
+  expect_false ceph osd tier cache-mode cache readonly --yes-i-really-mean-it
+  ceph osd tier cache-mode cache proxy
+  ceph osd tier cache-mode cache readproxy
+  ceph osd tier cache-mode cache none
+  ceph osd tier cache-mode cache readonly --yes-i-really-mean-it
+  ceph osd tier cache-mode cache none
+  ceph osd tier cache-mode cache writeback
+  ceph osd tier cache-mode cache proxy
+  ceph osd tier cache-mode cache writeback
+  expect_false ceph osd tier cache-mode cache none
+  expect_false ceph osd tier cache-mode cache readonly --yes-i-really-mean-it
+  # test with dirty objects in the tier pool
+  # tier pool currently set to 'writeback'
+  rados -p cache put /etc/passwd /etc/passwd
+  flush_pg_stats
+  # 1 dirty object in pool 'cache'
+  ceph osd tier cache-mode cache proxy
+  expect_false ceph osd tier cache-mode cache none
+  expect_false ceph osd tier cache-mode cache readonly --yes-i-really-mean-it
+  ceph osd tier cache-mode cache writeback
+  # remove object from tier pool
+  rados -p cache rm /etc/passwd
+  rados -p cache cache-flush-evict-all
+  flush_pg_stats
+  # no dirty objects in pool 'cache'
+  ceph osd tier cache-mode cache proxy
+  ceph osd tier cache-mode cache none
+  ceph osd tier cache-mode cache readonly --yes-i-really-mean-it
+  TRIES=0
+  while ! ceph osd pool set cache pg_num 3 --yes-i-really-mean-it 2>$TMPFILE
+  do
+    grep 'currently creating pgs' $TMPFILE
+    TRIES=$(( $TRIES + 1 ))
+    test $TRIES -ne 60
+    sleep 3
+  done
+  expect_false ceph osd pool set cache pg_num 4
+  ceph osd tier cache-mode cache none
+  ceph osd tier set-overlay slow cache
+  expect_false ceph osd tier set-overlay slow cache2
+  expect_false ceph osd tier remove slow cache
+  ceph osd tier remove-overlay slow
+  ceph osd tier set-overlay slow cache2
+  ceph osd tier remove-overlay slow
+  ceph osd tier remove slow cache
+  ceph osd tier add slow2 cache
+  expect_false ceph osd tier set-overlay slow cache
+  ceph osd tier set-overlay slow2 cache
+  ceph osd tier remove-overlay slow2
+  ceph osd tier remove slow2 cache
+  ceph osd tier remove slow cache2
+
+  # make sure a non-empty pool fails
+  rados -p cache2 put /etc/passwd /etc/passwd
+  while ! ceph df | grep cache2 | grep ' 1 ' ; do
+    echo waiting for pg stats to flush
+    sleep 2
+  done
+  expect_false ceph osd tier add slow cache2
+  ceph osd tier add slow cache2 --force-nonempty
+  ceph osd tier remove slow cache2
+
+  ceph osd pool ls | grep cache2
+  ceph osd pool ls -f json-pretty | grep cache2
+  ceph osd pool ls detail | grep cache2
+  ceph osd pool ls detail -f json-pretty | grep cache2
+
+  ceph osd pool delete slow slow --yes-i-really-really-mean-it
+  ceph osd pool delete slow2 slow2 --yes-i-really-really-mean-it
+  ceph osd pool delete cache cache --yes-i-really-really-mean-it
+  ceph osd pool delete cache2 cache2 --yes-i-really-really-mean-it
+}
+
+function test_tiering_2()
+{
+  # make sure we can't clobber snapshot state
+  ceph osd pool create snap_base 2
+  ceph osd pool application enable snap_base rados
+  ceph osd pool create snap_cache 2
+  ceph osd pool mksnap snap_cache snapname
+  expect_false ceph osd tier add snap_base snap_cache
+  ceph osd pool delete snap_base snap_base --yes-i-really-really-mean-it
+  ceph osd pool delete snap_cache snap_cache --yes-i-really-really-mean-it
+}
+
+function test_tiering_3()
+{
+  # make sure we can't create snapshot on tier
+  ceph osd pool create basex 2
+  ceph osd pool application enable basex rados
+  ceph osd pool create cachex 2
+  ceph osd tier add basex cachex
+  expect_false ceph osd pool mksnap cache snapname
+  ceph osd tier remove basex cachex
+  ceph osd pool delete basex basex --yes-i-really-really-mean-it
+  ceph osd pool delete cachex cachex --yes-i-really-really-mean-it
+}
+
+function test_tiering_4()
+{
+  # make sure we can't create an ec pool tier
+  ceph osd pool create eccache 2 2 erasure
+  expect_false ceph osd set-require-min-compat-client bobtail
+  ceph osd pool create repbase 2
+  ceph osd pool application enable repbase rados
+  expect_false ceph osd tier add repbase eccache
+  ceph osd pool delete repbase repbase --yes-i-really-really-mean-it
+  ceph osd pool delete eccache eccache --yes-i-really-really-mean-it
+}
+
+function test_tiering_5()
+{
+  # convenient add-cache command
+  ceph osd pool create slow 2
+  ceph osd pool application enable slow rados
+  ceph osd pool create cache3 2
+  ceph osd tier add-cache slow cache3 1024000
+  ceph osd dump | grep cache3 | grep bloom | grep 'false_positive_probability: 0.05' | grep 'target_bytes 1024000' | grep '1200s x4'
+  ceph osd tier remove slow cache3 2> $TMPFILE || true
+  check_response "EBUSY: tier pool 'cache3' is the overlay for 'slow'; please remove-overlay first"
+  ceph osd tier remove-overlay slow
+  ceph osd tier remove slow cache3
+  ceph osd pool ls | grep cache3
+  ceph osd pool delete cache3 cache3 --yes-i-really-really-mean-it
+  ! ceph osd pool ls | grep cache3 || exit 1
+  ceph osd pool delete slow slow --yes-i-really-really-mean-it
+}
+
+function test_tiering_6()
+{
+  # check add-cache whether work
+  ceph osd pool create datapool 2
+  ceph osd pool application enable datapool rados
+  ceph osd pool create cachepool 2
+  ceph osd tier add-cache datapool cachepool 1024000
+  ceph osd tier cache-mode cachepool writeback
+  rados -p datapool put object /etc/passwd
+  rados -p cachepool stat object
+  rados -p cachepool cache-flush object
+  rados -p datapool stat object
+  ceph osd tier remove-overlay datapool
+  ceph osd tier remove datapool cachepool
+  ceph osd pool delete cachepool cachepool --yes-i-really-really-mean-it
+  ceph osd pool delete datapool datapool --yes-i-really-really-mean-it
+}
+
+function test_tiering_7()
+{
+  # protection against pool removal when used as tiers
+  ceph osd pool create datapool 2
+  ceph osd pool application enable datapool rados
+  ceph osd pool create cachepool 2
+  ceph osd tier add-cache datapool cachepool 1024000
+  ceph osd pool delete cachepool cachepool --yes-i-really-really-mean-it 2> $TMPFILE || true
+  check_response "EBUSY: pool 'cachepool' is a tier of 'datapool'"
+  ceph osd pool delete datapool datapool --yes-i-really-really-mean-it 2> $TMPFILE || true
+  check_response "EBUSY: pool 'datapool' has tiers cachepool"
+  ceph osd tier remove-overlay datapool
+  ceph osd tier remove datapool cachepool
+  ceph osd pool delete cachepool cachepool --yes-i-really-really-mean-it
+  ceph osd pool delete datapool datapool --yes-i-really-really-mean-it
+}
+
+function test_tiering_8()
+{
+  ## check health check
+  ceph osd set notieragent
+  ceph osd pool create datapool 2
+  ceph osd pool application enable datapool rados
+  ceph osd pool create cache4 2
+  ceph osd tier add-cache datapool cache4 1024000
+  ceph osd tier cache-mode cache4 writeback
+  tmpfile=$(mktemp|grep tmp)
+  dd if=/dev/zero of=$tmpfile  bs=4K count=1
+  ceph osd pool set cache4 target_max_objects 200
+  ceph osd pool set cache4 target_max_bytes 1000000
+  rados -p cache4 put foo1 $tmpfile
+  rados -p cache4 put foo2 $tmpfile
+  rm -f $tmpfile
+  flush_pg_stats
+  ceph df | grep datapool | grep ' 2 '
+  ceph osd tier remove-overlay datapool
+  ceph osd tier remove datapool cache4
+  ceph osd pool delete cache4 cache4 --yes-i-really-really-mean-it
+  ceph osd pool delete datapool datapool --yes-i-really-really-mean-it
+  ceph osd unset notieragent
+}
+
+function test_tiering_9()
+{
+  # make sure 'tier remove' behaves as we expect
+  # i.e., removing a tier from a pool that's not its base pool only
+  # results in a 'pool foo is now (or already was) not a tier of bar'
+  #
+  ceph osd pool create basepoolA 2
+  ceph osd pool application enable basepoolA rados
+  ceph osd pool create basepoolB 2
+  ceph osd pool application enable basepoolB rados
+  poolA_id=$(ceph osd dump | grep 'pool.*basepoolA' | awk '{print $2;}')
+  poolB_id=$(ceph osd dump | grep 'pool.*basepoolB' | awk '{print $2;}')
+
+  ceph osd pool create cache5 2
+  ceph osd pool create cache6 2
+  ceph osd tier add basepoolA cache5
+  ceph osd tier add basepoolB cache6
+  ceph osd tier remove basepoolB cache5 2>&1 | grep 'not a tier of'
+  ceph osd dump | grep "pool.*'cache5'" 2>&1 | grep "tier_of[ \t]\+$poolA_id"
+  ceph osd tier remove basepoolA cache6 2>&1 | grep 'not a tier of'
+  ceph osd dump | grep "pool.*'cache6'" 2>&1 | grep "tier_of[ \t]\+$poolB_id"
+
+  ceph osd tier remove basepoolA cache5 2>&1 | grep 'not a tier of'
+  ! ceph osd dump | grep "pool.*'cache5'" 2>&1 | grep "tier_of" || exit 1
+  ceph osd tier remove basepoolB cache6 2>&1 | grep 'not a tier of'
+  ! ceph osd dump | grep "pool.*'cache6'" 2>&1 | grep "tier_of" || exit 1
+
+  ! ceph osd dump | grep "pool.*'basepoolA'" 2>&1 | grep "tiers" || exit 1
+  ! ceph osd dump | grep "pool.*'basepoolB'" 2>&1 | grep "tiers" || exit 1
+
+  ceph osd pool delete cache6 cache6 --yes-i-really-really-mean-it
+  ceph osd pool delete cache5 cache5 --yes-i-really-really-mean-it
+  ceph osd pool delete basepoolB basepoolB --yes-i-really-really-mean-it
+  ceph osd pool delete basepoolA basepoolA --yes-i-really-really-mean-it
+}
+
+function test_auth()
+{
+  expect_false ceph auth add client.xx mon 'invalid' osd "allow *"
+  expect_false ceph auth add client.xx mon 'allow *' osd "allow *" invalid "allow *"
+  ceph auth add client.xx mon 'allow *' osd "allow *"
+  ceph auth export client.xx >client.xx.keyring
+  ceph auth add client.xx -i client.xx.keyring
+  rm -f client.xx.keyring
+  ceph auth list | grep client.xx
+  ceph auth ls | grep client.xx
+  ceph auth get client.xx | grep caps | grep mon
+  ceph auth get client.xx | grep caps | grep osd
+  ceph auth get-key client.xx
+  ceph auth print-key client.xx
+  ceph auth print_key client.xx
+  ceph auth caps client.xx osd "allow rw"
+  expect_false sh <<< "ceph auth get client.xx | grep caps | grep mon"
+  ceph auth get client.xx | grep osd | grep "allow rw"
+  ceph auth caps client.xx mon 'allow command "osd tree"'
+  ceph auth export | grep client.xx
+  ceph auth export -o authfile
+  ceph auth import -i authfile
+
+  ceph auth export -o authfile2
+  diff authfile authfile2
+  rm authfile authfile2
+  ceph auth del client.xx
+  expect_false ceph auth get client.xx
+
+  # (almost) interactive mode
+  echo -e 'auth add client.xx mon "allow *" osd "allow *"\n' | ceph
+  ceph auth get client.xx
+  # script mode
+  echo 'auth del client.xx' | ceph
+  expect_false ceph auth get client.xx
+}
+
+function test_auth_profiles()
+{
+  ceph auth add client.xx-profile-ro mon 'allow profile read-only' \
+       mgr 'allow profile read-only'
+  ceph auth add client.xx-profile-rw mon 'allow profile read-write' \
+       mgr 'allow profile read-write'
+  ceph auth add client.xx-profile-rd mon 'allow profile role-definer'
+
+  ceph auth export > client.xx.keyring
+
+  # read-only is allowed all read-only commands (auth excluded)
+  ceph -n client.xx-profile-ro -k client.xx.keyring status
+  ceph -n client.xx-profile-ro -k client.xx.keyring osd dump
+  ceph -n client.xx-profile-ro -k client.xx.keyring pg dump
+  ceph -n client.xx-profile-ro -k client.xx.keyring mon dump
+  # read-only gets access denied for rw commands or auth commands
+  ceph -n client.xx-profile-ro -k client.xx.keyring log foo >& $TMPFILE || true
+  check_response "EACCES: access denied"
+  ceph -n client.xx-profile-ro -k client.xx.keyring osd set noout >& $TMPFILE || true
+  check_response "EACCES: access denied"
+  ceph -n client.xx-profile-ro -k client.xx.keyring auth ls >& $TMPFILE || true
+  check_response "EACCES: access denied"
+
+  # read-write is allowed for all read-write commands (except auth)
+  ceph -n client.xx-profile-rw -k client.xx.keyring status
+  ceph -n client.xx-profile-rw -k client.xx.keyring osd dump
+  ceph -n client.xx-profile-rw -k client.xx.keyring pg dump
+  ceph -n client.xx-profile-rw -k client.xx.keyring mon dump
+  ceph -n client.xx-profile-rw -k client.xx.keyring fs dump
+  ceph -n client.xx-profile-rw -k client.xx.keyring log foo
+  ceph -n client.xx-profile-rw -k client.xx.keyring osd set noout
+  ceph -n client.xx-profile-rw -k client.xx.keyring osd unset noout
+  # read-write gets access denied for auth commands
+  ceph -n client.xx-profile-rw -k client.xx.keyring auth ls >& $TMPFILE || true
+  check_response "EACCES: access denied"
+
+  # role-definer is allowed RWX 'auth' commands and read-only 'mon' commands
+  ceph -n client.xx-profile-rd -k client.xx.keyring auth ls
+  ceph -n client.xx-profile-rd -k client.xx.keyring auth export
+  ceph -n client.xx-profile-rd -k client.xx.keyring auth add client.xx-profile-foo
+  ceph -n client.xx-profile-rd -k client.xx.keyring status
+  ceph -n client.xx-profile-rd -k client.xx.keyring osd dump >& $TMPFILE || true
+  check_response "EACCES: access denied"
+  ceph -n client.xx-profile-rd -k client.xx.keyring pg dump >& $TMPFILE || true
+  check_response "EACCES: access denied"
+  # read-only 'mon' subsystem commands are allowed
+  ceph -n client.xx-profile-rd -k client.xx.keyring mon dump
+  # but read-write 'mon' commands are not
+  ceph -n client.xx-profile-rd -k client.xx.keyring mon add foo 1.1.1.1 >& $TMPFILE || true
+  check_response "EACCES: access denied"
+  ceph -n client.xx-profile-rd -k client.xx.keyring fs dump >& $TMPFILE || true
+  check_response "EACCES: access denied"
+  ceph -n client.xx-profile-rd -k client.xx.keyring log foo >& $TMPFILE || true
+  check_response "EACCES: access denied"
+  ceph -n client.xx-profile-rd -k client.xx.keyring osd set noout >& $TMPFILE || true
+  check_response "EACCES: access denied"
+
+  ceph -n client.xx-profile-rd -k client.xx.keyring auth del client.xx-profile-ro
+  ceph -n client.xx-profile-rd -k client.xx.keyring auth del client.xx-profile-rw
+  
+  # add a new role-definer with the existing role-definer
+  ceph -n client.xx-profile-rd -k client.xx.keyring \
+    auth add client.xx-profile-rd2 mon 'allow profile role-definer'
+  ceph -n client.xx-profile-rd -k client.xx.keyring \
+    auth export > client.xx.keyring.2
+  # remove old role-definer using the new role-definer
+  ceph -n client.xx-profile-rd2 -k client.xx.keyring.2 \
+    auth del client.xx-profile-rd
+  # remove the remaining role-definer with admin
+  ceph auth del client.xx-profile-rd2
+  rm -f client.xx.keyring client.xx.keyring.2
+}
+
+function test_mon_caps()
+{
+  ceph-authtool --create-keyring $TEMP_DIR/ceph.client.bug.keyring
+  chmod +r  $TEMP_DIR/ceph.client.bug.keyring
+  ceph-authtool  $TEMP_DIR/ceph.client.bug.keyring -n client.bug --gen-key
+  ceph auth add client.bug -i  $TEMP_DIR/ceph.client.bug.keyring
+
+  # pass --no-mon-config since we are looking for the permission denied error
+  rados lspools --no-mon-config --keyring $TEMP_DIR/ceph.client.bug.keyring -n client.bug >& $TMPFILE || true
+  cat $TMPFILE
+  check_response "Permission denied"
+
+  rm -rf $TEMP_DIR/ceph.client.bug.keyring
+  ceph auth del client.bug
+  ceph-authtool --create-keyring $TEMP_DIR/ceph.client.bug.keyring
+  chmod +r  $TEMP_DIR/ceph.client.bug.keyring
+  ceph-authtool  $TEMP_DIR/ceph.client.bug.keyring -n client.bug --gen-key
+  ceph-authtool -n client.bug --cap mon '' $TEMP_DIR/ceph.client.bug.keyring
+  ceph auth add client.bug -i  $TEMP_DIR/ceph.client.bug.keyring
+  rados lspools --no-mon-config --keyring $TEMP_DIR/ceph.client.bug.keyring -n client.bug >& $TMPFILE || true
+  check_response "Permission denied"  
+}
+
+function test_mon_misc()
+{
+  # with and without verbosity
+  ceph osd dump | grep '^epoch'
+  ceph --concise osd dump | grep '^epoch'
+
+  ceph osd df | grep 'MIN/MAX VAR'
+
+  # df
+  ceph df > $TMPFILE
+  grep RAW $TMPFILE
+  grep -v DIRTY $TMPFILE
+  ceph df detail > $TMPFILE
+  grep DIRTY $TMPFILE
+  ceph df --format json > $TMPFILE
+  grep 'total_bytes' $TMPFILE
+  grep -v 'dirty' $TMPFILE
+  ceph df detail --format json > $TMPFILE
+  grep 'rd_bytes' $TMPFILE
+  grep 'dirty' $TMPFILE
+  ceph df --format xml | grep '<total_bytes>'
+  ceph df detail --format xml | grep '<rd_bytes>'
+
+  ceph fsid
+  ceph health
+  ceph health detail
+  ceph health --format json-pretty
+  ceph health detail --format xml-pretty
+
+  ceph time-sync-status
+
+  ceph node ls
+  for t in mon osd mds mgr ; do
+      ceph node ls $t
+  done
+
+  ceph_watch_start
+  mymsg="this is a test log message $$.$(date)"
+  ceph log "$mymsg"
+  ceph log last | grep "$mymsg"
+  ceph log last 100 | grep "$mymsg"
+  ceph_watch_wait "$mymsg"
+
+  ceph mgr stat
+  ceph mgr dump
+  ceph mgr dump | jq -e '.active_clients[0].name'
+  ceph mgr module ls
+  ceph mgr module enable restful
+  expect_false ceph mgr module enable foodne
+  ceph mgr module enable foodne --force
+  ceph mgr module disable foodne
+  ceph mgr module disable foodnebizbangbash
+
+  ceph mon metadata a
+  ceph mon metadata
+  ceph mon count-metadata ceph_version
+  ceph mon versions
+
+  ceph mgr metadata
+  ceph mgr versions
+  ceph mgr count-metadata ceph_version
+
+  ceph versions
+
+  ceph node ls
+}
+
+function check_mds_active()
+{
+    fs_name=$1
+    ceph fs get $fs_name | grep active
+}
+
+function wait_mds_active()
+{
+  fs_name=$1
+  max_run=300
+  for i in $(seq 1 $max_run) ; do
+      if ! check_mds_active $fs_name ; then
+          echo "waiting for an active MDS daemon ($i/$max_run)"
+          sleep 5
+      else
+          break
+      fi
+  done
+  check_mds_active $fs_name
+}
+
+function get_mds_gids()
+{
+    fs_name=$1
+    ceph fs get $fs_name --format=json | python3 -c "import json; import sys; print(' '.join([m['gid'].__str__() for m in json.load(sys.stdin)['mdsmap']['info'].values()]))"
+}
+
+function fail_all_mds()
+{
+  fs_name=$1
+  ceph fs set $fs_name cluster_down true
+  mds_gids=$(get_mds_gids $fs_name)
+  for mds_gid in $mds_gids ; do
+      ceph mds fail $mds_gid
+  done
+  if check_mds_active $fs_name ; then
+      echo "An active MDS remains, something went wrong"
+      ceph fs get $fs_name
+      exit -1
+  fi
+
+}
+
+function remove_all_fs()
+{
+  existing_fs=$(ceph fs ls --format=json | python3 -c "import json; import sys; print(' '.join([fs['name'] for fs in json.load(sys.stdin)]))")
+  for fs_name in $existing_fs ; do
+      echo "Removing fs ${fs_name}..."
+      fail_all_mds $fs_name
+      echo "Removing existing filesystem '${fs_name}'..."
+      ceph fs rm $fs_name --yes-i-really-mean-it
+      echo "Removed '${fs_name}'."
+  done
+}
+
+# So that tests requiring MDS can skip if one is not configured
+# in the cluster at all
+function mds_exists()
+{
+    ceph auth ls | grep "^mds"
+}
+
+# some of the commands are just not idempotent.
+function without_test_dup_command()
+{
+  if [ -z ${CEPH_CLI_TEST_DUP_COMMAND+x} ]; then
+    $@
+  else
+    local saved=${CEPH_CLI_TEST_DUP_COMMAND}
+    unset CEPH_CLI_TEST_DUP_COMMAND
+    $@
+    CEPH_CLI_TEST_DUP_COMMAND=saved
+  fi
+}
+
+function test_mds_tell()
+{
+  local FS_NAME=cephfs
+  if ! mds_exists ; then
+      echo "Skipping test, no MDS found"
+      return
+  fi
+
+  remove_all_fs
+  ceph osd pool create fs_data 16
+  ceph osd pool create fs_metadata 16
+  ceph fs new $FS_NAME fs_metadata fs_data
+  wait_mds_active $FS_NAME
+
+  # Test injectargs by GID
+  old_mds_gids=$(get_mds_gids $FS_NAME)
+  echo Old GIDs: $old_mds_gids
+
+  for mds_gid in $old_mds_gids ; do
+      ceph tell mds.$mds_gid injectargs "--debug-mds 20"
+  done
+  expect_false ceph tell mds.a injectargs mds_max_file_recover -1
+
+  # Test respawn by rank
+  without_test_dup_command ceph tell mds.0 respawn
+  new_mds_gids=$old_mds_gids
+  while [ $new_mds_gids -eq $old_mds_gids ] ; do
+      sleep 5
+      new_mds_gids=$(get_mds_gids $FS_NAME)
+  done
+  echo New GIDs: $new_mds_gids
+
+  # Test respawn by ID
+  without_test_dup_command ceph tell mds.a respawn
+  new_mds_gids=$old_mds_gids
+  while [ $new_mds_gids -eq $old_mds_gids ] ; do
+      sleep 5
+      new_mds_gids=$(get_mds_gids $FS_NAME)
+  done
+  echo New GIDs: $new_mds_gids
+
+  remove_all_fs
+  ceph osd pool delete fs_data fs_data --yes-i-really-really-mean-it
+  ceph osd pool delete fs_metadata fs_metadata --yes-i-really-really-mean-it
+}
+
+function test_mon_mds()
+{
+  local FS_NAME=cephfs
+  remove_all_fs
+
+  ceph osd pool create fs_data 16
+  ceph osd pool create fs_metadata 16
+  ceph fs new $FS_NAME fs_metadata fs_data
+
+  ceph fs set $FS_NAME cluster_down true
+  ceph fs set $FS_NAME cluster_down false
+
+  ceph mds compat rm_incompat 4
+  ceph mds compat rm_incompat 4
+
+  # We don't want any MDSs to be up, their activity can interfere with
+  # the "current_epoch + 1" checking below if they're generating updates
+  fail_all_mds $FS_NAME
+
+  ceph mds compat show
+  ceph fs dump
+  ceph fs get $FS_NAME
+  for mds_gid in $(get_mds_gids $FS_NAME) ; do
+      ceph mds metadata $mds_id
+  done
+  ceph mds metadata
+  ceph mds versions
+  ceph mds count-metadata os
+
+  # XXX mds fail, but how do you undo it?
+  mdsmapfile=$TEMP_DIR/mdsmap.$$
+  current_epoch=$(ceph fs dump -o $mdsmapfile --no-log-to-stderr 2>&1 | grep epoch | sed 's/.*epoch //')
+  [ -s $mdsmapfile ]
+  rm $mdsmapfile
+
+  ceph osd pool create data2 16
+  ceph osd pool create data3 16
+  data2_pool=$(ceph osd dump | grep "pool.*'data2'" | awk '{print $2;}')
+  data3_pool=$(ceph osd dump | grep "pool.*'data3'" | awk '{print $2;}')
+  ceph fs add_data_pool cephfs $data2_pool
+  ceph fs add_data_pool cephfs $data3_pool
+  ceph fs add_data_pool cephfs 100 >& $TMPFILE || true
+  check_response "Error ENOENT"
+  ceph fs add_data_pool cephfs foobarbaz >& $TMPFILE || true
+  check_response "Error ENOENT"
+  ceph fs rm_data_pool cephfs $data2_pool
+  ceph fs rm_data_pool cephfs $data3_pool
+  ceph osd pool delete data2 data2 --yes-i-really-really-mean-it
+  ceph osd pool delete data3 data3 --yes-i-really-really-mean-it
+  ceph fs set cephfs max_mds 4
+  ceph fs set cephfs max_mds 3
+  ceph fs set cephfs max_mds 256
+  expect_false ceph fs set cephfs max_mds 257
+  ceph fs set cephfs max_mds 4
+  ceph fs set cephfs max_mds 256
+  expect_false ceph fs set cephfs max_mds 257
+  expect_false ceph fs set cephfs max_mds asdf
+  expect_false ceph fs set cephfs inline_data true
+  ceph fs set cephfs inline_data true --yes-i-really-really-mean-it
+  ceph fs set cephfs inline_data yes --yes-i-really-really-mean-it
+  ceph fs set cephfs inline_data 1 --yes-i-really-really-mean-it
+  expect_false ceph fs set cephfs inline_data --yes-i-really-really-mean-it
+  ceph fs set cephfs inline_data false
+  ceph fs set cephfs inline_data no
+  ceph fs set cephfs inline_data 0
+  expect_false ceph fs set cephfs inline_data asdf
+  ceph fs set cephfs max_file_size 1048576
+  expect_false ceph fs set cephfs max_file_size 123asdf
+
+  expect_false ceph fs set cephfs allow_new_snaps
+  ceph fs set cephfs allow_new_snaps true
+  ceph fs set cephfs allow_new_snaps 0
+  ceph fs set cephfs allow_new_snaps false
+  ceph fs set cephfs allow_new_snaps no
+  expect_false ceph fs set cephfs allow_new_snaps taco
+
+  # we should never be able to add EC pools as data or metadata pools
+  # create an ec-pool...
+  ceph osd pool create mds-ec-pool 16 16 erasure
+  set +e
+  ceph fs add_data_pool cephfs mds-ec-pool 2>$TMPFILE
+  check_response 'erasure-code' $? 22
+  set -e
+  ec_poolnum=$(ceph osd dump | grep "pool.* 'mds-ec-pool" | awk '{print $2;}')
+  data_poolnum=$(ceph osd dump | grep "pool.* 'fs_data" | awk '{print $2;}')
+  metadata_poolnum=$(ceph osd dump | grep "pool.* 'fs_metadata" | awk '{print $2;}')
+
+  fail_all_mds $FS_NAME
+
+  set +e
+  # Check that rmfailed requires confirmation
+  expect_false ceph mds rmfailed 0
+  ceph mds rmfailed 0 --yes-i-really-mean-it
+  set -e
+
+  # Check that `fs new` is no longer permitted
+  expect_false ceph fs new cephfs $metadata_poolnum $data_poolnum --yes-i-really-mean-it 2>$TMPFILE
+
+  # Check that 'fs reset' runs
+  ceph fs reset $FS_NAME --yes-i-really-mean-it
+
+  # Check that creating a second FS fails by default
+  ceph osd pool create fs_metadata2 16
+  ceph osd pool create fs_data2 16
+  set +e
+  expect_false ceph fs new cephfs2 fs_metadata2 fs_data2
+  set -e
+
+  # Check that setting enable_multiple enables creation of second fs
+  ceph fs flag set enable_multiple true --yes-i-really-mean-it
+  ceph fs new cephfs2 fs_metadata2 fs_data2
+
+  # Clean up multi-fs stuff
+  fail_all_mds cephfs2
+  ceph fs rm cephfs2 --yes-i-really-mean-it
+  ceph osd pool delete fs_metadata2 fs_metadata2 --yes-i-really-really-mean-it
+  ceph osd pool delete fs_data2 fs_data2 --yes-i-really-really-mean-it
+
+  fail_all_mds $FS_NAME
+
+  # Clean up to enable subsequent fs new tests
+  ceph fs rm $FS_NAME --yes-i-really-mean-it
+
+  set +e
+  ceph fs new $FS_NAME fs_metadata mds-ec-pool --force 2>$TMPFILE
+  check_response 'erasure-code' $? 22
+  ceph fs new $FS_NAME mds-ec-pool fs_data 2>$TMPFILE
+  check_response 'already used by filesystem' $? 22
+  ceph fs new $FS_NAME mds-ec-pool fs_data --force 2>$TMPFILE
+  check_response 'erasure-code' $? 22
+  ceph fs new $FS_NAME mds-ec-pool mds-ec-pool 2>$TMPFILE
+  check_response 'erasure-code' $? 22
+  set -e
+
+  # ... new create a cache tier in front of the EC pool...
+  ceph osd pool create mds-tier 2
+  ceph osd tier add mds-ec-pool mds-tier
+  ceph osd tier set-overlay mds-ec-pool mds-tier
+  tier_poolnum=$(ceph osd dump | grep "pool.* 'mds-tier" | awk '{print $2;}')
+
+  # Use of a readonly tier should be forbidden
+  ceph osd tier cache-mode mds-tier readonly --yes-i-really-mean-it
+  set +e
+  ceph fs new $FS_NAME fs_metadata mds-ec-pool --force 2>$TMPFILE
+  check_response 'has a write tier (mds-tier) that is configured to forward' $? 22
+  set -e
+
+  # Use of a writeback tier should enable FS creation
+  ceph osd tier cache-mode mds-tier writeback
+  ceph fs new $FS_NAME fs_metadata mds-ec-pool --force
+
+  # While a FS exists using the tiered pools, I should not be allowed
+  # to remove the tier
+  set +e
+  ceph osd tier remove-overlay mds-ec-pool 2>$TMPFILE
+  check_response 'in use by CephFS' $? 16
+  ceph osd tier remove mds-ec-pool mds-tier 2>$TMPFILE
+  check_response 'in use by CephFS' $? 16
+  set -e
+
+  fail_all_mds $FS_NAME
+  ceph fs rm $FS_NAME --yes-i-really-mean-it
+
+  # ... but we should be forbidden from using the cache pool in the FS directly.
+  set +e
+  ceph fs new $FS_NAME fs_metadata mds-tier --force 2>$TMPFILE
+  check_response 'in use as a cache tier' $? 22
+  ceph fs new $FS_NAME mds-tier fs_data 2>$TMPFILE
+  check_response 'already used by filesystem' $? 22
+  ceph fs new $FS_NAME mds-tier fs_data --force 2>$TMPFILE
+  check_response 'in use as a cache tier' $? 22
+  ceph fs new $FS_NAME mds-tier mds-tier 2>$TMPFILE
+  check_response 'already used by filesystem' $? 22
+  ceph fs new $FS_NAME mds-tier mds-tier --force 2>$TMPFILE
+  check_response 'in use as a cache tier' $? 22
+  set -e
+
+  # Clean up tier + EC pools
+  ceph osd tier remove-overlay mds-ec-pool
+  ceph osd tier remove mds-ec-pool mds-tier
+
+  # Create a FS using the 'cache' pool now that it's no longer a tier
+  ceph fs new $FS_NAME fs_metadata mds-tier --force
+
+  # We should be forbidden from using this pool as a tier now that
+  # it's in use for CephFS
+  set +e
+  ceph osd tier add mds-ec-pool mds-tier 2>$TMPFILE
+  check_response 'in use by CephFS' $? 16
+  set -e
+
+  fail_all_mds $FS_NAME
+  ceph fs rm $FS_NAME --yes-i-really-mean-it
+
+  # We should be permitted to use an EC pool with overwrites enabled
+  # as the data pool...
+  ceph osd pool set mds-ec-pool allow_ec_overwrites true
+  ceph fs new $FS_NAME fs_metadata mds-ec-pool --force 2>$TMPFILE
+  fail_all_mds $FS_NAME
+  ceph fs rm $FS_NAME --yes-i-really-mean-it
+
+  # ...but not as the metadata pool
+  set +e
+  ceph fs new $FS_NAME mds-ec-pool fs_data 2>$TMPFILE
+  check_response 'already used by filesystem' $? 22
+  ceph fs new $FS_NAME mds-ec-pool fs_data --force 2>$TMPFILE
+  check_response 'erasure-code' $? 22
+  set -e
+
+  ceph osd pool delete mds-ec-pool mds-ec-pool --yes-i-really-really-mean-it
+
+  # Create a FS and check that we can subsequently add a cache tier to it
+  ceph fs new $FS_NAME fs_metadata fs_data --force
+
+  # Adding overlay to FS pool should be permitted, RADOS clients handle this.
+  ceph osd tier add fs_metadata mds-tier
+  ceph osd tier cache-mode mds-tier writeback
+  ceph osd tier set-overlay fs_metadata mds-tier
+
+  # Removing tier should be permitted because the underlying pool is
+  # replicated (#11504 case)
+  ceph osd tier cache-mode mds-tier proxy
+  ceph osd tier remove-overlay fs_metadata
+  ceph osd tier remove fs_metadata mds-tier
+  ceph osd pool delete mds-tier mds-tier --yes-i-really-really-mean-it
+
+  # Clean up FS
+  fail_all_mds $FS_NAME
+  ceph fs rm $FS_NAME --yes-i-really-mean-it
+
+
+
+  ceph mds stat
+  # ceph mds tell mds.a getmap
+  # ceph mds rm
+  # ceph mds rmfailed
+  # ceph mds set_state
+
+  ceph osd pool delete fs_data fs_data --yes-i-really-really-mean-it
+  ceph osd pool delete fs_metadata fs_metadata --yes-i-really-really-mean-it
+}
+
+function test_mon_mds_metadata()
+{
+  local nmons=$(ceph tell 'mon.*' version | grep -c 'version')
+  test "$nmons" -gt 0
+
+  ceph fs dump |
+  sed -nEe "s/^([0-9]+):.*'([a-z])' mds\\.([0-9]+)\\..*/\\1 \\2 \\3/p" |
+  while read gid id rank; do
+    ceph mds metadata ${gid} | grep '"hostname":'
+    ceph mds metadata ${id} | grep '"hostname":'
+    ceph mds metadata ${rank} | grep '"hostname":'
+
+    local n=$(ceph tell 'mon.*' mds metadata ${id} | grep -c '"hostname":')
+    test "$n" -eq "$nmons"
+  done
+
+  expect_false ceph mds metadata UNKNOWN
+}
+
+function test_mon_mon()
+{
+  # print help message
+  ceph --help mon
+  # -h works even when some arguments are passed
+  ceph osd dump -h | grep 'osd dump'
+  ceph osd dump 123 -h | grep 'osd dump'
+  # no mon add/remove
+  ceph mon dump
+  ceph mon getmap -o $TEMP_DIR/monmap.$$
+  [ -s $TEMP_DIR/monmap.$$ ]
+
+  # ceph mon tell
+  first=$(ceph mon dump -f json | jq -r '.mons[0].name')
+  ceph tell mon.$first mon_status
+
+  # test mon features
+  ceph mon feature ls
+  ceph mon feature set kraken --yes-i-really-mean-it
+  expect_false ceph mon feature set abcd
+  expect_false ceph mon feature set abcd --yes-i-really-mean-it
+
+  # test elector
+  expect_failure $TEMP_DIR ceph mon add disallowed_leader $first
+  ceph mon set election_strategy disallow
+  ceph mon add disallowed_leader $first
+  ceph mon set election_strategy connectivity
+  ceph mon rm disallowed_leader $first
+  ceph mon set election_strategy classic
+  expect_failure $TEMP_DIR ceph mon rm disallowed_leader $first
+
+  # test mon stat
+  # don't check output, just ensure it does not fail.
+  ceph mon stat
+  ceph mon stat -f json | jq '.'
+}
+
+function test_mon_priority_and_weight()
+{
+    for i in 0 1 65535; do
+      ceph mon set-weight a $i
+      w=$(ceph mon dump --format=json-pretty 2>/dev/null | jq '.mons[0].weight')
+      [[ "$w" == "$i" ]]
+    done
+
+    for i in -1 65536; do
+      expect_false ceph mon set-weight a $i
+    done
+}
+
+function gen_secrets_file()
+{
+  # lets assume we can have the following types
+  #  all - generates both cephx and lockbox, with mock dm-crypt key
+  #  cephx - only cephx
+  #  no_cephx - lockbox and dm-crypt, no cephx
+  #  no_lockbox - dm-crypt and cephx, no lockbox
+  #  empty - empty file
+  #  empty_json - correct json, empty map
+  #  bad_json - bad json :)
+  #
+  local t=$1
+  if [[ -z "$t" ]]; then
+    t="all"
+  fi
+
+  fn=$(mktemp $TEMP_DIR/secret.XXXXXX)
+  echo $fn
+  if [[ "$t" == "empty" ]]; then
+    return 0
+  fi
+
+  echo "{" > $fn
+  if [[ "$t" == "bad_json" ]]; then
+    echo "asd: ; }" >> $fn
+    return 0
+  elif [[ "$t" == "empty_json" ]]; then
+    echo "}" >> $fn
+    return 0
+  fi
+
+  cephx_secret="\"cephx_secret\": \"$(ceph-authtool --gen-print-key)\""
+  lb_secret="\"cephx_lockbox_secret\": \"$(ceph-authtool --gen-print-key)\""
+  dmcrypt_key="\"dmcrypt_key\": \"$(ceph-authtool --gen-print-key)\""
+
+  if [[ "$t" == "all" ]]; then
+    echo "$cephx_secret,$lb_secret,$dmcrypt_key" >> $fn
+  elif [[ "$t" == "cephx" ]]; then
+    echo "$cephx_secret" >> $fn
+  elif [[ "$t" == "no_cephx" ]]; then
+    echo "$lb_secret,$dmcrypt_key" >> $fn
+  elif [[ "$t" == "no_lockbox" ]]; then
+    echo "$cephx_secret,$dmcrypt_key" >> $fn
+  else
+    echo "unknown gen_secrets_file() type \'$fn\'"
+    return 1
+  fi
+  echo "}" >> $fn
+  return 0
+}
+
+function test_mon_osd_create_destroy()
+{
+  ceph osd new 2>&1 | grep 'EINVAL'
+  ceph osd new '' -1 2>&1 | grep 'EINVAL'
+  ceph osd new '' 10 2>&1 | grep 'EINVAL'
+
+  old_maxosd=$(ceph osd getmaxosd | sed -e 's/max_osd = //' -e 's/ in epoch.*//')
+
+  old_osds=$(ceph osd ls)
+  num_osds=$(ceph osd ls | wc -l)
+
+  uuid=$(uuidgen)
+  id=$(ceph osd new $uuid 2>/dev/null)
+
+  for i in $old_osds; do
+    [[ "$i" != "$id" ]]
+  done
+
+  ceph osd find $id
+
+  id2=`ceph osd new $uuid 2>/dev/null`
+
+  [[ $id2 == $id ]]
+
+  ceph osd new $uuid $id
+
+  id3=$(ceph osd getmaxosd | sed -e 's/max_osd = //' -e 's/ in epoch.*//')
+  ceph osd new $uuid $((id3+1)) 2>&1 | grep EEXIST
+
+  uuid2=$(uuidgen)
+  id2=$(ceph osd new $uuid2)
+  ceph osd find $id2
+  [[ "$id2" != "$id" ]]
+
+  ceph osd new $uuid $id2 2>&1 | grep EEXIST
+  ceph osd new $uuid2 $id2
+
+  # test with secrets
+  empty_secrets=$(gen_secrets_file "empty")
+  empty_json=$(gen_secrets_file "empty_json")
+  all_secrets=$(gen_secrets_file "all")
+  cephx_only=$(gen_secrets_file "cephx")
+  no_cephx=$(gen_secrets_file "no_cephx")
+  no_lockbox=$(gen_secrets_file "no_lockbox")
+  bad_json=$(gen_secrets_file "bad_json")
+
+  # empty secrets should be idempotent
+  new_id=$(ceph osd new $uuid $id -i $empty_secrets)
+  [[ "$new_id" == "$id" ]]
+
+  # empty json, thus empty secrets
+  new_id=$(ceph osd new $uuid $id -i $empty_json)
+  [[ "$new_id" == "$id" ]]
+
+  ceph osd new $uuid $id -i $all_secrets 2>&1 | grep 'EEXIST'
+
+  ceph osd rm $id
+  ceph osd rm $id2
+  ceph osd setmaxosd $old_maxosd
+
+  ceph osd new $uuid -i $no_cephx 2>&1 | grep 'EINVAL'
+  ceph osd new $uuid -i $no_lockbox 2>&1 | grep 'EINVAL'
+
+  osds=$(ceph osd ls)
+  id=$(ceph osd new $uuid -i $all_secrets)
+  for i in $osds; do
+    [[ "$i" != "$id" ]]
+  done
+
+  ceph osd find $id
+
+  # validate secrets and dm-crypt are set
+  k=$(ceph auth get-key osd.$id --format=json-pretty 2>/dev/null | jq '.key')
+  s=$(cat $all_secrets | jq '.cephx_secret')
+  [[ $k == $s ]]
+  k=$(ceph auth get-key client.osd-lockbox.$uuid --format=json-pretty 2>/dev/null | \
+      jq '.key')
+  s=$(cat $all_secrets | jq '.cephx_lockbox_secret')
+  [[ $k == $s ]]
+  ceph config-key exists dm-crypt/osd/$uuid/luks
+
+  osds=$(ceph osd ls)
+  id2=$(ceph osd new $uuid2 -i $cephx_only)
+  for i in $osds; do
+    [[ "$i" != "$id2" ]]
+  done
+
+  ceph osd find $id2
+  k=$(ceph auth get-key osd.$id --format=json-pretty 2>/dev/null | jq '.key')
+  s=$(cat $all_secrets | jq '.cephx_secret')
+  [[ $k == $s ]]
+  expect_false ceph auth get-key client.osd-lockbox.$uuid2
+  expect_false ceph config-key exists dm-crypt/osd/$uuid2/luks
+
+  ceph osd destroy osd.$id2 --yes-i-really-mean-it
+  ceph osd destroy $id2 --yes-i-really-mean-it
+  ceph osd find $id2
+  expect_false ceph auth get-key osd.$id2
+  ceph osd dump | grep osd.$id2 | grep destroyed
+
+  id3=$id2
+  uuid3=$(uuidgen)
+  ceph osd new $uuid3 $id3 -i $all_secrets
+  ceph osd dump | grep osd.$id3 | expect_false grep destroyed
+  ceph auth get-key client.osd-lockbox.$uuid3
+  ceph auth get-key osd.$id3
+  ceph config-key exists dm-crypt/osd/$uuid3/luks
+
+  ceph osd purge-new osd.$id3 --yes-i-really-mean-it
+  expect_false ceph osd find $id2
+  expect_false ceph auth get-key osd.$id2
+  expect_false ceph auth get-key client.osd-lockbox.$uuid3
+  expect_false ceph config-key exists dm-crypt/osd/$uuid3/luks
+  ceph osd purge osd.$id3 --yes-i-really-mean-it
+  ceph osd purge-new osd.$id3 --yes-i-really-mean-it # idempotent
+
+  ceph osd purge osd.$id --yes-i-really-mean-it
+  ceph osd purge 123456 --yes-i-really-mean-it
+  expect_false ceph osd find $id
+  expect_false ceph auth get-key osd.$id
+  expect_false ceph auth get-key client.osd-lockbox.$uuid
+  expect_false ceph config-key exists dm-crypt/osd/$uuid/luks
+
+  rm $empty_secrets $empty_json $all_secrets $cephx_only \
+     $no_cephx $no_lockbox $bad_json
+
+  for i in $(ceph osd ls); do
+    [[ "$i" != "$id" ]]
+    [[ "$i" != "$id2" ]]
+    [[ "$i" != "$id3" ]]
+  done
+
+  [[ "$(ceph osd ls | wc -l)" == "$num_osds" ]]
+  ceph osd setmaxosd $old_maxosd
+
+}
+
+function test_mon_config_key()
+{
+  key=asdfasdfqwerqwreasdfuniquesa123df
+  ceph config-key list | grep -c $key | grep 0
+  ceph config-key get $key | grep -c bar | grep 0
+  ceph config-key set $key bar
+  ceph config-key get $key | grep bar
+  ceph config-key list | grep -c $key | grep 1
+  ceph config-key dump | grep $key | grep bar
+  ceph config-key rm $key
+  expect_false ceph config-key get $key
+  ceph config-key list | grep -c $key | grep 0
+  ceph config-key dump | grep -c $key | grep 0
+}
+
+function test_mon_osd()
+{
+  #
+  # osd blocklist
+  #
+  bl=192.168.0.1:0/1000
+  ceph osd blocklist add $bl
+  ceph osd blocklist ls | grep $bl
+  ceph osd blocklist ls --format=json-pretty  | sed 's/\\\//\//' | grep $bl
+  ceph osd dump --format=json-pretty | grep $bl
+  ceph osd dump | grep $bl
+  ceph osd blocklist rm $bl
+  ceph osd blocklist ls | expect_false grep $bl
+
+  bl=192.168.0.1
+  # test without nonce, invalid nonce
+  ceph osd blocklist add $bl
+  ceph osd blocklist ls | grep $bl
+  ceph osd blocklist rm $bl
+  ceph osd blocklist ls | expect_false grep $bl
+  expect_false "ceph osd blocklist add $bl/-1"
+  expect_false "ceph osd blocklist add $bl/foo"
+
+  # test with invalid address
+  expect_false "ceph osd blocklist add 1234.56.78.90/100"
+
+  # test range blocklisting
+  bl=192.168.0.1:0/24
+  ceph osd blocklist range add $bl
+  ceph osd blocklist ls | grep $bl
+  ceph osd blocklist range rm $bl
+  ceph osd blocklist ls | expect_false grep $bl
+  bad_bl=192.168.0.1/33
+  expect_false ceph osd blocklist range add $bad_bl
+
+  # Test `clear`
+  ceph osd blocklist add $bl
+  ceph osd blocklist ls | grep $bl
+  ceph osd blocklist clear
+  ceph osd blocklist ls | expect_false grep $bl
+
+  # deprecated syntax?
+  ceph osd blacklist ls
+
+  #
+  # osd crush
+  #
+  ceph osd crush reweight-all
+  ceph osd crush tunables legacy
+  ceph osd crush show-tunables | grep argonaut
+  ceph osd crush tunables bobtail
+  ceph osd crush show-tunables | grep bobtail
+  ceph osd crush tunables firefly
+  ceph osd crush show-tunables | grep firefly
+
+  ceph osd crush set-tunable straw_calc_version 0
+  ceph osd crush get-tunable straw_calc_version | grep 0
+  ceph osd crush set-tunable straw_calc_version 1
+  ceph osd crush get-tunable straw_calc_version | grep 1
+
+  #
+  # require-min-compat-client
+  expect_false ceph osd set-require-min-compat-client dumpling  # firefly tunables
+  ceph osd get-require-min-compat-client | grep luminous
+  ceph osd dump | grep 'require_min_compat_client luminous'
+
+  #
+  # osd scrub
+  #
+
+  # blocking
+  ceph osd scrub 0 --block
+  ceph osd deep-scrub 0 --block
+
+  # how do I tell when these are done?
+  ceph osd scrub 0
+  ceph osd deep-scrub 0
+  ceph osd repair 0
+
+  # pool scrub, force-recovery/backfill
+  pool_names=`rados lspools`
+  for pool_name in $pool_names
+  do
+    ceph osd pool scrub $pool_name
+    ceph osd pool deep-scrub $pool_name
+    ceph osd pool repair $pool_name
+    ceph osd pool force-recovery $pool_name
+    ceph osd pool cancel-force-recovery $pool_name
+    ceph osd pool force-backfill $pool_name
+    ceph osd pool cancel-force-backfill $pool_name
+  done
+
+  for f in noup nodown noin noout noscrub nodeep-scrub nobackfill \
+	  norebalance norecover notieragent noautoscale
+  do
+    ceph osd set $f
+    ceph osd unset $f
+  done
+  expect_false ceph osd set bogus
+  expect_false ceph osd unset bogus
+  for f in sortbitwise recover_deletes require_jewel_osds \
+	  require_kraken_osds
+  do
+	expect_false ceph osd set $f
+	expect_false ceph osd unset $f
+  done
+  ceph osd require-osd-release reef
+  # can't lower
+  expect_false ceph osd require-osd-release quincy
+  expect_false ceph osd require-osd-release pacific
+  # these are no-ops but should succeed.
+
+  ceph osd set noup
+  ceph osd down 0
+  ceph osd dump | grep 'osd.0 down'
+  ceph osd unset noup
+  max_run=1000
+  for ((i=0; i < $max_run; i++)); do
+    if ! ceph osd dump | grep 'osd.0 up'; then
+      echo "waiting for osd.0 to come back up ($i/$max_run)"
+      sleep 1
+    else
+      break
+    fi
+  done
+  ceph osd dump | grep 'osd.0 up'
+
+  ceph osd dump | grep 'osd.0 up'
+  # ceph osd find expects the OsdName, so both ints and osd.n should work.
+  ceph osd find 1
+  ceph osd find osd.1
+  expect_false ceph osd find osd.xyz
+  expect_false ceph osd find xyz
+  expect_false ceph osd find 0.1
+  ceph --format plain osd find 1 # falls back to json-pretty
+  if [ `uname` == Linux ]; then
+    ceph osd metadata 1 | grep 'distro'
+    ceph --format plain osd metadata 1 | grep 'distro' # falls back to json-pretty
+  fi
+  ceph osd out 0
+  ceph osd dump | grep 'osd.0.*out'
+  ceph osd in 0
+  ceph osd dump | grep 'osd.0.*in'
+  ceph osd find 0
+
+  ceph osd info 0
+  ceph osd info osd.0
+  expect_false ceph osd info osd.xyz
+  expect_false ceph osd info xyz
+  expect_false ceph osd info 42
+  expect_false ceph osd info osd.42
+
+  ceph osd info
+  info_json=$(ceph osd info --format=json | jq -cM '.')
+  dump_json=$(ceph osd dump --format=json | jq -cM '.osds')
+  if [[ "${info_json}" != "${dump_json}" ]]; then
+    echo "waiting for OSDs to settle"
+    sleep 10
+    info_json=$(ceph osd info --format=json | jq -cM '.')
+    dump_json=$(ceph osd dump --format=json | jq -cM '.osds')
+    [[ "${info_json}" == "${dump_json}" ]]
+  fi
+
+  info_json=$(ceph osd info 0 --format=json | jq -cM '.')
+  dump_json=$(ceph osd dump --format=json | \
+	  jq -cM '.osds[] | select(.osd == 0)')
+  [[ "${info_json}" == "${dump_json}" ]]
+  
+  info_plain="$(ceph osd info)"
+  dump_plain="$(ceph osd dump | grep '^osd')"
+  [[ "${info_plain}" == "${dump_plain}" ]]
+
+  info_plain="$(ceph osd info 0)"
+  dump_plain="$(ceph osd dump | grep '^osd.0')"
+  [[ "${info_plain}" == "${dump_plain}" ]]
+
+  ceph osd add-nodown 0 1
+  ceph health detail | grep 'NODOWN'
+  ceph osd rm-nodown 0 1
+  ! ceph health detail | grep 'NODOWN'
+
+  ceph osd out 0 # so we can mark it as noin later
+  ceph osd add-noin 0
+  ceph health detail | grep 'NOIN'
+  ceph osd rm-noin 0
+  ! ceph health detail | grep 'NOIN'
+  ceph osd in 0
+
+  ceph osd add-noout 0
+  ceph health detail | grep 'NOOUT'
+  ceph osd rm-noout 0
+  ! ceph health detail | grep 'NOOUT'
+
+  # test osd id parse
+  expect_false ceph osd add-noup 797er
+  expect_false ceph osd add-nodown u9uwer
+  expect_false ceph osd add-noin 78~15
+
+  expect_false ceph osd rm-noup 1234567
+  expect_false ceph osd rm-nodown fsadf7
+  expect_false ceph osd rm-noout 790-fd
+
+  ids=`ceph osd ls-tree default`
+  for osd in $ids
+  do
+    ceph osd add-nodown $osd
+    ceph osd add-noout $osd
+  done
+  ceph -s | grep 'NODOWN'
+  ceph -s | grep 'NOOUT'
+  ceph osd rm-nodown any
+  ceph osd rm-noout all
+  ! ceph -s | grep 'NODOWN'
+  ! ceph -s | grep 'NOOUT'
+
+  # test crush node flags
+  ceph osd add-noup osd.0
+  ceph osd add-nodown osd.0
+  ceph osd add-noin osd.0
+  ceph osd add-noout osd.0
+  ceph osd dump -f json-pretty | jq ".crush_node_flags" | expect_false grep "osd.0"
+  ceph osd rm-noup osd.0
+  ceph osd rm-nodown osd.0
+  ceph osd rm-noin osd.0
+  ceph osd rm-noout osd.0
+  ceph osd dump -f json-pretty | jq ".crush_node_flags" | expect_false grep "osd.0"
+
+  ceph osd crush add-bucket foo host root=default
+  ceph osd add-noup foo
+  ceph osd add-nodown foo
+  ceph osd add-noin foo
+  ceph osd add-noout foo
+  ceph osd dump -f json-pretty | jq ".crush_node_flags" | grep foo
+  ceph osd rm-noup foo
+  ceph osd rm-nodown foo
+  ceph osd rm-noin foo
+  ceph osd rm-noout foo
+  ceph osd dump -f json-pretty | jq ".crush_node_flags" | expect_false grep foo
+  ceph osd add-noup foo
+  ceph osd dump -f json-pretty | jq ".crush_node_flags" | grep foo
+  ceph osd crush rm foo
+  ceph osd dump -f json-pretty | jq ".crush_node_flags" | expect_false grep foo
+
+  ceph osd set-group noup osd.0
+  ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noup'
+  ceph osd set-group noup,nodown osd.0
+  ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noup'
+  ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'nodown'
+  ceph osd set-group noup,nodown,noin osd.0
+  ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noup'
+  ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'nodown'
+  ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noin'
+  ceph osd set-group noup,nodown,noin,noout osd.0
+  ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noup'
+  ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'nodown'
+  ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noin'
+  ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noout'
+  ceph osd unset-group noup osd.0
+  ceph osd dump -f json-pretty | jq ".osds[0].state" | expect_false grep 'noup'
+  ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'nodown'
+  ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noin'
+  ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noout'
+  ceph osd unset-group noup,nodown osd.0
+  ceph osd dump -f json-pretty | jq ".osds[0].state" | expect_false grep 'noup\|nodown'
+  ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noin'
+  ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noout'
+  ceph osd unset-group noup,nodown,noin osd.0
+  ceph osd dump -f json-pretty | jq ".osds[0].state" | expect_false grep 'noup\|nodown\|noin'
+  ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noout'
+  ceph osd unset-group noup,nodown,noin,noout osd.0
+  ceph osd dump -f json-pretty | jq ".osds[0].state" | expect_false grep 'noup\|nodown\|noin\|noout'
+
+  ceph osd set-group noup,nodown,noin,noout osd.0 osd.1
+  ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noup'
+  ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'nodown'
+  ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noin'
+  ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noout'
+  ceph osd dump -f json-pretty | jq ".osds[1].state" | grep 'noup'
+  ceph osd dump -f json-pretty | jq ".osds[1].state" | grep 'nodown'
+  ceph osd dump -f json-pretty | jq ".osds[1].state" | grep 'noin'
+  ceph osd dump -f json-pretty | jq ".osds[1].state" | grep 'noout'
+  ceph osd unset-group noup,nodown,noin,noout osd.0 osd.1
+  ceph osd dump -f json-pretty | jq ".osds[0].state" | expect_false grep 'noup\|nodown\|noin\|noout'
+  ceph osd dump -f json-pretty | jq ".osds[1].state" | expect_false grep 'noup\|nodown\|noin\|noout'
+
+  ceph osd set-group noup all
+  ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noup'
+  ceph osd unset-group noup all
+  ceph osd dump -f json-pretty | jq ".osds[0].state" | expect_false grep 'noup'
+
+  # crush node flags
+  ceph osd crush add-bucket foo host root=default
+  ceph osd set-group noup foo
+  ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noup'
+  ceph osd set-group noup,nodown foo
+  ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noup'
+  ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'nodown'
+  ceph osd set-group noup,nodown,noin foo
+  ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noup'
+  ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'nodown'
+  ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noin'
+  ceph osd set-group noup,nodown,noin,noout foo
+  ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noup'
+  ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'nodown'
+  ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noin'
+  ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noout'
+
+  ceph osd unset-group noup foo
+  ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | expect_false grep 'noup'
+  ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'nodown'
+  ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noin'
+  ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noout'
+  ceph osd unset-group noup,nodown foo
+  ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | expect_false grep 'noup\|nodown'
+  ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noin'
+  ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noout'
+  ceph osd unset-group noup,nodown,noin foo
+  ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | expect_false grep 'noup\|nodown\|noin'
+  ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noout'
+  ceph osd unset-group noup,nodown,noin,noout foo
+  ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | expect_false grep 'noup\|nodown\|noin\|noout'
+
+  ceph osd set-group noin,noout foo
+  ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noin'
+  ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noout'
+  ceph osd unset-group noin,noout foo
+  ceph osd dump -f json-pretty | jq ".crush_node_flags" | expect_false grep 'foo'
+
+  ceph osd set-group noup,nodown,noin,noout foo
+  ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noup'
+  ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'nodown'
+  ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noin'
+  ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noout'
+  ceph osd crush rm foo
+  ceph osd dump -f json-pretty | jq ".crush_node_flags" | expect_false grep 'foo'
+
+  # test device class flags
+  osd_0_device_class=$(ceph osd crush get-device-class osd.0)
+  ceph osd set-group noup $osd_0_device_class
+  ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noup'
+  ceph osd set-group noup,nodown $osd_0_device_class
+  ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noup'
+  ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'nodown'
+  ceph osd set-group noup,nodown,noin $osd_0_device_class
+  ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noup'
+  ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'nodown'
+  ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noin'
+  ceph osd set-group noup,nodown,noin,noout $osd_0_device_class
+  ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noup'
+  ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'nodown'
+  ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noin'
+  ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noout'
+
+  ceph osd unset-group noup $osd_0_device_class
+  ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | expect_false grep 'noup'
+  ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'nodown'
+  ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noin'
+  ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noout'
+  ceph osd unset-group noup,nodown $osd_0_device_class
+  ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | expect_false grep 'noup\|nodown'
+  ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noin'
+  ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noout'
+  ceph osd unset-group noup,nodown,noin $osd_0_device_class
+  ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | expect_false grep 'noup\|nodown\|noin'
+  ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noout'
+  ceph osd unset-group noup,nodown,noin,noout $osd_0_device_class
+  ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | expect_false grep 'noup\|nodown\|noin\|noout'
+
+  ceph osd set-group noin,noout $osd_0_device_class
+  ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noin'
+  ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noout'
+  ceph osd unset-group noin,noout $osd_0_device_class
+  ceph osd dump -f json-pretty | jq ".crush_node_flags" | expect_false grep $osd_0_device_class
+
+  # make sure mark out preserves weight
+  ceph osd reweight osd.0 .5
+  ceph osd dump | grep ^osd.0 | grep 'weight 0.5'
+  ceph osd out 0
+  ceph osd in 0
+  ceph osd dump | grep ^osd.0 | grep 'weight 0.5'
+
+  ceph osd getmap -o $f
+  [ -s $f ]
+  rm $f
+  save=$(ceph osd getmaxosd | sed -e 's/max_osd = //' -e 's/ in epoch.*//')
+  [ "$save" -gt 0 ]
+  ceph osd setmaxosd $((save - 1)) 2>&1 | grep 'EBUSY'
+  ceph osd setmaxosd 10
+  ceph osd getmaxosd | grep 'max_osd = 10'
+  ceph osd setmaxosd $save
+  ceph osd getmaxosd | grep "max_osd = $save"
+
+  for id in `ceph osd ls` ; do
+    retry_eagain 5 map_enxio_to_eagain ceph tell osd.$id version
+  done
+
+  ceph osd rm 0 2>&1 | grep 'EBUSY'
+
+  local old_osds=$(echo $(ceph osd ls))
+  id=`ceph osd create`
+  ceph osd find $id
+  ceph osd lost $id --yes-i-really-mean-it
+  expect_false ceph osd setmaxosd $id
+  local new_osds=$(echo $(ceph osd ls))
+  for id in $(echo $new_osds | sed -e "s/$old_osds//") ; do
+      ceph osd rm $id
+  done
+
+  uuid=`uuidgen`
+  id=`ceph osd create $uuid`
+  id2=`ceph osd create $uuid`
+  [ "$id" = "$id2" ]
+  ceph osd rm $id
+
+  ceph --help osd
+
+  # reset max_osd.
+  ceph osd setmaxosd $id
+  ceph osd getmaxosd | grep "max_osd = $save"
+  local max_osd=$save
+
+  ceph osd create $uuid 0 2>&1 | grep 'EINVAL'
+  ceph osd create $uuid $((max_osd - 1)) 2>&1 | grep 'EINVAL'
+
+  id=`ceph osd create $uuid $max_osd`
+  [ "$id" = "$max_osd" ]
+  ceph osd find $id
+  max_osd=$((max_osd + 1))
+  ceph osd getmaxosd | grep "max_osd = $max_osd"
+
+  ceph osd create $uuid $((id - 1)) 2>&1 | grep 'EEXIST'
+  ceph osd create $uuid $((id + 1)) 2>&1 | grep 'EEXIST'
+  id2=`ceph osd create $uuid`
+  [ "$id" = "$id2" ]
+  id2=`ceph osd create $uuid $id`
+  [ "$id" = "$id2" ]
+
+  uuid=`uuidgen`
+  local gap_start=$max_osd
+  id=`ceph osd create $uuid $((gap_start + 100))`
+  [ "$id" = "$((gap_start + 100))" ]
+  max_osd=$((id + 1))
+  ceph osd getmaxosd | grep "max_osd = $max_osd"
+
+  ceph osd create $uuid $gap_start 2>&1 | grep 'EEXIST'
+
+  #
+  # When CEPH_CLI_TEST_DUP_COMMAND is set, osd create
+  # is repeated and consumes two osd id, not just one.
+  #
+  local next_osd=$gap_start
+  id=`ceph osd create $(uuidgen)`
+  [ "$id" = "$next_osd" ]
+
+  next_osd=$((id + 1))
+  id=`ceph osd create $(uuidgen) $next_osd`
+  [ "$id" = "$next_osd" ]
+
+  local new_osds=$(echo $(ceph osd ls))
+  for id in $(echo $new_osds | sed -e "s/$old_osds//") ; do
+      [ $id -ge $save ]
+      ceph osd rm $id
+  done
+  ceph osd setmaxosd $save
+
+  ceph osd ls
+  ceph osd pool create data 16
+  ceph osd pool application enable data rados
+  ceph osd lspools | grep data
+  ceph osd map data foo | grep 'pool.*data.*object.*foo.*pg.*up.*acting'
+  ceph osd map data foo namespace| grep 'pool.*data.*object.*namespace/foo.*pg.*up.*acting'
+  ceph osd pool delete data data --yes-i-really-really-mean-it
+
+  ceph osd pause
+  ceph osd dump | grep 'flags.*pauserd,pausewr'
+  ceph osd unpause
+
+  ceph osd tree
+  ceph osd tree up
+  ceph osd tree down
+  ceph osd tree in
+  ceph osd tree out
+  ceph osd tree destroyed
+  ceph osd tree up in
+  ceph osd tree up out
+  ceph osd tree down in
+  ceph osd tree down out
+  ceph osd tree out down
+  expect_false ceph osd tree up down
+  expect_false ceph osd tree up destroyed
+  expect_false ceph osd tree down destroyed
+  expect_false ceph osd tree up down destroyed
+  expect_false ceph osd tree in out
+  expect_false ceph osd tree up foo
+
+  ceph osd metadata
+  ceph osd count-metadata os
+  ceph osd versions
+
+  ceph osd perf
+  ceph osd blocked-by
+
+  ceph osd stat | grep up
+}
+
+function test_mon_crush()
+{
+  f=$TEMP_DIR/map.$$
+  epoch=$(ceph osd getcrushmap -o $f 2>&1 | tail -n1)
+  [ -s $f ]
+  [ "$epoch" -gt 1 ]
+  nextepoch=$(( $epoch + 1 ))
+  echo epoch $epoch nextepoch $nextepoch
+  rm -f $f.epoch
+  expect_false ceph osd setcrushmap $nextepoch -i $f
+  gotepoch=$(ceph osd setcrushmap $epoch -i $f 2>&1 | tail -n1)
+  echo gotepoch $gotepoch
+  [ "$gotepoch" -eq "$nextepoch" ]
+  # should be idempotent
+  gotepoch=$(ceph osd setcrushmap $epoch -i $f 2>&1 | tail -n1)
+  echo epoch $gotepoch
+  [ "$gotepoch" -eq "$nextepoch" ]
+  rm $f
+}
+
+function test_mon_osd_pool()
+{
+  #
+  # osd pool
+  #
+  ceph osd pool create data 16
+  ceph osd pool application enable data rados
+  ceph osd pool mksnap data datasnap
+  rados -p data lssnap | grep datasnap
+  ceph osd pool rmsnap data datasnap
+  expect_false ceph osd pool rmsnap pool_fake snapshot
+  ceph osd pool delete data data --yes-i-really-really-mean-it
+
+  ceph osd pool create data2 16
+  ceph osd pool application enable data2 rados
+  ceph osd pool rename data2 data3
+  ceph osd lspools | grep data3
+  ceph osd pool delete data3 data3 --yes-i-really-really-mean-it
+
+  ceph osd pool create replicated 16 16 replicated
+  ceph osd pool create replicated 1 16 replicated
+  ceph osd pool create replicated 16 16 # default is replicated
+  ceph osd pool create replicated 16    # default is replicated, pgp_num = pg_num
+  ceph osd pool application enable replicated rados
+  # should fail because the type is not the same
+  expect_false ceph osd pool create replicated 16 16 erasure
+  ceph osd lspools | grep replicated
+  ceph osd pool create ec_test 1 1 erasure
+  ceph osd pool application enable ec_test rados
+  set +e
+  ceph osd count-metadata osd_objectstore | grep 'bluestore'
+  if [ $? -eq 1 ]; then # enable ec_overwrites on non-bluestore pools should fail
+      ceph osd pool set ec_test allow_ec_overwrites true >& $TMPFILE
+      check_response "pool must only be stored on bluestore for scrubbing to work" $? 22
+  else
+      ceph osd pool set ec_test allow_ec_overwrites true || return 1
+      expect_false ceph osd pool set ec_test allow_ec_overwrites false
+  fi
+  set -e
+  ceph osd pool delete replicated replicated --yes-i-really-really-mean-it
+  ceph osd pool delete ec_test ec_test --yes-i-really-really-mean-it
+
+  # test create pool with rule
+  ceph osd erasure-code-profile set foo foo
+  ceph osd erasure-code-profile ls | grep foo
+  ceph osd crush rule create-erasure foo foo
+  ceph osd pool create erasure 16 16 erasure foo
+  expect_false ceph osd erasure-code-profile rm foo
+  ceph osd pool delete erasure erasure --yes-i-really-really-mean-it
+  ceph osd crush rule rm foo
+  ceph osd erasure-code-profile rm foo
+
+  # autoscale mode
+  ceph osd pool create modeon --autoscale-mode=on
+  ceph osd dump | grep modeon | grep 'autoscale_mode on'
+  ceph osd pool create modewarn --autoscale-mode=warn
+  ceph osd dump | grep modewarn | grep 'autoscale_mode warn'
+  ceph osd pool create modeoff --autoscale-mode=off
+  ceph osd dump | grep modeoff | grep 'autoscale_mode off'
+  ceph osd pool delete modeon modeon --yes-i-really-really-mean-it
+  ceph osd pool delete modewarn modewarn --yes-i-really-really-mean-it
+  ceph osd pool delete modeoff modeoff --yes-i-really-really-mean-it
+}
+
+function test_mon_osd_pool_quota()
+{
+  #
+  # test osd pool set/get quota
+  #
+
+  # create tmp pool
+  ceph osd pool create tmp-quota-pool 32
+  ceph osd pool application enable tmp-quota-pool rados
+  #
+  # set erroneous quotas
+  #
+  expect_false ceph osd pool set-quota tmp-quota-pool max_fooness 10
+  expect_false ceph osd pool set-quota tmp-quota-pool max_bytes -1
+  expect_false ceph osd pool set-quota tmp-quota-pool max_objects aaa
+  #
+  # set valid quotas
+  #
+  ceph osd pool set-quota tmp-quota-pool max_bytes 10
+  ceph osd pool set-quota tmp-quota-pool max_objects 10M
+  #
+  # get quotas in json-pretty format
+  #
+  ceph osd pool get-quota tmp-quota-pool --format=json-pretty | \
+    grep '"quota_max_objects":.*10000000'
+  ceph osd pool get-quota tmp-quota-pool --format=json-pretty | \
+    grep '"quota_max_bytes":.*10'
+  #
+  # get quotas
+  #
+  ceph osd pool get-quota tmp-quota-pool | grep 'max bytes.*10 B'
+  ceph osd pool get-quota tmp-quota-pool | grep 'max objects.*10.*M objects'
+  #
+  # set valid quotas with unit prefix
+  #
+  ceph osd pool set-quota tmp-quota-pool max_bytes 10K
+  #
+  # get quotas
+  #
+  ceph osd pool get-quota tmp-quota-pool | grep 'max bytes.*10 Ki'
+  #
+  # set valid quotas with unit prefix
+  #
+  ceph osd pool set-quota tmp-quota-pool max_bytes 10Ki
+  #
+  # get quotas
+  #
+  ceph osd pool get-quota tmp-quota-pool | grep 'max bytes.*10 Ki'
+  #
+  #
+  # reset pool quotas
+  #
+  ceph osd pool set-quota tmp-quota-pool max_bytes 0
+  ceph osd pool set-quota tmp-quota-pool max_objects 0
+  #
+  # test N/A quotas
+  #
+  ceph osd pool get-quota tmp-quota-pool | grep 'max bytes.*N/A'
+  ceph osd pool get-quota tmp-quota-pool | grep 'max objects.*N/A'
+  #
+  # cleanup tmp pool
+  ceph osd pool delete tmp-quota-pool tmp-quota-pool --yes-i-really-really-mean-it
+}
+
+function test_mon_pg()
+{
+  # Make sure we start healthy.
+  wait_for_health_ok
+
+  ceph pg debug unfound_objects_exist
+  ceph pg debug degraded_pgs_exist
+  ceph pg deep-scrub 1.0
+  ceph pg dump
+  ceph pg dump pgs_brief --format=json
+  ceph pg dump pgs --format=json
+  ceph pg dump pools --format=json
+  ceph pg dump osds --format=json
+  ceph pg dump sum --format=json
+  ceph pg dump all --format=json
+  ceph pg dump pgs_brief osds --format=json
+  ceph pg dump pools osds pgs_brief --format=json
+  ceph pg dump_json
+  ceph pg dump_pools_json
+  ceph pg dump_stuck inactive
+  ceph pg dump_stuck unclean
+  ceph pg dump_stuck stale
+  ceph pg dump_stuck undersized
+  ceph pg dump_stuck degraded
+  ceph pg ls
+  ceph pg ls 1
+  ceph pg ls stale
+  expect_false ceph pg ls scrubq
+  ceph pg ls active stale repair recovering
+  ceph pg ls 1 active
+  ceph pg ls 1 active stale
+  ceph pg ls-by-primary osd.0
+  ceph pg ls-by-primary osd.0 1
+  ceph pg ls-by-primary osd.0 active
+  ceph pg ls-by-primary osd.0 active stale
+  ceph pg ls-by-primary osd.0 1 active stale
+  ceph pg ls-by-osd osd.0
+  ceph pg ls-by-osd osd.0 1
+  ceph pg ls-by-osd osd.0 active
+  ceph pg ls-by-osd osd.0 active stale
+  ceph pg ls-by-osd osd.0 1 active stale
+  ceph pg ls-by-pool rbd
+  ceph pg ls-by-pool rbd active stale
+  # can't test this...
+  # ceph pg force_create_pg
+  ceph pg getmap -o $TEMP_DIR/map.$$
+  [ -s $TEMP_DIR/map.$$ ]
+  ceph pg map 1.0 | grep acting
+  ceph pg repair 1.0
+  ceph pg scrub 1.0
+
+  ceph osd set-full-ratio .962
+  ceph osd dump | grep '^full_ratio 0.962'
+  ceph osd set-backfillfull-ratio .912
+  ceph osd dump | grep '^backfillfull_ratio 0.912'
+  ceph osd set-nearfull-ratio .892
+  ceph osd dump | grep '^nearfull_ratio 0.892'
+
+  # Check health status
+  ceph osd set-nearfull-ratio .913
+  ceph health -f json | grep OSD_OUT_OF_ORDER_FULL
+  ceph health detail | grep OSD_OUT_OF_ORDER_FULL
+  ceph osd set-nearfull-ratio .892
+  ceph osd set-backfillfull-ratio .963
+  ceph health -f json | grep OSD_OUT_OF_ORDER_FULL
+  ceph health detail | grep OSD_OUT_OF_ORDER_FULL
+  ceph osd set-backfillfull-ratio .912
+
+  # Check injected full results
+  $SUDO ceph tell osd.0 injectfull nearfull
+  wait_for_health "OSD_NEARFULL"
+  ceph health detail | grep "osd.0 is near full"
+  $SUDO ceph tell osd.0 injectfull none
+  wait_for_health_ok
+
+  $SUDO ceph tell osd.1 injectfull backfillfull
+  wait_for_health "OSD_BACKFILLFULL"
+  ceph health detail | grep "osd.1 is backfill full"
+  $SUDO ceph tell osd.1 injectfull none
+  wait_for_health_ok
+
+  $SUDO ceph tell osd.2 injectfull failsafe
+  # failsafe and full are the same as far as the monitor is concerned
+  wait_for_health "OSD_FULL"
+  ceph health detail | grep "osd.2 is full"
+  $SUDO ceph tell osd.2 injectfull none
+  wait_for_health_ok
+
+  $SUDO ceph tell osd.0 injectfull full
+  wait_for_health "OSD_FULL"
+  ceph health detail | grep "osd.0 is full"
+  $SUDO ceph tell osd.0 injectfull none
+  wait_for_health_ok
+
+  ceph pg stat | grep 'pgs:'
+  ceph pg 1.0 query
+  ceph tell 1.0 query
+  first=$(ceph mon dump -f json | jq -r '.mons[0].name')
+  ceph tell mon.$first quorum enter
+  ceph quorum_status
+  ceph report | grep osd_stats
+  ceph status
+  ceph -s
+
+  #
+  # tell osd version
+  #
+  ceph tell osd.0 version
+  expect_false ceph tell osd.9999 version 
+  expect_false ceph tell osd.foo version
+
+  # back to pg stuff
+
+  ceph tell osd.0 dump_pg_recovery_stats | grep Started
+
+  ceph osd reweight 0 0.9
+  expect_false ceph osd reweight 0 -1
+  ceph osd reweight osd.0 1
+
+  ceph osd primary-affinity osd.0 .9
+  expect_false ceph osd primary-affinity osd.0 -2
+  expect_false ceph osd primary-affinity osd.9999 .5
+  ceph osd primary-affinity osd.0 1
+
+  ceph osd pool set rbd size 2
+  ceph osd pg-temp 1.0 0 1
+  ceph osd pg-temp 1.0 osd.1 osd.0
+  expect_false ceph osd pg-temp 1.0 0 1 2
+  expect_false ceph osd pg-temp asdf qwer
+  expect_false ceph osd pg-temp 1.0 asdf
+  ceph osd pg-temp 1.0 # cleanup pg-temp
+
+  ceph pg repeer 1.0
+  expect_false ceph pg repeer 0.0   # pool 0 shouldn't exist anymore
+
+  # don't test ceph osd primary-temp for now
+}
+
+function test_mon_osd_pool_set()
+{
+  TEST_POOL_GETSET=pool_getset
+  expect_false ceph osd pool create $TEST_POOL_GETSET 1 --target_size_ratio -0.3
+  expect_true ceph osd pool create $TEST_POOL_GETSET 1 --target_size_ratio 1
+  ceph osd pool application enable $TEST_POOL_GETSET rados
+  ceph osd pool set $TEST_POOL_GETSET pg_autoscale_mode off
+  wait_for_clean
+  ceph osd pool get $TEST_POOL_GETSET all
+
+  for s in pg_num pgp_num size min_size crush_rule target_size_ratio; do
+    ceph osd pool get $TEST_POOL_GETSET $s
+  done
+
+  old_size=$(ceph osd pool get $TEST_POOL_GETSET size | sed -e 's/size: //')
+  (( new_size = old_size + 1 ))
+  ceph osd pool set $TEST_POOL_GETSET size $new_size --yes-i-really-mean-it
+  ceph osd pool get $TEST_POOL_GETSET size | grep "size: $new_size"
+  ceph osd pool set $TEST_POOL_GETSET size $old_size --yes-i-really-mean-it
+
+  ceph osd pool create pool_erasure 1 1 erasure
+  ceph osd pool application enable pool_erasure rados
+  wait_for_clean
+  set +e
+  ceph osd pool set pool_erasure size 4444 2>$TMPFILE
+  check_response 'not change the size'
+  set -e
+  ceph osd pool get pool_erasure erasure_code_profile
+  ceph osd pool rm pool_erasure pool_erasure --yes-i-really-really-mean-it
+
+  for flag in nodelete nopgchange nosizechange write_fadvise_dontneed noscrub nodeep-scrub bulk; do
+      ceph osd pool set $TEST_POOL_GETSET $flag false
+      ceph osd pool get $TEST_POOL_GETSET $flag | grep "$flag: false"
+      ceph osd pool set $TEST_POOL_GETSET $flag true
+      ceph osd pool get $TEST_POOL_GETSET $flag | grep "$flag: true"
+      ceph osd pool set $TEST_POOL_GETSET $flag 1
+      ceph osd pool get $TEST_POOL_GETSET $flag | grep "$flag: true"
+      ceph osd pool set $TEST_POOL_GETSET $flag 0
+      ceph osd pool get $TEST_POOL_GETSET $flag | grep "$flag: false"
+      expect_false ceph osd pool set $TEST_POOL_GETSET $flag asdf
+      expect_false ceph osd pool set $TEST_POOL_GETSET $flag 2
+  done
+
+  ceph osd pool get $TEST_POOL_GETSET scrub_min_interval | expect_false grep '.'
+  ceph osd pool set $TEST_POOL_GETSET scrub_min_interval 123456
+  ceph osd pool get $TEST_POOL_GETSET scrub_min_interval | grep 'scrub_min_interval: 123456'
+  ceph osd pool set $TEST_POOL_GETSET scrub_min_interval 0
+  ceph osd pool get $TEST_POOL_GETSET scrub_min_interval | expect_false grep '.'
+
+  ceph osd pool get $TEST_POOL_GETSET scrub_max_interval | expect_false grep '.'
+  ceph osd pool set $TEST_POOL_GETSET scrub_max_interval 123456
+  ceph osd pool get $TEST_POOL_GETSET scrub_max_interval | grep 'scrub_max_interval: 123456'
+  ceph osd pool set $TEST_POOL_GETSET scrub_max_interval 0
+  ceph osd pool get $TEST_POOL_GETSET scrub_max_interval | expect_false grep '.'
+
+  ceph osd pool get $TEST_POOL_GETSET deep_scrub_interval | expect_false grep '.'
+  ceph osd pool set $TEST_POOL_GETSET deep_scrub_interval 123456
+  ceph osd pool get $TEST_POOL_GETSET deep_scrub_interval | grep 'deep_scrub_interval: 123456'
+  ceph osd pool set $TEST_POOL_GETSET deep_scrub_interval 0
+  ceph osd pool get $TEST_POOL_GETSET deep_scrub_interval | expect_false grep '.'
+
+  ceph osd pool get $TEST_POOL_GETSET recovery_priority | expect_false grep '.'
+  ceph osd pool set $TEST_POOL_GETSET recovery_priority 5 
+  ceph osd pool get $TEST_POOL_GETSET recovery_priority | grep 'recovery_priority: 5'
+  ceph osd pool set $TEST_POOL_GETSET recovery_priority -5
+  ceph osd pool get $TEST_POOL_GETSET recovery_priority | grep 'recovery_priority: -5'
+  ceph osd pool set $TEST_POOL_GETSET recovery_priority 0
+  ceph osd pool get $TEST_POOL_GETSET recovery_priority | expect_false grep '.'
+  expect_false ceph osd pool set $TEST_POOL_GETSET recovery_priority -11
+  expect_false ceph osd pool set $TEST_POOL_GETSET recovery_priority 11
+
+  ceph osd pool get $TEST_POOL_GETSET recovery_op_priority | expect_false grep '.'
+  ceph osd pool set $TEST_POOL_GETSET recovery_op_priority 5 
+  ceph osd pool get $TEST_POOL_GETSET recovery_op_priority | grep 'recovery_op_priority: 5'
+  ceph osd pool set $TEST_POOL_GETSET recovery_op_priority 0
+  ceph osd pool get $TEST_POOL_GETSET recovery_op_priority | expect_false grep '.'
+
+  ceph osd pool get $TEST_POOL_GETSET scrub_priority | expect_false grep '.'
+  ceph osd pool set $TEST_POOL_GETSET scrub_priority 5 
+  ceph osd pool get $TEST_POOL_GETSET scrub_priority | grep 'scrub_priority: 5'
+  ceph osd pool set $TEST_POOL_GETSET scrub_priority 0
+  ceph osd pool get $TEST_POOL_GETSET scrub_priority | expect_false grep '.'
+
+  expect_false ceph osd pool set $TEST_POOL_GETSET target_size_ratio -3
+  expect_false ceph osd pool set $TEST_POOL_GETSET target_size_ratio abc
+  expect_true ceph osd pool set $TEST_POOL_GETSET target_size_ratio 0.1
+  expect_true ceph osd pool set $TEST_POOL_GETSET target_size_ratio 1
+  ceph osd pool get $TEST_POOL_GETSET target_size_ratio | grep 'target_size_ratio: 1'
+
+  ceph osd pool set $TEST_POOL_GETSET nopgchange 1
+  expect_false ceph osd pool set $TEST_POOL_GETSET pg_num 10
+  expect_false ceph osd pool set $TEST_POOL_GETSET pgp_num 10
+  ceph osd pool set $TEST_POOL_GETSET nopgchange 0
+  ceph osd pool set $TEST_POOL_GETSET pg_num 10
+  wait_for_clean
+  ceph osd pool set $TEST_POOL_GETSET pgp_num 10
+  expect_false ceph osd pool set $TEST_POOL_GETSET pg_num 0
+  expect_false ceph osd pool set $TEST_POOL_GETSET pgp_num 0
+
+  old_pgs=$(ceph osd pool get $TEST_POOL_GETSET pg_num | sed -e 's/pg_num: //')
+  new_pgs=$(($old_pgs + $(ceph osd stat --format json | jq '.num_osds') * 32))
+  ceph osd pool set $TEST_POOL_GETSET pg_num $new_pgs
+  ceph osd pool set $TEST_POOL_GETSET pgp_num $new_pgs
+  wait_for_clean
+
+  ceph osd pool set $TEST_POOL_GETSET nosizechange 1
+  expect_false ceph osd pool set $TEST_POOL_GETSET size 2
+  expect_false ceph osd pool set $TEST_POOL_GETSET min_size 2
+  ceph osd pool set $TEST_POOL_GETSET nosizechange 0
+  ceph osd pool set $TEST_POOL_GETSET size 2
+  wait_for_clean
+  ceph osd pool set $TEST_POOL_GETSET min_size 2
+  
+  expect_false ceph osd pool set $TEST_POOL_GETSET hashpspool 0
+  ceph osd pool set $TEST_POOL_GETSET hashpspool 0 --yes-i-really-mean-it
+  
+  expect_false ceph osd pool set $TEST_POOL_GETSET hashpspool 1
+  ceph osd pool set $TEST_POOL_GETSET hashpspool 1 --yes-i-really-mean-it
+
+  ceph osd pool get rbd crush_rule | grep 'crush_rule: '
+
+  ceph osd pool get $TEST_POOL_GETSET compression_mode | expect_false grep '.'
+  ceph osd pool set $TEST_POOL_GETSET compression_mode aggressive
+  ceph osd pool get $TEST_POOL_GETSET compression_mode | grep 'aggressive'
+  ceph osd pool set $TEST_POOL_GETSET compression_mode unset
+  ceph osd pool get $TEST_POOL_GETSET compression_mode | expect_false grep '.'
+
+  ceph osd pool get $TEST_POOL_GETSET compression_algorithm | expect_false grep '.'
+  ceph osd pool set $TEST_POOL_GETSET compression_algorithm zlib
+  ceph osd pool get $TEST_POOL_GETSET compression_algorithm | grep 'zlib'
+  ceph osd pool set $TEST_POOL_GETSET compression_algorithm unset
+  ceph osd pool get $TEST_POOL_GETSET compression_algorithm | expect_false grep '.'
+
+  ceph osd pool get $TEST_POOL_GETSET compression_required_ratio | expect_false grep '.'
+  expect_false ceph osd pool set $TEST_POOL_GETSET compression_required_ratio 1.1
+  expect_false ceph osd pool set $TEST_POOL_GETSET compression_required_ratio -.2
+  ceph osd pool set $TEST_POOL_GETSET compression_required_ratio .2
+  ceph osd pool get $TEST_POOL_GETSET compression_required_ratio | grep '.2'
+  ceph osd pool set $TEST_POOL_GETSET compression_required_ratio 0
+  ceph osd pool get $TEST_POOL_GETSET compression_required_ratio | expect_false grep '.'
+
+  ceph osd pool get $TEST_POOL_GETSET csum_type | expect_false grep '.'
+  ceph osd pool set $TEST_POOL_GETSET csum_type crc32c
+  ceph osd pool get $TEST_POOL_GETSET csum_type | grep 'crc32c'
+  ceph osd pool set $TEST_POOL_GETSET csum_type unset
+  ceph osd pool get $TEST_POOL_GETSET csum_type | expect_false grep '.'
+
+  for size in compression_max_blob_size compression_min_blob_size csum_max_block csum_min_block; do
+      ceph osd pool get $TEST_POOL_GETSET $size | expect_false grep '.'
+      ceph osd pool set $TEST_POOL_GETSET $size 100
+      ceph osd pool get $TEST_POOL_GETSET $size | grep '100'
+      ceph osd pool set $TEST_POOL_GETSET $size 0
+      ceph osd pool get $TEST_POOL_GETSET $size | expect_false grep '.'
+  done
+
+  ceph osd pool set $TEST_POOL_GETSET nodelete 1
+  expect_false ceph osd pool delete $TEST_POOL_GETSET $TEST_POOL_GETSET --yes-i-really-really-mean-it
+  ceph osd pool set $TEST_POOL_GETSET nodelete 0
+  ceph osd pool delete $TEST_POOL_GETSET $TEST_POOL_GETSET --yes-i-really-really-mean-it
+
+}
+
+function test_mon_osd_tiered_pool_set()
+{
+  # this is really a tier pool
+  ceph osd pool create real-tier 2
+  ceph osd tier add rbd real-tier
+
+  # expect us to be unable to set negative values for hit_set_*
+  for o in hit_set_period hit_set_count hit_set_fpp; do
+    expect_false ceph osd pool set real_tier $o -1
+  done
+
+  # and hit_set_fpp should be in range 0..1
+  expect_false ceph osd pool set real_tier hit_set_fpp 2
+
+  ceph osd pool set real-tier hit_set_type explicit_hash
+  ceph osd pool get real-tier hit_set_type | grep "hit_set_type: explicit_hash"
+  ceph osd pool set real-tier hit_set_type explicit_object
+  ceph osd pool get real-tier hit_set_type | grep "hit_set_type: explicit_object"
+  ceph osd pool set real-tier hit_set_type bloom
+  ceph osd pool get real-tier hit_set_type | grep "hit_set_type: bloom"
+  expect_false ceph osd pool set real-tier hit_set_type i_dont_exist
+  ceph osd pool set real-tier hit_set_period 123
+  ceph osd pool get real-tier hit_set_period | grep "hit_set_period: 123"
+  ceph osd pool set real-tier hit_set_count 12
+  ceph osd pool get real-tier hit_set_count | grep "hit_set_count: 12"
+  ceph osd pool set real-tier hit_set_fpp .01
+  ceph osd pool get real-tier hit_set_fpp | grep "hit_set_fpp: 0.01"
+
+  ceph osd pool set real-tier target_max_objects 123
+  ceph osd pool get real-tier target_max_objects | \
+    grep 'target_max_objects:[ \t]\+123'
+  ceph osd pool set real-tier target_max_bytes 123456
+  ceph osd pool get real-tier target_max_bytes | \
+    grep 'target_max_bytes:[ \t]\+123456'
+  ceph osd pool set real-tier cache_target_dirty_ratio .123
+  ceph osd pool get real-tier cache_target_dirty_ratio | \
+    grep 'cache_target_dirty_ratio:[ \t]\+0.123'
+  expect_false ceph osd pool set real-tier cache_target_dirty_ratio -.2
+  expect_false ceph osd pool set real-tier cache_target_dirty_ratio 1.1
+  ceph osd pool set real-tier cache_target_dirty_high_ratio .123
+  ceph osd pool get real-tier cache_target_dirty_high_ratio | \
+    grep 'cache_target_dirty_high_ratio:[ \t]\+0.123'
+  expect_false ceph osd pool set real-tier cache_target_dirty_high_ratio -.2
+  expect_false ceph osd pool set real-tier cache_target_dirty_high_ratio 1.1
+  ceph osd pool set real-tier cache_target_full_ratio .123
+  ceph osd pool get real-tier cache_target_full_ratio | \
+    grep 'cache_target_full_ratio:[ \t]\+0.123'
+  ceph osd dump -f json-pretty | grep '"cache_target_full_ratio_micro": 123000'
+  ceph osd pool set real-tier cache_target_full_ratio 1.0
+  ceph osd pool set real-tier cache_target_full_ratio 0
+  expect_false ceph osd pool set real-tier cache_target_full_ratio 1.1
+  ceph osd pool set real-tier cache_min_flush_age 123
+  ceph osd pool get real-tier cache_min_flush_age | \
+    grep 'cache_min_flush_age:[ \t]\+123'
+  ceph osd pool set real-tier cache_min_evict_age 234
+  ceph osd pool get real-tier cache_min_evict_age | \
+    grep 'cache_min_evict_age:[ \t]\+234'
+
+  # iec vs si units
+  ceph osd pool set real-tier target_max_objects 1K
+  ceph osd pool get real-tier target_max_objects | grep 1000
+  for o in target_max_bytes target_size_bytes compression_max_blob_size compression_min_blob_size csum_max_block csum_min_block; do
+    ceph osd pool set real-tier $o 1Ki  # no i suffix
+    val=$(ceph osd pool get real-tier $o --format=json | jq -c ".$o")
+    [[ $val  == 1024 ]]
+    ceph osd pool set real-tier $o 1M   # with i suffix
+    val=$(ceph osd pool get real-tier $o --format=json | jq -c ".$o")
+    [[ $val  == 1048576 ]]
+  done
+
+  # this is not a tier pool
+  ceph osd pool create fake-tier 2
+  ceph osd pool application enable fake-tier rados
+  wait_for_clean
+
+  expect_false ceph osd pool set fake-tier hit_set_type explicit_hash
+  expect_false ceph osd pool get fake-tier hit_set_type
+  expect_false ceph osd pool set fake-tier hit_set_type explicit_object
+  expect_false ceph osd pool get fake-tier hit_set_type
+  expect_false ceph osd pool set fake-tier hit_set_type bloom
+  expect_false ceph osd pool get fake-tier hit_set_type
+  expect_false ceph osd pool set fake-tier hit_set_type i_dont_exist
+  expect_false ceph osd pool set fake-tier hit_set_period 123
+  expect_false ceph osd pool get fake-tier hit_set_period
+  expect_false ceph osd pool set fake-tier hit_set_count 12
+  expect_false ceph osd pool get fake-tier hit_set_count
+  expect_false ceph osd pool set fake-tier hit_set_fpp .01
+  expect_false ceph osd pool get fake-tier hit_set_fpp
+
+  expect_false ceph osd pool set fake-tier target_max_objects 123
+  expect_false ceph osd pool get fake-tier target_max_objects
+  expect_false ceph osd pool set fake-tier target_max_bytes 123456
+  expect_false ceph osd pool get fake-tier target_max_bytes
+  expect_false ceph osd pool set fake-tier cache_target_dirty_ratio .123
+  expect_false ceph osd pool get fake-tier cache_target_dirty_ratio
+  expect_false ceph osd pool set fake-tier cache_target_dirty_ratio -.2
+  expect_false ceph osd pool set fake-tier cache_target_dirty_ratio 1.1
+  expect_false ceph osd pool set fake-tier cache_target_dirty_high_ratio .123
+  expect_false ceph osd pool get fake-tier cache_target_dirty_high_ratio
+  expect_false ceph osd pool set fake-tier cache_target_dirty_high_ratio -.2
+  expect_false ceph osd pool set fake-tier cache_target_dirty_high_ratio 1.1
+  expect_false ceph osd pool set fake-tier cache_target_full_ratio .123
+  expect_false ceph osd pool get fake-tier cache_target_full_ratio
+  expect_false ceph osd pool set fake-tier cache_target_full_ratio 1.0
+  expect_false ceph osd pool set fake-tier cache_target_full_ratio 0
+  expect_false ceph osd pool set fake-tier cache_target_full_ratio 1.1
+  expect_false ceph osd pool set fake-tier cache_min_flush_age 123
+  expect_false ceph osd pool get fake-tier cache_min_flush_age
+  expect_false ceph osd pool set fake-tier cache_min_evict_age 234
+  expect_false ceph osd pool get fake-tier cache_min_evict_age
+
+  ceph osd tier remove rbd real-tier
+  ceph osd pool delete real-tier real-tier --yes-i-really-really-mean-it
+  ceph osd pool delete fake-tier fake-tier --yes-i-really-really-mean-it
+}
+
+function test_mon_osd_erasure_code()
+{
+
+  ceph osd erasure-code-profile set fooprofile a=b c=d
+  ceph osd erasure-code-profile set fooprofile a=b c=d
+  expect_false ceph osd erasure-code-profile set fooprofile a=b c=d e=f
+  ceph osd erasure-code-profile set fooprofile a=b c=d e=f --force
+  ceph osd erasure-code-profile set fooprofile a=b c=d e=f
+  expect_false ceph osd erasure-code-profile set fooprofile a=b c=d e=f g=h
+  # make sure rule-foo doesn't work anymore
+  expect_false ceph osd erasure-code-profile set barprofile ruleset-failure-domain=host
+  ceph osd erasure-code-profile set barprofile crush-failure-domain=host
+  # clean up
+  ceph osd erasure-code-profile rm fooprofile
+  ceph osd erasure-code-profile rm barprofile
+
+  # try weird k and m values
+  expect_false ceph osd erasure-code-profile set badk k=1 m=1
+  expect_false ceph osd erasure-code-profile set badk k=1 m=2
+  expect_false ceph osd erasure-code-profile set badk k=0 m=2
+  expect_false ceph osd erasure-code-profile set badk k=-1 m=2
+  expect_false ceph osd erasure-code-profile set badm k=2 m=0
+  expect_false ceph osd erasure-code-profile set badm k=2 m=-1
+  ceph osd erasure-code-profile set good k=2 m=1
+  ceph osd erasure-code-profile rm good
+}
+
+function test_mon_osd_misc()
+{
+  set +e
+
+  # expect error about missing 'pool' argument
+  ceph osd map 2>$TMPFILE; check_response 'pool' $? 22
+
+  # expect error about unused argument foo
+  ceph osd ls foo 2>$TMPFILE; check_response 'unused' $? 22 
+
+  # expect "not in range" for invalid overload percentage
+  ceph osd reweight-by-utilization 80 2>$TMPFILE; check_response 'higher than 100' $? 22
+
+  set -e
+
+  local old_bytes_per_osd=$(ceph config get mgr mon_reweight_min_bytes_per_osd)
+  local old_pgs_per_osd=$(ceph config get mgr mon_reweight_min_pgs_per_osd)
+  # otherwise ceph-mgr complains like:
+  # Error EDOM: Refusing to reweight: we only have 5372 kb used across all osds!
+  # Error EDOM: Refusing to reweight: we only have 20 PGs across 3 osds!
+  ceph config set mgr mon_reweight_min_bytes_per_osd 0
+  ceph config set mgr mon_reweight_min_pgs_per_osd 0
+  ceph osd reweight-by-utilization 110
+  ceph osd reweight-by-utilization 110 .5
+  expect_false ceph osd reweight-by-utilization 110 0
+  expect_false ceph osd reweight-by-utilization 110 -0.1
+  ceph osd test-reweight-by-utilization 110 .5 --no-increasing
+  ceph osd test-reweight-by-utilization 110 .5 4 --no-increasing
+  expect_false ceph osd test-reweight-by-utilization 110 .5 0 --no-increasing
+  expect_false ceph osd test-reweight-by-utilization 110 .5 -10 --no-increasing
+  ceph osd reweight-by-pg 110
+  ceph osd test-reweight-by-pg 110 .5
+  ceph osd reweight-by-pg 110 rbd
+  ceph osd reweight-by-pg 110 .5 rbd
+  expect_false ceph osd reweight-by-pg 110 boguspoolasdfasdfasdf
+  # restore the setting
+  ceph config set mgr mon_reweight_min_bytes_per_osd $old_bytes_per_osd
+  ceph config set mgr mon_reweight_min_pgs_per_osd $old_pgs_per_osd
+}
+
+function test_admin_heap_profiler()
+{
+  do_test=1
+  set +e
+  # expect 'heap' commands to be correctly parsed
+  ceph tell osd.0 heap stats 2>$TMPFILE
+  if [[ $? -eq 22 && `grep 'tcmalloc not enabled' $TMPFILE` ]]; then
+    echo "tcmalloc not enabled; skip heap profiler test"
+    do_test=0
+  fi
+  set -e
+
+  [[ $do_test -eq 0 ]] && return 0
+
+  $SUDO ceph tell osd.0 heap start_profiler
+  $SUDO ceph tell osd.0 heap dump
+  $SUDO ceph tell osd.0 heap stop_profiler
+  $SUDO ceph tell osd.0 heap release
+}
+
+function test_osd_bench()
+{
+  # test osd bench limits
+  # As we should not rely on defaults (as they may change over time),
+  # lets inject some values and perform some simple tests
+  # max iops: 10              # 100 IOPS
+  # max throughput: 10485760  # 10MB/s
+  # max block size: 2097152   # 2MB
+  # duration: 10              # 10 seconds
+
+  local args="\
+    --osd-bench-duration 10 \
+    --osd-bench-max-block-size 2097152 \
+    --osd-bench-large-size-max-throughput 10485760 \
+    --osd-bench-small-size-max-iops 10"
+  ceph tell osd.0 injectargs ${args## }
+
+  # anything with a bs larger than 2097152  must fail
+  expect_false ceph tell osd.0 bench 1 2097153
+  # but using 'osd_bench_max_bs' must succeed
+  ceph tell osd.0 bench 1 2097152
+
+  # we assume 1MB as a large bs; anything lower is a small bs
+  # for a 4096 bytes bs, for 10 seconds, we are limited by IOPS
+  # max count: 409600 (bytes)
+
+  # more than max count must not be allowed
+  expect_false ceph tell osd.0 bench 409601 4096
+  # but 409600 must be succeed
+  ceph tell osd.0 bench 409600 4096
+
+  # for a large bs, we are limited by throughput.
+  # for a 2MB block size for 10 seconds, assuming 10MB/s throughput,
+  # the max count will be (10MB * 10s) = 100MB
+  # max count: 104857600 (bytes)
+
+  # more than max count must not be allowed
+  expect_false ceph tell osd.0 bench 104857601 2097152
+  # up to max count must be allowed
+  ceph tell osd.0 bench 104857600 2097152
+}
+
+function test_osd_negative_filestore_merge_threshold()
+{
+  $SUDO ceph daemon osd.0 config set filestore_merge_threshold -1
+  expect_config_value "osd.0" "filestore_merge_threshold" -1
+}
+
+function test_mon_tell()
+{
+  for m in mon.a mon.b; do
+    ceph tell $m sessions
+    ceph_watch_start debug audit
+    ceph tell mon.a sessions
+    ceph_watch_wait "${m} \[DBG\] from.*cmd='sessions' args=\[\]: dispatch"
+  done
+  expect_false ceph tell mon.foo version
+}
+
+function test_mon_ping()
+{
+  ceph ping mon.a
+  ceph ping mon.b
+  expect_false ceph ping mon.foo
+
+  ceph ping mon.\*
+}
+
+function test_mon_deprecated_commands()
+{
+  # current DEPRECATED commands are marked with FLAG(DEPRECATED)
+  #
+  # Testing should be accomplished by setting
+  # 'mon_debug_deprecated_as_obsolete = true' and expecting ENOTSUP for
+  # each one of these commands.
+
+  ceph tell mon.* injectargs '--mon-debug-deprecated-as-obsolete'
+  expect_false ceph config-key list 2> $TMPFILE
+  check_response "\(EOPNOTSUPP\|ENOTSUP\): command is obsolete"
+
+  ceph tell mon.* injectargs '--no-mon-debug-deprecated-as-obsolete'
+}
+
+function test_mon_cephdf_commands()
+{
+  # ceph df detail:
+  # pool section:
+  # RAW USED The near raw used per pool in raw total
+
+  ceph osd pool create cephdf_for_test 1 1 replicated
+  ceph osd pool application enable cephdf_for_test rados
+  ceph osd pool set cephdf_for_test size 2
+
+  dd if=/dev/zero of=./cephdf_for_test bs=4k count=1
+  rados put cephdf_for_test cephdf_for_test -p cephdf_for_test
+
+  #wait for update
+  for i in `seq 1 10`; do
+    rados -p cephdf_for_test ls - | grep -q cephdf_for_test && break
+    sleep 1
+  done
+  # "rados ls" goes straight to osd, but "ceph df" is served by mon. so we need
+  # to sync mon with osd
+  flush_pg_stats
+  local jq_filter='.pools | .[] | select(.name == "cephdf_for_test") | .stats'
+  stored=`ceph df detail --format=json | jq "$jq_filter.stored * 2"`
+  stored_raw=`ceph df detail --format=json | jq "$jq_filter.stored_raw"`
+
+  ceph osd pool delete cephdf_for_test cephdf_for_test --yes-i-really-really-mean-it
+  rm ./cephdf_for_test
+
+  expect_false test $stored != $stored_raw
+}
+
+function test_mon_pool_application()
+{
+  ceph osd pool create app_for_test 16
+
+  ceph osd pool application enable app_for_test rbd
+  expect_false ceph osd pool application enable app_for_test rgw
+  ceph osd pool application enable app_for_test rgw --yes-i-really-mean-it
+  ceph osd pool ls detail | grep "application rbd,rgw"
+  ceph osd pool ls detail --format=json | grep '"application_metadata":{"rbd":{},"rgw":{}}'
+
+  expect_false ceph osd pool application set app_for_test cephfs key value
+  ceph osd pool application set app_for_test rbd key1 value1
+  ceph osd pool application set app_for_test rbd key2 value2
+  ceph osd pool application set app_for_test rgw key1 value1
+  ceph osd pool application get app_for_test rbd key1 | grep 'value1'
+  ceph osd pool application get app_for_test rbd key2 | grep 'value2'
+  ceph osd pool application get app_for_test rgw key1 | grep 'value1'
+
+  ceph osd pool ls detail --format=json | grep '"application_metadata":{"rbd":{"key1":"value1","key2":"value2"},"rgw":{"key1":"value1"}}'
+
+  ceph osd pool application rm app_for_test rgw key1
+  ceph osd pool ls detail --format=json | grep '"application_metadata":{"rbd":{"key1":"value1","key2":"value2"},"rgw":{}}'
+  ceph osd pool application rm app_for_test rbd key2
+  ceph osd pool ls detail --format=json | grep '"application_metadata":{"rbd":{"key1":"value1"},"rgw":{}}'
+  ceph osd pool application rm app_for_test rbd key1
+  ceph osd pool ls detail --format=json | grep '"application_metadata":{"rbd":{},"rgw":{}}'
+  ceph osd pool application rm app_for_test rbd key1 # should be idempotent
+
+  expect_false ceph osd pool application disable app_for_test rgw
+  ceph osd pool application disable app_for_test rgw --yes-i-really-mean-it
+  ceph osd pool application disable app_for_test rgw --yes-i-really-mean-it # should be idempotent
+  ceph osd pool ls detail | grep "application rbd"
+  ceph osd pool ls detail --format=json | grep '"application_metadata":{"rbd":{}}'
+
+  ceph osd pool application disable app_for_test rgw --yes-i-really-mean-it
+  ceph osd pool ls detail | grep -v "application "
+  ceph osd pool ls detail --format=json | grep '"application_metadata":{}'
+
+  ceph osd pool rm app_for_test app_for_test --yes-i-really-really-mean-it
+}
+
+function test_mon_tell_help_command()
+{
+  ceph tell mon.a help | grep sync_force
+  ceph tell mon.a -h | grep sync_force
+  ceph tell mon.a config -h | grep 'config diff get'
+
+  # wrong target
+  expect_false ceph tell mon.zzz help
+}
+
+function test_mon_stdin_stdout()
+{
+  echo foo | ceph config-key set test_key -i -
+  ceph config-key get test_key -o - | grep -c foo | grep -q 1
+}
+
+function test_osd_tell_help_command()
+{
+  ceph tell osd.1 help
+  expect_false ceph tell osd.100 help
+}
+
+function test_osd_compact()
+{
+  ceph tell osd.1 compact
+  $SUDO ceph daemon osd.1 compact
+}
+
+function test_mds_tell_help_command()
+{
+  local FS_NAME=cephfs
+  if ! mds_exists ; then
+      echo "Skipping test, no MDS found"
+      return
+  fi
+
+  remove_all_fs
+  ceph osd pool create fs_data 16
+  ceph osd pool create fs_metadata 16
+  ceph fs new $FS_NAME fs_metadata fs_data
+  wait_mds_active $FS_NAME
+
+
+  ceph tell mds.a help
+  expect_false ceph tell mds.z help
+
+  remove_all_fs
+  ceph osd pool delete fs_data fs_data --yes-i-really-really-mean-it
+  ceph osd pool delete fs_metadata fs_metadata --yes-i-really-really-mean-it
+}
+
+function test_mgr_tell()
+{
+  ceph tell mgr version
+}
+
+function test_mgr_devices()
+{
+  ceph device ls
+  expect_false ceph device info doesnotexist
+  expect_false ceph device get-health-metrics doesnotexist
+}
+
+function test_per_pool_scrub_status()
+{
+  ceph osd pool create noscrub_pool 16
+  ceph osd pool create noscrub_pool2 16
+  ceph -s | expect_false grep -q "Some pool(s) have the.*scrub.* flag(s) set"
+  ceph -s --format json | \
+    jq .health.checks.POOL_SCRUB_FLAGS.summary.message | \
+    expect_false grep -q "Some pool(s) have the.*scrub.* flag(s) set"
+  ceph report | jq .health.checks.POOL_SCRUB_FLAGS.detail |
+    expect_false grep -q "Pool .* has .*scrub.* flag"
+  ceph health detail | jq .health.checks.POOL_SCRUB_FLAGS.detail | \
+    expect_false grep -q "Pool .* has .*scrub.* flag"
+
+  ceph osd pool set noscrub_pool noscrub 1
+  ceph -s | expect_true grep -q "Some pool(s) have the noscrub flag(s) set"
+  ceph -s --format json | \
+    jq .health.checks.POOL_SCRUB_FLAGS.summary.message | \
+    expect_true grep -q "Some pool(s) have the noscrub flag(s) set"
+  ceph report | jq .health.checks.POOL_SCRUB_FLAGS.detail | \
+    expect_true grep -q "Pool noscrub_pool has noscrub flag"
+  ceph health detail | expect_true grep -q "Pool noscrub_pool has noscrub flag"
+
+  ceph osd pool set noscrub_pool nodeep-scrub 1
+  ceph osd pool set noscrub_pool2 nodeep-scrub 1
+  ceph -s | expect_true grep -q "Some pool(s) have the noscrub, nodeep-scrub flag(s) set"
+  ceph -s --format json | \
+    jq .health.checks.POOL_SCRUB_FLAGS.summary.message | \
+    expect_true grep -q "Some pool(s) have the noscrub, nodeep-scrub flag(s) set"
+  ceph report | jq .health.checks.POOL_SCRUB_FLAGS.detail | \
+    expect_true grep -q "Pool noscrub_pool has noscrub flag"
+  ceph report | jq .health.checks.POOL_SCRUB_FLAGS.detail | \
+    expect_true grep -q "Pool noscrub_pool has nodeep-scrub flag"
+  ceph report | jq .health.checks.POOL_SCRUB_FLAGS.detail | \
+    expect_true grep -q "Pool noscrub_pool2 has nodeep-scrub flag"
+  ceph health detail | expect_true grep -q "Pool noscrub_pool has noscrub flag"
+  ceph health detail | expect_true grep -q "Pool noscrub_pool has nodeep-scrub flag"
+  ceph health detail | expect_true grep -q "Pool noscrub_pool2 has nodeep-scrub flag"
+
+  ceph osd pool rm noscrub_pool noscrub_pool --yes-i-really-really-mean-it
+  ceph osd pool rm noscrub_pool2 noscrub_pool2 --yes-i-really-really-mean-it
+}
+
+#
+# New tests should be added to the TESTS array below
+#
+# Individual tests may be run using the '-t <testname>' argument
+# The user can specify '-t <testname>' as many times as she wants
+#
+# Tests will be run in order presented in the TESTS array, or in
+# the order specified by the '-t <testname>' options.
+#
+# '-l' will list all the available test names
+# '-h' will show usage
+#
+# The test maintains backward compatibility: not specifying arguments
+# will run all tests following the order they appear in the TESTS array.
+#
+
+set +x
+MON_TESTS+=" mon_injectargs"
+MON_TESTS+=" mon_injectargs_SI"
+for i in `seq 9`; do
+    MON_TESTS+=" tiering_$i";
+done
+MON_TESTS+=" auth"
+MON_TESTS+=" auth_profiles"
+MON_TESTS+=" mon_misc"
+MON_TESTS+=" mon_mon"
+MON_TESTS+=" mon_osd"
+MON_TESTS+=" mon_config_key"
+MON_TESTS+=" mon_crush"
+MON_TESTS+=" mon_osd_create_destroy"
+MON_TESTS+=" mon_osd_pool"
+MON_TESTS+=" mon_osd_pool_quota"
+MON_TESTS+=" mon_pg"
+MON_TESTS+=" mon_osd_pool_set"
+MON_TESTS+=" mon_osd_tiered_pool_set"
+MON_TESTS+=" mon_osd_erasure_code"
+MON_TESTS+=" mon_osd_misc"
+MON_TESTS+=" mon_tell"
+MON_TESTS+=" mon_ping"
+MON_TESTS+=" mon_deprecated_commands"
+MON_TESTS+=" mon_caps"
+MON_TESTS+=" mon_cephdf_commands"
+MON_TESTS+=" mon_tell_help_command"
+MON_TESTS+=" mon_stdin_stdout"
+
+OSD_TESTS+=" osd_bench"
+OSD_TESTS+=" osd_negative_filestore_merge_threshold"
+OSD_TESTS+=" tiering_agent"
+OSD_TESTS+=" admin_heap_profiler"
+OSD_TESTS+=" osd_tell_help_command"
+OSD_TESTS+=" osd_compact"
+OSD_TESTS+=" per_pool_scrub_status"
+
+MDS_TESTS+=" mds_tell"
+MDS_TESTS+=" mon_mds"
+MDS_TESTS+=" mon_mds_metadata"
+MDS_TESTS+=" mds_tell_help_command"
+
+MGR_TESTS+=" mgr_tell"
+MGR_TESTS+=" mgr_devices"
+
+TESTS+=$MON_TESTS
+TESTS+=$OSD_TESTS
+TESTS+=$MDS_TESTS
+TESTS+=$MGR_TESTS
+
+#
+# "main" follows
+#
+
+function list_tests()
+{
+  echo "AVAILABLE TESTS"
+  for i in $TESTS; do
+    echo "  $i"
+  done
+}
+
+function usage()
+{
+  echo "usage: $0 [-h|-l|-t <testname> [-t <testname>...]]"
+}
+
+tests_to_run=()
+
+sanity_check=true
+
+while [[ $# -gt 0 ]]; do
+  opt=$1
+
+  case "$opt" in
+    "-l" )
+      do_list=1
+      ;;
+    "--asok-does-not-need-root" )
+      SUDO=""
+      ;;
+    "--no-sanity-check" )
+      sanity_check=false
+      ;;
+    "--test-mon" )
+      tests_to_run+="$MON_TESTS"
+      ;;
+    "--test-osd" )
+      tests_to_run+="$OSD_TESTS"
+      ;;
+    "--test-mds" )
+      tests_to_run+="$MDS_TESTS"
+      ;;
+    "--test-mgr" )
+      tests_to_run+="$MGR_TESTS"
+      ;;
+    "-t" )
+      shift
+      if [[ -z "$1" ]]; then
+        echo "missing argument to '-t'"
+        usage ;
+        exit 1
+      fi
+      tests_to_run+=" $1"
+      ;;
+    "-h" )
+      usage ;
+      exit 0
+      ;;
+  esac
+  shift
+done
+
+if [[ $do_list -eq 1 ]]; then
+  list_tests ;
+  exit 0
+fi
+
+ceph osd pool create rbd 16
+
+if test -z "$tests_to_run" ; then
+  tests_to_run="$TESTS"
+fi
+
+if $sanity_check ; then
+    wait_no_osd_down
+fi
+for i in $tests_to_run; do
+  if $sanity_check ; then
+      check_no_osd_down
+  fi
+  set -x
+  test_${i}
+  set +x
+done
+if $sanity_check ; then
+    check_no_osd_down
+fi
+
+set -x
+
+echo OK
diff --git a/qa/workunits/cephtool/test_daemon.sh b/qa/workunits/cephtool/test_daemon.sh
new file mode 100755
index 000000000..08ae937cc
--- /dev/null
+++ b/qa/workunits/cephtool/test_daemon.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+
+set -ex
+
+expect_false()
+{
+	set -x
+	if "$@"; then return 1; else return 0; fi
+}
+
+echo note: assuming mon.a is on the current host
+
+# can set to 'sudo ./ceph' to execute tests from current dir for development
+CEPH=${CEPH:-'sudo ceph'}
+
+${CEPH} daemon mon.a version | grep version
+
+# get debug_ms setting and strip it, painfully for reuse
+old_ms=$(${CEPH} daemon mon.a config get debug_ms | \
+	grep debug_ms | sed -e 's/.*: //' -e 's/["\}\\]//g')
+${CEPH} daemon mon.a config set debug_ms 13
+new_ms=$(${CEPH} daemon mon.a config get debug_ms | \
+	grep debug_ms | sed -e 's/.*: //' -e 's/["\}\\]//g')
+[ "$new_ms" = "13/13" ]
+${CEPH} daemon mon.a config set debug_ms $old_ms
+new_ms=$(${CEPH} daemon mon.a config get debug_ms | \
+	grep debug_ms | sed -e 's/.*: //' -e 's/["\}\\]//g')
+[ "$new_ms" = "$old_ms" ]
+
+# unregistered/non-existent command
+expect_false ${CEPH} daemon mon.a bogus_command_blah foo
+
+set +e
+OUTPUT=$(${CEPH} -c /not/a/ceph.conf daemon mon.a help 2>&1)
+# look for EINVAL
+if [ $? != 22 ] ; then exit 1; fi
+if ! echo "$OUTPUT" | grep -q '.*open.*/not/a/ceph.conf'; then 
+	echo "didn't find expected error in bad conf search"
+	exit 1
+fi
+set -e
+
+echo OK
diff --git a/qa/workunits/cephtool/test_kvstore_tool.sh b/qa/workunits/cephtool/test_kvstore_tool.sh
new file mode 100755
index 000000000..b7953dd21
--- /dev/null
+++ b/qa/workunits/cephtool/test_kvstore_tool.sh
@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+
+set -x
+
+source $(dirname $0)/../../standalone/ceph-helpers.sh
+
+set -e
+set -o functrace
+PS4='${BASH_SOURCE[0]}:$LINENO: ${FUNCNAME[0]}:  '
+SUDO=${SUDO:-sudo}
+export CEPH_DEV=1
+
+echo note: test ceph_kvstore_tool with bluestore
+
+expect_false()
+{
+    set -x
+    if "$@"; then return 1; else return 0; fi
+}
+
+TEMP_DIR=$(mktemp -d ./cephtool.XXX)
+trap "rm -fr $TEMP_DIR" 0
+
+TEMP_FILE=$(mktemp $TEMP_DIR/test_invalid.XXX)
+
+function test_ceph_kvstore_tool()
+{
+  # create a data directory
+  ceph-objectstore-tool --data-path ${TEMP_DIR} --op mkfs --no-mon-config
+
+  # list
+  origin_kv_nums=`ceph-kvstore-tool  bluestore-kv ${TEMP_DIR} list 2>/dev/null | wc -l`
+  
+  # exists
+  prefix=`ceph-kvstore-tool bluestore-kv ${TEMP_DIR} list 2>/dev/null | head -n 1 | awk '{print $1}'`
+  ceph-kvstore-tool bluestore-kv ${TEMP_DIR} exists ${prefix}
+  expect_false ceph-kvstore-tool bluestore-kv ${TEMP_DIR} exists ${prefix}notexist
+
+  # list-crc
+  ceph-kvstore-tool  bluestore-kv ${TEMP_DIR} list-crc
+  ceph-kvstore-tool  bluestore-kv ${TEMP_DIR} list-crc ${prefix}
+
+  # list with prefix
+  ceph-kvstore-tool  bluestore-kv ${TEMP_DIR} list ${prefix}
+
+  # set
+  echo "helloworld" >> ${TEMP_FILE}
+  ceph-kvstore-tool bluestore-kv ${TEMP_DIR} set TESTPREFIX TESTKEY in ${TEMP_FILE}
+  ceph-kvstore-tool bluestore-kv ${TEMP_DIR} exists TESTPREFIX TESTKEY
+
+  # get 
+  ceph-kvstore-tool bluestore-kv ${TEMP_DIR} get TESTPREFIX TESTKEY out ${TEMP_FILE}.bak
+  diff ${TEMP_FILE} ${TEMP_FILE}.bak
+
+  # rm 
+  ceph-kvstore-tool bluestore-kv ${TEMP_DIR} rm TESTPREFIX TESTKEY
+  expect_false ceph-kvstore-tool bluestore-kv ${TEMP_DIR} exists TESTPREFIX TESTKEY
+
+  # compact
+  ceph-kvstore-tool bluestore-kv ${TEMP_DIR} compact
+
+  # destructive-repair 
+  ceph-kvstore-tool bluestore-kv ${TEMP_DIR} destructive-repair 
+
+  current_kv_nums=`ceph-kvstore-tool  bluestore-kv ${TEMP_DIR} list 2>/dev/null | wc -l`
+  test ${origin_kv_nums} -eq ${current_kv_nums}
+} 
+
+test_ceph_kvstore_tool
+
+echo OK
diff --git a/qa/workunits/client/test.sh b/qa/workunits/client/test.sh
new file mode 100755
index 000000000..12abd3a5d
--- /dev/null
+++ b/qa/workunits/client/test.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+set -ex
+
+ceph_test_client
diff --git a/qa/workunits/cls/test_cls_2pc_queue.sh b/qa/workunits/cls/test_cls_2pc_queue.sh
new file mode 100755
index 000000000..b4f68800f
--- /dev/null
+++ b/qa/workunits/cls/test_cls_2pc_queue.sh
@@ -0,0 +1,5 @@
+#!/bin/sh -e
+
+ceph_test_cls_2pc_queue
+
+exit 0
diff --git a/qa/workunits/cls/test_cls_cas.sh b/qa/workunits/cls/test_cls_cas.sh
new file mode 100755
index 000000000..765913482
--- /dev/null
+++ b/qa/workunits/cls/test_cls_cas.sh
@@ -0,0 +1,6 @@
+#!/bin/sh -e
+
+GTEST_FILTER=${CLS_CAS_GTEST_FILTER:-*}
+ceph_test_cls_cas --gtest_filter=${GTEST_FILTER}
+
+exit 0
diff --git a/qa/workunits/cls/test_cls_cmpomap.sh b/qa/workunits/cls/test_cls_cmpomap.sh
new file mode 100755
index 000000000..af079f6e6
--- /dev/null
+++ b/qa/workunits/cls/test_cls_cmpomap.sh
@@ -0,0 +1,5 @@
+#!/bin/sh -e
+
+ceph_test_cls_cmpomap
+
+exit 0
diff --git a/qa/workunits/cls/test_cls_hello.sh b/qa/workunits/cls/test_cls_hello.sh
new file mode 100755
index 000000000..0a2e09620
--- /dev/null
+++ b/qa/workunits/cls/test_cls_hello.sh
@@ -0,0 +1,5 @@
+#!/bin/sh -e
+
+ceph_test_cls_hello
+
+exit 0
diff --git a/qa/workunits/cls/test_cls_journal.sh b/qa/workunits/cls/test_cls_journal.sh
new file mode 100755
index 000000000..9aa7450a9
--- /dev/null
+++ b/qa/workunits/cls/test_cls_journal.sh
@@ -0,0 +1,6 @@
+#!/bin/sh -e
+
+GTEST_FILTER=${CLS_JOURNAL_GTEST_FILTER:-*}
+ceph_test_cls_journal --gtest_filter=${GTEST_FILTER}
+
+exit 0
diff --git a/qa/workunits/cls/test_cls_lock.sh b/qa/workunits/cls/test_cls_lock.sh
new file mode 100755
index 000000000..c14527053
--- /dev/null
+++ b/qa/workunits/cls/test_cls_lock.sh
@@ -0,0 +1,5 @@
+#!/bin/sh -e
+
+ceph_test_cls_lock
+
+exit 0
diff --git a/qa/workunits/cls/test_cls_log.sh b/qa/workunits/cls/test_cls_log.sh
new file mode 100755
index 000000000..523f985e7
--- /dev/null
+++ b/qa/workunits/cls/test_cls_log.sh
@@ -0,0 +1,5 @@
+#!/bin/sh -e
+
+ceph_test_cls_log
+
+exit 0
diff --git a/qa/workunits/cls/test_cls_numops.sh b/qa/workunits/cls/test_cls_numops.sh
new file mode 100755
index 000000000..dcbafcab2
--- /dev/null
+++ b/qa/workunits/cls/test_cls_numops.sh
@@ -0,0 +1,5 @@
+#!/bin/sh -e
+
+ceph_test_cls_numops
+
+exit 0
diff --git a/qa/workunits/cls/test_cls_rbd.sh b/qa/workunits/cls/test_cls_rbd.sh
new file mode 100755
index 000000000..fd4bec0f8
--- /dev/null
+++ b/qa/workunits/cls/test_cls_rbd.sh
@@ -0,0 +1,6 @@
+#!/bin/sh -e
+
+GTEST_FILTER=${CLS_RBD_GTEST_FILTER:-*}
+ceph_test_cls_rbd --gtest_filter=${GTEST_FILTER}
+
+exit 0
diff --git a/qa/workunits/cls/test_cls_refcount.sh b/qa/workunits/cls/test_cls_refcount.sh
new file mode 100755
index 000000000..d722f5ad9
--- /dev/null
+++ b/qa/workunits/cls/test_cls_refcount.sh
@@ -0,0 +1,5 @@
+#!/bin/sh -e
+
+ceph_test_cls_refcount
+
+exit 0
diff --git a/qa/workunits/cls/test_cls_rgw.sh b/qa/workunits/cls/test_cls_rgw.sh
new file mode 100755
index 000000000..257338a05
--- /dev/null
+++ b/qa/workunits/cls/test_cls_rgw.sh
@@ -0,0 +1,8 @@
+#!/bin/sh -e
+
+ceph_test_cls_rgw
+#ceph_test_cls_rgw_meta
+#ceph_test_cls_rgw_log
+#ceph_test_cls_rgw_opstate
+
+exit 0
diff --git a/qa/workunits/cls/test_cls_rgw_gc.sh b/qa/workunits/cls/test_cls_rgw_gc.sh
new file mode 100755
index 000000000..0266438f8
--- /dev/null
+++ b/qa/workunits/cls/test_cls_rgw_gc.sh
@@ -0,0 +1,5 @@
+#!/bin/sh -e
+
+ceph_test_cls_rgw_gc
+
+exit 0
diff --git a/qa/workunits/cls/test_cls_rgw_stats.sh b/qa/workunits/cls/test_cls_rgw_stats.sh
new file mode 100755
index 000000000..e1b5bd6b9
--- /dev/null
+++ b/qa/workunits/cls/test_cls_rgw_stats.sh
@@ -0,0 +1,5 @@
+#!/bin/sh -e
+
+ceph_test_cls_rgw_stats
+
+exit 0
diff --git a/qa/workunits/cls/test_cls_sdk.sh b/qa/workunits/cls/test_cls_sdk.sh
new file mode 100755
index 000000000..f1ccdc3b4
--- /dev/null
+++ b/qa/workunits/cls/test_cls_sdk.sh
@@ -0,0 +1,5 @@
+#!/bin/sh -e
+
+ceph_test_cls_sdk
+
+exit 0
diff --git a/qa/workunits/direct_io/.gitignore b/qa/workunits/direct_io/.gitignore
new file mode 100644
index 000000000..80f1fd1aa
--- /dev/null
+++ b/qa/workunits/direct_io/.gitignore
@@ -0,0 +1,3 @@
+/direct_io_test
+/test_sync_io
+/test_short_dio_read
diff --git a/qa/workunits/direct_io/Makefile b/qa/workunits/direct_io/Makefile
new file mode 100644
index 000000000..20fec0be5
--- /dev/null
+++ b/qa/workunits/direct_io/Makefile
@@ -0,0 +1,11 @@
+CFLAGS = -Wall -Wextra -D_GNU_SOURCE
+
+TARGETS = direct_io_test test_sync_io test_short_dio_read
+
+.c:
+	$(CC) $(CFLAGS) $@.c -o $@
+
+all:	$(TARGETS)
+
+clean:
+	rm $(TARGETS)
diff --git a/qa/workunits/direct_io/big.sh b/qa/workunits/direct_io/big.sh
new file mode 100755
index 000000000..43bd6d72b
--- /dev/null
+++ b/qa/workunits/direct_io/big.sh
@@ -0,0 +1,6 @@
+#!/bin/sh -ex
+
+echo "test large (16MB) dio write"
+dd if=/dev/zero of=foo.big bs=16M count=1 oflag=direct
+
+echo OK
diff --git a/qa/workunits/direct_io/direct_io_test.c b/qa/workunits/direct_io/direct_io_test.c
new file mode 100644
index 000000000..ccfbbb860
--- /dev/null
+++ b/qa/workunits/direct_io/direct_io_test.c
@@ -0,0 +1,312 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2011 New Dream Network
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+/*
+ * direct_io_test
+ *
+ * This test does some I/O using O_DIRECT.
+ *
+ * Semantics of O_DIRECT can be found at http://lwn.net/Articles/348739/
+ *
+ */
+
+static int g_num_pages = 100;
+
+static int g_duration = 10;
+
+struct chunk {
+        uint64_t offset;
+        uint64_t pad0;
+        uint64_t pad1;
+        uint64_t pad2;
+        uint64_t pad3;
+        uint64_t pad4;
+        uint64_t pad5;
+        uint64_t not_offset;
+} __attribute__((packed));
+
+static int page_size;
+
+static char temp_file[] = "direct_io_temp_file_XXXXXX";
+
+static int safe_write(int fd, const void *buf, signed int len)
+{
+        const char *b = (const char*)buf;
+        /* Handle EINTR and short writes */
+        while (1) {
+                int res = write(fd, b, len);
+                if (res < 0) {
+                        int err = errno;
+                        if (err != EINTR) {
+                                return err;
+                        }
+                }
+                len -= res;
+                b += res;
+                if (len <= 0)
+                        return 0;
+        }
+}
+
+static int do_read(int fd, char *buf, int buf_sz)
+{
+        /* We assume no short reads or EINTR. It's not really clear how
+         * those things interact with O_DIRECT. */
+        int ret = read(fd, buf, buf_sz);
+        if (ret < 0) {
+                int err = errno;
+                printf("do_read: error: %d (%s)\n", err, strerror(err));
+                return err;
+        }
+        if (ret != buf_sz) {
+                printf("do_read: short read\n");
+                return -EIO;
+        }
+        return 0;
+}
+
+static int setup_temp_file(void)
+{
+        int fd;
+        int64_t num_chunks, i;
+
+        if (page_size % sizeof(struct chunk)) {
+                printf("setup_big_file: page_size doesn't divide evenly "
+                        "into data blocks.\n");
+                return -EINVAL;
+        }
+
+        fd = mkstemp(temp_file);
+        if (fd < 0) {
+                int err = errno;
+                printf("setup_big_file: mkostemps failed with error %d\n", err);
+                return err;
+        }
+
+        num_chunks = g_num_pages * (page_size / sizeof(struct chunk));
+        for (i = 0; i < num_chunks; ++i) {
+                int ret;
+                struct chunk c;
+                memset(&c, 0, sizeof(c));
+                c.offset = i * sizeof(struct chunk);
+                c.pad0 = 0;
+                c.pad1 = 1;
+                c.pad2 = 2;
+                c.pad3 = 3;
+                c.pad4 = 4;
+                c.pad5 = 5;
+                c.not_offset = ~c.offset;
+                ret = safe_write(fd, &c, sizeof(struct chunk));
+                if (ret) {
+                        printf("setup_big_file: safe_write failed with "
+                               "error: %d\n", ret);
+                        TEMP_FAILURE_RETRY(close(fd));
+                        unlink(temp_file);
+                        return ret;
+                }
+        }
+        TEMP_FAILURE_RETRY(close(fd));
+        return 0;
+}
+
+static int verify_chunk(const struct chunk *c, uint64_t offset)
+{
+        if (c->offset != offset) {
+                printf("verify_chunk(%" PRId64 "): bad offset value (got: %"
+                       PRId64 ", expected: %" PRId64 "\n", offset, c->offset, offset);
+                return EIO;
+        }
+        if (c->pad0 != 0) {
+                printf("verify_chunk(%" PRId64 "): bad pad0 value\n", offset);
+                return EIO;
+        }
+        if (c->pad1 != 1) {
+                printf("verify_chunk(%" PRId64 "): bad pad1 value\n", offset);
+                return EIO;
+        }
+        if (c->pad2 != 2) {
+                printf("verify_chunk(%" PRId64 "): bad pad2 value\n", offset);
+                return EIO;
+        }
+        if (c->pad3 != 3) {
+                printf("verify_chunk(%" PRId64 "): bad pad3 value\n", offset);
+                return EIO;
+        }
+        if (c->pad4 != 4) {
+                printf("verify_chunk(%" PRId64 "): bad pad4 value\n", offset);
+                return EIO;
+        }
+        if (c->pad5 != 5) {
+                printf("verify_chunk(%" PRId64 "): bad pad5 value\n", offset);
+                return EIO;
+        }
+        if (c->not_offset != ~offset) {
+                printf("verify_chunk(%" PRId64 "): bad not_offset value\n",
+                       offset);
+                return EIO;
+        }
+        return 0;
+}
+
+static int do_o_direct_reads(void)
+{
+        int fd, ret;
+        unsigned int i;
+        void *buf = 0;
+        time_t cur_time, end_time;
+        ret = posix_memalign(&buf, page_size, page_size);
+        if (ret) {
+                printf("do_o_direct_reads: posix_memalign returned %d\n", ret);
+                goto done;
+        }
+
+        fd = open(temp_file, O_RDONLY | O_DIRECT);
+        if (fd < 0) {
+                ret = errno;
+                printf("do_o_direct_reads: error opening fd: %d\n", ret);
+                goto free_buf;
+        }
+
+        // read the first chunk and see if it looks OK
+        ret = do_read(fd, buf, page_size);
+        if (ret)
+                goto close_fd;
+        ret = verify_chunk((struct chunk*)buf, 0);
+        if (ret)
+                goto close_fd;
+
+        // read some random chunks and see how they look
+        cur_time = time(NULL);
+        end_time = cur_time + g_duration;
+        i = 0;
+        do {
+                time_t next_time;
+                uint64_t offset;
+                int page;
+                unsigned int seed;
+
+                seed = i++;
+                page = rand_r(&seed) % g_num_pages;
+                offset = page;
+                offset *= page_size;
+                if (lseek64(fd, offset, SEEK_SET) == -1) {
+                        int err = errno;
+                        printf("lseek64(%" PRId64 ") failed: error %d (%s)\n",
+                               offset, err, strerror(err));
+                        goto close_fd;
+                }
+                ret = do_read(fd, buf, page_size);
+                if (ret)
+                        goto close_fd;
+                ret = verify_chunk((struct chunk*)buf, offset);
+                if (ret)
+                        goto close_fd;
+                next_time = time(NULL);
+                if (next_time > cur_time) {
+                        printf(".");
+                }
+                cur_time = next_time;
+        } while (time(NULL) < end_time);
+
+        printf("\ndo_o_direct_reads: SUCCESS\n");
+close_fd:
+        TEMP_FAILURE_RETRY(close(fd));
+free_buf:
+        free(buf);
+done:
+        return ret;
+}
+
+static void usage(char *argv0)
+{
+        printf("%s: tests direct I/O\n", argv0);
+        printf("-d <seconds>:          sets duration to <seconds>\n");
+        printf("-h:                    this help\n");
+        printf("-p <pages>:            sets number of pages to allocate\n");
+}
+
+static void parse_args(int argc, char *argv[])
+{
+        int c;
+        while ((c = getopt (argc, argv, "d:hp:")) != -1) {
+                switch (c) {
+                case 'd':
+                        g_duration = atoi(optarg);
+                        if (g_duration <= 0) {
+                                printf("tried to set invalid value of "
+                                       "g_duration: %d\n", g_num_pages);
+                                exit(1);
+                        }
+                        break;
+                case 'h':
+                        usage(argv[0]);
+                        exit(0);
+                        break;
+                case 'p':
+                        g_num_pages = atoi(optarg);
+                        if (g_num_pages <= 0) {
+                                printf("tried to set invalid value of "
+                                       "g_num_pages: %d\n", g_num_pages);
+                                exit(1);
+                        }
+                        break;
+                case '?':
+                        usage(argv[0]);
+                        exit(1);
+                        break;
+                default:
+                        usage(argv[0]);
+                        exit(1);
+                        break;
+                }
+        }
+}
+
+int main(int argc, char *argv[])
+{
+        int ret;
+
+        parse_args(argc, argv);
+
+        setvbuf(stdout, NULL, _IONBF, 0);
+
+        page_size = getpagesize();
+
+        ret = setup_temp_file();
+        if (ret) {
+                printf("setup_temp_file failed with error %d\n", ret);
+                goto done;
+        }
+
+        ret = do_o_direct_reads();
+        if (ret) {
+                printf("do_o_direct_reads failed with error %d\n", ret);
+                goto unlink_temp_file;
+        }
+
+unlink_temp_file:
+        unlink(temp_file);
+done:
+        return ret;
+}
diff --git a/qa/workunits/direct_io/misc.sh b/qa/workunits/direct_io/misc.sh
new file mode 100755
index 000000000..6de080d2d
--- /dev/null
+++ b/qa/workunits/direct_io/misc.sh
@@ -0,0 +1,16 @@
+#!/bin/sh -ex
+
+# a few test cases from henry
+echo "test read from hole"
+dd if=/dev/zero of=dd3 bs=1 seek=1048576 count=0
+dd if=dd3 of=/tmp/ddout1 skip=8 bs=512 count=2 iflag=direct
+dd if=/dev/zero of=/tmp/dd3 bs=512 count=2
+cmp /tmp/dd3 /tmp/ddout1
+
+echo "other thing"
+dd if=/dev/urandom of=/tmp/dd10 bs=500 count=1
+dd if=/tmp/dd10 of=dd10 bs=512 seek=8388 count=1
+dd if=dd10 of=/tmp/dd10out bs=512 skip=8388 count=1 iflag=direct
+cmp /tmp/dd10 /tmp/dd10out
+
+echo OK
diff --git a/qa/workunits/direct_io/test_short_dio_read.c b/qa/workunits/direct_io/test_short_dio_read.c
new file mode 100644
index 000000000..502485557
--- /dev/null
+++ b/qa/workunits/direct_io/test_short_dio_read.c
@@ -0,0 +1,57 @@
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+
+int main()
+{
+        char buf[409600];
+        ssize_t r;
+	int err;
+	int fd = open("shortfile", O_WRONLY|O_CREAT, 0644);
+
+	if (fd < 0) {
+		err = errno;
+		printf("error: open() failed with: %d (%s)\n", err, strerror(err));
+		exit(err);
+	}
+
+	printf("writing first 3 bytes of 10k file\n");
+        r = write(fd, "foo", 3);
+	if (r == -1) {
+		err = errno;
+		printf("error: write() failed with: %d (%s)\n", err, strerror(err));
+		close(fd);
+		exit(err);
+	}
+        r = ftruncate(fd, 10000);
+	if (r == -1) {
+		err = errno;
+		printf("error: ftruncate() failed with: %d (%s)\n", err, strerror(err));
+		close(fd);
+		exit(err);
+	}
+	
+        fsync(fd);
+        close(fd);
+
+	printf("reading O_DIRECT\n");
+        fd = open("shortfile", O_RDONLY|O_DIRECT);
+	if (fd < 0) {
+		err = errno;
+		printf("error: open() failed with: %d (%s)\n", err, strerror(err));
+		exit(err);
+	}
+
+        r = read(fd, buf, sizeof(buf));
+        close(fd);
+
+        printf("got %d\n", (int)r);
+	if (r != 10000)
+		return 1;
+        return 0;
+}
diff --git a/qa/workunits/direct_io/test_sync_io.c b/qa/workunits/direct_io/test_sync_io.c
new file mode 100644
index 000000000..f393fa6e8
--- /dev/null
+++ b/qa/workunits/direct_io/test_sync_io.c
@@ -0,0 +1,250 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <linux/types.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+
+//#include "../client/ioctl.h"
+
+#include <linux/ioctl.h>
+#define CEPH_IOCTL_MAGIC 0x97
+#define CEPH_IOC_SYNCIO _IO(CEPH_IOCTL_MAGIC, 5)
+
+void write_pattern()
+{
+	printf("writing pattern\n");
+
+	uint64_t i;
+	int r;
+
+	int fd = open("foo", O_CREAT|O_WRONLY, 0644);
+	if (fd < 0) {
+	   r = errno;
+	   printf("write_pattern: error: open() failed with: %d (%s)\n", r, strerror(r));
+	   exit(r);
+	}
+	for (i=0; i<1048576 * sizeof(i); i += sizeof(i)) {
+		r = write(fd, &i, sizeof(i));
+		if (r == -1) {
+			r = errno;
+			printf("write_pattern: error: write() failed with: %d (%s)\n", r, strerror(r));
+			break;
+		}
+	}
+
+	close(fd);
+}
+
+int verify_pattern(char *buf, size_t len, uint64_t off)
+{
+	size_t i;
+
+	for (i = 0; i < len; i += sizeof(uint64_t)) {
+		uint64_t expected = i + off;
+		uint64_t actual = *(uint64_t*)(buf + i);
+		if (expected != actual) {
+			printf("error: offset %llu had %llu\n", (unsigned long long)expected,
+			       (unsigned long long)actual);
+			exit(1);
+		}
+	}
+	return 0;
+}
+
+void generate_pattern(void *buf, size_t len, uint64_t offset)
+{
+	uint64_t *v = buf;
+	size_t i;
+
+	for (i=0; i<len / sizeof(v); i++)
+		v[i] = i * sizeof(v) + offset;
+	verify_pattern(buf, len, offset);
+}
+
+int read_file(int buf_align, uint64_t offset, int len, int direct) {
+
+	printf("read_file buf_align %d offset %llu len %d\n", buf_align,
+	       (unsigned long long)offset, len);
+	void *rawbuf;
+	int r;
+        int flags;
+	int err = 0;
+
+	if(direct)
+	   flags = O_RDONLY|O_DIRECT;
+	else
+	   flags = O_RDONLY;
+
+	int fd = open("foo", flags);
+	if (fd < 0) {
+	   err = errno;
+	   printf("read_file: error: open() failed with: %d (%s)\n", err, strerror(err));
+	   exit(err);
+	}
+
+	if (!direct)
+	   ioctl(fd, CEPH_IOC_SYNCIO);
+
+	if ((r = posix_memalign(&rawbuf, 4096, len + buf_align)) != 0) {
+	   printf("read_file: error: posix_memalign failed with %d", r);
+	   close(fd);
+	   exit (r);
+	}
+
+	void *buf = (char *)rawbuf + buf_align;
+	memset(buf, 0, len);
+	r = pread(fd, buf, len, offset);
+	if (r == -1) {
+	   err = errno;
+	   printf("read_file: error: pread() failed with: %d (%s)\n", err, strerror(err));
+	   goto out;
+	}
+	r = verify_pattern(buf, len, offset);
+
+out:
+	close(fd);
+	free(rawbuf);
+	return r;
+}
+
+int read_direct(int buf_align, uint64_t offset, int len)
+{
+	printf("read_direct buf_align %d offset %llu len %d\n", buf_align,
+	       (unsigned long long)offset, len);
+	return read_file(buf_align, offset, len, 1);
+}
+
+int read_sync(int buf_align, uint64_t offset, int len)
+{
+	printf("read_sync buf_align %d offset %llu len %d\n", buf_align,
+	       (unsigned long long)offset, len);
+	return read_file(buf_align, offset, len, 0);
+}
+
+int write_file(int buf_align, uint64_t offset, int len, int direct)
+{
+	printf("write_file buf_align %d offset %llu len %d\n", buf_align,
+	       (unsigned long long)offset, len);
+	void *rawbuf;
+	int r;
+        int err = 0;
+	int flags;
+	if (direct)
+	   flags = O_WRONLY|O_DIRECT|O_CREAT;
+        else
+	   flags = O_WRONLY|O_CREAT;
+
+	int fd = open("foo", flags, 0644);
+	if (fd < 0) {
+	   int err = errno;
+	   printf("write_file: error: open() failed with: %d (%s)\n", err, strerror(err));
+	   exit(err);
+	}
+
+	if ((r = posix_memalign(&rawbuf, 4096, len + buf_align)) != 0) {
+	   printf("write_file: error: posix_memalign failed with %d", r);
+	   err = r;
+	   goto out_close;
+	}
+
+	if (!direct)
+	   ioctl(fd, CEPH_IOC_SYNCIO);
+
+	void *buf = (char *)rawbuf + buf_align;
+
+	generate_pattern(buf, len, offset);
+
+	r = pwrite(fd, buf, len, offset);
+	close(fd);
+
+	fd = open("foo", O_RDONLY);
+	if (fd < 0) {
+	   err = errno;
+	   printf("write_file: error: open() failed with: %d (%s)\n", err, strerror(err));
+	   free(rawbuf);
+	   goto out_unlink;
+	}
+	void *buf2 = malloc(len);
+	if (!buf2) {
+	   err = -ENOMEM;
+	   printf("write_file: error: malloc failed\n");
+	   goto out_free;
+	}
+
+	memset(buf2, 0, len);
+	r = pread(fd, buf2, len, offset);
+	if (r == -1) {
+	   err = errno;
+	   printf("write_file: error: pread() failed with: %d (%s)\n", err, strerror(err));
+	   goto out_free_buf;
+	}
+	r = verify_pattern(buf2, len, offset);
+
+out_free_buf:
+	free(buf2);
+out_free:
+	free(rawbuf);
+out_close:
+	close(fd);
+out_unlink:
+	unlink("foo");
+	if (err)
+	   exit(err);
+	return r;
+}
+
+int write_direct(int buf_align, uint64_t offset, int len)
+{
+	printf("write_direct buf_align %d offset %llu len %d\n", buf_align,
+	       (unsigned long long)offset, len);
+	return write_file (buf_align, offset, len, 1);
+}
+
+int write_sync(int buf_align, uint64_t offset, int len)
+{
+	printf("write_sync buf_align %d offset %llu len %d\n", buf_align,
+	       (unsigned long long)offset, len);
+	return write_file (buf_align, offset, len, 0);
+}
+
+int main(int argc, char **argv)
+{
+	uint64_t i, j, k;
+	int read = 1;
+	int write = 1;
+
+	if (argc >= 2 && strcmp(argv[1], "read") == 0)
+		write = 0;
+	if (argc >= 2 && strcmp(argv[1], "write") == 0)
+		read = 0;
+
+	if (read) {
+		write_pattern();
+		
+		for (i = 0; i < 4096; i += 512)
+			for (j = 4*1024*1024 - 4096; j < 4*1024*1024 + 4096; j += 512)
+				for (k = 1024; k <= 16384; k *= 2) {
+					read_direct(i, j, k);
+					read_sync(i, j, k);
+				}
+		
+	}
+	unlink("foo");
+	if (write) {
+		for (i = 0; i < 4096; i += 512)
+			for (j = 4*1024*1024 - 4096 + 512; j < 4*1024*1024 + 4096; j += 512)
+				for (k = 1024; k <= 16384; k *= 2) {
+					write_direct(i, j, k);
+					write_sync(i, j, k);
+				}
+	}
+	
+
+	return 0;
+}
diff --git a/qa/workunits/erasure-code/.gitignore b/qa/workunits/erasure-code/.gitignore
new file mode 100644
index 000000000..7e563b8b3
--- /dev/null
+++ b/qa/workunits/erasure-code/.gitignore
@@ -0,0 +1,2 @@
+*.log
+*.trs
diff --git a/qa/workunits/erasure-code/bench.html b/qa/workunits/erasure-code/bench.html
new file mode 100644
index 000000000..3b4b6c74c
--- /dev/null
+++ b/qa/workunits/erasure-code/bench.html
@@ -0,0 +1,34 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd" >
+<html>
+  <head>
+    <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+    <title>Erasure Code Plugins Benchmarks</title>
+    <link href="examples.css" rel="stylesheet" type="text/css">
+    <script language="javascript" type="text/javascript" src="jquery.js"></script>
+    <script language="javascript" type="text/javascript" src="jquery.flot.js"></script>
+    <script language="javascript" type="text/javascript" src="jquery.flot.categories.js"></script>
+    <script language="javascript" type="text/javascript" src="bench.js"></script>
+    <script language="javascript" type="text/javascript" src="plot.js"></script>
+  </head>
+  <body>
+
+    <div id="header">
+      <h2>Erasure Code Plugins Benchmarks</h2>
+    </div>
+
+    <div id="content">
+
+      <div class="demo-container">
+	<div id="encode" class="demo-placeholder"></div>
+      </div>
+      <p>encode: Y = GB/s, X = K/M</p>
+
+      <div class="demo-container">
+	<div id="decode" class="demo-placeholder"></div>
+      </div>
+      <p>decode: Y = GB/s, X = K/M/erasures</p>
+
+    </div>
+
+  </body>
+</html>
diff --git a/qa/workunits/erasure-code/bench.sh b/qa/workunits/erasure-code/bench.sh
new file mode 100755
index 000000000..8e288f053
--- /dev/null
+++ b/qa/workunits/erasure-code/bench.sh
@@ -0,0 +1,192 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2015 Red Hat <contact@redhat.com>
+# Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+# Test that it works from sources with:
+#
+#  CEPH_ERASURE_CODE_BENCHMARK=src/ceph_erasure_code_benchmark  \
+#  PLUGIN_DIRECTORY=build/lib \
+#      qa/workunits/erasure-code/bench.sh fplot jerasure |
+#      tee qa/workunits/erasure-code/bench.js
+#
+# This should start immediately and display:
+#
+# ...
+# [ '2/1',  .48035538612887358583  ],
+# [ '3/2',  .21648470405675016626  ],
+# etc.
+#
+# and complete within a few seconds. The result can then be displayed with:
+#
+#  firefox qa/workunits/erasure-code/bench.html
+#
+# Once it is confirmed to work, it can be run with a more significant
+# volume of data so that the measures are more reliable:
+#
+#  TOTAL_SIZE=$((4 * 1024 * 1024 * 1024)) \
+#  CEPH_ERASURE_CODE_BENCHMARK=src/ceph_erasure_code_benchmark  \
+#  PLUGIN_DIRECTORY=build/lib \
+#      qa/workunits/erasure-code/bench.sh fplot jerasure |
+#      tee qa/workunits/erasure-code/bench.js
+#
+set -e
+
+export PATH=/sbin:$PATH
+
+: ${VERBOSE:=false}
+: ${CEPH_ERASURE_CODE_BENCHMARK:=ceph_erasure_code_benchmark}
+: ${PLUGIN_DIRECTORY:=/usr/lib/ceph/erasure-code}
+: ${PLUGINS:=isa jerasure}
+: ${TECHNIQUES:=vandermonde cauchy}
+: ${TOTAL_SIZE:=$((1024 * 1024))}
+: ${SIZE:=4096}
+: ${PARAMETERS:=--parameter jerasure-per-chunk-alignment=true}
+
+function bench_header() {
+    echo -e "seconds\tKB\tplugin\tk\tm\twork.\titer.\tsize\teras.\tcommand."
+}
+
+function bench() {
+    local plugin=$1
+    shift
+    local k=$1
+    shift
+    local m=$1
+    shift
+    local workload=$1
+    shift
+    local iterations=$1
+    shift
+    local size=$1
+    shift
+    local erasures=$1
+    shift
+    command=$(echo $CEPH_ERASURE_CODE_BENCHMARK \
+        --plugin $plugin \
+        --workload $workload \
+        --iterations $iterations \
+        --size $size \
+        --erasures $erasures \
+        --parameter k=$k \
+        --parameter m=$m \
+        --erasure-code-dir $PLUGIN_DIRECTORY)
+    result=$($command "$@")
+    echo -e "$result\t$plugin\t$k\t$m\t$workload\t$iterations\t$size\t$erasures\t$command ""$@"
+}
+
+function packetsize() {
+    local k=$1
+    local w=$2
+    local vector_wordsize=$3
+    local size=$4
+
+    local p=$(( ($size / $k / $w / $vector_wordsize ) * $vector_wordsize))
+    if [ $p -gt 3100 ] ; then
+        p=3100
+    fi
+    echo $p
+}
+
+function bench_run() {
+    local plugin=jerasure
+    local w=8
+    local VECTOR_WORDSIZE=16
+    local ks="2 3 4 6 10"
+    declare -A k2ms
+    k2ms[2]="1"
+    k2ms[3]="2"
+    k2ms[4]="2 3"
+    k2ms[6]="2 3 4"
+    k2ms[10]="3 4"
+    local isa2technique_vandermonde='reed_sol_van'
+    local isa2technique_cauchy='cauchy'
+    local jerasure2technique_vandermonde='reed_sol_van'
+    local jerasure2technique_cauchy='cauchy_good'
+    for technique in ${TECHNIQUES} ; do
+        for plugin in ${PLUGINS} ; do
+            eval technique_parameter=\$${plugin}2technique_${technique}
+            echo "serie encode_${technique}_${plugin}"
+            for k in $ks ; do
+                for m in ${k2ms[$k]} ; do
+                    bench $plugin $k $m encode $(($TOTAL_SIZE / $SIZE)) $SIZE 0 \
+                        --parameter packetsize=$(packetsize $k $w $VECTOR_WORDSIZE $SIZE) \
+                        ${PARAMETERS} \
+                        --parameter technique=$technique_parameter
+
+                done
+            done
+        done
+    done
+    for technique in ${TECHNIQUES} ; do
+        for plugin in ${PLUGINS} ; do
+            eval technique_parameter=\$${plugin}2technique_${technique}
+            echo "serie decode_${technique}_${plugin}"
+            for k in $ks ; do
+                for m in ${k2ms[$k]} ; do
+                    echo
+                    for erasures in $(seq 1 $m) ; do
+                        bench $plugin $k $m decode $(($TOTAL_SIZE / $SIZE)) $SIZE $erasures \
+                            --parameter packetsize=$(packetsize $k $w $VECTOR_WORDSIZE  $SIZE) \
+                            ${PARAMETERS} \
+                            --parameter technique=$technique_parameter
+                    done
+                done
+            done
+        done
+    done
+}
+
+function fplot() {
+    local serie
+    bench_run | while read seconds total plugin k m workload iteration size erasures rest ; do 
+        if [ -z $seconds ] ; then
+            echo null,
+        elif [ $seconds = serie ] ; then
+            if [ "$serie" ] ; then
+                echo '];'
+            fi
+            local serie=`echo $total | sed 's/cauchy_\([0-9]\)/cauchy_good_\1/g'`
+            echo "var $serie = ["
+        else
+            local x
+            if [ $workload = encode ] ; then
+                x=$k/$m
+            else
+                x=$k/$m/$erasures
+            fi
+            echo "[ '$x', " $(echo "( $total / 1024 / 1024 ) / $seconds" | bc -ql) " ], "
+        fi
+    done
+    echo '];'
+}
+
+function main() {
+    bench_header
+    bench_run
+}
+
+if [ "$1" = fplot ] ; then
+    "$@"
+else
+    main
+fi
+# Local Variables:
+# compile-command: "\
+#   CEPH_ERASURE_CODE_BENCHMARK=../../../src/ceph_erasure_code_benchmark \
+#   PLUGIN_DIRECTORY=../../../build/lib \
+#   ./bench.sh
+# "
+# End:
diff --git a/qa/workunits/erasure-code/encode-decode-non-regression.sh b/qa/workunits/erasure-code/encode-decode-non-regression.sh
new file mode 100755
index 000000000..7f36c91c7
--- /dev/null
+++ b/qa/workunits/erasure-code/encode-decode-non-regression.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2014 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+set -ex
+
+: ${CORPUS:=https://github.com/ceph/ceph-erasure-code-corpus.git}
+: ${DIRECTORY:=$CEPH_ROOT/ceph-erasure-code-corpus}
+
+# when running from sources, the current directory must have precedence
+export PATH=:$PATH
+
+if ! test -d $DIRECTORY ; then
+    git clone $CORPUS $DIRECTORY
+fi
+
+my_version=v$(ceph --version | cut -f3 -d ' ')
+
+all_versions=$((ls -d $DIRECTORY/v* ; echo $DIRECTORY/$my_version ) | sort)
+
+for version in $all_versions ; do
+    if test -d $version ; then
+        $version/non-regression.sh
+    fi
+    if test $version = $DIRECTORY/$my_version ; then
+        break
+    fi
+done
diff --git a/qa/workunits/erasure-code/examples.css b/qa/workunits/erasure-code/examples.css
new file mode 100644
index 000000000..ee4724778
--- /dev/null
+++ b/qa/workunits/erasure-code/examples.css
@@ -0,0 +1,97 @@
+* {	padding: 0; margin: 0; vertical-align: top; }
+
+body {
+	background: url(background.png) repeat-x;
+	font: 18px/1.5em "proxima-nova", Helvetica, Arial, sans-serif;
+}
+
+a {	color: #069; }
+a:hover { color: #28b; }
+
+h2 {
+	margin-top: 15px;
+	font: normal 32px "omnes-pro", Helvetica, Arial, sans-serif;
+}
+
+h3 {
+	margin-left: 30px;
+	font: normal 26px "omnes-pro", Helvetica, Arial, sans-serif;
+	color: #666;
+}
+
+p {
+	margin-top: 10px;
+}
+
+button {
+	font-size: 18px;
+	padding: 1px 7px;
+}
+
+input {
+	font-size: 18px;
+}
+
+input[type=checkbox] {
+	margin: 7px;
+}
+
+#header {
+	position: relative;
+	width: 900px;
+	margin: auto;
+}
+
+#header h2 {
+	margin-left: 10px;
+	vertical-align: middle;
+	font-size: 42px;
+	font-weight: bold;
+	text-decoration: none;
+	color: #000;
+}
+
+#content {
+	width: 880px;
+	margin: 0 auto;
+	padding: 10px;
+}
+
+#footer {
+	margin-top: 25px;
+	margin-bottom: 10px;
+	text-align: center;
+	font-size: 12px;
+	color: #999;
+}
+
+.demo-container {
+	box-sizing: border-box;
+	width: 850px;
+	height: 450px;
+	padding: 20px 15px 15px 15px;
+	margin: 15px auto 30px auto;
+	border: 1px solid #ddd;
+	background: #fff;
+	background: linear-gradient(#f6f6f6 0, #fff 50px);
+	background: -o-linear-gradient(#f6f6f6 0, #fff 50px);
+	background: -ms-linear-gradient(#f6f6f6 0, #fff 50px);
+	background: -moz-linear-gradient(#f6f6f6 0, #fff 50px);
+	background: -webkit-linear-gradient(#f6f6f6 0, #fff 50px);
+	box-shadow: 0 3px 10px rgba(0,0,0,0.15);
+	-o-box-shadow: 0 3px 10px rgba(0,0,0,0.1);
+	-ms-box-shadow: 0 3px 10px rgba(0,0,0,0.1);
+	-moz-box-shadow: 0 3px 10px rgba(0,0,0,0.1);
+	-webkit-box-shadow: 0 3px 10px rgba(0,0,0,0.1);
+}
+
+.demo-placeholder {
+	width: 100%;
+	height: 100%;
+	font-size: 14px;
+	line-height: 1.2em;
+}
+
+.legend table {
+	border-spacing: 5px;
+}
+\ No newline at end of file
diff --git a/qa/workunits/erasure-code/jquery.flot.categories.js b/qa/workunits/erasure-code/jquery.flot.categories.js
new file mode 100644
index 000000000..2f9b25797
--- /dev/null
+++ b/qa/workunits/erasure-code/jquery.flot.categories.js
@@ -0,0 +1,190 @@
+/* Flot plugin for plotting textual data or categories.
+
+Copyright (c) 2007-2014 IOLA and Ole Laursen.
+Licensed under the MIT license.
+
+Consider a dataset like [["February", 34], ["March", 20], ...]. This plugin
+allows you to plot such a dataset directly.
+
+To enable it, you must specify mode: "categories" on the axis with the textual
+labels, e.g.
+
+	$.plot("#placeholder", data, { xaxis: { mode: "categories" } });
+
+By default, the labels are ordered as they are met in the data series. If you
+need a different ordering, you can specify "categories" on the axis options
+and list the categories there:
+
+	xaxis: {
+		mode: "categories",
+		categories: ["February", "March", "April"]
+	}
+
+If you need to customize the distances between the categories, you can specify
+"categories" as an object mapping labels to values
+
+	xaxis: {
+		mode: "categories",
+		categories: { "February": 1, "March": 3, "April": 4 }
+	}
+
+If you don't specify all categories, the remaining categories will be numbered
+from the max value plus 1 (with a spacing of 1 between each).
+
+Internally, the plugin works by transforming the input data through an auto-
+generated mapping where the first category becomes 0, the second 1, etc.
+Hence, a point like ["February", 34] becomes [0, 34] internally in Flot (this
+is visible in hover and click events that return numbers rather than the
+category labels). The plugin also overrides the tick generator to spit out the
+categories as ticks instead of the values.
+
+If you need to map a value back to its label, the mapping is always accessible
+as "categories" on the axis object, e.g. plot.getAxes().xaxis.categories.
+
+*/
+
+(function ($) {
+    var options = {
+        xaxis: {
+            categories: null
+        },
+        yaxis: {
+            categories: null
+        }
+    };
+    
+    function processRawData(plot, series, data, datapoints) {
+        // if categories are enabled, we need to disable
+        // auto-transformation to numbers so the strings are intact
+        // for later processing
+
+        var xCategories = series.xaxis.options.mode == "categories",
+            yCategories = series.yaxis.options.mode == "categories";
+        
+        if (!(xCategories || yCategories))
+            return;
+
+        var format = datapoints.format;
+
+        if (!format) {
+            // FIXME: auto-detection should really not be defined here
+            var s = series;
+            format = [];
+            format.push({ x: true, number: true, required: true });
+            format.push({ y: true, number: true, required: true });
+
+            if (s.bars.show || (s.lines.show && s.lines.fill)) {
+                var autoscale = !!((s.bars.show && s.bars.zero) || (s.lines.show && s.lines.zero));
+                format.push({ y: true, number: true, required: false, defaultValue: 0, autoscale: autoscale });
+                if (s.bars.horizontal) {
+                    delete format[format.length - 1].y;
+                    format[format.length - 1].x = true;
+                }
+            }
+            
+            datapoints.format = format;
+        }
+
+        for (var m = 0; m < format.length; ++m) {
+            if (format[m].x && xCategories)
+                format[m].number = false;
+            
+            if (format[m].y && yCategories)
+                format[m].number = false;
+        }
+    }
+
+    function getNextIndex(categories) {
+        var index = -1;
+        
+        for (var v in categories)
+            if (categories[v] > index)
+                index = categories[v];
+
+        return index + 1;
+    }
+
+    function categoriesTickGenerator(axis) {
+        var res = [];
+        for (var label in axis.categories) {
+            var v = axis.categories[label];
+            if (v >= axis.min && v <= axis.max)
+                res.push([v, label]);
+        }
+
+        res.sort(function (a, b) { return a[0] - b[0]; });
+
+        return res;
+    }
+    
+    function setupCategoriesForAxis(series, axis, datapoints) {
+        if (series[axis].options.mode != "categories")
+            return;
+        
+        if (!series[axis].categories) {
+            // parse options
+            var c = {}, o = series[axis].options.categories || {};
+            if ($.isArray(o)) {
+                for (var i = 0; i < o.length; ++i)
+                    c[o[i]] = i;
+            }
+            else {
+                for (var v in o)
+                    c[v] = o[v];
+            }
+            
+            series[axis].categories = c;
+        }
+
+        // fix ticks
+        if (!series[axis].options.ticks)
+            series[axis].options.ticks = categoriesTickGenerator;
+
+        transformPointsOnAxis(datapoints, axis, series[axis].categories);
+    }
+    
+    function transformPointsOnAxis(datapoints, axis, categories) {
+        // go through the points, transforming them
+        var points = datapoints.points,
+            ps = datapoints.pointsize,
+            format = datapoints.format,
+            formatColumn = axis.charAt(0),
+            index = getNextIndex(categories);
+
+        for (var i = 0; i < points.length; i += ps) {
+            if (points[i] == null)
+                continue;
+            
+            for (var m = 0; m < ps; ++m) {
+                var val = points[i + m];
+
+                if (val == null || !format[m][formatColumn])
+                    continue;
+
+                if (!(val in categories)) {
+                    categories[val] = index;
+                    ++index;
+                }
+                
+                points[i + m] = categories[val];
+            }
+        }
+    }
+
+    function processDatapoints(plot, series, datapoints) {
+        setupCategoriesForAxis(series, "xaxis", datapoints);
+        setupCategoriesForAxis(series, "yaxis", datapoints);
+    }
+
+    function init(plot) {
+        plot.hooks.processRawData.push(processRawData);
+        plot.hooks.processDatapoints.push(processDatapoints);
+    }
+    
+    $.plot.plugins.push({
+        init: init,
+        options: options,
+        name: 'categories',
+        version: '1.0'
+    });
+})(jQuery);
diff --git a/qa/workunits/erasure-code/jquery.flot.js b/qa/workunits/erasure-code/jquery.flot.js
new file mode 100644
index 000000000..39f3e4cf3
--- /dev/null
+++ b/qa/workunits/erasure-code/jquery.flot.js
@@ -0,0 +1,3168 @@
+/* Javascript plotting library for jQuery, version 0.8.3.
+
+Copyright (c) 2007-2014 IOLA and Ole Laursen.
+Licensed under the MIT license.
+
+*/
+
+// first an inline dependency, jquery.colorhelpers.js, we inline it here
+// for convenience
+
+/* Plugin for jQuery for working with colors.
+ *
+ * Version 1.1.
+ *
+ * Inspiration from jQuery color animation plugin by John Resig.
+ *
+ * Released under the MIT license by Ole Laursen, October 2009.
+ *
+ * Examples:
+ *
+ *   $.color.parse("#fff").scale('rgb', 0.25).add('a', -0.5).toString()
+ *   var c = $.color.extract($("#mydiv"), 'background-color');
+ *   console.log(c.r, c.g, c.b, c.a);
+ *   $.color.make(100, 50, 25, 0.4).toString() // returns "rgba(100,50,25,0.4)"
+ *
+ * Note that .scale() and .add() return the same modified object
+ * instead of making a new one.
+ *
+ * V. 1.1: Fix error handling so e.g. parsing an empty string does
+ * produce a color rather than just crashing.
+ */
+(function($){$.color={};$.color.make=function(r,g,b,a){var o={};o.r=r||0;o.g=g||0;o.b=b||0;o.a=a!=null?a:1;o.add=function(c,d){for(var i=0;i<c.length;++i)o[c.charAt(i)]+=d;return o.normalize()};o.scale=function(c,f){for(var i=0;i<c.length;++i)o[c.charAt(i)]*=f;return o.normalize()};o.toString=function(){if(o.a>=1){return"rgb("+[o.r,o.g,o.b].join(",")+")"}else{return"rgba("+[o.r,o.g,o.b,o.a].join(",")+")"}};o.normalize=function(){function clamp(min,value,max){return value<min?min:value>max?max:value}o.r=clamp(0,parseInt(o.r),255);o.g=clamp(0,parseInt(o.g),255);o.b=clamp(0,parseInt(o.b),255);o.a=clamp(0,o.a,1);return o};o.clone=function(){return $.color.make(o.r,o.b,o.g,o.a)};return o.normalize()};$.color.extract=function(elem,css){var c;do{c=elem.css(css).toLowerCase();if(c!=""&&c!="transparent")break;elem=elem.parent()}while(elem.length&&!$.nodeName(elem.get(0),"body"));if(c=="rgba(0, 0, 0, 0)")c="transparent";return $.color.parse(c)};$.color.parse=function(str){var res,m=$.color.make;if(res=/rgb\(\s*([0-9]{1,3})\s*,\s*([0-9]{1,3})\s*,\s*([0-9]{1,3})\s*\)/.exec(str))return m(parseInt(res[1],10),parseInt(res[2],10),parseInt(res[3],10));if(res=/rgba\(\s*([0-9]{1,3})\s*,\s*([0-9]{1,3})\s*,\s*([0-9]{1,3})\s*,\s*([0-9]+(?:\.[0-9]+)?)\s*\)/.exec(str))return m(parseInt(res[1],10),parseInt(res[2],10),parseInt(res[3],10),parseFloat(res[4]));if(res=/rgb\(\s*([0-9]+(?:\.[0-9]+)?)\%\s*,\s*([0-9]+(?:\.[0-9]+)?)\%\s*,\s*([0-9]+(?:\.[0-9]+)?)\%\s*\)/.exec(str))return m(parseFloat(res[1])*2.55,parseFloat(res[2])*2.55,parseFloat(res[3])*2.55);if(res=/rgba\(\s*([0-9]+(?:\.[0-9]+)?)\%\s*,\s*([0-9]+(?:\.[0-9]+)?)\%\s*,\s*([0-9]+(?:\.[0-9]+)?)\%\s*,\s*([0-9]+(?:\.[0-9]+)?)\s*\)/.exec(str))return m(parseFloat(res[1])*2.55,parseFloat(res[2])*2.55,parseFloat(res[3])*2.55,parseFloat(res[4]));if(res=/#([a-fA-F0-9]{2})([a-fA-F0-9]{2})([a-fA-F0-9]{2})/.exec(str))return m(parseInt(res[1],16),parseInt(res[2],16),parseInt(res[3],16));if(res=/#([a-fA-F0-9])([a-fA-F0-9])([a-fA-F0-9])/.exec(str))return m(parseInt(res[1]+res[1],16),parseInt(res[2]+res[2],16),parseInt(res[3]+res[3],16));var name=$.trim(str).toLowerCase();if(name=="transparent")return m(255,255,255,0);else{res=lookupColors[name]||[0,0,0];return m(res[0],res[1],res[2])}};var lookupColors={aqua:[0,255,255],azure:[240,255,255],beige:[245,245,220],black:[0,0,0],blue:[0,0,255],brown:[165,42,42],cyan:[0,255,255],darkblue:[0,0,139],darkcyan:[0,139,139],darkgrey:[169,169,169],darkgreen:[0,100,0],darkkhaki:[189,183,107],darkmagenta:[139,0,139],darkolivegreen:[85,107,47],darkorange:[255,140,0],darkorchid:[153,50,204],darkred:[139,0,0],darksalmon:[233,150,122],darkviolet:[148,0,211],fuchsia:[255,0,255],gold:[255,215,0],green:[0,128,0],indigo:[75,0,130],khaki:[240,230,140],lightblue:[173,216,230],lightcyan:[224,255,255],lightgreen:[144,238,144],lightgrey:[211,211,211],lightpink:[255,182,193],lightyellow:[255,255,224],lime:[0,255,0],magenta:[255,0,255],maroon:[128,0,0],navy:[0,0,128],olive:[128,128,0],orange:[255,165,0],pink:[255,192,203],purple:[128,0,128],violet:[128,0,128],red:[255,0,0],silver:[192,192,192],white:[255,255,255],yellow:[255,255,0]}})(jQuery);
+
+// the actual Flot code
+(function($) {
+
+	// Cache the prototype hasOwnProperty for faster access
+
+	var hasOwnProperty = Object.prototype.hasOwnProperty;
+
+    // A shim to provide 'detach' to jQuery versions prior to 1.4.  Using a DOM
+    // operation produces the same effect as detach, i.e. removing the element
+    // without touching its jQuery data.
+
+    // Do not merge this into Flot 0.9, since it requires jQuery 1.4.4+.
+
+    if (!$.fn.detach) {
+        $.fn.detach = function() {
+            return this.each(function() {
+                if (this.parentNode) {
+                    this.parentNode.removeChild( this );
+                }
+            });
+        };
+    }
+
+	///////////////////////////////////////////////////////////////////////////
+	// The Canvas object is a wrapper around an HTML5 <canvas> tag.
+	//
+	// @constructor
+	// @param {string} cls List of classes to apply to the canvas.
+	// @param {element} container Element onto which to append the canvas.
+	//
+	// Requiring a container is a little iffy, but unfortunately canvas
+	// operations don't work unless the canvas is attached to the DOM.
+
+	function Canvas(cls, container) {
+
+		var element = container.children("." + cls)[0];
+
+		if (element == null) {
+
+			element = document.createElement("canvas");
+			element.className = cls;
+
+			$(element).css({ direction: "ltr", position: "absolute", left: 0, top: 0 })
+				.appendTo(container);
+
+			// If HTML5 Canvas isn't available, fall back to [Ex|Flash]canvas
+
+			if (!element.getContext) {
+				if (window.G_vmlCanvasManager) {
+					element = window.G_vmlCanvasManager.initElement(element);
+				} else {
+					throw new Error("Canvas is not available. If you're using IE with a fall-back such as Excanvas, then there's either a mistake in your conditional include, or the page has no DOCTYPE and is rendering in Quirks Mode.");
+				}
+			}
+		}
+
+		this.element = element;
+
+		var context = this.context = element.getContext("2d");
+
+		// Determine the screen's ratio of physical to device-independent
+		// pixels.  This is the ratio between the canvas width that the browser
+		// advertises and the number of pixels actually present in that space.
+
+		// The iPhone 4, for example, has a device-independent width of 320px,
+		// but its screen is actually 640px wide.  It therefore has a pixel
+		// ratio of 2, while most normal devices have a ratio of 1.
+
+		var devicePixelRatio = window.devicePixelRatio || 1,
+			backingStoreRatio =
+				context.webkitBackingStorePixelRatio ||
+				context.mozBackingStorePixelRatio ||
+				context.msBackingStorePixelRatio ||
+				context.oBackingStorePixelRatio ||
+				context.backingStorePixelRatio || 1;
+
+		this.pixelRatio = devicePixelRatio / backingStoreRatio;
+
+		// Size the canvas to match the internal dimensions of its container
+
+		this.resize(container.width(), container.height());
+
+		// Collection of HTML div layers for text overlaid onto the canvas
+
+		this.textContainer = null;
+		this.text = {};
+
+		// Cache of text fragments and metrics, so we can avoid expensively
+		// re-calculating them when the plot is re-rendered in a loop.
+
+		this._textCache = {};
+	}
+
+	// Resizes the canvas to the given dimensions.
+	//
+	// @param {number} width New width of the canvas, in pixels.
+	// @param {number} width New height of the canvas, in pixels.
+
+	Canvas.prototype.resize = function(width, height) {
+
+		if (width <= 0 || height <= 0) {
+			throw new Error("Invalid dimensions for plot, width = " + width + ", height = " + height);
+		}
+
+		var element = this.element,
+			context = this.context,
+			pixelRatio = this.pixelRatio;
+
+		// Resize the canvas, increasing its density based on the display's
+		// pixel ratio; basically giving it more pixels without increasing the
+		// size of its element, to take advantage of the fact that retina
+		// displays have that many more pixels in the same advertised space.
+
+		// Resizing should reset the state (excanvas seems to be buggy though)
+
+		if (this.width != width) {
+			element.width = width * pixelRatio;
+			element.style.width = width + "px";
+			this.width = width;
+		}
+
+		if (this.height != height) {
+			element.height = height * pixelRatio;
+			element.style.height = height + "px";
+			this.height = height;
+		}
+
+		// Save the context, so we can reset in case we get replotted.  The
+		// restore ensure that we're really back at the initial state, and
+		// should be safe even if we haven't saved the initial state yet.
+
+		context.restore();
+		context.save();
+
+		// Scale the coordinate space to match the display density; so even though we
+		// may have twice as many pixels, we still want lines and other drawing to
+		// appear at the same size; the extra pixels will just make them crisper.
+
+		context.scale(pixelRatio, pixelRatio);
+	};
+
+	// Clears the entire canvas area, not including any overlaid HTML text
+
+	Canvas.prototype.clear = function() {
+		this.context.clearRect(0, 0, this.width, this.height);
+	};
+
+	// Finishes rendering the canvas, including managing the text overlay.
+
+	Canvas.prototype.render = function() {
+
+		var cache = this._textCache;
+
+		// For each text layer, add elements marked as active that haven't
+		// already been rendered, and remove those that are no longer active.
+
+		for (var layerKey in cache) {
+			if (hasOwnProperty.call(cache, layerKey)) {
+
+				var layer = this.getTextLayer(layerKey),
+					layerCache = cache[layerKey];
+
+				layer.hide();
+
+				for (var styleKey in layerCache) {
+					if (hasOwnProperty.call(layerCache, styleKey)) {
+						var styleCache = layerCache[styleKey];
+						for (var key in styleCache) {
+							if (hasOwnProperty.call(styleCache, key)) {
+
+								var positions = styleCache[key].positions;
+
+								for (var i = 0, position; position = positions[i]; i++) {
+									if (position.active) {
+										if (!position.rendered) {
+											layer.append(position.element);
+											position.rendered = true;
+										}
+									} else {
+										positions.splice(i--, 1);
+										if (position.rendered) {
+											position.element.detach();
+										}
+									}
+								}
+
+								if (positions.length == 0) {
+									delete styleCache[key];
+								}
+							}
+						}
+					}
+				}
+
+				layer.show();
+			}
+		}
+	};
+
+	// Creates (if necessary) and returns the text overlay container.
+	//
+	// @param {string} classes String of space-separated CSS classes used to
+	//     uniquely identify the text layer.
+	// @return {object} The jQuery-wrapped text-layer div.
+
+	Canvas.prototype.getTextLayer = function(classes) {
+
+		var layer = this.text[classes];
+
+		// Create the text layer if it doesn't exist
+
+		if (layer == null) {
+
+			// Create the text layer container, if it doesn't exist
+
+			if (this.textContainer == null) {
+				this.textContainer = $("<div class='flot-text'></div>")
+					.css({
+						position: "absolute",
+						top: 0,
+						left: 0,
+						bottom: 0,
+						right: 0,
+						'font-size': "smaller",
+						color: "#545454"
+					})
+					.insertAfter(this.element);
+			}
+
+			layer = this.text[classes] = $("<div></div>")
+				.addClass(classes)
+				.css({
+					position: "absolute",
+					top: 0,
+					left: 0,
+					bottom: 0,
+					right: 0
+				})
+				.appendTo(this.textContainer);
+		}
+
+		return layer;
+	};
+
+	// Creates (if necessary) and returns a text info object.
+	//
+	// The object looks like this:
+	//
+	// {
+	//     width: Width of the text's wrapper div.
+	//     height: Height of the text's wrapper div.
+	//     element: The jQuery-wrapped HTML div containing the text.
+	//     positions: Array of positions at which this text is drawn.
+	// }
+	//
+	// The positions array contains objects that look like this:
+	//
+	// {
+	//     active: Flag indicating whether the text should be visible.
+	//     rendered: Flag indicating whether the text is currently visible.
+	//     element: The jQuery-wrapped HTML div containing the text.
+	//     x: X coordinate at which to draw the text.
+	//     y: Y coordinate at which to draw the text.
+	// }
+	//
+	// Each position after the first receives a clone of the original element.
+	//
+	// The idea is that that the width, height, and general 'identity' of the
+	// text is constant no matter where it is placed; the placements are a
+	// secondary property.
+	//
+	// Canvas maintains a cache of recently-used text info objects; getTextInfo
+	// either returns the cached element or creates a new entry.
+	//
+	// @param {string} layer A string of space-separated CSS classes uniquely
+	//     identifying the layer containing this text.
+	// @param {string} text Text string to retrieve info for.
+	// @param {(string|object)=} font Either a string of space-separated CSS
+	//     classes or a font-spec object, defining the text's font and style.
+	// @param {number=} angle Angle at which to rotate the text, in degrees.
+	//     Angle is currently unused, it will be implemented in the future.
+	// @param {number=} width Maximum width of the text before it wraps.
+	// @return {object} a text info object.
+
+	Canvas.prototype.getTextInfo = function(layer, text, font, angle, width) {
+
+		var textStyle, layerCache, styleCache, info;
+
+		// Cast the value to a string, in case we were given a number or such
+
+		text = "" + text;
+
+		// If the font is a font-spec object, generate a CSS font definition
+
+		if (typeof font === "object") {
+			textStyle = font.style + " " + font.variant + " " + font.weight + " " + font.size + "px/" + font.lineHeight + "px " + font.family;
+		} else {
+			textStyle = font;
+		}
+
+		// Retrieve (or create) the cache for the text's layer and styles
+
+		layerCache = this._textCache[layer];
+
+		if (layerCache == null) {
+			layerCache = this._textCache[layer] = {};
+		}
+
+		styleCache = layerCache[textStyle];
+
+		if (styleCache == null) {
+			styleCache = layerCache[textStyle] = {};
+		}
+
+		info = styleCache[text];
+
+		// If we can't find a matching element in our cache, create a new one
+
+		if (info == null) {
+
+			var element = $("<div></div>").html(text)
+				.css({
+					position: "absolute",
+					'max-width': width,
+					top: -9999
+				})
+				.appendTo(this.getTextLayer(layer));
+
+			if (typeof font === "object") {
+				element.css({
+					font: textStyle,
+					color: font.color
+				});
+			} else if (typeof font === "string") {
+				element.addClass(font);
+			}
+
+			info = styleCache[text] = {
+				width: element.outerWidth(true),
+				height: element.outerHeight(true),
+				element: element,
+				positions: []
+			};
+
+			element.detach();
+		}
+
+		return info;
+	};
+
+	// Adds a text string to the canvas text overlay.
+	//
+	// The text isn't drawn immediately; it is marked as rendering, which will
+	// result in its addition to the canvas on the next render pass.
+	//
+	// @param {string} layer A string of space-separated CSS classes uniquely
+	//     identifying the layer containing this text.
+	// @param {number} x X coordinate at which to draw the text.
+	// @param {number} y Y coordinate at which to draw the text.
+	// @param {string} text Text string to draw.
+	// @param {(string|object)=} font Either a string of space-separated CSS
+	//     classes or a font-spec object, defining the text's font and style.
+	// @param {number=} angle Angle at which to rotate the text, in degrees.
+	//     Angle is currently unused, it will be implemented in the future.
+	// @param {number=} width Maximum width of the text before it wraps.
+	// @param {string=} halign Horizontal alignment of the text; either "left",
+	//     "center" or "right".
+	// @param {string=} valign Vertical alignment of the text; either "top",
+	//     "middle" or "bottom".
+
+	Canvas.prototype.addText = function(layer, x, y, text, font, angle, width, halign, valign) {
+
+		var info = this.getTextInfo(layer, text, font, angle, width),
+			positions = info.positions;
+
+		// Tweak the div's position to match the text's alignment
+
+		if (halign == "center") {
+			x -= info.width / 2;
+		} else if (halign == "right") {
+			x -= info.width;
+		}
+
+		if (valign == "middle") {
+			y -= info.height / 2;
+		} else if (valign == "bottom") {
+			y -= info.height;
+		}
+
+		// Determine whether this text already exists at this position.
+		// If so, mark it for inclusion in the next render pass.
+
+		for (var i = 0, position; position = positions[i]; i++) {
+			if (position.x == x && position.y == y) {
+				position.active = true;
+				return;
+			}
+		}
+
+		// If the text doesn't exist at this position, create a new entry
+
+		// For the very first position we'll re-use the original element,
+		// while for subsequent ones we'll clone it.
+
+		position = {
+			active: true,
+			rendered: false,
+			element: positions.length ? info.element.clone() : info.element,
+			x: x,
+			y: y
+		};
+
+		positions.push(position);
+
+		// Move the element to its final position within the container
+
+		position.element.css({
+			top: Math.round(y),
+			left: Math.round(x),
+			'text-align': halign	// In case the text wraps
+		});
+	};
+
+	// Removes one or more text strings from the canvas text overlay.
+	//
+	// If no parameters are given, all text within the layer is removed.
+	//
+	// Note that the text is not immediately removed; it is simply marked as
+	// inactive, which will result in its removal on the next render pass.
+	// This avoids the performance penalty for 'clear and redraw' behavior,
+	// where we potentially get rid of all text on a layer, but will likely
+	// add back most or all of it later, as when redrawing axes, for example.
+	//
+	// @param {string} layer A string of space-separated CSS classes uniquely
+	//     identifying the layer containing this text.
+	// @param {number=} x X coordinate of the text.
+	// @param {number=} y Y coordinate of the text.
+	// @param {string=} text Text string to remove.
+	// @param {(string|object)=} font Either a string of space-separated CSS
+	//     classes or a font-spec object, defining the text's font and style.
+	// @param {number=} angle Angle at which the text is rotated, in degrees.
+	//     Angle is currently unused, it will be implemented in the future.
+
+	Canvas.prototype.removeText = function(layer, x, y, text, font, angle) {
+		if (text == null) {
+			var layerCache = this._textCache[layer];
+			if (layerCache != null) {
+				for (var styleKey in layerCache) {
+					if (hasOwnProperty.call(layerCache, styleKey)) {
+						var styleCache = layerCache[styleKey];
+						for (var key in styleCache) {
+							if (hasOwnProperty.call(styleCache, key)) {
+								var positions = styleCache[key].positions;
+								for (var i = 0, position; position = positions[i]; i++) {
+									position.active = false;
+								}
+							}
+						}
+					}
+				}
+			}
+		} else {
+			var positions = this.getTextInfo(layer, text, font, angle).positions;
+			for (var i = 0, position; position = positions[i]; i++) {
+				if (position.x == x && position.y == y) {
+					position.active = false;
+				}
+			}
+		}
+	};
+
+	///////////////////////////////////////////////////////////////////////////
+	// The top-level container for the entire plot.
+
+    function Plot(placeholder, data_, options_, plugins) {
+        // data is on the form:
+        //   [ series1, series2 ... ]
+        // where series is either just the data as [ [x1, y1], [x2, y2], ... ]
+        // or { data: [ [x1, y1], [x2, y2], ... ], label: "some label", ... }
+
+        var series = [],
+            options = {
+                // the color theme used for graphs
+                colors: ["#edc240", "#afd8f8", "#cb4b4b", "#4da74d", "#9440ed"],
+                legend: {
+                    show: true,
+                    noColumns: 1, // number of colums in legend table
+                    labelFormatter: null, // fn: string -> string
+                    labelBoxBorderColor: "#ccc", // border color for the little label boxes
+                    container: null, // container (as jQuery object) to put legend in, null means default on top of graph
+                    position: "ne", // position of default legend container within plot
+                    margin: 5, // distance from grid edge to default legend container within plot
+                    backgroundColor: null, // null means auto-detect
+                    backgroundOpacity: 0.85, // set to 0 to avoid background
+                    sorted: null    // default to no legend sorting
+                },
+                xaxis: {
+                    show: null, // null = auto-detect, true = always, false = never
+                    position: "bottom", // or "top"
+                    mode: null, // null or "time"
+                    font: null, // null (derived from CSS in placeholder) or object like { size: 11, lineHeight: 13, style: "italic", weight: "bold", family: "sans-serif", variant: "small-caps" }
+                    color: null, // base color, labels, ticks
+                    tickColor: null, // possibly different color of ticks, e.g. "rgba(0,0,0,0.15)"
+                    transform: null, // null or f: number -> number to transform axis
+                    inverseTransform: null, // if transform is set, this should be the inverse function
+                    min: null, // min. value to show, null means set automatically
+                    max: null, // max. value to show, null means set automatically
+                    autoscaleMargin: null, // margin in % to add if auto-setting min/max
+                    ticks: null, // either [1, 3] or [[1, "a"], 3] or (fn: axis info -> ticks) or app. number of ticks for auto-ticks
+                    tickFormatter: null, // fn: number -> string
+                    labelWidth: null, // size of tick labels in pixels
+                    labelHeight: null,
+                    reserveSpace: null, // whether to reserve space even if axis isn't shown
+                    tickLength: null, // size in pixels of ticks, or "full" for whole line
+                    alignTicksWithAxis: null, // axis number or null for no sync
+                    tickDecimals: null, // no. of decimals, null means auto
+                    tickSize: null, // number or [number, "unit"]
+                    minTickSize: null // number or [number, "unit"]
+                },
+                yaxis: {
+                    autoscaleMargin: 0.02,
+                    position: "left" // or "right"
+                },
+                xaxes: [],
+                yaxes: [],
+                series: {
+                    points: {
+                        show: false,
+                        radius: 3,
+                        lineWidth: 2, // in pixels
+                        fill: true,
+                        fillColor: "#ffffff",
+                        symbol: "circle" // or callback
+                    },
+                    lines: {
+                        // we don't put in show: false so we can see
+                        // whether lines were actively disabled
+                        lineWidth: 2, // in pixels
+                        fill: false,
+                        fillColor: null,
+                        steps: false
+                        // Omit 'zero', so we can later default its value to
+                        // match that of the 'fill' option.
+                    },
+                    bars: {
+                        show: false,
+                        lineWidth: 2, // in pixels
+                        barWidth: 1, // in units of the x axis
+                        fill: true,
+                        fillColor: null,
+                        align: "left", // "left", "right", or "center"
+                        horizontal: false,
+                        zero: true
+                    },
+                    shadowSize: 3,
+                    highlightColor: null
+                },
+                grid: {
+                    show: true,
+                    aboveData: false,
+                    color: "#545454", // primary color used for outline and labels
+                    backgroundColor: null, // null for transparent, else color
+                    borderColor: null, // set if different from the grid color
+                    tickColor: null, // color for the ticks, e.g. "rgba(0,0,0,0.15)"
+                    margin: 0, // distance from the canvas edge to the grid
+                    labelMargin: 5, // in pixels
+                    axisMargin: 8, // in pixels
+                    borderWidth: 2, // in pixels
+                    minBorderMargin: null, // in pixels, null means taken from points radius
+                    markings: null, // array of ranges or fn: axes -> array of ranges
+                    markingsColor: "#f4f4f4",
+                    markingsLineWidth: 2,
+                    // interactive stuff
+                    clickable: false,
+                    hoverable: false,
+                    autoHighlight: true, // highlight in case mouse is near
+                    mouseActiveRadius: 10 // how far the mouse can be away to activate an item
+                },
+                interaction: {
+                    redrawOverlayInterval: 1000/60 // time between updates, -1 means in same flow
+                },
+                hooks: {}
+            },
+        surface = null,     // the canvas for the plot itself
+        overlay = null,     // canvas for interactive stuff on top of plot
+        eventHolder = null, // jQuery object that events should be bound to
+        ctx = null, octx = null,
+        xaxes = [], yaxes = [],
+        plotOffset = { left: 0, right: 0, top: 0, bottom: 0},
+        plotWidth = 0, plotHeight = 0,
+        hooks = {
+            processOptions: [],
+            processRawData: [],
+            processDatapoints: [],
+            processOffset: [],
+            drawBackground: [],
+            drawSeries: [],
+            draw: [],
+            bindEvents: [],
+            drawOverlay: [],
+            shutdown: []
+        },
+        plot = this;
+
+        // public functions
+        plot.setData = setData;
+        plot.setupGrid = setupGrid;
+        plot.draw = draw;
+        plot.getPlaceholder = function() { return placeholder; };
+        plot.getCanvas = function() { return surface.element; };
+        plot.getPlotOffset = function() { return plotOffset; };
+        plot.width = function () { return plotWidth; };
+        plot.height = function () { return plotHeight; };
+        plot.offset = function () {
+            var o = eventHolder.offset();
+            o.left += plotOffset.left;
+            o.top += plotOffset.top;
+            return o;
+        };
+        plot.getData = function () { return series; };
+        plot.getAxes = function () {
+            var res = {}, i;
+            $.each(xaxes.concat(yaxes), function (_, axis) {
+                if (axis)
+                    res[axis.direction + (axis.n != 1 ? axis.n : "") + "axis"] = axis;
+            });
+            return res;
+        };
+        plot.getXAxes = function () { return xaxes; };
+        plot.getYAxes = function () { return yaxes; };
+        plot.c2p = canvasToAxisCoords;
+        plot.p2c = axisToCanvasCoords;
+        plot.getOptions = function () { return options; };
+        plot.highlight = highlight;
+        plot.unhighlight = unhighlight;
+        plot.triggerRedrawOverlay = triggerRedrawOverlay;
+        plot.pointOffset = function(point) {
+            return {
+                left: parseInt(xaxes[axisNumber(point, "x") - 1].p2c(+point.x) + plotOffset.left, 10),
+                top: parseInt(yaxes[axisNumber(point, "y") - 1].p2c(+point.y) + plotOffset.top, 10)
+            };
+        };
+        plot.shutdown = shutdown;
+        plot.destroy = function () {
+            shutdown();
+            placeholder.removeData("plot").empty();
+
+            series = [];
+            options = null;
+            surface = null;
+            overlay = null;
+            eventHolder = null;
+            ctx = null;
+            octx = null;
+            xaxes = [];
+            yaxes = [];
+            hooks = null;
+            highlights = [];
+            plot = null;
+        };
+        plot.resize = function () {
+        	var width = placeholder.width(),
+        		height = placeholder.height();
+            surface.resize(width, height);
+            overlay.resize(width, height);
+        };
+
+        // public attributes
+        plot.hooks = hooks;
+
+        // initialize
+        initPlugins(plot);
+        parseOptions(options_);
+        setupCanvases();
+        setData(data_);
+        setupGrid();
+        draw();
+        bindEvents();
+
+
+        function executeHooks(hook, args) {
+            args = [plot].concat(args);
+            for (var i = 0; i < hook.length; ++i)
+                hook[i].apply(this, args);
+        }
+
+        function initPlugins() {
+
+            // References to key classes, allowing plugins to modify them
+
+            var classes = {
+                Canvas: Canvas
+            };
+
+            for (var i = 0; i < plugins.length; ++i) {
+                var p = plugins[i];
+                p.init(plot, classes);
+                if (p.options)
+                    $.extend(true, options, p.options);
+            }
+        }
+
+        function parseOptions(opts) {
+
+            $.extend(true, options, opts);
+
+            // $.extend merges arrays, rather than replacing them.  When less
+            // colors are provided than the size of the default palette, we
+            // end up with those colors plus the remaining defaults, which is
+            // not expected behavior; avoid it by replacing them here.
+
+            if (opts && opts.colors) {
+            	options.colors = opts.colors;
+            }
+
+            if (options.xaxis.color == null)
+                options.xaxis.color = $.color.parse(options.grid.color).scale('a', 0.22).toString();
+            if (options.yaxis.color == null)
+                options.yaxis.color = $.color.parse(options.grid.color).scale('a', 0.22).toString();
+
+            if (options.xaxis.tickColor == null) // grid.tickColor for back-compatibility
+                options.xaxis.tickColor = options.grid.tickColor || options.xaxis.color;
+            if (options.yaxis.tickColor == null) // grid.tickColor for back-compatibility
+                options.yaxis.tickColor = options.grid.tickColor || options.yaxis.color;
+
+            if (options.grid.borderColor == null)
+                options.grid.borderColor = options.grid.color;
+            if (options.grid.tickColor == null)
+                options.grid.tickColor = $.color.parse(options.grid.color).scale('a', 0.22).toString();
+
+            // Fill in defaults for axis options, including any unspecified
+            // font-spec fields, if a font-spec was provided.
+
+            // If no x/y axis options were provided, create one of each anyway,
+            // since the rest of the code assumes that they exist.
+
+            var i, axisOptions, axisCount,
+                fontSize = placeholder.css("font-size"),
+                fontSizeDefault = fontSize ? +fontSize.replace("px", "") : 13,
+                fontDefaults = {
+                    style: placeholder.css("font-style"),
+                    size: Math.round(0.8 * fontSizeDefault),
+                    variant: placeholder.css("font-variant"),
+                    weight: placeholder.css("font-weight"),
+                    family: placeholder.css("font-family")
+                };
+
+            axisCount = options.xaxes.length || 1;
+            for (i = 0; i < axisCount; ++i) {
+
+                axisOptions = options.xaxes[i];
+                if (axisOptions && !axisOptions.tickColor) {
+                    axisOptions.tickColor = axisOptions.color;
+                }
+
+                axisOptions = $.extend(true, {}, options.xaxis, axisOptions);
+                options.xaxes[i] = axisOptions;
+
+                if (axisOptions.font) {
+                    axisOptions.font = $.extend({}, fontDefaults, axisOptions.font);
+                    if (!axisOptions.font.color) {
+                        axisOptions.font.color = axisOptions.color;
+                    }
+                    if (!axisOptions.font.lineHeight) {
+                        axisOptions.font.lineHeight = Math.round(axisOptions.font.size * 1.15);
+                    }
+                }
+            }
+
+            axisCount = options.yaxes.length || 1;
+            for (i = 0; i < axisCount; ++i) {
+
+                axisOptions = options.yaxes[i];
+                if (axisOptions && !axisOptions.tickColor) {
+                    axisOptions.tickColor = axisOptions.color;
+                }
+
+                axisOptions = $.extend(true, {}, options.yaxis, axisOptions);
+                options.yaxes[i] = axisOptions;
+
+                if (axisOptions.font) {
+                    axisOptions.font = $.extend({}, fontDefaults, axisOptions.font);
+                    if (!axisOptions.font.color) {
+                        axisOptions.font.color = axisOptions.color;
+                    }
+                    if (!axisOptions.font.lineHeight) {
+                        axisOptions.font.lineHeight = Math.round(axisOptions.font.size * 1.15);
+                    }
+                }
+            }
+
+            // backwards compatibility, to be removed in future
+            if (options.xaxis.noTicks && options.xaxis.ticks == null)
+                options.xaxis.ticks = options.xaxis.noTicks;
+            if (options.yaxis.noTicks && options.yaxis.ticks == null)
+                options.yaxis.ticks = options.yaxis.noTicks;
+            if (options.x2axis) {
+                options.xaxes[1] = $.extend(true, {}, options.xaxis, options.x2axis);
+                options.xaxes[1].position = "top";
+                // Override the inherit to allow the axis to auto-scale
+                if (options.x2axis.min == null) {
+                    options.xaxes[1].min = null;
+                }
+                if (options.x2axis.max == null) {
+                    options.xaxes[1].max = null;
+                }
+            }
+            if (options.y2axis) {
+                options.yaxes[1] = $.extend(true, {}, options.yaxis, options.y2axis);
+                options.yaxes[1].position = "right";
+                // Override the inherit to allow the axis to auto-scale
+                if (options.y2axis.min == null) {
+                    options.yaxes[1].min = null;
+                }
+                if (options.y2axis.max == null) {
+                    options.yaxes[1].max = null;
+                }
+            }
+            if (options.grid.coloredAreas)
+                options.grid.markings = options.grid.coloredAreas;
+            if (options.grid.coloredAreasColor)
+                options.grid.markingsColor = options.grid.coloredAreasColor;
+            if (options.lines)
+                $.extend(true, options.series.lines, options.lines);
+            if (options.points)
+                $.extend(true, options.series.points, options.points);
+            if (options.bars)
+                $.extend(true, options.series.bars, options.bars);
+            if (options.shadowSize != null)
+                options.series.shadowSize = options.shadowSize;
+            if (options.highlightColor != null)
+                options.series.highlightColor = options.highlightColor;
+
+            // save options on axes for future reference
+            for (i = 0; i < options.xaxes.length; ++i)
+                getOrCreateAxis(xaxes, i + 1).options = options.xaxes[i];
+            for (i = 0; i < options.yaxes.length; ++i)
+                getOrCreateAxis(yaxes, i + 1).options = options.yaxes[i];
+
+            // add hooks from options
+            for (var n in hooks)
+                if (options.hooks[n] && options.hooks[n].length)
+                    hooks[n] = hooks[n].concat(options.hooks[n]);
+
+            executeHooks(hooks.processOptions, [options]);
+        }
+
+        function setData(d) {
+            series = parseData(d);
+            fillInSeriesOptions();
+            processData();
+        }
+
+        function parseData(d) {
+            var res = [];
+            for (var i = 0; i < d.length; ++i) {
+                var s = $.extend(true, {}, options.series);
+
+                if (d[i].data != null) {
+                    s.data = d[i].data; // move the data instead of deep-copy
+                    delete d[i].data;
+
+                    $.extend(true, s, d[i]);
+
+                    d[i].data = s.data;
+                }
+                else
+                    s.data = d[i];
+                res.push(s);
+            }
+
+            return res;
+        }
+
+        function axisNumber(obj, coord) {
+            var a = obj[coord + "axis"];
+            if (typeof a == "object") // if we got a real axis, extract number
+                a = a.n;
+            if (typeof a != "number")
+                a = 1; // default to first axis
+            return a;
+        }
+
+        function allAxes() {
+            // return flat array without annoying null entries
+            return $.grep(xaxes.concat(yaxes), function (a) { return a; });
+        }
+
+        function canvasToAxisCoords(pos) {
+            // return an object with x/y corresponding to all used axes
+            var res = {}, i, axis;
+            for (i = 0; i < xaxes.length; ++i) {
+                axis = xaxes[i];
+                if (axis && axis.used)
+                    res["x" + axis.n] = axis.c2p(pos.left);
+            }
+
+            for (i = 0; i < yaxes.length; ++i) {
+                axis = yaxes[i];
+                if (axis && axis.used)
+                    res["y" + axis.n] = axis.c2p(pos.top);
+            }
+
+            if (res.x1 !== undefined)
+                res.x = res.x1;
+            if (res.y1 !== undefined)
+                res.y = res.y1;
+
+            return res;
+        }
+
+        function axisToCanvasCoords(pos) {
+            // get canvas coords from the first pair of x/y found in pos
+            var res = {}, i, axis, key;
+
+            for (i = 0; i < xaxes.length; ++i) {
+                axis = xaxes[i];
+                if (axis && axis.used) {
+                    key = "x" + axis.n;
+                    if (pos[key] == null && axis.n == 1)
+                        key = "x";
+
+                    if (pos[key] != null) {
+                        res.left = axis.p2c(pos[key]);
+                        break;
+                    }
+                }
+            }
+
+            for (i = 0; i < yaxes.length; ++i) {
+                axis = yaxes[i];
+                if (axis && axis.used) {
+                    key = "y" + axis.n;
+                    if (pos[key] == null && axis.n == 1)
+                        key = "y";
+
+                    if (pos[key] != null) {
+                        res.top = axis.p2c(pos[key]);
+                        break;
+                    }
+                }
+            }
+
+            return res;
+        }
+
+        function getOrCreateAxis(axes, number) {
+            if (!axes[number - 1])
+                axes[number - 1] = {
+                    n: number, // save the number for future reference
+                    direction: axes == xaxes ? "x" : "y",
+                    options: $.extend(true, {}, axes == xaxes ? options.xaxis : options.yaxis)
+                };
+
+            return axes[number - 1];
+        }
+
+        function fillInSeriesOptions() {
+
+            var neededColors = series.length, maxIndex = -1, i;
+
+            // Subtract the number of series that already have fixed colors or
+            // color indexes from the number that we still need to generate.
+
+            for (i = 0; i < series.length; ++i) {
+                var sc = series[i].color;
+                if (sc != null) {
+                    neededColors--;
+                    if (typeof sc == "number" && sc > maxIndex) {
+                        maxIndex = sc;
+                    }
+                }
+            }
+
+            // If any of the series have fixed color indexes, then we need to
+            // generate at least as many colors as the highest index.
+
+            if (neededColors <= maxIndex) {
+                neededColors = maxIndex + 1;
+            }
+
+            // Generate all the colors, using first the option colors and then
+            // variations on those colors once they're exhausted.
+
+            var c, colors = [], colorPool = options.colors,
+                colorPoolSize = colorPool.length, variation = 0;
+
+            for (i = 0; i < neededColors; i++) {
+
+                c = $.color.parse(colorPool[i % colorPoolSize] || "#666");
+
+                // Each time we exhaust the colors in the pool we adjust
+                // a scaling factor used to produce more variations on
+                // those colors. The factor alternates negative/positive
+                // to produce lighter/darker colors.
+
+                // Reset the variation after every few cycles, or else
+                // it will end up producing only white or black colors.
+
+                if (i % colorPoolSize == 0 && i) {
+                    if (variation >= 0) {
+                        if (variation < 0.5) {
+                            variation = -variation - 0.2;
+                        } else variation = 0;
+                    } else variation = -variation;
+                }
+
+                colors[i] = c.scale('rgb', 1 + variation);
+            }
+
+            // Finalize the series options, filling in their colors
+
+            var colori = 0, s;
+            for (i = 0; i < series.length; ++i) {
+                s = series[i];
+
+                // assign colors
+                if (s.color == null) {
+                    s.color = colors[colori].toString();
+                    ++colori;
+                }
+                else if (typeof s.color == "number")
+                    s.color = colors[s.color].toString();
+
+                // turn on lines automatically in case nothing is set
+                if (s.lines.show == null) {
+                    var v, show = true;
+                    for (v in s)
+                        if (s[v] && s[v].show) {
+                            show = false;
+                            break;
+                        }
+                    if (show)
+                        s.lines.show = true;
+                }
+
+                // If nothing was provided for lines.zero, default it to match
+                // lines.fill, since areas by default should extend to zero.
+
+                if (s.lines.zero == null) {
+                    s.lines.zero = !!s.lines.fill;
+                }
+
+                // setup axes
+                s.xaxis = getOrCreateAxis(xaxes, axisNumber(s, "x"));
+                s.yaxis = getOrCreateAxis(yaxes, axisNumber(s, "y"));
+            }
+        }
+
+        function processData() {
+            var topSentry = Number.POSITIVE_INFINITY,
+                bottomSentry = Number.NEGATIVE_INFINITY,
+                fakeInfinity = Number.MAX_VALUE,
+                i, j, k, m, length,
+                s, points, ps, x, y, axis, val, f, p,
+                data, format;
+
+            function updateAxis(axis, min, max) {
+                if (min < axis.datamin && min != -fakeInfinity)
+                    axis.datamin = min;
+                if (max > axis.datamax && max != fakeInfinity)
+                    axis.datamax = max;
+            }
+
+            $.each(allAxes(), function (_, axis) {
+                // init axis
+                axis.datamin = topSentry;
+                axis.datamax = bottomSentry;
+                axis.used = false;
+            });
+
+            for (i = 0; i < series.length; ++i) {
+                s = series[i];
+                s.datapoints = { points: [] };
+
+                executeHooks(hooks.processRawData, [ s, s.data, s.datapoints ]);
+            }
+
+            // first pass: clean and copy data
+            for (i = 0; i < series.length; ++i) {
+                s = series[i];
+
+                data = s.data;
+                format = s.datapoints.format;
+
+                if (!format) {
+                    format = [];
+                    // find out how to copy
+                    format.push({ x: true, number: true, required: true });
+                    format.push({ y: true, number: true, required: true });
+
+                    if (s.bars.show || (s.lines.show && s.lines.fill)) {
+                        var autoscale = !!((s.bars.show && s.bars.zero) || (s.lines.show && s.lines.zero));
+                        format.push({ y: true, number: true, required: false, defaultValue: 0, autoscale: autoscale });
+                        if (s.bars.horizontal) {
+                            delete format[format.length - 1].y;
+                            format[format.length - 1].x = true;
+                        }
+                    }
+
+                    s.datapoints.format = format;
+                }
+
+                if (s.datapoints.pointsize != null)
+                    continue; // already filled in
+
+                s.datapoints.pointsize = format.length;
+
+                ps = s.datapoints.pointsize;
+                points = s.datapoints.points;
+
+                var insertSteps = s.lines.show && s.lines.steps;
+                s.xaxis.used = s.yaxis.used = true;
+
+                for (j = k = 0; j < data.length; ++j, k += ps) {
+                    p = data[j];
+
+                    var nullify = p == null;
+                    if (!nullify) {
+                        for (m = 0; m < ps; ++m) {
+                            val = p[m];
+                            f = format[m];
+
+                            if (f) {
+                                if (f.number && val != null) {
+                                    val = +val; // convert to number
+                                    if (isNaN(val))
+                                        val = null;
+                                    else if (val == Infinity)
+                                        val = fakeInfinity;
+                                    else if (val == -Infinity)
+                                        val = -fakeInfinity;
+                                }
+
+                                if (val == null) {
+                                    if (f.required)
+                                        nullify = true;
+
+                                    if (f.defaultValue != null)
+                                        val = f.defaultValue;
+                                }
+                            }
+
+                            points[k + m] = val;
+                        }
+                    }
+
+                    if (nullify) {
+                        for (m = 0; m < ps; ++m) {
+                            val = points[k + m];
+                            if (val != null) {
+                                f = format[m];
+                                // extract min/max info
+                                if (f.autoscale !== false) {
+                                    if (f.x) {
+                                        updateAxis(s.xaxis, val, val);
+                                    }
+                                    if (f.y) {
+                                        updateAxis(s.yaxis, val, val);
+                                    }
+                                }
+                            }
+                            points[k + m] = null;
+                        }
+                    }
+                    else {
+                        // a little bit of line specific stuff that
+                        // perhaps shouldn't be here, but lacking
+                        // better means...
+                        if (insertSteps && k > 0
+                            && points[k - ps] != null
+                            && points[k - ps] != points[k]
+                            && points[k - ps + 1] != points[k + 1]) {
+                            // copy the point to make room for a middle point
+                            for (m = 0; m < ps; ++m)
+                                points[k + ps + m] = points[k + m];
+
+                            // middle point has same y
+                            points[k + 1] = points[k - ps + 1];
+
+                            // we've added a point, better reflect that
+                            k += ps;
+                        }
+                    }
+                }
+            }
+
+            // give the hooks a chance to run
+            for (i = 0; i < series.length; ++i) {
+                s = series[i];
+
+                executeHooks(hooks.processDatapoints, [ s, s.datapoints]);
+            }
+
+            // second pass: find datamax/datamin for auto-scaling
+            for (i = 0; i < series.length; ++i) {
+                s = series[i];
+                points = s.datapoints.points;
+                ps = s.datapoints.pointsize;
+                format = s.datapoints.format;
+
+                var xmin = topSentry, ymin = topSentry,
+                    xmax = bottomSentry, ymax = bottomSentry;
+
+                for (j = 0; j < points.length; j += ps) {
+                    if (points[j] == null)
+                        continue;
+
+                    for (m = 0; m < ps; ++m) {
+                        val = points[j + m];
+                        f = format[m];
+                        if (!f || f.autoscale === false || val == fakeInfinity || val == -fakeInfinity)
+                            continue;
+
+                        if (f.x) {
+                            if (val < xmin)
+                                xmin = val;
+                            if (val > xmax)
+                                xmax = val;
+                        }
+                        if (f.y) {
+                            if (val < ymin)
+                                ymin = val;
+                            if (val > ymax)
+                                ymax = val;
+                        }
+                    }
+                }
+
+                if (s.bars.show) {
+                    // make sure we got room for the bar on the dancing floor
+                    var delta;
+
+                    switch (s.bars.align) {
+                        case "left":
+                            delta = 0;
+                            break;
+                        case "right":
+                            delta = -s.bars.barWidth;
+                            break;
+                        default:
+                            delta = -s.bars.barWidth / 2;
+                    }
+
+                    if (s.bars.horizontal) {
+                        ymin += delta;
+                        ymax += delta + s.bars.barWidth;
+                    }
+                    else {
+                        xmin += delta;
+                        xmax += delta + s.bars.barWidth;
+                    }
+                }
+
+                updateAxis(s.xaxis, xmin, xmax);
+                updateAxis(s.yaxis, ymin, ymax);
+            }
+
+            $.each(allAxes(), function (_, axis) {
+                if (axis.datamin == topSentry)
+                    axis.datamin = null;
+                if (axis.datamax == bottomSentry)
+                    axis.datamax = null;
+            });
+        }
+
+        function setupCanvases() {
+
+            // Make sure the placeholder is clear of everything except canvases
+            // from a previous plot in this container that we'll try to re-use.
+
+            placeholder.css("padding", 0) // padding messes up the positioning
+                .children().filter(function(){
+                    return !$(this).hasClass("flot-overlay") && !$(this).hasClass('flot-base');
+                }).remove();
+
+            if (placeholder.css("position") == 'static')
+                placeholder.css("position", "relative"); // for positioning labels and overlay
+
+            surface = new Canvas("flot-base", placeholder);
+            overlay = new Canvas("flot-overlay", placeholder); // overlay canvas for interactive features
+
+            ctx = surface.context;
+            octx = overlay.context;
+
+            // define which element we're listening for events on
+            eventHolder = $(overlay.element).unbind();
+
+            // If we're re-using a plot object, shut down the old one
+
+            var existing = placeholder.data("plot");
+
+            if (existing) {
+                existing.shutdown();
+                overlay.clear();
+            }
+
+            // save in case we get replotted
+            placeholder.data("plot", plot);
+        }
+
+        function bindEvents() {
+            // bind events
+            if (options.grid.hoverable) {
+                eventHolder.mousemove(onMouseMove);
+
+                // Use bind, rather than .mouseleave, because we officially
+                // still support jQuery 1.2.6, which doesn't define a shortcut
+                // for mouseenter or mouseleave.  This was a bug/oversight that
+                // was fixed somewhere around 1.3.x.  We can return to using
+                // .mouseleave when we drop support for 1.2.6.
+
+                eventHolder.bind("mouseleave", onMouseLeave);
+            }
+
+            if (options.grid.clickable)
+                eventHolder.click(onClick);
+
+            executeHooks(hooks.bindEvents, [eventHolder]);
+        }
+
+        function shutdown() {
+            if (redrawTimeout)
+                clearTimeout(redrawTimeout);
+
+            eventHolder.unbind("mousemove", onMouseMove);
+            eventHolder.unbind("mouseleave", onMouseLeave);
+            eventHolder.unbind("click", onClick);
+
+            executeHooks(hooks.shutdown, [eventHolder]);
+        }
+
+        function setTransformationHelpers(axis) {
+            // set helper functions on the axis, assumes plot area
+            // has been computed already
+
+            function identity(x) { return x; }
+
+            var s, m, t = axis.options.transform || identity,
+                it = axis.options.inverseTransform;
+
+            // precompute how much the axis is scaling a point
+            // in canvas space
+            if (axis.direction == "x") {
+                s = axis.scale = plotWidth / Math.abs(t(axis.max) - t(axis.min));
+                m = Math.min(t(axis.max), t(axis.min));
+            }
+            else {
+                s = axis.scale = plotHeight / Math.abs(t(axis.max) - t(axis.min));
+                s = -s;
+                m = Math.max(t(axis.max), t(axis.min));
+            }
+
+            // data point to canvas coordinate
+            if (t == identity) // slight optimization
+                axis.p2c = function (p) { return (p - m) * s; };
+            else
+                axis.p2c = function (p) { return (t(p) - m) * s; };
+            // canvas coordinate to data point
+            if (!it)
+                axis.c2p = function (c) { return m + c / s; };
+            else
+                axis.c2p = function (c) { return it(m + c / s); };
+        }
+
+        function measureTickLabels(axis) {
+
+            var opts = axis.options,
+                ticks = axis.ticks || [],
+                labelWidth = opts.labelWidth || 0,
+                labelHeight = opts.labelHeight || 0,
+                maxWidth = labelWidth || (axis.direction == "x" ? Math.floor(surface.width / (ticks.length || 1)) : null),
+                legacyStyles = axis.direction + "Axis " + axis.direction + axis.n + "Axis",
+                layer = "flot-" + axis.direction + "-axis flot-" + axis.direction + axis.n + "-axis " + legacyStyles,
+                font = opts.font || "flot-tick-label tickLabel";
+
+            for (var i = 0; i < ticks.length; ++i) {
+
+                var t = ticks[i];
+
+                if (!t.label)
+                    continue;
+
+                var info = surface.getTextInfo(layer, t.label, font, null, maxWidth);
+
+                labelWidth = Math.max(labelWidth, info.width);
+                labelHeight = Math.max(labelHeight, info.height);
+            }
+
+            axis.labelWidth = opts.labelWidth || labelWidth;
+            axis.labelHeight = opts.labelHeight || labelHeight;
+        }
+
+        function allocateAxisBoxFirstPhase(axis) {
+            // find the bounding box of the axis by looking at label
+            // widths/heights and ticks, make room by diminishing the
+            // plotOffset; this first phase only looks at one
+            // dimension per axis, the other dimension depends on the
+            // other axes so will have to wait
+
+            var lw = axis.labelWidth,
+                lh = axis.labelHeight,
+                pos = axis.options.position,
+                isXAxis = axis.direction === "x",
+                tickLength = axis.options.tickLength,
+                axisMargin = options.grid.axisMargin,
+                padding = options.grid.labelMargin,
+                innermost = true,
+                outermost = true,
+                first = true,
+                found = false;
+
+            // Determine the axis's position in its direction and on its side
+
+            $.each(isXAxis ? xaxes : yaxes, function(i, a) {
+                if (a && (a.show || a.reserveSpace)) {
+                    if (a === axis) {
+                        found = true;
+                    } else if (a.options.position === pos) {
+                        if (found) {
+                            outermost = false;
+                        } else {
+                            innermost = false;
+                        }
+                    }
+                    if (!found) {
+                        first = false;
+                    }
+                }
+            });
+
+            // The outermost axis on each side has no margin
+
+            if (outermost) {
+                axisMargin = 0;
+            }
+
+            // The ticks for the first axis in each direction stretch across
+
+            if (tickLength == null) {
+                tickLength = first ? "full" : 5;
+            }
+
+            if (!isNaN(+tickLength))
+                padding += +tickLength;
+
+            if (isXAxis) {
+                lh += padding;
+
+                if (pos == "bottom") {
+                    plotOffset.bottom += lh + axisMargin;
+                    axis.box = { top: surface.height - plotOffset.bottom, height: lh };
+                }
+                else {
+                    axis.box = { top: plotOffset.top + axisMargin, height: lh };
+                    plotOffset.top += lh + axisMargin;
+                }
+            }
+            else {
+                lw += padding;
+
+                if (pos == "left") {
+                    axis.box = { left: plotOffset.left + axisMargin, width: lw };
+                    plotOffset.left += lw + axisMargin;
+                }
+                else {
+                    plotOffset.right += lw + axisMargin;
+                    axis.box = { left: surface.width - plotOffset.right, width: lw };
+                }
+            }
+
+             // save for future reference
+            axis.position = pos;
+            axis.tickLength = tickLength;
+            axis.box.padding = padding;
+            axis.innermost = innermost;
+        }
+
+        function allocateAxisBoxSecondPhase(axis) {
+            // now that all axis boxes have been placed in one
+            // dimension, we can set the remaining dimension coordinates
+            if (axis.direction == "x") {
+                axis.box.left = plotOffset.left - axis.labelWidth / 2;
+                axis.box.width = surface.width - plotOffset.left - plotOffset.right + axis.labelWidth;
+            }
+            else {
+                axis.box.top = plotOffset.top - axis.labelHeight / 2;
+                axis.box.height = surface.height - plotOffset.bottom - plotOffset.top + axis.labelHeight;
+            }
+        }
+
+        function adjustLayoutForThingsStickingOut() {
+            // possibly adjust plot offset to ensure everything stays
+            // inside the canvas and isn't clipped off
+
+            var minMargin = options.grid.minBorderMargin,
+                axis, i;
+
+            // check stuff from the plot (FIXME: this should just read
+            // a value from the series, otherwise it's impossible to
+            // customize)
+            if (minMargin == null) {
+                minMargin = 0;
+                for (i = 0; i < series.length; ++i)
+                    minMargin = Math.max(minMargin, 2 * (series[i].points.radius + series[i].points.lineWidth/2));
+            }
+
+            var margins = {
+                left: minMargin,
+                right: minMargin,
+                top: minMargin,
+                bottom: minMargin
+            };
+
+            // check axis labels, note we don't check the actual
+            // labels but instead use the overall width/height to not
+            // jump as much around with replots
+            $.each(allAxes(), function (_, axis) {
+                if (axis.reserveSpace && axis.ticks && axis.ticks.length) {
+                    if (axis.direction === "x") {
+                        margins.left = Math.max(margins.left, axis.labelWidth / 2);
+                        margins.right = Math.max(margins.right, axis.labelWidth / 2);
+                    } else {
+                        margins.bottom = Math.max(margins.bottom, axis.labelHeight / 2);
+                        margins.top = Math.max(margins.top, axis.labelHeight / 2);
+                    }
+                }
+            });
+
+            plotOffset.left = Math.ceil(Math.max(margins.left, plotOffset.left));
+            plotOffset.right = Math.ceil(Math.max(margins.right, plotOffset.right));
+            plotOffset.top = Math.ceil(Math.max(margins.top, plotOffset.top));
+            plotOffset.bottom = Math.ceil(Math.max(margins.bottom, plotOffset.bottom));
+        }
+
+        function setupGrid() {
+            var i, axes = allAxes(), showGrid = options.grid.show;
+
+            // Initialize the plot's offset from the edge of the canvas
+
+            for (var a in plotOffset) {
+                var margin = options.grid.margin || 0;
+                plotOffset[a] = typeof margin == "number" ? margin : margin[a] || 0;
+            }
+
+            executeHooks(hooks.processOffset, [plotOffset]);
+
+            // If the grid is visible, add its border width to the offset
+
+            for (var a in plotOffset) {
+                if(typeof(options.grid.borderWidth) == "object") {
+                    plotOffset[a] += showGrid ? options.grid.borderWidth[a] : 0;
+                }
+                else {
+                    plotOffset[a] += showGrid ? options.grid.borderWidth : 0;
+                }
+            }
+
+            $.each(axes, function (_, axis) {
+                var axisOpts = axis.options;
+                axis.show = axisOpts.show == null ? axis.used : axisOpts.show;
+                axis.reserveSpace = axisOpts.reserveSpace == null ? axis.show : axisOpts.reserveSpace;
+                setRange(axis);
+            });
+
+            if (showGrid) {
+
+                var allocatedAxes = $.grep(axes, function (axis) {
+                    return axis.show || axis.reserveSpace;
+                });
+
+                $.each(allocatedAxes, function (_, axis) {
+                    // make the ticks
+                    setupTickGeneration(axis);
+                    setTicks(axis);
+                    snapRangeToTicks(axis, axis.ticks);
+                    // find labelWidth/Height for axis
+                    measureTickLabels(axis);
+                });
+
+                // with all dimensions calculated, we can compute the
+                // axis bounding boxes, start from the outside
+                // (reverse order)
+                for (i = allocatedAxes.length - 1; i >= 0; --i)
+                    allocateAxisBoxFirstPhase(allocatedAxes[i]);
+
+                // make sure we've got enough space for things that
+                // might stick out
+                adjustLayoutForThingsStickingOut();
+
+                $.each(allocatedAxes, function (_, axis) {
+                    allocateAxisBoxSecondPhase(axis);
+                });
+            }
+
+            plotWidth = surface.width - plotOffset.left - plotOffset.right;
+            plotHeight = surface.height - plotOffset.bottom - plotOffset.top;
+
+            // now we got the proper plot dimensions, we can compute the scaling
+            $.each(axes, function (_, axis) {
+                setTransformationHelpers(axis);
+            });
+
+            if (showGrid) {
+                drawAxisLabels();
+            }
+
+            insertLegend();
+        }
+
+        function setRange(axis) {
+            var opts = axis.options,
+                min = +(opts.min != null ? opts.min : axis.datamin),
+                max = +(opts.max != null ? opts.max : axis.datamax),
+                delta = max - min;
+
+            if (delta == 0.0) {
+                // degenerate case
+                var widen = max == 0 ? 1 : 0.01;
+
+                if (opts.min == null)
+                    min -= widen;
+                // always widen max if we couldn't widen min to ensure we
+                // don't fall into min == max which doesn't work
+                if (opts.max == null || opts.min != null)
+                    max += widen;
+            }
+            else {
+                // consider autoscaling
+                var margin = opts.autoscaleMargin;
+                if (margin != null) {
+                    if (opts.min == null) {
+                        min -= delta * margin;
+                        // make sure we don't go below zero if all values
+                        // are positive
+                        if (min < 0 && axis.datamin != null && axis.datamin >= 0)
+                            min = 0;
+                    }
+                    if (opts.max == null) {
+                        max += delta * margin;
+                        if (max > 0 && axis.datamax != null && axis.datamax <= 0)
+                            max = 0;
+                    }
+                }
+            }
+            axis.min = min;
+            axis.max = max;
+        }
+
+        function setupTickGeneration(axis) {
+            var opts = axis.options;
+
+            // estimate number of ticks
+            var noTicks;
+            if (typeof opts.ticks == "number" && opts.ticks > 0)
+                noTicks = opts.ticks;
+            else
+                // heuristic based on the model a*sqrt(x) fitted to
+                // some data points that seemed reasonable
+                noTicks = 0.3 * Math.sqrt(axis.direction == "x" ? surface.width : surface.height);
+
+            var delta = (axis.max - axis.min) / noTicks,
+                dec = -Math.floor(Math.log(delta) / Math.LN10),
+                maxDec = opts.tickDecimals;
+
+            if (maxDec != null && dec > maxDec) {
+                dec = maxDec;
+            }
+
+            var magn = Math.pow(10, -dec),
+                norm = delta / magn, // norm is between 1.0 and 10.0
+                size;
+
+            if (norm < 1.5) {
+                size = 1;
+            } else if (norm < 3) {
+                size = 2;
+                // special case for 2.5, requires an extra decimal
+                if (norm > 2.25 && (maxDec == null || dec + 1 <= maxDec)) {
+                    size = 2.5;
+                    ++dec;
+                }
+            } else if (norm < 7.5) {
+                size = 5;
+            } else {
+                size = 10;
+            }
+
+            size *= magn;
+
+            if (opts.minTickSize != null && size < opts.minTickSize) {
+                size = opts.minTickSize;
+            }
+
+            axis.delta = delta;
+            axis.tickDecimals = Math.max(0, maxDec != null ? maxDec : dec);
+            axis.tickSize = opts.tickSize || size;
+
+            // Time mode was moved to a plug-in in 0.8, and since so many people use it
+            // we'll add an especially friendly reminder to make sure they included it.
+
+            if (opts.mode == "time" && !axis.tickGenerator) {
+                throw new Error("Time mode requires the flot.time plugin.");
+            }
+
+            // Flot supports base-10 axes; any other mode else is handled by a plug-in,
+            // like flot.time.js.
+
+            if (!axis.tickGenerator) {
+
+                axis.tickGenerator = function (axis) {
+
+                    var ticks = [],
+                        start = floorInBase(axis.min, axis.tickSize),
+                        i = 0,
+                        v = Number.NaN,
+                        prev;
+
+                    do {
+                        prev = v;
+                        v = start + i * axis.tickSize;
+                        ticks.push(v);
+                        ++i;
+                    } while (v < axis.max && v != prev);
+                    return ticks;
+                };
+
+				axis.tickFormatter = function (value, axis) {
+
+					var factor = axis.tickDecimals ? Math.pow(10, axis.tickDecimals) : 1;
+					var formatted = "" + Math.round(value * factor) / factor;
+
+					// If tickDecimals was specified, ensure that we have exactly that
+					// much precision; otherwise default to the value's own precision.
+
+					if (axis.tickDecimals != null) {
+						var decimal = formatted.indexOf(".");
+						var precision = decimal == -1 ? 0 : formatted.length - decimal - 1;
+						if (precision < axis.tickDecimals) {
+							return (precision ? formatted : formatted + ".") + ("" + factor).substr(1, axis.tickDecimals - precision);
+						}
+					}
+
+                    return formatted;
+                };
+            }
+
+            if ($.isFunction(opts.tickFormatter))
+                axis.tickFormatter = function (v, axis) { return "" + opts.tickFormatter(v, axis); };
+
+            if (opts.alignTicksWithAxis != null) {
+                var otherAxis = (axis.direction == "x" ? xaxes : yaxes)[opts.alignTicksWithAxis - 1];
+                if (otherAxis && otherAxis.used && otherAxis != axis) {
+                    // consider snapping min/max to outermost nice ticks
+                    var niceTicks = axis.tickGenerator(axis);
+                    if (niceTicks.length > 0) {
+                        if (opts.min == null)
+                            axis.min = Math.min(axis.min, niceTicks[0]);
+                        if (opts.max == null && niceTicks.length > 1)
+                            axis.max = Math.max(axis.max, niceTicks[niceTicks.length - 1]);
+                    }
+
+                    axis.tickGenerator = function (axis) {
+                        // copy ticks, scaled to this axis
+                        var ticks = [], v, i;
+                        for (i = 0; i < otherAxis.ticks.length; ++i) {
+                            v = (otherAxis.ticks[i].v - otherAxis.min) / (otherAxis.max - otherAxis.min);
+                            v = axis.min + v * (axis.max - axis.min);
+                            ticks.push(v);
+                        }
+                        return ticks;
+                    };
+
+                    // we might need an extra decimal since forced
+                    // ticks don't necessarily fit naturally
+                    if (!axis.mode && opts.tickDecimals == null) {
+                        var extraDec = Math.max(0, -Math.floor(Math.log(axis.delta) / Math.LN10) + 1),
+                            ts = axis.tickGenerator(axis);
+
+                        // only proceed if the tick interval rounded
+                        // with an extra decimal doesn't give us a
+                        // zero at end
+                        if (!(ts.length > 1 && /\..*0$/.test((ts[1] - ts[0]).toFixed(extraDec))))
+                            axis.tickDecimals = extraDec;
+                    }
+                }
+            }
+        }
+
+        function setTicks(axis) {
+            var oticks = axis.options.ticks, ticks = [];
+            if (oticks == null || (typeof oticks == "number" && oticks > 0))
+                ticks = axis.tickGenerator(axis);
+            else if (oticks) {
+                if ($.isFunction(oticks))
+                    // generate the ticks
+                    ticks = oticks(axis);
+                else
+                    ticks = oticks;
+            }
+
+            // clean up/labelify the supplied ticks, copy them over
+            var i, v;
+            axis.ticks = [];
+            for (i = 0; i < ticks.length; ++i) {
+                var label = null;
+                var t = ticks[i];
+                if (typeof t == "object") {
+                    v = +t[0];
+                    if (t.length > 1)
+                        label = t[1];
+                }
+                else
+                    v = +t;
+                if (label == null)
+                    label = axis.tickFormatter(v, axis);
+                if (!isNaN(v))
+                    axis.ticks.push({ v: v, label: label });
+            }
+        }
+
+        function snapRangeToTicks(axis, ticks) {
+            if (axis.options.autoscaleMargin && ticks.length > 0) {
+                // snap to ticks
+                if (axis.options.min == null)
+                    axis.min = Math.min(axis.min, ticks[0].v);
+                if (axis.options.max == null && ticks.length > 1)
+                    axis.max = Math.max(axis.max, ticks[ticks.length - 1].v);
+            }
+        }
+
+        function draw() {
+
+            surface.clear();
+
+            executeHooks(hooks.drawBackground, [ctx]);
+
+            var grid = options.grid;
+
+            // draw background, if any
+            if (grid.show && grid.backgroundColor)
+                drawBackground();
+
+            if (grid.show && !grid.aboveData) {
+                drawGrid();
+            }
+
+            for (var i = 0; i < series.length; ++i) {
+                executeHooks(hooks.drawSeries, [ctx, series[i]]);
+                drawSeries(series[i]);
+            }
+
+            executeHooks(hooks.draw, [ctx]);
+
+            if (grid.show && grid.aboveData) {
+                drawGrid();
+            }
+
+            surface.render();
+
+            // A draw implies that either the axes or data have changed, so we
+            // should probably update the overlay highlights as well.
+
+            triggerRedrawOverlay();
+        }
+
+        function extractRange(ranges, coord) {
+            var axis, from, to, key, axes = allAxes();
+
+            for (var i = 0; i < axes.length; ++i) {
+                axis = axes[i];
+                if (axis.direction == coord) {
+                    key = coord + axis.n + "axis";
+                    if (!ranges[key] && axis.n == 1)
+                        key = coord + "axis"; // support x1axis as xaxis
+                    if (ranges[key]) {
+                        from = ranges[key].from;
+                        to = ranges[key].to;
+                        break;
+                    }
+                }
+            }
+
+            // backwards-compat stuff - to be removed in future
+            if (!ranges[key]) {
+                axis = coord == "x" ? xaxes[0] : yaxes[0];
+                from = ranges[coord + "1"];
+                to = ranges[coord + "2"];
+            }
+
+            // auto-reverse as an added bonus
+            if (from != null && to != null && from > to) {
+                var tmp = from;
+                from = to;
+                to = tmp;
+            }
+
+            return { from: from, to: to, axis: axis };
+        }
+
+        function drawBackground() {
+            ctx.save();
+            ctx.translate(plotOffset.left, plotOffset.top);
+
+            ctx.fillStyle = getColorOrGradient(options.grid.backgroundColor, plotHeight, 0, "rgba(255, 255, 255, 0)");
+            ctx.fillRect(0, 0, plotWidth, plotHeight);
+            ctx.restore();
+        }
+
+        function drawGrid() {
+            var i, axes, bw, bc;
+
+            ctx.save();
+            ctx.translate(plotOffset.left, plotOffset.top);
+
+            // draw markings
+            var markings = options.grid.markings;
+            if (markings) {
+                if ($.isFunction(markings)) {
+                    axes = plot.getAxes();
+                    // xmin etc. is backwards compatibility, to be
+                    // removed in the future
+                    axes.xmin = axes.xaxis.min;
+                    axes.xmax = axes.xaxis.max;
+                    axes.ymin = axes.yaxis.min;
+                    axes.ymax = axes.yaxis.max;
+
+                    markings = markings(axes);
+                }
+
+                for (i = 0; i < markings.length; ++i) {
+                    var m = markings[i],
+                        xrange = extractRange(m, "x"),
+                        yrange = extractRange(m, "y");
+
+                    // fill in missing
+                    if (xrange.from == null)
+                        xrange.from = xrange.axis.min;
+                    if (xrange.to == null)
+                        xrange.to = xrange.axis.max;
+                    if (yrange.from == null)
+                        yrange.from = yrange.axis.min;
+                    if (yrange.to == null)
+                        yrange.to = yrange.axis.max;
+
+                    // clip
+                    if (xrange.to < xrange.axis.min || xrange.from > xrange.axis.max ||
+                        yrange.to < yrange.axis.min || yrange.from > yrange.axis.max)
+                        continue;
+
+                    xrange.from = Math.max(xrange.from, xrange.axis.min);
+                    xrange.to = Math.min(xrange.to, xrange.axis.max);
+                    yrange.from = Math.max(yrange.from, yrange.axis.min);
+                    yrange.to = Math.min(yrange.to, yrange.axis.max);
+
+                    var xequal = xrange.from === xrange.to,
+                        yequal = yrange.from === yrange.to;
+
+                    if (xequal && yequal) {
+                        continue;
+                    }
+
+                    // then draw
+                    xrange.from = Math.floor(xrange.axis.p2c(xrange.from));
+                    xrange.to = Math.floor(xrange.axis.p2c(xrange.to));
+                    yrange.from = Math.floor(yrange.axis.p2c(yrange.from));
+                    yrange.to = Math.floor(yrange.axis.p2c(yrange.to));
+
+                    if (xequal || yequal) {
+                        var lineWidth = m.lineWidth || options.grid.markingsLineWidth,
+                            subPixel = lineWidth % 2 ? 0.5 : 0;
+                        ctx.beginPath();
+                        ctx.strokeStyle = m.color || options.grid.markingsColor;
+                        ctx.lineWidth = lineWidth;
+                        if (xequal) {
+                            ctx.moveTo(xrange.to + subPixel, yrange.from);
+                            ctx.lineTo(xrange.to + subPixel, yrange.to);
+                        } else {
+                            ctx.moveTo(xrange.from, yrange.to + subPixel);
+                            ctx.lineTo(xrange.to, yrange.to + subPixel);                            
+                        }
+                        ctx.stroke();
+                    } else {
+                        ctx.fillStyle = m.color || options.grid.markingsColor;
+                        ctx.fillRect(xrange.from, yrange.to,
+                                     xrange.to - xrange.from,
+                                     yrange.from - yrange.to);
+                    }
+                }
+            }
+
+            // draw the ticks
+            axes = allAxes();
+            bw = options.grid.borderWidth;
+
+            for (var j = 0; j < axes.length; ++j) {
+                var axis = axes[j], box = axis.box,
+                    t = axis.tickLength, x, y, xoff, yoff;
+                if (!axis.show || axis.ticks.length == 0)
+                    continue;
+
+                ctx.lineWidth = 1;
+
+                // find the edges
+                if (axis.direction == "x") {
+                    x = 0;
+                    if (t == "full")
+                        y = (axis.position == "top" ? 0 : plotHeight);
+                    else
+                        y = box.top - plotOffset.top + (axis.position == "top" ? box.height : 0);
+                }
+                else {
+                    y = 0;
+                    if (t == "full")
+                        x = (axis.position == "left" ? 0 : plotWidth);
+                    else
+                        x = box.left - plotOffset.left + (axis.position == "left" ? box.width : 0);
+                }
+
+                // draw tick bar
+                if (!axis.innermost) {
+                    ctx.strokeStyle = axis.options.color;
+                    ctx.beginPath();
+                    xoff = yoff = 0;
+                    if (axis.direction == "x")
+                        xoff = plotWidth + 1;
+                    else
+                        yoff = plotHeight + 1;
+
+                    if (ctx.lineWidth == 1) {
+                        if (axis.direction == "x") {
+                            y = Math.floor(y) + 0.5;
+                        } else {
+                            x = Math.floor(x) + 0.5;
+                        }
+                    }
+
+                    ctx.moveTo(x, y);
+                    ctx.lineTo(x + xoff, y + yoff);
+                    ctx.stroke();
+                }
+
+                // draw ticks
+
+                ctx.strokeStyle = axis.options.tickColor;
+
+                ctx.beginPath();
+                for (i = 0; i < axis.ticks.length; ++i) {
+                    var v = axis.ticks[i].v;
+
+                    xoff = yoff = 0;
+
+                    if (isNaN(v) || v < axis.min || v > axis.max
+                        // skip those lying on the axes if we got a border
+                        || (t == "full"
+                            && ((typeof bw == "object" && bw[axis.position] > 0) || bw > 0)
+                            && (v == axis.min || v == axis.max)))
+                        continue;
+
+                    if (axis.direction == "x") {
+                        x = axis.p2c(v);
+                        yoff = t == "full" ? -plotHeight : t;
+
+                        if (axis.position == "top")
+                            yoff = -yoff;
+                    }
+                    else {
+                        y = axis.p2c(v);
+                        xoff = t == "full" ? -plotWidth : t;
+
+                        if (axis.position == "left")
+                            xoff = -xoff;
+                    }
+
+                    if (ctx.lineWidth == 1) {
+                        if (axis.direction == "x")
+                            x = Math.floor(x) + 0.5;
+                        else
+                            y = Math.floor(y) + 0.5;
+                    }
+
+                    ctx.moveTo(x, y);
+                    ctx.lineTo(x + xoff, y + yoff);
+                }
+
+                ctx.stroke();
+            }
+
+
+            // draw border
+            if (bw) {
+                // If either borderWidth or borderColor is an object, then draw the border
+                // line by line instead of as one rectangle
+                bc = options.grid.borderColor;
+                if(typeof bw == "object" || typeof bc == "object") {
+                    if (typeof bw !== "object") {
+                        bw = {top: bw, right: bw, bottom: bw, left: bw};
+                    }
+                    if (typeof bc !== "object") {
+                        bc = {top: bc, right: bc, bottom: bc, left: bc};
+                    }
+
+                    if (bw.top > 0) {
+                        ctx.strokeStyle = bc.top;
+                        ctx.lineWidth = bw.top;
+                        ctx.beginPath();
+                        ctx.moveTo(0 - bw.left, 0 - bw.top/2);
+                        ctx.lineTo(plotWidth, 0 - bw.top/2);
+                        ctx.stroke();
+                    }
+
+                    if (bw.right > 0) {
+                        ctx.strokeStyle = bc.right;
+                        ctx.lineWidth = bw.right;
+                        ctx.beginPath();
+                        ctx.moveTo(plotWidth + bw.right / 2, 0 - bw.top);
+                        ctx.lineTo(plotWidth + bw.right / 2, plotHeight);
+                        ctx.stroke();
+                    }
+
+                    if (bw.bottom > 0) {
+                        ctx.strokeStyle = bc.bottom;
+                        ctx.lineWidth = bw.bottom;
+                        ctx.beginPath();
+                        ctx.moveTo(plotWidth + bw.right, plotHeight + bw.bottom / 2);
+                        ctx.lineTo(0, plotHeight + bw.bottom / 2);
+                        ctx.stroke();
+                    }
+
+                    if (bw.left > 0) {
+                        ctx.strokeStyle = bc.left;
+                        ctx.lineWidth = bw.left;
+                        ctx.beginPath();
+                        ctx.moveTo(0 - bw.left/2, plotHeight + bw.bottom);
+                        ctx.lineTo(0- bw.left/2, 0);
+                        ctx.stroke();
+                    }
+                }
+                else {
+                    ctx.lineWidth = bw;
+                    ctx.strokeStyle = options.grid.borderColor;
+                    ctx.strokeRect(-bw/2, -bw/2, plotWidth + bw, plotHeight + bw);
+                }
+            }
+
+            ctx.restore();
+        }
+
+        function drawAxisLabels() {
+
+            $.each(allAxes(), function (_, axis) {
+                var box = axis.box,
+                    legacyStyles = axis.direction + "Axis " + axis.direction + axis.n + "Axis",
+                    layer = "flot-" + axis.direction + "-axis flot-" + axis.direction + axis.n + "-axis " + legacyStyles,
+                    font = axis.options.font || "flot-tick-label tickLabel",
+                    tick, x, y, halign, valign;
+
+                // Remove text before checking for axis.show and ticks.length;
+                // otherwise plugins, like flot-tickrotor, that draw their own
+                // tick labels will end up with both theirs and the defaults.
+
+                surface.removeText(layer);
+
+                if (!axis.show || axis.ticks.length == 0)
+                    return;
+
+                for (var i = 0; i < axis.ticks.length; ++i) {
+
+                    tick = axis.ticks[i];
+                    if (!tick.label || tick.v < axis.min || tick.v > axis.max)
+                        continue;
+
+                    if (axis.direction == "x") {
+                        halign = "center";
+                        x = plotOffset.left + axis.p2c(tick.v);
+                        if (axis.position == "bottom") {
+                            y = box.top + box.padding;
+                        } else {
+                            y = box.top + box.height - box.padding;
+                            valign = "bottom";
+                        }
+                    } else {
+                        valign = "middle";
+                        y = plotOffset.top + axis.p2c(tick.v);
+                        if (axis.position == "left") {
+                            x = box.left + box.width - box.padding;
+                            halign = "right";
+                        } else {
+                            x = box.left + box.padding;
+                        }
+                    }
+
+                    surface.addText(layer, x, y, tick.label, font, null, null, halign, valign);
+                }
+            });
+        }
+
+        function drawSeries(series) {
+            if (series.lines.show)
+                drawSeriesLines(series);
+            if (series.bars.show)
+                drawSeriesBars(series);
+            if (series.points.show)
+                drawSeriesPoints(series);
+        }
+
+        function drawSeriesLines(series) {
+            function plotLine(datapoints, xoffset, yoffset, axisx, axisy) {
+                var points = datapoints.points,
+                    ps = datapoints.pointsize,
+                    prevx = null, prevy = null;
+
+                ctx.beginPath();
+                for (var i = ps; i < points.length; i += ps) {
+                    var x1 = points[i - ps], y1 = points[i - ps + 1],
+                        x2 = points[i], y2 = points[i + 1];
+
+                    if (x1 == null || x2 == null)
+                        continue;
+
+                    // clip with ymin
+                    if (y1 <= y2 && y1 < axisy.min) {
+                        if (y2 < axisy.min)
+                            continue;   // line segment is outside
+                        // compute new intersection point
+                        x1 = (axisy.min - y1) / (y2 - y1) * (x2 - x1) + x1;
+                        y1 = axisy.min;
+                    }
+                    else if (y2 <= y1 && y2 < axisy.min) {
+                        if (y1 < axisy.min)
+                            continue;
+                        x2 = (axisy.min - y1) / (y2 - y1) * (x2 - x1) + x1;
+                        y2 = axisy.min;
+                    }
+
+                    // clip with ymax
+                    if (y1 >= y2 && y1 > axisy.max) {
+                        if (y2 > axisy.max)
+                            continue;
+                        x1 = (axisy.max - y1) / (y2 - y1) * (x2 - x1) + x1;
+                        y1 = axisy.max;
+                    }
+                    else if (y2 >= y1 && y2 > axisy.max) {
+                        if (y1 > axisy.max)
+                            continue;
+                        x2 = (axisy.max - y1) / (y2 - y1) * (x2 - x1) + x1;
+                        y2 = axisy.max;
+                    }
+
+                    // clip with xmin
+                    if (x1 <= x2 && x1 < axisx.min) {
+                        if (x2 < axisx.min)
+                            continue;
+                        y1 = (axisx.min - x1) / (x2 - x1) * (y2 - y1) + y1;
+                        x1 = axisx.min;
+                    }
+                    else if (x2 <= x1 && x2 < axisx.min) {
+                        if (x1 < axisx.min)
+                            continue;
+                        y2 = (axisx.min - x1) / (x2 - x1) * (y2 - y1) + y1;
+                        x2 = axisx.min;
+                    }
+
+                    // clip with xmax
+                    if (x1 >= x2 && x1 > axisx.max) {
+                        if (x2 > axisx.max)
+                            continue;
+                        y1 = (axisx.max - x1) / (x2 - x1) * (y2 - y1) + y1;
+                        x1 = axisx.max;
+                    }
+                    else if (x2 >= x1 && x2 > axisx.max) {
+                        if (x1 > axisx.max)
+                            continue;
+                        y2 = (axisx.max - x1) / (x2 - x1) * (y2 - y1) + y1;
+                        x2 = axisx.max;
+                    }
+
+                    if (x1 != prevx || y1 != prevy)
+                        ctx.moveTo(axisx.p2c(x1) + xoffset, axisy.p2c(y1) + yoffset);
+
+                    prevx = x2;
+                    prevy = y2;
+                    ctx.lineTo(axisx.p2c(x2) + xoffset, axisy.p2c(y2) + yoffset);
+                }
+                ctx.stroke();
+            }
+
+            function plotLineArea(datapoints, axisx, axisy) {
+                var points = datapoints.points,
+                    ps = datapoints.pointsize,
+                    bottom = Math.min(Math.max(0, axisy.min), axisy.max),
+                    i = 0, top, areaOpen = false,
+                    ypos = 1, segmentStart = 0, segmentEnd = 0;
+
+                // we process each segment in two turns, first forward
+                // direction to sketch out top, then once we hit the
+                // end we go backwards to sketch the bottom
+                while (true) {
+                    if (ps > 0 && i > points.length + ps)
+                        break;
+
+                    i += ps; // ps is negative if going backwards
+
+                    var x1 = points[i - ps],
+                        y1 = points[i - ps + ypos],
+                        x2 = points[i], y2 = points[i + ypos];
+
+                    if (areaOpen) {
+                        if (ps > 0 && x1 != null && x2 == null) {
+                            // at turning point
+                            segmentEnd = i;
+                            ps = -ps;
+                            ypos = 2;
+                            continue;
+                        }
+
+                        if (ps < 0 && i == segmentStart + ps) {
+                            // done with the reverse sweep
+                            ctx.fill();
+                            areaOpen = false;
+                            ps = -ps;
+                            ypos = 1;
+                            i = segmentStart = segmentEnd + ps;
+                            continue;
+                        }
+                    }
+
+                    if (x1 == null || x2 == null)
+                        continue;
+
+                    // clip x values
+
+                    // clip with xmin
+                    if (x1 <= x2 && x1 < axisx.min) {
+                        if (x2 < axisx.min)
+                            continue;
+                        y1 = (axisx.min - x1) / (x2 - x1) * (y2 - y1) + y1;
+                        x1 = axisx.min;
+                    }
+                    else if (x2 <= x1 && x2 < axisx.min) {
+                        if (x1 < axisx.min)
+                            continue;
+                        y2 = (axisx.min - x1) / (x2 - x1) * (y2 - y1) + y1;
+                        x2 = axisx.min;
+                    }
+
+                    // clip with xmax
+                    if (x1 >= x2 && x1 > axisx.max) {
+                        if (x2 > axisx.max)
+                            continue;
+                        y1 = (axisx.max - x1) / (x2 - x1) * (y2 - y1) + y1;
+                        x1 = axisx.max;
+                    }
+                    else if (x2 >= x1 && x2 > axisx.max) {
+                        if (x1 > axisx.max)
+                            continue;
+                        y2 = (axisx.max - x1) / (x2 - x1) * (y2 - y1) + y1;
+                        x2 = axisx.max;
+                    }
+
+                    if (!areaOpen) {
+                        // open area
+                        ctx.beginPath();
+                        ctx.moveTo(axisx.p2c(x1), axisy.p2c(bottom));
+                        areaOpen = true;
+                    }
+
+                    // now first check the case where both is outside
+                    if (y1 >= axisy.max && y2 >= axisy.max) {
+                        ctx.lineTo(axisx.p2c(x1), axisy.p2c(axisy.max));
+                        ctx.lineTo(axisx.p2c(x2), axisy.p2c(axisy.max));
+                        continue;
+                    }
+                    else if (y1 <= axisy.min && y2 <= axisy.min) {
+                        ctx.lineTo(axisx.p2c(x1), axisy.p2c(axisy.min));
+                        ctx.lineTo(axisx.p2c(x2), axisy.p2c(axisy.min));
+                        continue;
+                    }
+
+                    // else it's a bit more complicated, there might
+                    // be a flat maxed out rectangle first, then a
+                    // triangular cutout or reverse; to find these
+                    // keep track of the current x values
+                    var x1old = x1, x2old = x2;
+
+                    // clip the y values, without shortcutting, we
+                    // go through all cases in turn
+
+                    // clip with ymin
+                    if (y1 <= y2 && y1 < axisy.min && y2 >= axisy.min) {
+                        x1 = (axisy.min - y1) / (y2 - y1) * (x2 - x1) + x1;
+                        y1 = axisy.min;
+                    }
+                    else if (y2 <= y1 && y2 < axisy.min && y1 >= axisy.min) {
+                        x2 = (axisy.min - y1) / (y2 - y1) * (x2 - x1) + x1;
+                        y2 = axisy.min;
+                    }
+
+                    // clip with ymax
+                    if (y1 >= y2 && y1 > axisy.max && y2 <= axisy.max) {
+                        x1 = (axisy.max - y1) / (y2 - y1) * (x2 - x1) + x1;
+                        y1 = axisy.max;
+                    }
+                    else if (y2 >= y1 && y2 > axisy.max && y1 <= axisy.max) {
+                        x2 = (axisy.max - y1) / (y2 - y1) * (x2 - x1) + x1;
+                        y2 = axisy.max;
+                    }
+
+                    // if the x value was changed we got a rectangle
+                    // to fill
+                    if (x1 != x1old) {
+                        ctx.lineTo(axisx.p2c(x1old), axisy.p2c(y1));
+                        // it goes to (x1, y1), but we fill that below
+                    }
+
+                    // fill triangular section, this sometimes result
+                    // in redundant points if (x1, y1) hasn't changed
+                    // from previous line to, but we just ignore that
+                    ctx.lineTo(axisx.p2c(x1), axisy.p2c(y1));
+                    ctx.lineTo(axisx.p2c(x2), axisy.p2c(y2));
+
+                    // fill the other rectangle if it's there
+                    if (x2 != x2old) {
+                        ctx.lineTo(axisx.p2c(x2), axisy.p2c(y2));
+                        ctx.lineTo(axisx.p2c(x2old), axisy.p2c(y2));
+                    }
+                }
+            }
+
+            ctx.save();
+            ctx.translate(plotOffset.left, plotOffset.top);
+            ctx.lineJoin = "round";
+
+            var lw = series.lines.lineWidth,
+                sw = series.shadowSize;
+            // FIXME: consider another form of shadow when filling is turned on
+            if (lw > 0 && sw > 0) {
+                // draw shadow as a thick and thin line with transparency
+                ctx.lineWidth = sw;
+                ctx.strokeStyle = "rgba(0,0,0,0.1)";
+                // position shadow at angle from the mid of line
+                var angle = Math.PI/18;
+                plotLine(series.datapoints, Math.sin(angle) * (lw/2 + sw/2), Math.cos(angle) * (lw/2 + sw/2), series.xaxis, series.yaxis);
+                ctx.lineWidth = sw/2;
+                plotLine(series.datapoints, Math.sin(angle) * (lw/2 + sw/4), Math.cos(angle) * (lw/2 + sw/4), series.xaxis, series.yaxis);
+            }
+
+            ctx.lineWidth = lw;
+            ctx.strokeStyle = series.color;
+            var fillStyle = getFillStyle(series.lines, series.color, 0, plotHeight);
+            if (fillStyle) {
+                ctx.fillStyle = fillStyle;
+                plotLineArea(series.datapoints, series.xaxis, series.yaxis);
+            }
+
+            if (lw > 0)
+                plotLine(series.datapoints, 0, 0, series.xaxis, series.yaxis);
+            ctx.restore();
+        }
+
+        function drawSeriesPoints(series) {
+            function plotPoints(datapoints, radius, fillStyle, offset, shadow, axisx, axisy, symbol) {
+                var points = datapoints.points, ps = datapoints.pointsize;
+
+                for (var i = 0; i < points.length; i += ps) {
+                    var x = points[i], y = points[i + 1];
+                    if (x == null || x < axisx.min || x > axisx.max || y < axisy.min || y > axisy.max)
+                        continue;
+
+                    ctx.beginPath();
+                    x = axisx.p2c(x);
+                    y = axisy.p2c(y) + offset;
+                    if (symbol == "circle")
+                        ctx.arc(x, y, radius, 0, shadow ? Math.PI : Math.PI * 2, false);
+                    else
+                        symbol(ctx, x, y, radius, shadow);
+                    ctx.closePath();
+
+                    if (fillStyle) {
+                        ctx.fillStyle = fillStyle;
+                        ctx.fill();
+                    }
+                    ctx.stroke();
+                }
+            }
+
+            ctx.save();
+            ctx.translate(plotOffset.left, plotOffset.top);
+
+            var lw = series.points.lineWidth,
+                sw = series.shadowSize,
+                radius = series.points.radius,
+                symbol = series.points.symbol;
+
+            // If the user sets the line width to 0, we change it to a very 
+            // small value. A line width of 0 seems to force the default of 1.
+            // Doing the conditional here allows the shadow setting to still be 
+            // optional even with a lineWidth of 0.
+
+            if( lw == 0 )
+                lw = 0.0001;
+
+            if (lw > 0 && sw > 0) {
+                // draw shadow in two steps
+                var w = sw / 2;
+                ctx.lineWidth = w;
+                ctx.strokeStyle = "rgba(0,0,0,0.1)";
+                plotPoints(series.datapoints, radius, null, w + w/2, true,
+                           series.xaxis, series.yaxis, symbol);
+
+                ctx.strokeStyle = "rgba(0,0,0,0.2)";
+                plotPoints(series.datapoints, radius, null, w/2, true,
+                           series.xaxis, series.yaxis, symbol);
+            }
+
+            ctx.lineWidth = lw;
+            ctx.strokeStyle = series.color;
+            plotPoints(series.datapoints, radius,
+                       getFillStyle(series.points, series.color), 0, false,
+                       series.xaxis, series.yaxis, symbol);
+            ctx.restore();
+        }
+
+        function drawBar(x, y, b, barLeft, barRight, fillStyleCallback, axisx, axisy, c, horizontal, lineWidth) {
+            var left, right, bottom, top,
+                drawLeft, drawRight, drawTop, drawBottom,
+                tmp;
+
+            // in horizontal mode, we start the bar from the left
+            // instead of from the bottom so it appears to be
+            // horizontal rather than vertical
+            if (horizontal) {
+                drawBottom = drawRight = drawTop = true;
+                drawLeft = false;
+                left = b;
+                right = x;
+                top = y + barLeft;
+                bottom = y + barRight;
+
+                // account for negative bars
+                if (right < left) {
+                    tmp = right;
+                    right = left;
+                    left = tmp;
+                    drawLeft = true;
+                    drawRight = false;
+                }
+            }
+            else {
+                drawLeft = drawRight = drawTop = true;
+                drawBottom = false;
+                left = x + barLeft;
+                right = x + barRight;
+                bottom = b;
+                top = y;
+
+                // account for negative bars
+                if (top < bottom) {
+                    tmp = top;
+                    top = bottom;
+                    bottom = tmp;
+                    drawBottom = true;
+                    drawTop = false;
+                }
+            }
+
+            // clip
+            if (right < axisx.min || left > axisx.max ||
+                top < axisy.min || bottom > axisy.max)
+                return;
+
+            if (left < axisx.min) {
+                left = axisx.min;
+                drawLeft = false;
+            }
+
+            if (right > axisx.max) {
+                right = axisx.max;
+                drawRight = false;
+            }
+
+            if (bottom < axisy.min) {
+                bottom = axisy.min;
+                drawBottom = false;
+            }
+
+            if (top > axisy.max) {
+                top = axisy.max;
+                drawTop = false;
+            }
+
+            left = axisx.p2c(left);
+            bottom = axisy.p2c(bottom);
+            right = axisx.p2c(right);
+            top = axisy.p2c(top);
+
+            // fill the bar
+            if (fillStyleCallback) {
+                c.fillStyle = fillStyleCallback(bottom, top);
+                c.fillRect(left, top, right - left, bottom - top)
+            }
+
+            // draw outline
+            if (lineWidth > 0 && (drawLeft || drawRight || drawTop || drawBottom)) {
+                c.beginPath();
+
+                // FIXME: inline moveTo is buggy with excanvas
+                c.moveTo(left, bottom);
+                if (drawLeft)
+                    c.lineTo(left, top);
+                else
+                    c.moveTo(left, top);
+                if (drawTop)
+                    c.lineTo(right, top);
+                else
+                    c.moveTo(right, top);
+                if (drawRight)
+                    c.lineTo(right, bottom);
+                else
+                    c.moveTo(right, bottom);
+                if (drawBottom)
+                    c.lineTo(left, bottom);
+                else
+                    c.moveTo(left, bottom);
+                c.stroke();
+            }
+        }
+
+        function drawSeriesBars(series) {
+            function plotBars(datapoints, barLeft, barRight, fillStyleCallback, axisx, axisy) {
+                var points = datapoints.points, ps = datapoints.pointsize;
+
+                for (var i = 0; i < points.length; i += ps) {
+                    if (points[i] == null)
+                        continue;
+                    drawBar(points[i], points[i + 1], points[i + 2], barLeft, barRight, fillStyleCallback, axisx, axisy, ctx, series.bars.horizontal, series.bars.lineWidth);
+                }
+            }
+
+            ctx.save();
+            ctx.translate(plotOffset.left, plotOffset.top);
+
+            // FIXME: figure out a way to add shadows (for instance along the right edge)
+            ctx.lineWidth = series.bars.lineWidth;
+            ctx.strokeStyle = series.color;
+
+            var barLeft;
+
+            switch (series.bars.align) {
+                case "left":
+                    barLeft = 0;
+                    break;
+                case "right":
+                    barLeft = -series.bars.barWidth;
+                    break;
+                default:
+                    barLeft = -series.bars.barWidth / 2;
+            }
+
+            var fillStyleCallback = series.bars.fill ? function (bottom, top) { return getFillStyle(series.bars, series.color, bottom, top); } : null;
+            plotBars(series.datapoints, barLeft, barLeft + series.bars.barWidth, fillStyleCallback, series.xaxis, series.yaxis);
+            ctx.restore();
+        }
+
+        function getFillStyle(filloptions, seriesColor, bottom, top) {
+            var fill = filloptions.fill;
+            if (!fill)
+                return null;
+
+            if (filloptions.fillColor)
+                return getColorOrGradient(filloptions.fillColor, bottom, top, seriesColor);
+
+            var c = $.color.parse(seriesColor);
+            c.a = typeof fill == "number" ? fill : 0.4;
+            c.normalize();
+            return c.toString();
+        }
+
+        function insertLegend() {
+
+            if (options.legend.container != null) {
+                $(options.legend.container).html("");
+            } else {
+                placeholder.find(".legend").remove();
+            }
+
+            if (!options.legend.show) {
+                return;
+            }
+
+            var fragments = [], entries = [], rowStarted = false,
+                lf = options.legend.labelFormatter, s, label;
+
+            // Build a list of legend entries, with each having a label and a color
+
+            for (var i = 0; i < series.length; ++i) {
+                s = series[i];
+                if (s.label) {
+                    label = lf ? lf(s.label, s) : s.label;
+                    if (label) {
+                        entries.push({
+                            label: label,
+                            color: s.color
+                        });
+                    }
+                }
+            }
+
+            // Sort the legend using either the default or a custom comparator
+
+            if (options.legend.sorted) {
+                if ($.isFunction(options.legend.sorted)) {
+                    entries.sort(options.legend.sorted);
+                } else if (options.legend.sorted == "reverse") {
+                	entries.reverse();
+                } else {
+                    var ascending = options.legend.sorted != "descending";
+                    entries.sort(function(a, b) {
+                        return a.label == b.label ? 0 : (
+                            (a.label < b.label) != ascending ? 1 : -1   // Logical XOR
+                        );
+                    });
+                }
+            }
+
+            // Generate markup for the list of entries, in their final order
+
+            for (var i = 0; i < entries.length; ++i) {
+
+                var entry = entries[i];
+
+                if (i % options.legend.noColumns == 0) {
+                    if (rowStarted)
+                        fragments.push('</tr>');
+                    fragments.push('<tr>');
+                    rowStarted = true;
+                }
+
+                fragments.push(
+                    '<td class="legendColorBox"><div style="border:1px solid ' + options.legend.labelBoxBorderColor + ';padding:1px"><div style="width:4px;height:0;border:5px solid ' + entry.color + ';overflow:hidden"></div></div></td>' +
+                    '<td class="legendLabel">' + entry.label + '</td>'
+                );
+            }
+
+            if (rowStarted)
+                fragments.push('</tr>');
+
+            if (fragments.length == 0)
+                return;
+
+            var table = '<table style="font-size:smaller;color:' + options.grid.color + '">' + fragments.join("") + '</table>';
+            if (options.legend.container != null)
+                $(options.legend.container).html(table);
+            else {
+                var pos = "",
+                    p = options.legend.position,
+                    m = options.legend.margin;
+                if (m[0] == null)
+                    m = [m, m];
+                if (p.charAt(0) == "n")
+                    pos += 'top:' + (m[1] + plotOffset.top) + 'px;';
+                else if (p.charAt(0) == "s")
+                    pos += 'bottom:' + (m[1] + plotOffset.bottom) + 'px;';
+                if (p.charAt(1) == "e")
+                    pos += 'right:' + (m[0] + plotOffset.right) + 'px;';
+                else if (p.charAt(1) == "w")
+                    pos += 'left:' + (m[0] + plotOffset.left) + 'px;';
+                var legend = $('<div class="legend">' + table.replace('style="', 'style="position:absolute;' + pos +';') + '</div>').appendTo(placeholder);
+                if (options.legend.backgroundOpacity != 0.0) {
+                    // put in the transparent background
+                    // separately to avoid blended labels and
+                    // label boxes
+                    var c = options.legend.backgroundColor;
+                    if (c == null) {
+                        c = options.grid.backgroundColor;
+                        if (c && typeof c == "string")
+                            c = $.color.parse(c);
+                        else
+                            c = $.color.extract(legend, 'background-color');
+                        c.a = 1;
+                        c = c.toString();
+                    }
+                    var div = legend.children();
+                    $('<div style="position:absolute;width:' + div.width() + 'px;height:' + div.height() + 'px;' + pos +'background-color:' + c + ';"> </div>').prependTo(legend).css('opacity', options.legend.backgroundOpacity);
+                }
+            }
+        }
+
+
+        // interactive features
+
+        var highlights = [],
+            redrawTimeout = null;
+
+        // returns the data item the mouse is over, or null if none is found
+        function findNearbyItem(mouseX, mouseY, seriesFilter) {
+            var maxDistance = options.grid.mouseActiveRadius,
+                smallestDistance = maxDistance * maxDistance + 1,
+                item = null, foundPoint = false, i, j, ps;
+
+            for (i = series.length - 1; i >= 0; --i) {
+                if (!seriesFilter(series[i]))
+                    continue;
+
+                var s = series[i],
+                    axisx = s.xaxis,
+                    axisy = s.yaxis,
+                    points = s.datapoints.points,
+                    mx = axisx.c2p(mouseX), // precompute some stuff to make the loop faster
+                    my = axisy.c2p(mouseY),
+                    maxx = maxDistance / axisx.scale,
+                    maxy = maxDistance / axisy.scale;
+
+                ps = s.datapoints.pointsize;
+                // with inverse transforms, we can't use the maxx/maxy
+                // optimization, sadly
+                if (axisx.options.inverseTransform)
+                    maxx = Number.MAX_VALUE;
+                if (axisy.options.inverseTransform)
+                    maxy = Number.MAX_VALUE;
+
+                if (s.lines.show || s.points.show) {
+                    for (j = 0; j < points.length; j += ps) {
+                        var x = points[j], y = points[j + 1];
+                        if (x == null)
+                            continue;
+
+                        // For points and lines, the cursor must be within a
+                        // certain distance to the data point
+                        if (x - mx > maxx || x - mx < -maxx ||
+                            y - my > maxy || y - my < -maxy)
+                            continue;
+
+                        // We have to calculate distances in pixels, not in
+                        // data units, because the scales of the axes may be different
+                        var dx = Math.abs(axisx.p2c(x) - mouseX),
+                            dy = Math.abs(axisy.p2c(y) - mouseY),
+                            dist = dx * dx + dy * dy; // we save the sqrt
+
+                        // use <= to ensure last point takes precedence
+                        // (last generally means on top of)
+                        if (dist < smallestDistance) {
+                            smallestDistance = dist;
+                            item = [i, j / ps];
+                        }
+                    }
+                }
+
+                if (s.bars.show && !item) { // no other point can be nearby
+
+                    var barLeft, barRight;
+
+                    switch (s.bars.align) {
+                        case "left":
+                            barLeft = 0;
+                            break;
+                        case "right":
+                            barLeft = -s.bars.barWidth;
+                            break;
+                        default:
+                            barLeft = -s.bars.barWidth / 2;
+                    }
+
+                    barRight = barLeft + s.bars.barWidth;
+
+                    for (j = 0; j < points.length; j += ps) {
+                        var x = points[j], y = points[j + 1], b = points[j + 2];
+                        if (x == null)
+                            continue;
+
+                        // for a bar graph, the cursor must be inside the bar
+                        if (series[i].bars.horizontal ?
+                            (mx <= Math.max(b, x) && mx >= Math.min(b, x) &&
+                             my >= y + barLeft && my <= y + barRight) :
+                            (mx >= x + barLeft && mx <= x + barRight &&
+                             my >= Math.min(b, y) && my <= Math.max(b, y)))
+                                item = [i, j / ps];
+                    }
+                }
+            }
+
+            if (item) {
+                i = item[0];
+                j = item[1];
+                ps = series[i].datapoints.pointsize;
+
+                return { datapoint: series[i].datapoints.points.slice(j * ps, (j + 1) * ps),
+                         dataIndex: j,
+                         series: series[i],
+                         seriesIndex: i };
+            }
+
+            return null;
+        }
+
+        function onMouseMove(e) {
+            if (options.grid.hoverable)
+                triggerClickHoverEvent("plothover", e,
+                                       function (s) { return s["hoverable"] != false; });
+        }
+
+        function onMouseLeave(e) {
+            if (options.grid.hoverable)
+                triggerClickHoverEvent("plothover", e,
+                                       function (s) { return false; });
+        }
+
+        function onClick(e) {
+            triggerClickHoverEvent("plotclick", e,
+                                   function (s) { return s["clickable"] != false; });
+        }
+
+        // trigger click or hover event (they send the same parameters
+        // so we share their code)
+        function triggerClickHoverEvent(eventname, event, seriesFilter) {
+            var offset = eventHolder.offset(),
+                canvasX = event.pageX - offset.left - plotOffset.left,
+                canvasY = event.pageY - offset.top - plotOffset.top,
+            pos = canvasToAxisCoords({ left: canvasX, top: canvasY });
+
+            pos.pageX = event.pageX;
+            pos.pageY = event.pageY;
+
+            var item = findNearbyItem(canvasX, canvasY, seriesFilter);
+
+            if (item) {
+                // fill in mouse pos for any listeners out there
+                item.pageX = parseInt(item.series.xaxis.p2c(item.datapoint[0]) + offset.left + plotOffset.left, 10);
+                item.pageY = parseInt(item.series.yaxis.p2c(item.datapoint[1]) + offset.top + plotOffset.top, 10);
+            }
+
+            if (options.grid.autoHighlight) {
+                // clear auto-highlights
+                for (var i = 0; i < highlights.length; ++i) {
+                    var h = highlights[i];
+                    if (h.auto == eventname &&
+                        !(item && h.series == item.series &&
+                          h.point[0] == item.datapoint[0] &&
+                          h.point[1] == item.datapoint[1]))
+                        unhighlight(h.series, h.point);
+                }
+
+                if (item)
+                    highlight(item.series, item.datapoint, eventname);
+            }
+
+            placeholder.trigger(eventname, [ pos, item ]);
+        }
+
+        function triggerRedrawOverlay() {
+            var t = options.interaction.redrawOverlayInterval;
+            if (t == -1) {      // skip event queue
+                drawOverlay();
+                return;
+            }
+
+            if (!redrawTimeout)
+                redrawTimeout = setTimeout(drawOverlay, t);
+        }
+
+        function drawOverlay() {
+            redrawTimeout = null;
+
+            // draw highlights
+            octx.save();
+            overlay.clear();
+            octx.translate(plotOffset.left, plotOffset.top);
+
+            var i, hi;
+            for (i = 0; i < highlights.length; ++i) {
+                hi = highlights[i];
+
+                if (hi.series.bars.show)
+                    drawBarHighlight(hi.series, hi.point);
+                else
+                    drawPointHighlight(hi.series, hi.point);
+            }
+            octx.restore();
+
+            executeHooks(hooks.drawOverlay, [octx]);
+        }
+
+        function highlight(s, point, auto) {
+            if (typeof s == "number")
+                s = series[s];
+
+            if (typeof point == "number") {
+                var ps = s.datapoints.pointsize;
+                point = s.datapoints.points.slice(ps * point, ps * (point + 1));
+            }
+
+            var i = indexOfHighlight(s, point);
+            if (i == -1) {
+                highlights.push({ series: s, point: point, auto: auto });
+
+                triggerRedrawOverlay();
+            }
+            else if (!auto)
+                highlights[i].auto = false;
+        }
+
+        function unhighlight(s, point) {
+            if (s == null && point == null) {
+                highlights = [];
+                triggerRedrawOverlay();
+                return;
+            }
+
+            if (typeof s == "number")
+                s = series[s];
+
+            if (typeof point == "number") {
+                var ps = s.datapoints.pointsize;
+                point = s.datapoints.points.slice(ps * point, ps * (point + 1));
+            }
+
+            var i = indexOfHighlight(s, point);
+            if (i != -1) {
+                highlights.splice(i, 1);
+
+                triggerRedrawOverlay();
+            }
+        }
+
+        function indexOfHighlight(s, p) {
+            for (var i = 0; i < highlights.length; ++i) {
+                var h = highlights[i];
+                if (h.series == s && h.point[0] == p[0]
+                    && h.point[1] == p[1])
+                    return i;
+            }
+            return -1;
+        }
+
+        function drawPointHighlight(series, point) {
+            var x = point[0], y = point[1],
+                axisx = series.xaxis, axisy = series.yaxis,
+                highlightColor = (typeof series.highlightColor === "string") ? series.highlightColor : $.color.parse(series.color).scale('a', 0.5).toString();
+
+            if (x < axisx.min || x > axisx.max || y < axisy.min || y > axisy.max)
+                return;
+
+            var pointRadius = series.points.radius + series.points.lineWidth / 2;
+            octx.lineWidth = pointRadius;
+            octx.strokeStyle = highlightColor;
+            var radius = 1.5 * pointRadius;
+            x = axisx.p2c(x);
+            y = axisy.p2c(y);
+
+            octx.beginPath();
+            if (series.points.symbol == "circle")
+                octx.arc(x, y, radius, 0, 2 * Math.PI, false);
+            else
+                series.points.symbol(octx, x, y, radius, false);
+            octx.closePath();
+            octx.stroke();
+        }
+
+        function drawBarHighlight(series, point) {
+            var highlightColor = (typeof series.highlightColor === "string") ? series.highlightColor : $.color.parse(series.color).scale('a', 0.5).toString(),
+                fillStyle = highlightColor,
+                barLeft;
+
+            switch (series.bars.align) {
+                case "left":
+                    barLeft = 0;
+                    break;
+                case "right":
+                    barLeft = -series.bars.barWidth;
+                    break;
+                default:
+                    barLeft = -series.bars.barWidth / 2;
+            }
+
+            octx.lineWidth = series.bars.lineWidth;
+            octx.strokeStyle = highlightColor;
+
+            drawBar(point[0], point[1], point[2] || 0, barLeft, barLeft + series.bars.barWidth,
+                    function () { return fillStyle; }, series.xaxis, series.yaxis, octx, series.bars.horizontal, series.bars.lineWidth);
+        }
+
+        function getColorOrGradient(spec, bottom, top, defaultColor) {
+            if (typeof spec == "string")
+                return spec;
+            else {
+                // assume this is a gradient spec; IE currently only
+                // supports a simple vertical gradient properly, so that's
+                // what we support too
+                var gradient = ctx.createLinearGradient(0, top, 0, bottom);
+
+                for (var i = 0, l = spec.colors.length; i < l; ++i) {
+                    var c = spec.colors[i];
+                    if (typeof c != "string") {
+                        var co = $.color.parse(defaultColor);
+                        if (c.brightness != null)
+                            co = co.scale('rgb', c.brightness);
+                        if (c.opacity != null)
+                            co.a *= c.opacity;
+                        c = co.toString();
+                    }
+                    gradient.addColorStop(i / (l - 1), c);
+                }
+
+                return gradient;
+            }
+        }
+    }
+
+    // Add the plot function to the top level of the jQuery object
+
+    $.plot = function(placeholder, data, options) {
+        //var t0 = new Date();
+        var plot = new Plot($(placeholder), data, options, $.plot.plugins);
+        //(window.console ? console.log : alert)("time used (msecs): " + ((new Date()).getTime() - t0.getTime()));
+        return plot;
+    };
+
+    $.plot.version = "0.8.3";
+
+    $.plot.plugins = [];
+
+    // Also add the plot function as a chainable property
+
+    $.fn.plot = function(data, options) {
+        return this.each(function() {
+            $.plot(this, data, options);
+        });
+    };
+
+    // round to nearby lower multiple of base
+    function floorInBase(n, base) {
+        return base * Math.floor(n / base);
+    }
+
+})(jQuery);
diff --git a/qa/workunits/erasure-code/jquery.js b/qa/workunits/erasure-code/jquery.js
new file mode 100644
index 000000000..8c24ffc61
--- /dev/null
+++ b/qa/workunits/erasure-code/jquery.js
@@ -0,0 +1,9472 @@
+/*!
+ * jQuery JavaScript Library v1.8.3
+ * http://jquery.com/
+ *
+ * Includes Sizzle.js
+ * http://sizzlejs.com/
+ *
+ * Copyright 2012 jQuery Foundation and other contributors
+ * Released under the MIT license
+ * http://jquery.org/license
+ *
+ * Date: Tue Nov 13 2012 08:20:33 GMT-0500 (Eastern Standard Time)
+ */
+(function( window, undefined ) {
+var
+	// A central reference to the root jQuery(document)
+	rootjQuery,
+
+	// The deferred used on DOM ready
+	readyList,
+
+	// Use the correct document accordingly with window argument (sandbox)
+	document = window.document,
+	location = window.location,
+	navigator = window.navigator,
+
+	// Map over jQuery in case of overwrite
+	_jQuery = window.jQuery,
+
+	// Map over the $ in case of overwrite
+	_$ = window.$,
+
+	// Save a reference to some core methods
+	core_push = Array.prototype.push,
+	core_slice = Array.prototype.slice,
+	core_indexOf = Array.prototype.indexOf,
+	core_toString = Object.prototype.toString,
+	core_hasOwn = Object.prototype.hasOwnProperty,
+	core_trim = String.prototype.trim,
+
+	// Define a local copy of jQuery
+	jQuery = function( selector, context ) {
+		// The jQuery object is actually just the init constructor 'enhanced'
+		return new jQuery.fn.init( selector, context, rootjQuery );
+	},
+
+	// Used for matching numbers
+	core_pnum = /[\-+]?(?:\d*\.|)\d+(?:[eE][\-+]?\d+|)/.source,
+
+	// Used for detecting and trimming whitespace
+	core_rnotwhite = /\S/,
+	core_rspace = /\s+/,
+
+	// Make sure we trim BOM and NBSP (here's looking at you, Safari 5.0 and IE)
+	rtrim = /^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g,
+
+	// A simple way to check for HTML strings
+	// Prioritize #id over <tag> to avoid XSS via location.hash (#9521)
+	rquickExpr = /^(?:[^#<]*(<[\w\W]+>)[^>]*$|#([\w\-]*)$)/,
+
+	// Match a standalone tag
+	rsingleTag = /^<(\w+)\s*\/?>(?:<\/\1>|)$/,
+
+	// JSON RegExp
+	rvalidchars = /^[\],:{}\s]*$/,
+	rvalidbraces = /(?:^|:|,)(?:\s*\[)+/g,
+	rvalidescape = /\\(?:["\\\/bfnrt]|u[\da-fA-F]{4})/g,
+	rvalidtokens = /"[^"\\\r\n]*"|true|false|null|-?(?:\d\d*\.|)\d+(?:[eE][\-+]?\d+|)/g,
+
+	// Matches dashed string for camelizing
+	rmsPrefix = /^-ms-/,
+	rdashAlpha = /-([\da-z])/gi,
+
+	// Used by jQuery.camelCase as callback to replace()
+	fcamelCase = function( all, letter ) {
+		return ( letter + "" ).toUpperCase();
+	},
+
+	// The ready event handler and self cleanup method
+	DOMContentLoaded = function() {
+		if ( document.addEventListener ) {
+			document.removeEventListener( "DOMContentLoaded", DOMContentLoaded, false );
+			jQuery.ready();
+		} else if ( document.readyState === "complete" ) {
+			// we're here because readyState === "complete" in oldIE
+			// which is good enough for us to call the dom ready!
+			document.detachEvent( "onreadystatechange", DOMContentLoaded );
+			jQuery.ready();
+		}
+	},
+
+	// [[Class]] -> type pairs
+	class2type = {};
+
+jQuery.fn = jQuery.prototype = {
+	constructor: jQuery,
+	init: function( selector, context, rootjQuery ) {
+		var match, elem, ret, doc;
+
+		// Handle $(""), $(null), $(undefined), $(false)
+		if ( !selector ) {
+			return this;
+		}
+
+		// Handle $(DOMElement)
+		if ( selector.nodeType ) {
+			this.context = this[0] = selector;
+			this.length = 1;
+			return this;
+		}
+
+		// Handle HTML strings
+		if ( typeof selector === "string" ) {
+			if ( selector.charAt(0) === "<" && selector.charAt( selector.length - 1 ) === ">" && selector.length >= 3 ) {
+				// Assume that strings that start and end with <> are HTML and skip the regex check
+				match = [ null, selector, null ];
+
+			} else {
+				match = rquickExpr.exec( selector );
+			}
+
+			// Match html or make sure no context is specified for #id
+			if ( match && (match[1] || !context) ) {
+
+				// HANDLE: $(html) -> $(array)
+				if ( match[1] ) {
+					context = context instanceof jQuery ? context[0] : context;
+					doc = ( context && context.nodeType ? context.ownerDocument || context : document );
+
+					// scripts is true for back-compat
+					selector = jQuery.parseHTML( match[1], doc, true );
+					if ( rsingleTag.test( match[1] ) && jQuery.isPlainObject( context ) ) {
+						this.attr.call( selector, context, true );
+					}
+
+					return jQuery.merge( this, selector );
+
+				// HANDLE: $(#id)
+				} else {
+					elem = document.getElementById( match[2] );
+
+					// Check parentNode to catch when Blackberry 4.6 returns
+					// nodes that are no longer in the document #6963
+					if ( elem && elem.parentNode ) {
+						// Handle the case where IE and Opera return items
+						// by name instead of ID
+						if ( elem.id !== match[2] ) {
+							return rootjQuery.find( selector );
+						}
+
+						// Otherwise, we inject the element directly into the jQuery object
+						this.length = 1;
+						this[0] = elem;
+					}
+
+					this.context = document;
+					this.selector = selector;
+					return this;
+				}
+
+			// HANDLE: $(expr, $(...))
+			} else if ( !context || context.jquery ) {
+				return ( context || rootjQuery ).find( selector );
+
+			// HANDLE: $(expr, context)
+			// (which is just equivalent to: $(context).find(expr)
+			} else {
+				return this.constructor( context ).find( selector );
+			}
+
+		// HANDLE: $(function)
+		// Shortcut for document ready
+		} else if ( jQuery.isFunction( selector ) ) {
+			return rootjQuery.ready( selector );
+		}
+
+		if ( selector.selector !== undefined ) {
+			this.selector = selector.selector;
+			this.context = selector.context;
+		}
+
+		return jQuery.makeArray( selector, this );
+	},
+
+	// Start with an empty selector
+	selector: "",
+
+	// The current version of jQuery being used
+	jquery: "1.8.3",
+
+	// The default length of a jQuery object is 0
+	length: 0,
+
+	// The number of elements contained in the matched element set
+	size: function() {
+		return this.length;
+	},
+
+	toArray: function() {
+		return core_slice.call( this );
+	},
+
+	// Get the Nth element in the matched element set OR
+	// Get the whole matched element set as a clean array
+	get: function( num ) {
+		return num == null ?
+
+			// Return a 'clean' array
+			this.toArray() :
+
+			// Return just the object
+			( num < 0 ? this[ this.length + num ] : this[ num ] );
+	},
+
+	// Take an array of elements and push it onto the stack
+	// (returning the new matched element set)
+	pushStack: function( elems, name, selector ) {
+
+		// Build a new jQuery matched element set
+		var ret = jQuery.merge( this.constructor(), elems );
+
+		// Add the old object onto the stack (as a reference)
+		ret.prevObject = this;
+
+		ret.context = this.context;
+
+		if ( name === "find" ) {
+			ret.selector = this.selector + ( this.selector ? " " : "" ) + selector;
+		} else if ( name ) {
+			ret.selector = this.selector + "." + name + "(" + selector + ")";
+		}
+
+		// Return the newly-formed element set
+		return ret;
+	},
+
+	// Execute a callback for every element in the matched set.
+	// (You can seed the arguments with an array of args, but this is
+	// only used internally.)
+	each: function( callback, args ) {
+		return jQuery.each( this, callback, args );
+	},
+
+	ready: function( fn ) {
+		// Add the callback
+		jQuery.ready.promise().done( fn );
+
+		return this;
+	},
+
+	eq: function( i ) {
+		i = +i;
+		return i === -1 ?
+			this.slice( i ) :
+			this.slice( i, i + 1 );
+	},
+
+	first: function() {
+		return this.eq( 0 );
+	},
+
+	last: function() {
+		return this.eq( -1 );
+	},
+
+	slice: function() {
+		return this.pushStack( core_slice.apply( this, arguments ),
+			"slice", core_slice.call(arguments).join(",") );
+	},
+
+	map: function( callback ) {
+		return this.pushStack( jQuery.map(this, function( elem, i ) {
+			return callback.call( elem, i, elem );
+		}));
+	},
+
+	end: function() {
+		return this.prevObject || this.constructor(null);
+	},
+
+	// For internal use only.
+	// Behaves like an Array's method, not like a jQuery method.
+	push: core_push,
+	sort: [].sort,
+	splice: [].splice
+};
+
+// Give the init function the jQuery prototype for later instantiation
+jQuery.fn.init.prototype = jQuery.fn;
+
+jQuery.extend = jQuery.fn.extend = function() {
+	var options, name, src, copy, copyIsArray, clone,
+		target = arguments[0] || {},
+		i = 1,
+		length = arguments.length,
+		deep = false;
+
+	// Handle a deep copy situation
+	if ( typeof target === "boolean" ) {
+		deep = target;
+		target = arguments[1] || {};
+		// skip the boolean and the target
+		i = 2;
+	}
+
+	// Handle case when target is a string or something (possible in deep copy)
+	if ( typeof target !== "object" && !jQuery.isFunction(target) ) {
+		target = {};
+	}
+
+	// extend jQuery itself if only one argument is passed
+	if ( length === i ) {
+		target = this;
+		--i;
+	}
+
+	for ( ; i < length; i++ ) {
+		// Only deal with non-null/undefined values
+		if ( (options = arguments[ i ]) != null ) {
+			// Extend the base object
+			for ( name in options ) {
+				src = target[ name ];
+				copy = options[ name ];
+
+				// Prevent never-ending loop
+				if ( target === copy ) {
+					continue;
+				}
+
+				// Recurse if we're merging plain objects or arrays
+				if ( deep && copy && ( jQuery.isPlainObject(copy) || (copyIsArray = jQuery.isArray(copy)) ) ) {
+					if ( copyIsArray ) {
+						copyIsArray = false;
+						clone = src && jQuery.isArray(src) ? src : [];
+
+					} else {
+						clone = src && jQuery.isPlainObject(src) ? src : {};
+					}
+
+					// Never move original objects, clone them
+					target[ name ] = jQuery.extend( deep, clone, copy );
+
+				// Don't bring in undefined values
+				} else if ( copy !== undefined ) {
+					target[ name ] = copy;
+				}
+			}
+		}
+	}
+
+	// Return the modified object
+	return target;
+};
+
+jQuery.extend({
+	noConflict: function( deep ) {
+		if ( window.$ === jQuery ) {
+			window.$ = _$;
+		}
+
+		if ( deep && window.jQuery === jQuery ) {
+			window.jQuery = _jQuery;
+		}
+
+		return jQuery;
+	},
+
+	// Is the DOM ready to be used? Set to true once it occurs.
+	isReady: false,
+
+	// A counter to track how many items to wait for before
+	// the ready event fires. See #6781
+	readyWait: 1,
+
+	// Hold (or release) the ready event
+	holdReady: function( hold ) {
+		if ( hold ) {
+			jQuery.readyWait++;
+		} else {
+			jQuery.ready( true );
+		}
+	},
+
+	// Handle when the DOM is ready
+	ready: function( wait ) {
+
+		// Abort if there are pending holds or we're already ready
+		if ( wait === true ? --jQuery.readyWait : jQuery.isReady ) {
+			return;
+		}
+
+		// Make sure body exists, at least, in case IE gets a little overzealous (ticket #5443).
+		if ( !document.body ) {
+			return setTimeout( jQuery.ready, 1 );
+		}
+
+		// Remember that the DOM is ready
+		jQuery.isReady = true;
+
+		// If a normal DOM Ready event fired, decrement, and wait if need be
+		if ( wait !== true && --jQuery.readyWait > 0 ) {
+			return;
+		}
+
+		// If there are functions bound, to execute
+		readyList.resolveWith( document, [ jQuery ] );
+
+		// Trigger any bound ready events
+		if ( jQuery.fn.trigger ) {
+			jQuery( document ).trigger("ready").off("ready");
+		}
+	},
+
+	// See test/unit/core.js for details concerning isFunction.
+	// Since version 1.3, DOM methods and functions like alert
+	// aren't supported. They return false on IE (#2968).
+	isFunction: function( obj ) {
+		return jQuery.type(obj) === "function";
+	},
+
+	isArray: Array.isArray || function( obj ) {
+		return jQuery.type(obj) === "array";
+	},
+
+	isWindow: function( obj ) {
+		return obj != null && obj == obj.window;
+	},
+
+	isNumeric: function( obj ) {
+		return !isNaN( parseFloat(obj) ) && isFinite( obj );
+	},
+
+	type: function( obj ) {
+		return obj == null ?
+			String( obj ) :
+			class2type[ core_toString.call(obj) ] || "object";
+	},
+
+	isPlainObject: function( obj ) {
+		// Must be an Object.
+		// Because of IE, we also have to check the presence of the constructor property.
+		// Make sure that DOM nodes and window objects don't pass through, as well
+		if ( !obj || jQuery.type(obj) !== "object" || obj.nodeType || jQuery.isWindow( obj ) ) {
+			return false;
+		}
+
+		try {
+			// Not own constructor property must be Object
+			if ( obj.constructor &&
+				!core_hasOwn.call(obj, "constructor") &&
+				!core_hasOwn.call(obj.constructor.prototype, "isPrototypeOf") ) {
+				return false;
+			}
+		} catch ( e ) {
+			// IE8,9 Will throw exceptions on certain host objects #9897
+			return false;
+		}
+
+		// Own properties are enumerated firstly, so to speed up,
+		// if last one is own, then all properties are own.
+
+		var key;
+		for ( key in obj ) {}
+
+		return key === undefined || core_hasOwn.call( obj, key );
+	},
+
+	isEmptyObject: function( obj ) {
+		var name;
+		for ( name in obj ) {
+			return false;
+		}
+		return true;
+	},
+
+	error: function( msg ) {
+		throw new Error( msg );
+	},
+
+	// data: string of html
+	// context (optional): If specified, the fragment will be created in this context, defaults to document
+	// scripts (optional): If true, will include scripts passed in the html string
+	parseHTML: function( data, context, scripts ) {
+		var parsed;
+		if ( !data || typeof data !== "string" ) {
+			return null;
+		}
+		if ( typeof context === "boolean" ) {
+			scripts = context;
+			context = 0;
+		}
+		context = context || document;
+
+		// Single tag
+		if ( (parsed = rsingleTag.exec( data )) ) {
+			return [ context.createElement( parsed[1] ) ];
+		}
+
+		parsed = jQuery.buildFragment( [ data ], context, scripts ? null : [] );
+		return jQuery.merge( [],
+			(parsed.cacheable ? jQuery.clone( parsed.fragment ) : parsed.fragment).childNodes );
+	},
+
+	parseJSON: function( data ) {
+		if ( !data || typeof data !== "string") {
+			return null;
+		}
+
+		// Make sure leading/trailing whitespace is removed (IE can't handle it)
+		data = jQuery.trim( data );
+
+		// Attempt to parse using the native JSON parser first
+		if ( window.JSON && window.JSON.parse ) {
+			return window.JSON.parse( data );
+		}
+
+		// Make sure the incoming data is actual JSON
+		// Logic borrowed from http://json.org/json2.js
+		if ( rvalidchars.test( data.replace( rvalidescape, "@" )
+			.replace( rvalidtokens, "]" )
+			.replace( rvalidbraces, "")) ) {
+
+			return ( new Function( "return " + data ) )();
+
+		}
+		jQuery.error( "Invalid JSON: " + data );
+	},
+
+	// Cross-browser xml parsing
+	parseXML: function( data ) {
+		var xml, tmp;
+		if ( !data || typeof data !== "string" ) {
+			return null;
+		}
+		try {
+			if ( window.DOMParser ) { // Standard
+				tmp = new DOMParser();
+				xml = tmp.parseFromString( data , "text/xml" );
+			} else { // IE
+				xml = new ActiveXObject( "Microsoft.XMLDOM" );
+				xml.async = "false";
+				xml.loadXML( data );
+			}
+		} catch( e ) {
+			xml = undefined;
+		}
+		if ( !xml || !xml.documentElement || xml.getElementsByTagName( "parsererror" ).length ) {
+			jQuery.error( "Invalid XML: " + data );
+		}
+		return xml;
+	},
+
+	noop: function() {},
+
+	// Evaluates a script in a global context
+	// Workarounds based on findings by Jim Driscoll
+	// http://weblogs.java.net/blog/driscoll/archive/2009/09/08/eval-javascript-global-context
+	globalEval: function( data ) {
+		if ( data && core_rnotwhite.test( data ) ) {
+			// We use execScript on Internet Explorer
+			// We use an anonymous function so that context is window
+			// rather than jQuery in Firefox
+			( window.execScript || function( data ) {
+				window[ "eval" ].call( window, data );
+			} )( data );
+		}
+	},
+
+	// Convert dashed to camelCase; used by the css and data modules
+	// Microsoft forgot to hump their vendor prefix (#9572)
+	camelCase: function( string ) {
+		return string.replace( rmsPrefix, "ms-" ).replace( rdashAlpha, fcamelCase );
+	},
+
+	nodeName: function( elem, name ) {
+		return elem.nodeName && elem.nodeName.toLowerCase() === name.toLowerCase();
+	},
+
+	// args is for internal usage only
+	each: function( obj, callback, args ) {
+		var name,
+			i = 0,
+			length = obj.length,
+			isObj = length === undefined || jQuery.isFunction( obj );
+
+		if ( args ) {
+			if ( isObj ) {
+				for ( name in obj ) {
+					if ( callback.apply( obj[ name ], args ) === false ) {
+						break;
+					}
+				}
+			} else {
+				for ( ; i < length; ) {
+					if ( callback.apply( obj[ i++ ], args ) === false ) {
+						break;
+					}
+				}
+			}
+
+		// A special, fast, case for the most common use of each
+		} else {
+			if ( isObj ) {
+				for ( name in obj ) {
+					if ( callback.call( obj[ name ], name, obj[ name ] ) === false ) {
+						break;
+					}
+				}
+			} else {
+				for ( ; i < length; ) {
+					if ( callback.call( obj[ i ], i, obj[ i++ ] ) === false ) {
+						break;
+					}
+				}
+			}
+		}
+
+		return obj;
+	},
+
+	// Use native String.trim function wherever possible
+	trim: core_trim && !core_trim.call("\uFEFF\xA0") ?
+		function( text ) {
+			return text == null ?
+				"" :
+				core_trim.call( text );
+		} :
+
+		// Otherwise use our own trimming functionality
+		function( text ) {
+			return text == null ?
+				"" :
+				( text + "" ).replace( rtrim, "" );
+		},
+
+	// results is for internal usage only
+	makeArray: function( arr, results ) {
+		var type,
+			ret = results || [];
+
+		if ( arr != null ) {
+			// The window, strings (and functions) also have 'length'
+			// Tweaked logic slightly to handle Blackberry 4.7 RegExp issues #6930
+			type = jQuery.type( arr );
+
+			if ( arr.length == null || type === "string" || type === "function" || type === "regexp" || jQuery.isWindow( arr ) ) {
+				core_push.call( ret, arr );
+			} else {
+				jQuery.merge( ret, arr );
+			}
+		}
+
+		return ret;
+	},
+
+	inArray: function( elem, arr, i ) {
+		var len;
+
+		if ( arr ) {
+			if ( core_indexOf ) {
+				return core_indexOf.call( arr, elem, i );
+			}
+
+			len = arr.length;
+			i = i ? i < 0 ? Math.max( 0, len + i ) : i : 0;
+
+			for ( ; i < len; i++ ) {
+				// Skip accessing in sparse arrays
+				if ( i in arr && arr[ i ] === elem ) {
+					return i;
+				}
+			}
+		}
+
+		return -1;
+	},
+
+	merge: function( first, second ) {
+		var l = second.length,
+			i = first.length,
+			j = 0;
+
+		if ( typeof l === "number" ) {
+			for ( ; j < l; j++ ) {
+				first[ i++ ] = second[ j ];
+			}
+
+		} else {
+			while ( second[j] !== undefined ) {
+				first[ i++ ] = second[ j++ ];
+			}
+		}
+
+		first.length = i;
+
+		return first;
+	},
+
+	grep: function( elems, callback, inv ) {
+		var retVal,
+			ret = [],
+			i = 0,
+			length = elems.length;
+		inv = !!inv;
+
+		// Go through the array, only saving the items
+		// that pass the validator function
+		for ( ; i < length; i++ ) {
+			retVal = !!callback( elems[ i ], i );
+			if ( inv !== retVal ) {
+				ret.push( elems[ i ] );
+			}
+		}
+
+		return ret;
+	},
+
+	// arg is for internal usage only
+	map: function( elems, callback, arg ) {
+		var value, key,
+			ret = [],
+			i = 0,
+			length = elems.length,
+			// jquery objects are treated as arrays
+			isArray = elems instanceof jQuery || length !== undefined && typeof length === "number" && ( ( length > 0 && elems[ 0 ] && elems[ length -1 ] ) || length === 0 || jQuery.isArray( elems ) ) ;
+
+		// Go through the array, translating each of the items to their
+		if ( isArray ) {
+			for ( ; i < length; i++ ) {
+				value = callback( elems[ i ], i, arg );
+
+				if ( value != null ) {
+					ret[ ret.length ] = value;
+				}
+			}
+
+		// Go through every key on the object,
+		} else {
+			for ( key in elems ) {
+				value = callback( elems[ key ], key, arg );
+
+				if ( value != null ) {
+					ret[ ret.length ] = value;
+				}
+			}
+		}
+
+		// Flatten any nested arrays
+		return ret.concat.apply( [], ret );
+	},
+
+	// A global GUID counter for objects
+	guid: 1,
+
+	// Bind a function to a context, optionally partially applying any
+	// arguments.
+	proxy: function( fn, context ) {
+		var tmp, args, proxy;
+
+		if ( typeof context === "string" ) {
+			tmp = fn[ context ];
+			context = fn;
+			fn = tmp;
+		}
+
+		// Quick check to determine if target is callable, in the spec
+		// this throws a TypeError, but we will just return undefined.
+		if ( !jQuery.isFunction( fn ) ) {
+			return undefined;
+		}
+
+		// Simulated bind
+		args = core_slice.call( arguments, 2 );
+		proxy = function() {
+			return fn.apply( context, args.concat( core_slice.call( arguments ) ) );
+		};
+
+		// Set the guid of unique handler to the same of original handler, so it can be removed
+		proxy.guid = fn.guid = fn.guid || jQuery.guid++;
+
+		return proxy;
+	},
+
+	// Multifunctional method to get and set values of a collection
+	// The value/s can optionally be executed if it's a function
+	access: function( elems, fn, key, value, chainable, emptyGet, pass ) {
+		var exec,
+			bulk = key == null,
+			i = 0,
+			length = elems.length;
+
+		// Sets many values
+		if ( key && typeof key === "object" ) {
+			for ( i in key ) {
+				jQuery.access( elems, fn, i, key[i], 1, emptyGet, value );
+			}
+			chainable = 1;
+
+		// Sets one value
+		} else if ( value !== undefined ) {
+			// Optionally, function values get executed if exec is true
+			exec = pass === undefined && jQuery.isFunction( value );
+
+			if ( bulk ) {
+				// Bulk operations only iterate when executing function values
+				if ( exec ) {
+					exec = fn;
+					fn = function( elem, key, value ) {
+						return exec.call( jQuery( elem ), value );
+					};
+
+				// Otherwise they run against the entire set
+				} else {
+					fn.call( elems, value );
+					fn = null;
+				}
+			}
+
+			if ( fn ) {
+				for (; i < length; i++ ) {
+					fn( elems[i], key, exec ? value.call( elems[i], i, fn( elems[i], key ) ) : value, pass );
+				}
+			}
+
+			chainable = 1;
+		}
+
+		return chainable ?
+			elems :
+
+			// Gets
+			bulk ?
+				fn.call( elems ) :
+				length ? fn( elems[0], key ) : emptyGet;
+	},
+
+	now: function() {
+		return ( new Date() ).getTime();
+	}
+});
+
+jQuery.ready.promise = function( obj ) {
+	if ( !readyList ) {
+
+		readyList = jQuery.Deferred();
+
+		// Catch cases where $(document).ready() is called after the browser event has already occurred.
+		// we once tried to use readyState "interactive" here, but it caused issues like the one
+		// discovered by ChrisS here: http://bugs.jquery.com/ticket/12282#comment:15
+		if ( document.readyState === "complete" ) {
+			// Handle it asynchronously to allow scripts the opportunity to delay ready
+			setTimeout( jQuery.ready, 1 );
+
+		// Standards-based browsers support DOMContentLoaded
+		} else if ( document.addEventListener ) {
+			// Use the handy event callback
+			document.addEventListener( "DOMContentLoaded", DOMContentLoaded, false );
+
+			// A fallback to window.onload, that will always work
+			window.addEventListener( "load", jQuery.ready, false );
+
+		// If IE event model is used
+		} else {
+			// Ensure firing before onload, maybe late but safe also for iframes
+			document.attachEvent( "onreadystatechange", DOMContentLoaded );
+
+			// A fallback to window.onload, that will always work
+			window.attachEvent( "onload", jQuery.ready );
+
+			// If IE and not a frame
+			// continually check to see if the document is ready
+			var top = false;
+
+			try {
+				top = window.frameElement == null && document.documentElement;
+			} catch(e) {}
+
+			if ( top && top.doScroll ) {
+				(function doScrollCheck() {
+					if ( !jQuery.isReady ) {
+
+						try {
+							// Use the trick by Diego Perini
+							// http://javascript.nwbox.com/IEContentLoaded/
+							top.doScroll("left");
+						} catch(e) {
+							return setTimeout( doScrollCheck, 50 );
+						}
+
+						// and execute any waiting functions
+						jQuery.ready();
+					}
+				})();
+			}
+		}
+	}
+	return readyList.promise( obj );
+};
+
+// Populate the class2type map
+jQuery.each("Boolean Number String Function Array Date RegExp Object".split(" "), function(i, name) {
+	class2type[ "[object " + name + "]" ] = name.toLowerCase();
+});
+
+// All jQuery objects should point back to these
+rootjQuery = jQuery(document);
+// String to Object options format cache
+var optionsCache = {};
+
+// Convert String-formatted options into Object-formatted ones and store in cache
+function createOptions( options ) {
+	var object = optionsCache[ options ] = {};
+	jQuery.each( options.split( core_rspace ), function( _, flag ) {
+		object[ flag ] = true;
+	});
+	return object;
+}
+
+/*
+ * Create a callback list using the following parameters:
+ *
+ *	options: an optional list of space-separated options that will change how
+ *			the callback list behaves or a more traditional option object
+ *
+ * By default a callback list will act like an event callback list and can be
+ * "fired" multiple times.
+ *
+ * Possible options:
+ *
+ *	once:			will ensure the callback list can only be fired once (like a Deferred)
+ *
+ *	memory:			will keep track of previous values and will call any callback added
+ *					after the list has been fired right away with the latest "memorized"
+ *					values (like a Deferred)
+ *
+ *	unique:			will ensure a callback can only be added once (no duplicate in the list)
+ *
+ *	stopOnFalse:	interrupt callings when a callback returns false
+ *
+ */
+jQuery.Callbacks = function( options ) {
+
+	// Convert options from String-formatted to Object-formatted if needed
+	// (we check in cache first)
+	options = typeof options === "string" ?
+		( optionsCache[ options ] || createOptions( options ) ) :
+		jQuery.extend( {}, options );
+
+	var // Last fire value (for non-forgettable lists)
+		memory,
+		// Flag to know if list was already fired
+		fired,
+		// Flag to know if list is currently firing
+		firing,
+		// First callback to fire (used internally by add and fireWith)
+		firingStart,
+		// End of the loop when firing
+		firingLength,
+		// Index of currently firing callback (modified by remove if needed)
+		firingIndex,
+		// Actual callback list
+		list = [],
+		// Stack of fire calls for repeatable lists
+		stack = !options.once && [],
+		// Fire callbacks
+		fire = function( data ) {
+			memory = options.memory && data;
+			fired = true;
+			firingIndex = firingStart || 0;
+			firingStart = 0;
+			firingLength = list.length;
+			firing = true;
+			for ( ; list && firingIndex < firingLength; firingIndex++ ) {
+				if ( list[ firingIndex ].apply( data[ 0 ], data[ 1 ] ) === false && options.stopOnFalse ) {
+					memory = false; // To prevent further calls using add
+					break;
+				}
+			}
+			firing = false;
+			if ( list ) {
+				if ( stack ) {
+					if ( stack.length ) {
+						fire( stack.shift() );
+					}
+				} else if ( memory ) {
+					list = [];
+				} else {
+					self.disable();
+				}
+			}
+		},
+		// Actual Callbacks object
+		self = {
+			// Add a callback or a collection of callbacks to the list
+			add: function() {
+				if ( list ) {
+					// First, we save the current length
+					var start = list.length;
+					(function add( args ) {
+						jQuery.each( args, function( _, arg ) {
+							var type = jQuery.type( arg );
+							if ( type === "function" ) {
+								if ( !options.unique || !self.has( arg ) ) {
+									list.push( arg );
+								}
+							} else if ( arg && arg.length && type !== "string" ) {
+								// Inspect recursively
+								add( arg );
+							}
+						});
+					})( arguments );
+					// Do we need to add the callbacks to the
+					// current firing batch?
+					if ( firing ) {
+						firingLength = list.length;
+					// With memory, if we're not firing then
+					// we should call right away
+					} else if ( memory ) {
+						firingStart = start;
+						fire( memory );
+					}
+				}
+				return this;
+			},
+			// Remove a callback from the list
+			remove: function() {
+				if ( list ) {
+					jQuery.each( arguments, function( _, arg ) {
+						var index;
+						while( ( index = jQuery.inArray( arg, list, index ) ) > -1 ) {
+							list.splice( index, 1 );
+							// Handle firing indexes
+							if ( firing ) {
+								if ( index <= firingLength ) {
+									firingLength--;
+								}
+								if ( index <= firingIndex ) {
+									firingIndex--;
+								}
+							}
+						}
+					});
+				}
+				return this;
+			},
+			// Control if a given callback is in the list
+			has: function( fn ) {
+				return jQuery.inArray( fn, list ) > -1;
+			},
+			// Remove all callbacks from the list
+			empty: function() {
+				list = [];
+				return this;
+			},
+			// Have the list do nothing anymore
+			disable: function() {
+				list = stack = memory = undefined;
+				return this;
+			},
+			// Is it disabled?
+			disabled: function() {
+				return !list;
+			},
+			// Lock the list in its current state
+			lock: function() {
+				stack = undefined;
+				if ( !memory ) {
+					self.disable();
+				}
+				return this;
+			},
+			// Is it locked?
+			locked: function() {
+				return !stack;
+			},
+			// Call all callbacks with the given context and arguments
+			fireWith: function( context, args ) {
+				args = args || [];
+				args = [ context, args.slice ? args.slice() : args ];
+				if ( list && ( !fired || stack ) ) {
+					if ( firing ) {
+						stack.push( args );
+					} else {
+						fire( args );
+					}
+				}
+				return this;
+			},
+			// Call all the callbacks with the given arguments
+			fire: function() {
+				self.fireWith( this, arguments );
+				return this;
+			},
+			// To know if the callbacks have already been called at least once
+			fired: function() {
+				return !!fired;
+			}
+		};
+
+	return self;
+};
+jQuery.extend({
+
+	Deferred: function( func ) {
+		var tuples = [
+				// action, add listener, listener list, final state
+				[ "resolve", "done", jQuery.Callbacks("once memory"), "resolved" ],
+				[ "reject", "fail", jQuery.Callbacks("once memory"), "rejected" ],
+				[ "notify", "progress", jQuery.Callbacks("memory") ]
+			],
+			state = "pending",
+			promise = {
+				state: function() {
+					return state;
+				},
+				always: function() {
+					deferred.done( arguments ).fail( arguments );
+					return this;
+				},
+				then: function( /* fnDone, fnFail, fnProgress */ ) {
+					var fns = arguments;
+					return jQuery.Deferred(function( newDefer ) {
+						jQuery.each( tuples, function( i, tuple ) {
+							var action = tuple[ 0 ],
+								fn = fns[ i ];
+							// deferred[ done | fail | progress ] for forwarding actions to newDefer
+							deferred[ tuple[1] ]( jQuery.isFunction( fn ) ?
+								function() {
+									var returned = fn.apply( this, arguments );
+									if ( returned && jQuery.isFunction( returned.promise ) ) {
+										returned.promise()
+											.done( newDefer.resolve )
+											.fail( newDefer.reject )
+											.progress( newDefer.notify );
+									} else {
+										newDefer[ action + "With" ]( this === deferred ? newDefer : this, [ returned ] );
+									}
+								} :
+								newDefer[ action ]
+							);
+						});
+						fns = null;
+					}).promise();
+				},
+				// Get a promise for this deferred
+				// If obj is provided, the promise aspect is added to the object
+				promise: function( obj ) {
+					return obj != null ? jQuery.extend( obj, promise ) : promise;
+				}
+			},
+			deferred = {};
+
+		// Keep pipe for back-compat
+		promise.pipe = promise.then;
+
+		// Add list-specific methods
+		jQuery.each( tuples, function( i, tuple ) {
+			var list = tuple[ 2 ],
+				stateString = tuple[ 3 ];
+
+			// promise[ done | fail | progress ] = list.add
+			promise[ tuple[1] ] = list.add;
+
+			// Handle state
+			if ( stateString ) {
+				list.add(function() {
+					// state = [ resolved | rejected ]
+					state = stateString;
+
+				// [ reject_list | resolve_list ].disable; progress_list.lock
+				}, tuples[ i ^ 1 ][ 2 ].disable, tuples[ 2 ][ 2 ].lock );
+			}
+
+			// deferred[ resolve | reject | notify ] = list.fire
+			deferred[ tuple[0] ] = list.fire;
+			deferred[ tuple[0] + "With" ] = list.fireWith;
+		});
+
+		// Make the deferred a promise
+		promise.promise( deferred );
+
+		// Call given func if any
+		if ( func ) {
+			func.call( deferred, deferred );
+		}
+
+		// All done!
+		return deferred;
+	},
+
+	// Deferred helper
+	when: function( subordinate /* , ..., subordinateN */ ) {
+		var i = 0,
+			resolveValues = core_slice.call( arguments ),
+			length = resolveValues.length,
+
+			// the count of uncompleted subordinates
+			remaining = length !== 1 || ( subordinate && jQuery.isFunction( subordinate.promise ) ) ? length : 0,
+
+			// the master Deferred. If resolveValues consist of only a single Deferred, just use that.
+			deferred = remaining === 1 ? subordinate : jQuery.Deferred(),
+
+			// Update function for both resolve and progress values
+			updateFunc = function( i, contexts, values ) {
+				return function( value ) {
+					contexts[ i ] = this;
+					values[ i ] = arguments.length > 1 ? core_slice.call( arguments ) : value;
+					if( values === progressValues ) {
+						deferred.notifyWith( contexts, values );
+					} else if ( !( --remaining ) ) {
+						deferred.resolveWith( contexts, values );
+					}
+				};
+			},
+
+			progressValues, progressContexts, resolveContexts;
+
+		// add listeners to Deferred subordinates; treat others as resolved
+		if ( length > 1 ) {
+			progressValues = new Array( length );
+			progressContexts = new Array( length );
+			resolveContexts = new Array( length );
+			for ( ; i < length; i++ ) {
+				if ( resolveValues[ i ] && jQuery.isFunction( resolveValues[ i ].promise ) ) {
+					resolveValues[ i ].promise()
+						.done( updateFunc( i, resolveContexts, resolveValues ) )
+						.fail( deferred.reject )
+						.progress( updateFunc( i, progressContexts, progressValues ) );
+				} else {
+					--remaining;
+				}
+			}
+		}
+
+		// if we're not waiting on anything, resolve the master
+		if ( !remaining ) {
+			deferred.resolveWith( resolveContexts, resolveValues );
+		}
+
+		return deferred.promise();
+	}
+});
+jQuery.support = (function() {
+
+	var support,
+		all,
+		a,
+		select,
+		opt,
+		input,
+		fragment,
+		eventName,
+		i,
+		isSupported,
+		clickFn,
+		div = document.createElement("div");
+
+	// Setup
+	div.setAttribute( "className", "t" );
+	div.innerHTML = "  <link/><table></table><a href='/a'>a</a><input type='checkbox'/>";
+
+	// Support tests won't run in some limited or non-browser environments
+	all = div.getElementsByTagName("*");
+	a = div.getElementsByTagName("a")[ 0 ];
+	if ( !all || !a || !all.length ) {
+		return {};
+	}
+
+	// First batch of tests
+	select = document.createElement("select");
+	opt = select.appendChild( document.createElement("option") );
+	input = div.getElementsByTagName("input")[ 0 ];
+
+	a.style.cssText = "top:1px;float:left;opacity:.5";
+	support = {
+		// IE strips leading whitespace when .innerHTML is used
+		leadingWhitespace: ( div.firstChild.nodeType === 3 ),
+
+		// Make sure that tbody elements aren't automatically inserted
+		// IE will insert them into empty tables
+		tbody: !div.getElementsByTagName("tbody").length,
+
+		// Make sure that link elements get serialized correctly by innerHTML
+		// This requires a wrapper element in IE
+		htmlSerialize: !!div.getElementsByTagName("link").length,
+
+		// Get the style information from getAttribute
+		// (IE uses .cssText instead)
+		style: /top/.test( a.getAttribute("style") ),
+
+		// Make sure that URLs aren't manipulated
+		// (IE normalizes it by default)
+		hrefNormalized: ( a.getAttribute("href") === "/a" ),
+
+		// Make sure that element opacity exists
+		// (IE uses filter instead)
+		// Use a regex to work around a WebKit issue. See #5145
+		opacity: /^0.5/.test( a.style.opacity ),
+
+		// Verify style float existence
+		// (IE uses styleFloat instead of cssFloat)
+		cssFloat: !!a.style.cssFloat,
+
+		// Make sure that if no value is specified for a checkbox
+		// that it defaults to "on".
+		// (WebKit defaults to "" instead)
+		checkOn: ( input.value === "on" ),
+
+		// Make sure that a selected-by-default option has a working selected property.
+		// (WebKit defaults to false instead of true, IE too, if it's in an optgroup)
+		optSelected: opt.selected,
+
+		// Test setAttribute on camelCase class. If it works, we need attrFixes when doing get/setAttribute (ie6/7)
+		getSetAttribute: div.className !== "t",
+
+		// Tests for enctype support on a form (#6743)
+		enctype: !!document.createElement("form").enctype,
+
+		// Makes sure cloning an html5 element does not cause problems
+		// Where outerHTML is undefined, this still works
+		html5Clone: document.createElement("nav").cloneNode( true ).outerHTML !== "<:nav></:nav>",
+
+		// jQuery.support.boxModel DEPRECATED in 1.8 since we don't support Quirks Mode
+		boxModel: ( document.compatMode === "CSS1Compat" ),
+
+		// Will be defined later
+		submitBubbles: true,
+		changeBubbles: true,
+		focusinBubbles: false,
+		deleteExpando: true,
+		noCloneEvent: true,
+		inlineBlockNeedsLayout: false,
+		shrinkWrapBlocks: false,
+		reliableMarginRight: true,
+		boxSizingReliable: true,
+		pixelPosition: false
+	};
+
+	// Make sure checked status is properly cloned
+	input.checked = true;
+	support.noCloneChecked = input.cloneNode( true ).checked;
+
+	// Make sure that the options inside disabled selects aren't marked as disabled
+	// (WebKit marks them as disabled)
+	select.disabled = true;
+	support.optDisabled = !opt.disabled;
+
+	// Test to see if it's possible to delete an expando from an element
+	// Fails in Internet Explorer
+	try {
+		delete div.test;
+	} catch( e ) {
+		support.deleteExpando = false;
+	}
+
+	if ( !div.addEventListener && div.attachEvent && div.fireEvent ) {
+		div.attachEvent( "onclick", clickFn = function() {
+			// Cloning a node shouldn't copy over any
+			// bound event handlers (IE does this)
+			support.noCloneEvent = false;
+		});
+		div.cloneNode( true ).fireEvent("onclick");
+		div.detachEvent( "onclick", clickFn );
+	}
+
+	// Check if a radio maintains its value
+	// after being appended to the DOM
+	input = document.createElement("input");
+	input.value = "t";
+	input.setAttribute( "type", "radio" );
+	support.radioValue = input.value === "t";
+
+	input.setAttribute( "checked", "checked" );
+
+	// #11217 - WebKit loses check when the name is after the checked attribute
+	input.setAttribute( "name", "t" );
+
+	div.appendChild( input );
+	fragment = document.createDocumentFragment();
+	fragment.appendChild( div.lastChild );
+
+	// WebKit doesn't clone checked state correctly in fragments
+	support.checkClone = fragment.cloneNode( true ).cloneNode( true ).lastChild.checked;
+
+	// Check if a disconnected checkbox will retain its checked
+	// value of true after appended to the DOM (IE6/7)
+	support.appendChecked = input.checked;
+
+	fragment.removeChild( input );
+	fragment.appendChild( div );
+
+	// Technique from Juriy Zaytsev
+	// http://perfectionkills.com/detecting-event-support-without-browser-sniffing/
+	// We only care about the case where non-standard event systems
+	// are used, namely in IE. Short-circuiting here helps us to
+	// avoid an eval call (in setAttribute) which can cause CSP
+	// to go haywire. See: https://developer.mozilla.org/en/Security/CSP
+	if ( div.attachEvent ) {
+		for ( i in {
+			submit: true,
+			change: true,
+			focusin: true
+		}) {
+			eventName = "on" + i;
+			isSupported = ( eventName in div );
+			if ( !isSupported ) {
+				div.setAttribute( eventName, "return;" );
+				isSupported = ( typeof div[ eventName ] === "function" );
+			}
+			support[ i + "Bubbles" ] = isSupported;
+		}
+	}
+
+	// Run tests that need a body at doc ready
+	jQuery(function() {
+		var container, div, tds, marginDiv,
+			divReset = "padding:0;margin:0;border:0;display:block;overflow:hidden;",
+			body = document.getElementsByTagName("body")[0];
+
+		if ( !body ) {
+			// Return for frameset docs that don't have a body
+			return;
+		}
+
+		container = document.createElement("div");
+		container.style.cssText = "visibility:hidden;border:0;width:0;height:0;position:static;top:0;margin-top:1px";
+		body.insertBefore( container, body.firstChild );
+
+		// Construct the test element
+		div = document.createElement("div");
+		container.appendChild( div );
+
+		// Check if table cells still have offsetWidth/Height when they are set
+		// to display:none and there are still other visible table cells in a
+		// table row; if so, offsetWidth/Height are not reliable for use when
+		// determining if an element has been hidden directly using
+		// display:none (it is still safe to use offsets if a parent element is
+		// hidden; don safety goggles and see bug #4512 for more information).
+		// (only IE 8 fails this test)
+		div.innerHTML = "<table><tr><td></td><td>t</td></tr></table>";
+		tds = div.getElementsByTagName("td");
+		tds[ 0 ].style.cssText = "padding:0;margin:0;border:0;display:none";
+		isSupported = ( tds[ 0 ].offsetHeight === 0 );
+
+		tds[ 0 ].style.display = "";
+		tds[ 1 ].style.display = "none";
+
+		// Check if empty table cells still have offsetWidth/Height
+		// (IE <= 8 fail this test)
+		support.reliableHiddenOffsets = isSupported && ( tds[ 0 ].offsetHeight === 0 );
+
+		// Check box-sizing and margin behavior
+		div.innerHTML = "";
+		div.style.cssText = "box-sizing:border-box;-moz-box-sizing:border-box;-webkit-box-sizing:border-box;padding:1px;border:1px;display:block;width:4px;margin-top:1%;position:absolute;top:1%;";
+		support.boxSizing = ( div.offsetWidth === 4 );
+		support.doesNotIncludeMarginInBodyOffset = ( body.offsetTop !== 1 );
+
+		// NOTE: To any future maintainer, we've window.getComputedStyle
+		// because jsdom on node.js will break without it.
+		if ( window.getComputedStyle ) {
+			support.pixelPosition = ( window.getComputedStyle( div, null ) || {} ).top !== "1%";
+			support.boxSizingReliable = ( window.getComputedStyle( div, null ) || { width: "4px" } ).width === "4px";
+
+			// Check if div with explicit width and no margin-right incorrectly
+			// gets computed margin-right based on width of container. For more
+			// info see bug #3333
+			// Fails in WebKit before Feb 2011 nightlies
+			// WebKit Bug 13343 - getComputedStyle returns wrong value for margin-right
+			marginDiv = document.createElement("div");
+			marginDiv.style.cssText = div.style.cssText = divReset;
+			marginDiv.style.marginRight = marginDiv.style.width = "0";
+			div.style.width = "1px";
+			div.appendChild( marginDiv );
+			support.reliableMarginRight =
+				!parseFloat( ( window.getComputedStyle( marginDiv, null ) || {} ).marginRight );
+		}
+
+		if ( typeof div.style.zoom !== "undefined" ) {
+			// Check if natively block-level elements act like inline-block
+			// elements when setting their display to 'inline' and giving
+			// them layout
+			// (IE < 8 does this)
+			div.innerHTML = "";
+			div.style.cssText = divReset + "width:1px;padding:1px;display:inline;zoom:1";
+			support.inlineBlockNeedsLayout = ( div.offsetWidth === 3 );
+
+			// Check if elements with layout shrink-wrap their children
+			// (IE 6 does this)
+			div.style.display = "block";
+			div.style.overflow = "visible";
+			div.innerHTML = "<div></div>";
+			div.firstChild.style.width = "5px";
+			support.shrinkWrapBlocks = ( div.offsetWidth !== 3 );
+
+			container.style.zoom = 1;
+		}
+
+		// Null elements to avoid leaks in IE
+		body.removeChild( container );
+		container = div = tds = marginDiv = null;
+	});
+
+	// Null elements to avoid leaks in IE
+	fragment.removeChild( div );
+	all = a = select = opt = input = fragment = div = null;
+
+	return support;
+})();
+var rbrace = /(?:\{[\s\S]*\}|\[[\s\S]*\])$/,
+	rmultiDash = /([A-Z])/g;
+
+jQuery.extend({
+	cache: {},
+
+	deletedIds: [],
+
+	// Remove at next major release (1.9/2.0)
+	uuid: 0,
+
+	// Unique for each copy of jQuery on the page
+	// Non-digits removed to match rinlinejQuery
+	expando: "jQuery" + ( jQuery.fn.jquery + Math.random() ).replace( /\D/g, "" ),
+
+	// The following elements throw uncatchable exceptions if you
+	// attempt to add expando properties to them.
+	noData: {
+		"embed": true,
+		// Ban all objects except for Flash (which handle expandos)
+		"object": "clsid:D27CDB6E-AE6D-11cf-96B8-444553540000",
+		"applet": true
+	},
+
+	hasData: function( elem ) {
+		elem = elem.nodeType ? jQuery.cache[ elem[jQuery.expando] ] : elem[ jQuery.expando ];
+		return !!elem && !isEmptyDataObject( elem );
+	},
+
+	data: function( elem, name, data, pvt /* Internal Use Only */ ) {
+		if ( !jQuery.acceptData( elem ) ) {
+			return;
+		}
+
+		var thisCache, ret,
+			internalKey = jQuery.expando,
+			getByName = typeof name === "string",
+
+			// We have to handle DOM nodes and JS objects differently because IE6-7
+			// can't GC object references properly across the DOM-JS boundary
+			isNode = elem.nodeType,
+
+			// Only DOM nodes need the global jQuery cache; JS object data is
+			// attached directly to the object so GC can occur automatically
+			cache = isNode ? jQuery.cache : elem,
+
+			// Only defining an ID for JS objects if its cache already exists allows
+			// the code to shortcut on the same path as a DOM node with no cache
+			id = isNode ? elem[ internalKey ] : elem[ internalKey ] && internalKey;
+
+		// Avoid doing any more work than we need to when trying to get data on an
+		// object that has no data at all
+		if ( (!id || !cache[id] || (!pvt && !cache[id].data)) && getByName && data === undefined ) {
+			return;
+		}
+
+		if ( !id ) {
+			// Only DOM nodes need a new unique ID for each element since their data
+			// ends up in the global cache
+			if ( isNode ) {
+				elem[ internalKey ] = id = jQuery.deletedIds.pop() || jQuery.guid++;
+			} else {
+				id = internalKey;
+			}
+		}
+
+		if ( !cache[ id ] ) {
+			cache[ id ] = {};
+
+			// Avoids exposing jQuery metadata on plain JS objects when the object
+			// is serialized using JSON.stringify
+			if ( !isNode ) {
+				cache[ id ].toJSON = jQuery.noop;
+			}
+		}
+
+		// An object can be passed to jQuery.data instead of a key/value pair; this gets
+		// shallow copied over onto the existing cache
+		if ( typeof name === "object" || typeof name === "function" ) {
+			if ( pvt ) {
+				cache[ id ] = jQuery.extend( cache[ id ], name );
+			} else {
+				cache[ id ].data = jQuery.extend( cache[ id ].data, name );
+			}
+		}
+
+		thisCache = cache[ id ];
+
+		// jQuery data() is stored in a separate object inside the object's internal data
+		// cache in order to avoid key collisions between internal data and user-defined
+		// data.
+		if ( !pvt ) {
+			if ( !thisCache.data ) {
+				thisCache.data = {};
+			}
+
+			thisCache = thisCache.data;
+		}
+
+		if ( data !== undefined ) {
+			thisCache[ jQuery.camelCase( name ) ] = data;
+		}
+
+		// Check for both converted-to-camel and non-converted data property names
+		// If a data property was specified
+		if ( getByName ) {
+
+			// First Try to find as-is property data
+			ret = thisCache[ name ];
+
+			// Test for null|undefined property data
+			if ( ret == null ) {
+
+				// Try to find the camelCased property
+				ret = thisCache[ jQuery.camelCase( name ) ];
+			}
+		} else {
+			ret = thisCache;
+		}
+
+		return ret;
+	},
+
+	removeData: function( elem, name, pvt /* Internal Use Only */ ) {
+		if ( !jQuery.acceptData( elem ) ) {
+			return;
+		}
+
+		var thisCache, i, l,
+
+			isNode = elem.nodeType,
+
+			// See jQuery.data for more information
+			cache = isNode ? jQuery.cache : elem,
+			id = isNode ? elem[ jQuery.expando ] : jQuery.expando;
+
+		// If there is already no cache entry for this object, there is no
+		// purpose in continuing
+		if ( !cache[ id ] ) {
+			return;
+		}
+
+		if ( name ) {
+
+			thisCache = pvt ? cache[ id ] : cache[ id ].data;
+
+			if ( thisCache ) {
+
+				// Support array or space separated string names for data keys
+				if ( !jQuery.isArray( name ) ) {
+
+					// try the string as a key before any manipulation
+					if ( name in thisCache ) {
+						name = [ name ];
+					} else {
+
+						// split the camel cased version by spaces unless a key with the spaces exists
+						name = jQuery.camelCase( name );
+						if ( name in thisCache ) {
+							name = [ name ];
+						} else {
+							name = name.split(" ");
+						}
+					}
+				}
+
+				for ( i = 0, l = name.length; i < l; i++ ) {
+					delete thisCache[ name[i] ];
+				}
+
+				// If there is no data left in the cache, we want to continue
+				// and let the cache object itself get destroyed
+				if ( !( pvt ? isEmptyDataObject : jQuery.isEmptyObject )( thisCache ) ) {
+					return;
+				}
+			}
+		}
+
+		// See jQuery.data for more information
+		if ( !pvt ) {
+			delete cache[ id ].data;
+
+			// Don't destroy the parent cache unless the internal data object
+			// had been the only thing left in it
+			if ( !isEmptyDataObject( cache[ id ] ) ) {
+				return;
+			}
+		}
+
+		// Destroy the cache
+		if ( isNode ) {
+			jQuery.cleanData( [ elem ], true );
+
+		// Use delete when supported for expandos or `cache` is not a window per isWindow (#10080)
+		} else if ( jQuery.support.deleteExpando || cache != cache.window ) {
+			delete cache[ id ];
+
+		// When all else fails, null
+		} else {
+			cache[ id ] = null;
+		}
+	},
+
+	// For internal use only.
+	_data: function( elem, name, data ) {
+		return jQuery.data( elem, name, data, true );
+	},
+
+	// A method for determining if a DOM node can handle the data expando
+	acceptData: function( elem ) {
+		var noData = elem.nodeName && jQuery.noData[ elem.nodeName.toLowerCase() ];
+
+		// nodes accept data unless otherwise specified; rejection can be conditional
+		return !noData || noData !== true && elem.getAttribute("classid") === noData;
+	}
+});
+
+jQuery.fn.extend({
+	data: function( key, value ) {
+		var parts, part, attr, name, l,
+			elem = this[0],
+			i = 0,
+			data = null;
+
+		// Gets all values
+		if ( key === undefined ) {
+			if ( this.length ) {
+				data = jQuery.data( elem );
+
+				if ( elem.nodeType === 1 && !jQuery._data( elem, "parsedAttrs" ) ) {
+					attr = elem.attributes;
+					for ( l = attr.length; i < l; i++ ) {
+						name = attr[i].name;
+
+						if ( !name.indexOf( "data-" ) ) {
+							name = jQuery.camelCase( name.substring(5) );
+
+							dataAttr( elem, name, data[ name ] );
+						}
+					}
+					jQuery._data( elem, "parsedAttrs", true );
+				}
+			}
+
+			return data;
+		}
+
+		// Sets multiple values
+		if ( typeof key === "object" ) {
+			return this.each(function() {
+				jQuery.data( this, key );
+			});
+		}
+
+		parts = key.split( ".", 2 );
+		parts[1] = parts[1] ? "." + parts[1] : "";
+		part = parts[1] + "!";
+
+		return jQuery.access( this, function( value ) {
+
+			if ( value === undefined ) {
+				data = this.triggerHandler( "getData" + part, [ parts[0] ] );
+
+				// Try to fetch any internally stored data first
+				if ( data === undefined && elem ) {
+					data = jQuery.data( elem, key );
+					data = dataAttr( elem, key, data );
+				}
+
+				return data === undefined && parts[1] ?
+					this.data( parts[0] ) :
+					data;
+			}
+
+			parts[1] = value;
+			this.each(function() {
+				var self = jQuery( this );
+
+				self.triggerHandler( "setData" + part, parts );
+				jQuery.data( this, key, value );
+				self.triggerHandler( "changeData" + part, parts );
+			});
+		}, null, value, arguments.length > 1, null, false );
+	},
+
+	removeData: function( key ) {
+		return this.each(function() {
+			jQuery.removeData( this, key );
+		});
+	}
+});
+
+function dataAttr( elem, key, data ) {
+	// If nothing was found internally, try to fetch any
+	// data from the HTML5 data-* attribute
+	if ( data === undefined && elem.nodeType === 1 ) {
+
+		var name = "data-" + key.replace( rmultiDash, "-$1" ).toLowerCase();
+
+		data = elem.getAttribute( name );
+
+		if ( typeof data === "string" ) {
+			try {
+				data = data === "true" ? true :
+				data === "false" ? false :
+				data === "null" ? null :
+				// Only convert to a number if it doesn't change the string
+				+data + "" === data ? +data :
+				rbrace.test( data ) ? jQuery.parseJSON( data ) :
+					data;
+			} catch( e ) {}
+
+			// Make sure we set the data so it isn't changed later
+			jQuery.data( elem, key, data );
+
+		} else {
+			data = undefined;
+		}
+	}
+
+	return data;
+}
+
+// checks a cache object for emptiness
+function isEmptyDataObject( obj ) {
+	var name;
+	for ( name in obj ) {
+
+		// if the public data object is empty, the private is still empty
+		if ( name === "data" && jQuery.isEmptyObject( obj[name] ) ) {
+			continue;
+		}
+		if ( name !== "toJSON" ) {
+			return false;
+		}
+	}
+
+	return true;
+}
+jQuery.extend({
+	queue: function( elem, type, data ) {
+		var queue;
+
+		if ( elem ) {
+			type = ( type || "fx" ) + "queue";
+			queue = jQuery._data( elem, type );
+
+			// Speed up dequeue by getting out quickly if this is just a lookup
+			if ( data ) {
+				if ( !queue || jQuery.isArray(data) ) {
+					queue = jQuery._data( elem, type, jQuery.makeArray(data) );
+				} else {
+					queue.push( data );
+				}
+			}
+			return queue || [];
+		}
+	},
+
+	dequeue: function( elem, type ) {
+		type = type || "fx";
+
+		var queue = jQuery.queue( elem, type ),
+			startLength = queue.length,
+			fn = queue.shift(),
+			hooks = jQuery._queueHooks( elem, type ),
+			next = function() {
+				jQuery.dequeue( elem, type );
+			};
+
+		// If the fx queue is dequeued, always remove the progress sentinel
+		if ( fn === "inprogress" ) {
+			fn = queue.shift();
+			startLength--;
+		}
+
+		if ( fn ) {
+
+			// Add a progress sentinel to prevent the fx queue from being
+			// automatically dequeued
+			if ( type === "fx" ) {
+				queue.unshift( "inprogress" );
+			}
+
+			// clear up the last queue stop function
+			delete hooks.stop;
+			fn.call( elem, next, hooks );
+		}
+
+		if ( !startLength && hooks ) {
+			hooks.empty.fire();
+		}
+	},
+
+	// not intended for public consumption - generates a queueHooks object, or returns the current one
+	_queueHooks: function( elem, type ) {
+		var key = type + "queueHooks";
+		return jQuery._data( elem, key ) || jQuery._data( elem, key, {
+			empty: jQuery.Callbacks("once memory").add(function() {
+				jQuery.removeData( elem, type + "queue", true );
+				jQuery.removeData( elem, key, true );
+			})
+		});
+	}
+});
+
+jQuery.fn.extend({
+	queue: function( type, data ) {
+		var setter = 2;
+
+		if ( typeof type !== "string" ) {
+			data = type;
+			type = "fx";
+			setter--;
+		}
+
+		if ( arguments.length < setter ) {
+			return jQuery.queue( this[0], type );
+		}
+
+		return data === undefined ?
+			this :
+			this.each(function() {
+				var queue = jQuery.queue( this, type, data );
+
+				// ensure a hooks for this queue
+				jQuery._queueHooks( this, type );
+
+				if ( type === "fx" && queue[0] !== "inprogress" ) {
+					jQuery.dequeue( this, type );
+				}
+			});
+	},
+	dequeue: function( type ) {
+		return this.each(function() {
+			jQuery.dequeue( this, type );
+		});
+	},
+	// Based off of the plugin by Clint Helfers, with permission.
+	// http://blindsignals.com/index.php/2009/07/jquery-delay/
+	delay: function( time, type ) {
+		time = jQuery.fx ? jQuery.fx.speeds[ time ] || time : time;
+		type = type || "fx";
+
+		return this.queue( type, function( next, hooks ) {
+			var timeout = setTimeout( next, time );
+			hooks.stop = function() {
+				clearTimeout( timeout );
+			};
+		});
+	},
+	clearQueue: function( type ) {
+		return this.queue( type || "fx", [] );
+	},
+	// Get a promise resolved when queues of a certain type
+	// are emptied (fx is the type by default)
+	promise: function( type, obj ) {
+		var tmp,
+			count = 1,
+			defer = jQuery.Deferred(),
+			elements = this,
+			i = this.length,
+			resolve = function() {
+				if ( !( --count ) ) {
+					defer.resolveWith( elements, [ elements ] );
+				}
+			};
+
+		if ( typeof type !== "string" ) {
+			obj = type;
+			type = undefined;
+		}
+		type = type || "fx";
+
+		while( i-- ) {
+			tmp = jQuery._data( elements[ i ], type + "queueHooks" );
+			if ( tmp && tmp.empty ) {
+				count++;
+				tmp.empty.add( resolve );
+			}
+		}
+		resolve();
+		return defer.promise( obj );
+	}
+});
+var nodeHook, boolHook, fixSpecified,
+	rclass = /[\t\r\n]/g,
+	rreturn = /\r/g,
+	rtype = /^(?:button|input)$/i,
+	rfocusable = /^(?:button|input|object|select|textarea)$/i,
+	rclickable = /^a(?:rea|)$/i,
+	rboolean = /^(?:autofocus|autoplay|async|checked|controls|defer|disabled|hidden|loop|multiple|open|readonly|required|scoped|selected)$/i,
+	getSetAttribute = jQuery.support.getSetAttribute;
+
+jQuery.fn.extend({
+	attr: function( name, value ) {
+		return jQuery.access( this, jQuery.attr, name, value, arguments.length > 1 );
+	},
+
+	removeAttr: function( name ) {
+		return this.each(function() {
+			jQuery.removeAttr( this, name );
+		});
+	},
+
+	prop: function( name, value ) {
+		return jQuery.access( this, jQuery.prop, name, value, arguments.length > 1 );
+	},
+
+	removeProp: function( name ) {
+		name = jQuery.propFix[ name ] || name;
+		return this.each(function() {
+			// try/catch handles cases where IE balks (such as removing a property on window)
+			try {
+				this[ name ] = undefined;
+				delete this[ name ];
+			} catch( e ) {}
+		});
+	},
+
+	addClass: function( value ) {
+		var classNames, i, l, elem,
+			setClass, c, cl;
+
+		if ( jQuery.isFunction( value ) ) {
+			return this.each(function( j ) {
+				jQuery( this ).addClass( value.call(this, j, this.className) );
+			});
+		}
+
+		if ( value && typeof value === "string" ) {
+			classNames = value.split( core_rspace );
+
+			for ( i = 0, l = this.length; i < l; i++ ) {
+				elem = this[ i ];
+
+				if ( elem.nodeType === 1 ) {
+					if ( !elem.className && classNames.length === 1 ) {
+						elem.className = value;
+
+					} else {
+						setClass = " " + elem.className + " ";
+
+						for ( c = 0, cl = classNames.length; c < cl; c++ ) {
+							if ( setClass.indexOf( " " + classNames[ c ] + " " ) < 0 ) {
+								setClass += classNames[ c ] + " ";
+							}
+						}
+						elem.className = jQuery.trim( setClass );
+					}
+				}
+			}
+		}
+
+		return this;
+	},
+
+	removeClass: function( value ) {
+		var removes, className, elem, c, cl, i, l;
+
+		if ( jQuery.isFunction( value ) ) {
+			return this.each(function( j ) {
+				jQuery( this ).removeClass( value.call(this, j, this.className) );
+			});
+		}
+		if ( (value && typeof value === "string") || value === undefined ) {
+			removes = ( value || "" ).split( core_rspace );
+
+			for ( i = 0, l = this.length; i < l; i++ ) {
+				elem = this[ i ];
+				if ( elem.nodeType === 1 && elem.className ) {
+
+					className = (" " + elem.className + " ").replace( rclass, " " );
+
+					// loop over each item in the removal list
+					for ( c = 0, cl = removes.length; c < cl; c++ ) {
+						// Remove until there is nothing to remove,
+						while ( className.indexOf(" " + removes[ c ] + " ") >= 0 ) {
+							className = className.replace( " " + removes[ c ] + " " , " " );
+						}
+					}
+					elem.className = value ? jQuery.trim( className ) : "";
+				}
+			}
+		}
+
+		return this;
+	},
+
+	toggleClass: function( value, stateVal ) {
+		var type = typeof value,
+			isBool = typeof stateVal === "boolean";
+
+		if ( jQuery.isFunction( value ) ) {
+			return this.each(function( i ) {
+				jQuery( this ).toggleClass( value.call(this, i, this.className, stateVal), stateVal );
+			});
+		}
+
+		return this.each(function() {
+			if ( type === "string" ) {
+				// toggle individual class names
+				var className,
+					i = 0,
+					self = jQuery( this ),
+					state = stateVal,
+					classNames = value.split( core_rspace );
+
+				while ( (className = classNames[ i++ ]) ) {
+					// check each className given, space separated list
+					state = isBool ? state : !self.hasClass( className );
+					self[ state ? "addClass" : "removeClass" ]( className );
+				}
+
+			} else if ( type === "undefined" || type === "boolean" ) {
+				if ( this.className ) {
+					// store className if set
+					jQuery._data( this, "__className__", this.className );
+				}
+
+				// toggle whole className
+				this.className = this.className || value === false ? "" : jQuery._data( this, "__className__" ) || "";
+			}
+		});
+	},
+
+	hasClass: function( selector ) {
+		var className = " " + selector + " ",
+			i = 0,
+			l = this.length;
+		for ( ; i < l; i++ ) {
+			if ( this[i].nodeType === 1 && (" " + this[i].className + " ").replace(rclass, " ").indexOf( className ) >= 0 ) {
+				return true;
+			}
+		}
+
+		return false;
+	},
+
+	val: function( value ) {
+		var hooks, ret, isFunction,
+			elem = this[0];
+
+		if ( !arguments.length ) {
+			if ( elem ) {
+				hooks = jQuery.valHooks[ elem.type ] || jQuery.valHooks[ elem.nodeName.toLowerCase() ];
+
+				if ( hooks && "get" in hooks && (ret = hooks.get( elem, "value" )) !== undefined ) {
+					return ret;
+				}
+
+				ret = elem.value;
+
+				return typeof ret === "string" ?
+					// handle most common string cases
+					ret.replace(rreturn, "") :
+					// handle cases where value is null/undef or number
+					ret == null ? "" : ret;
+			}
+
+			return;
+		}
+
+		isFunction = jQuery.isFunction( value );
+
+		return this.each(function( i ) {
+			var val,
+				self = jQuery(this);
+
+			if ( this.nodeType !== 1 ) {
+				return;
+			}
+
+			if ( isFunction ) {
+				val = value.call( this, i, self.val() );
+			} else {
+				val = value;
+			}
+
+			// Treat null/undefined as ""; convert numbers to string
+			if ( val == null ) {
+				val = "";
+			} else if ( typeof val === "number" ) {
+				val += "";
+			} else if ( jQuery.isArray( val ) ) {
+				val = jQuery.map(val, function ( value ) {
+					return value == null ? "" : value + "";
+				});
+			}
+
+			hooks = jQuery.valHooks[ this.type ] || jQuery.valHooks[ this.nodeName.toLowerCase() ];
+
+			// If set returns undefined, fall back to normal setting
+			if ( !hooks || !("set" in hooks) || hooks.set( this, val, "value" ) === undefined ) {
+				this.value = val;
+			}
+		});
+	}
+});
+
+jQuery.extend({
+	valHooks: {
+		option: {
+			get: function( elem ) {
+				// attributes.value is undefined in Blackberry 4.7 but
+				// uses .value. See #6932
+				var val = elem.attributes.value;
+				return !val || val.specified ? elem.value : elem.text;
+			}
+		},
+		select: {
+			get: function( elem ) {
+				var value, option,
+					options = elem.options,
+					index = elem.selectedIndex,
+					one = elem.type === "select-one" || index < 0,
+					values = one ? null : [],
+					max = one ? index + 1 : options.length,
+					i = index < 0 ?
+						max :
+						one ? index : 0;
+
+				// Loop through all the selected options
+				for ( ; i < max; i++ ) {
+					option = options[ i ];
+
+					// oldIE doesn't update selected after form reset (#2551)
+					if ( ( option.selected || i === index ) &&
+							// Don't return options that are disabled or in a disabled optgroup
+							( jQuery.support.optDisabled ? !option.disabled : option.getAttribute("disabled") === null ) &&
+							( !option.parentNode.disabled || !jQuery.nodeName( option.parentNode, "optgroup" ) ) ) {
+
+						// Get the specific value for the option
+						value = jQuery( option ).val();
+
+						// We don't need an array for one selects
+						if ( one ) {
+							return value;
+						}
+
+						// Multi-Selects return an array
+						values.push( value );
+					}
+				}
+
+				return values;
+			},
+
+			set: function( elem, value ) {
+				var values = jQuery.makeArray( value );
+
+				jQuery(elem).find("option").each(function() {
+					this.selected = jQuery.inArray( jQuery(this).val(), values ) >= 0;
+				});
+
+				if ( !values.length ) {
+					elem.selectedIndex = -1;
+				}
+				return values;
+			}
+		}
+	},
+
+	// Unused in 1.8, left in so attrFn-stabbers won't die; remove in 1.9
+	attrFn: {},
+
+	attr: function( elem, name, value, pass ) {
+		var ret, hooks, notxml,
+			nType = elem.nodeType;
+
+		// don't get/set attributes on text, comment and attribute nodes
+		if ( !elem || nType === 3 || nType === 8 || nType === 2 ) {
+			return;
+		}
+
+		if ( pass && jQuery.isFunction( jQuery.fn[ name ] ) ) {
+			return jQuery( elem )[ name ]( value );
+		}
+
+		// Fallback to prop when attributes are not supported
+		if ( typeof elem.getAttribute === "undefined" ) {
+			return jQuery.prop( elem, name, value );
+		}
+
+		notxml = nType !== 1 || !jQuery.isXMLDoc( elem );
+
+		// All attributes are lowercase
+		// Grab necessary hook if one is defined
+		if ( notxml ) {
+			name = name.toLowerCase();
+			hooks = jQuery.attrHooks[ name ] || ( rboolean.test( name ) ? boolHook : nodeHook );
+		}
+
+		if ( value !== undefined ) {
+
+			if ( value === null ) {
+				jQuery.removeAttr( elem, name );
+				return;
+
+			} else if ( hooks && "set" in hooks && notxml && (ret = hooks.set( elem, value, name )) !== undefined ) {
+				return ret;
+
+			} else {
+				elem.setAttribute( name, value + "" );
+				return value;
+			}
+
+		} else if ( hooks && "get" in hooks && notxml && (ret = hooks.get( elem, name )) !== null ) {
+			return ret;
+
+		} else {
+
+			ret = elem.getAttribute( name );
+
+			// Non-existent attributes return null, we normalize to undefined
+			return ret === null ?
+				undefined :
+				ret;
+		}
+	},
+
+	removeAttr: function( elem, value ) {
+		var propName, attrNames, name, isBool,
+			i = 0;
+
+		if ( value && elem.nodeType === 1 ) {
+
+			attrNames = value.split( core_rspace );
+
+			for ( ; i < attrNames.length; i++ ) {
+				name = attrNames[ i ];
+
+				if ( name ) {
+					propName = jQuery.propFix[ name ] || name;
+					isBool = rboolean.test( name );
+
+					// See #9699 for explanation of this approach (setting first, then removal)
+					// Do not do this for boolean attributes (see #10870)
+					if ( !isBool ) {
+						jQuery.attr( elem, name, "" );
+					}
+					elem.removeAttribute( getSetAttribute ? name : propName );
+
+					// Set corresponding property to false for boolean attributes
+					if ( isBool && propName in elem ) {
+						elem[ propName ] = false;
+					}
+				}
+			}
+		}
+	},
+
+	attrHooks: {
+		type: {
+			set: function( elem, value ) {
+				// We can't allow the type property to be changed (since it causes problems in IE)
+				if ( rtype.test( elem.nodeName ) && elem.parentNode ) {
+					jQuery.error( "type property can't be changed" );
+				} else if ( !jQuery.support.radioValue && value === "radio" && jQuery.nodeName(elem, "input") ) {
+					// Setting the type on a radio button after the value resets the value in IE6-9
+					// Reset value to it's default in case type is set after value
+					// This is for element creation
+					var val = elem.value;
+					elem.setAttribute( "type", value );
+					if ( val ) {
+						elem.value = val;
+					}
+					return value;
+				}
+			}
+		},
+		// Use the value property for back compat
+		// Use the nodeHook for button elements in IE6/7 (#1954)
+		value: {
+			get: function( elem, name ) {
+				if ( nodeHook && jQuery.nodeName( elem, "button" ) ) {
+					return nodeHook.get( elem, name );
+				}
+				return name in elem ?
+					elem.value :
+					null;
+			},
+			set: function( elem, value, name ) {
+				if ( nodeHook && jQuery.nodeName( elem, "button" ) ) {
+					return nodeHook.set( elem, value, name );
+				}
+				// Does not return so that setAttribute is also used
+				elem.value = value;
+			}
+		}
+	},
+
+	propFix: {
+		tabindex: "tabIndex",
+		readonly: "readOnly",
+		"for": "htmlFor",
+		"class": "className",
+		maxlength: "maxLength",
+		cellspacing: "cellSpacing",
+		cellpadding: "cellPadding",
+		rowspan: "rowSpan",
+		colspan: "colSpan",
+		usemap: "useMap",
+		frameborder: "frameBorder",
+		contenteditable: "contentEditable"
+	},
+
+	prop: function( elem, name, value ) {
+		var ret, hooks, notxml,
+			nType = elem.nodeType;
+
+		// don't get/set properties on text, comment and attribute nodes
+		if ( !elem || nType === 3 || nType === 8 || nType === 2 ) {
+			return;
+		}
+
+		notxml = nType !== 1 || !jQuery.isXMLDoc( elem );
+
+		if ( notxml ) {
+			// Fix name and attach hooks
+			name = jQuery.propFix[ name ] || name;
+			hooks = jQuery.propHooks[ name ];
+		}
+
+		if ( value !== undefined ) {
+			if ( hooks && "set" in hooks && (ret = hooks.set( elem, value, name )) !== undefined ) {
+				return ret;
+
+			} else {
+				return ( elem[ name ] = value );
+			}
+
+		} else {
+			if ( hooks && "get" in hooks && (ret = hooks.get( elem, name )) !== null ) {
+				return ret;
+
+			} else {
+				return elem[ name ];
+			}
+		}
+	},
+
+	propHooks: {
+		tabIndex: {
+			get: function( elem ) {
+				// elem.tabIndex doesn't always return the correct value when it hasn't been explicitly set
+				// http://fluidproject.org/blog/2008/01/09/getting-setting-and-removing-tabindex-values-with-javascript/
+				var attributeNode = elem.getAttributeNode("tabindex");
+
+				return attributeNode && attributeNode.specified ?
+					parseInt( attributeNode.value, 10 ) :
+					rfocusable.test( elem.nodeName ) || rclickable.test( elem.nodeName ) && elem.href ?
+						0 :
+						undefined;
+			}
+		}
+	}
+});
+
+// Hook for boolean attributes
+boolHook = {
+	get: function( elem, name ) {
+		// Align boolean attributes with corresponding properties
+		// Fall back to attribute presence where some booleans are not supported
+		var attrNode,
+			property = jQuery.prop( elem, name );
+		return property === true || typeof property !== "boolean" && ( attrNode = elem.getAttributeNode(name) ) && attrNode.nodeValue !== false ?
+			name.toLowerCase() :
+			undefined;
+	},
+	set: function( elem, value, name ) {
+		var propName;
+		if ( value === false ) {
+			// Remove boolean attributes when set to false
+			jQuery.removeAttr( elem, name );
+		} else {
+			// value is true since we know at this point it's type boolean and not false
+			// Set boolean attributes to the same name and set the DOM property
+			propName = jQuery.propFix[ name ] || name;
+			if ( propName in elem ) {
+				// Only set the IDL specifically if it already exists on the element
+				elem[ propName ] = true;
+			}
+
+			elem.setAttribute( name, name.toLowerCase() );
+		}
+		return name;
+	}
+};
+
+// IE6/7 do not support getting/setting some attributes with get/setAttribute
+if ( !getSetAttribute ) {
+
+	fixSpecified = {
+		name: true,
+		id: true,
+		coords: true
+	};
+
+	// Use this for any attribute in IE6/7
+	// This fixes almost every IE6/7 issue
+	nodeHook = jQuery.valHooks.button = {
+		get: function( elem, name ) {
+			var ret;
+			ret = elem.getAttributeNode( name );
+			return ret && ( fixSpecified[ name ] ? ret.value !== "" : ret.specified ) ?
+				ret.value :
+				undefined;
+		},
+		set: function( elem, value, name ) {
+			// Set the existing or create a new attribute node
+			var ret = elem.getAttributeNode( name );
+			if ( !ret ) {
+				ret = document.createAttribute( name );
+				elem.setAttributeNode( ret );
+			}
+			return ( ret.value = value + "" );
+		}
+	};
+
+	// Set width and height to auto instead of 0 on empty string( Bug #8150 )
+	// This is for removals
+	jQuery.each([ "width", "height" ], function( i, name ) {
+		jQuery.attrHooks[ name ] = jQuery.extend( jQuery.attrHooks[ name ], {
+			set: function( elem, value ) {
+				if ( value === "" ) {
+					elem.setAttribute( name, "auto" );
+					return value;
+				}
+			}
+		});
+	});
+
+	// Set contenteditable to false on removals(#10429)
+	// Setting to empty string throws an error as an invalid value
+	jQuery.attrHooks.contenteditable = {
+		get: nodeHook.get,
+		set: function( elem, value, name ) {
+			if ( value === "" ) {
+				value = "false";
+			}
+			nodeHook.set( elem, value, name );
+		}
+	};
+}
+
+
+// Some attributes require a special call on IE
+if ( !jQuery.support.hrefNormalized ) {
+	jQuery.each([ "href", "src", "width", "height" ], function( i, name ) {
+		jQuery.attrHooks[ name ] = jQuery.extend( jQuery.attrHooks[ name ], {
+			get: function( elem ) {
+				var ret = elem.getAttribute( name, 2 );
+				return ret === null ? undefined : ret;
+			}
+		});
+	});
+}
+
+if ( !jQuery.support.style ) {
+	jQuery.attrHooks.style = {
+		get: function( elem ) {
+			// Return undefined in the case of empty string
+			// Normalize to lowercase since IE uppercases css property names
+			return elem.style.cssText.toLowerCase() || undefined;
+		},
+		set: function( elem, value ) {
+			return ( elem.style.cssText = value + "" );
+		}
+	};
+}
+
+// Safari mis-reports the default selected property of an option
+// Accessing the parent's selectedIndex property fixes it
+if ( !jQuery.support.optSelected ) {
+	jQuery.propHooks.selected = jQuery.extend( jQuery.propHooks.selected, {
+		get: function( elem ) {
+			var parent = elem.parentNode;
+
+			if ( parent ) {
+				parent.selectedIndex;
+
+				// Make sure that it also works with optgroups, see #5701
+				if ( parent.parentNode ) {
+					parent.parentNode.selectedIndex;
+				}
+			}
+			return null;
+		}
+	});
+}
+
+// IE6/7 call enctype encoding
+if ( !jQuery.support.enctype ) {
+	jQuery.propFix.enctype = "encoding";
+}
+
+// Radios and checkboxes getter/setter
+if ( !jQuery.support.checkOn ) {
+	jQuery.each([ "radio", "checkbox" ], function() {
+		jQuery.valHooks[ this ] = {
+			get: function( elem ) {
+				// Handle the case where in Webkit "" is returned instead of "on" if a value isn't specified
+				return elem.getAttribute("value") === null ? "on" : elem.value;
+			}
+		};
+	});
+}
+jQuery.each([ "radio", "checkbox" ], function() {
+	jQuery.valHooks[ this ] = jQuery.extend( jQuery.valHooks[ this ], {
+		set: function( elem, value ) {
+			if ( jQuery.isArray( value ) ) {
+				return ( elem.checked = jQuery.inArray( jQuery(elem).val(), value ) >= 0 );
+			}
+		}
+	});
+});
+var rformElems = /^(?:textarea|input|select)$/i,
+	rtypenamespace = /^([^\.]*|)(?:\.(.+)|)$/,
+	rhoverHack = /(?:^|\s)hover(\.\S+|)\b/,
+	rkeyEvent = /^key/,
+	rmouseEvent = /^(?:mouse|contextmenu)|click/,
+	rfocusMorph = /^(?:focusinfocus|focusoutblur)$/,
+	hoverHack = function( events ) {
+		return jQuery.event.special.hover ? events : events.replace( rhoverHack, "mouseenter$1 mouseleave$1" );
+	};
+
+/*
+ * Helper functions for managing events -- not part of the public interface.
+ * Props to Dean Edwards' addEvent library for many of the ideas.
+ */
+jQuery.event = {
+
+	add: function( elem, types, handler, data, selector ) {
+
+		var elemData, eventHandle, events,
+			t, tns, type, namespaces, handleObj,
+			handleObjIn, handlers, special;
+
+		// Don't attach events to noData or text/comment nodes (allow plain objects tho)
+		if ( elem.nodeType === 3 || elem.nodeType === 8 || !types || !handler || !(elemData = jQuery._data( elem )) ) {
+			return;
+		}
+
+		// Caller can pass in an object of custom data in lieu of the handler
+		if ( handler.handler ) {
+			handleObjIn = handler;
+			handler = handleObjIn.handler;
+			selector = handleObjIn.selector;
+		}
+
+		// Make sure that the handler has a unique ID, used to find/remove it later
+		if ( !handler.guid ) {
+			handler.guid = jQuery.guid++;
+		}
+
+		// Init the element's event structure and main handler, if this is the first
+		events = elemData.events;
+		if ( !events ) {
+			elemData.events = events = {};
+		}
+		eventHandle = elemData.handle;
+		if ( !eventHandle ) {
+			elemData.handle = eventHandle = function( e ) {
+				// Discard the second event of a jQuery.event.trigger() and
+				// when an event is called after a page has unloaded
+				return typeof jQuery !== "undefined" && (!e || jQuery.event.triggered !== e.type) ?
+					jQuery.event.dispatch.apply( eventHandle.elem, arguments ) :
+					undefined;
+			};
+			// Add elem as a property of the handle fn to prevent a memory leak with IE non-native events
+			eventHandle.elem = elem;
+		}
+
+		// Handle multiple events separated by a space
+		// jQuery(...).bind("mouseover mouseout", fn);
+		types = jQuery.trim( hoverHack(types) ).split( " " );
+		for ( t = 0; t < types.length; t++ ) {
+
+			tns = rtypenamespace.exec( types[t] ) || [];
+			type = tns[1];
+			namespaces = ( tns[2] || "" ).split( "." ).sort();
+
+			// If event changes its type, use the special event handlers for the changed type
+			special = jQuery.event.special[ type ] || {};
+
+			// If selector defined, determine special event api type, otherwise given type
+			type = ( selector ? special.delegateType : special.bindType ) || type;
+
+			// Update special based on newly reset type
+			special = jQuery.event.special[ type ] || {};
+
+			// handleObj is passed to all event handlers
+			handleObj = jQuery.extend({
+				type: type,
+				origType: tns[1],
+				data: data,
+				handler: handler,
+				guid: handler.guid,
+				selector: selector,
+				needsContext: selector && jQuery.expr.match.needsContext.test( selector ),
+				namespace: namespaces.join(".")
+			}, handleObjIn );
+
+			// Init the event handler queue if we're the first
+			handlers = events[ type ];
+			if ( !handlers ) {
+				handlers = events[ type ] = [];
+				handlers.delegateCount = 0;
+
+				// Only use addEventListener/attachEvent if the special events handler returns false
+				if ( !special.setup || special.setup.call( elem, data, namespaces, eventHandle ) === false ) {
+					// Bind the global event handler to the element
+					if ( elem.addEventListener ) {
+						elem.addEventListener( type, eventHandle, false );
+
+					} else if ( elem.attachEvent ) {
+						elem.attachEvent( "on" + type, eventHandle );
+					}
+				}
+			}
+
+			if ( special.add ) {
+				special.add.call( elem, handleObj );
+
+				if ( !handleObj.handler.guid ) {
+					handleObj.handler.guid = handler.guid;
+				}
+			}
+
+			// Add to the element's handler list, delegates in front
+			if ( selector ) {
+				handlers.splice( handlers.delegateCount++, 0, handleObj );
+			} else {
+				handlers.push( handleObj );
+			}
+
+			// Keep track of which events have ever been used, for event optimization
+			jQuery.event.global[ type ] = true;
+		}
+
+		// Nullify elem to prevent memory leaks in IE
+		elem = null;
+	},
+
+	global: {},
+
+	// Detach an event or set of events from an element
+	remove: function( elem, types, handler, selector, mappedTypes ) {
+
+		var t, tns, type, origType, namespaces, origCount,
+			j, events, special, eventType, handleObj,
+			elemData = jQuery.hasData( elem ) && jQuery._data( elem );
+
+		if ( !elemData || !(events = elemData.events) ) {
+			return;
+		}
+
+		// Once for each type.namespace in types; type may be omitted
+		types = jQuery.trim( hoverHack( types || "" ) ).split(" ");
+		for ( t = 0; t < types.length; t++ ) {
+			tns = rtypenamespace.exec( types[t] ) || [];
+			type = origType = tns[1];
+			namespaces = tns[2];
+
+			// Unbind all events (on this namespace, if provided) for the element
+			if ( !type ) {
+				for ( type in events ) {
+					jQuery.event.remove( elem, type + types[ t ], handler, selector, true );
+				}
+				continue;
+			}
+
+			special = jQuery.event.special[ type ] || {};
+			type = ( selector? special.delegateType : special.bindType ) || type;
+			eventType = events[ type ] || [];
+			origCount = eventType.length;
+			namespaces = namespaces ? new RegExp("(^|\\.)" + namespaces.split(".").sort().join("\\.(?:.*\\.|)") + "(\\.|$)") : null;
+
+			// Remove matching events
+			for ( j = 0; j < eventType.length; j++ ) {
+				handleObj = eventType[ j ];
+
+				if ( ( mappedTypes || origType === handleObj.origType ) &&
+					 ( !handler || handler.guid === handleObj.guid ) &&
+					 ( !namespaces || namespaces.test( handleObj.namespace ) ) &&
+					 ( !selector || selector === handleObj.selector || selector === "**" && handleObj.selector ) ) {
+					eventType.splice( j--, 1 );
+
+					if ( handleObj.selector ) {
+						eventType.delegateCount--;
+					}
+					if ( special.remove ) {
+						special.remove.call( elem, handleObj );
+					}
+				}
+			}
+
+			// Remove generic event handler if we removed something and no more handlers exist
+			// (avoids potential for endless recursion during removal of special event handlers)
+			if ( eventType.length === 0 && origCount !== eventType.length ) {
+				if ( !special.teardown || special.teardown.call( elem, namespaces, elemData.handle ) === false ) {
+					jQuery.removeEvent( elem, type, elemData.handle );
+				}
+
+				delete events[ type ];
+			}
+		}
+
+		// Remove the expando if it's no longer used
+		if ( jQuery.isEmptyObject( events ) ) {
+			delete elemData.handle;
+
+			// removeData also checks for emptiness and clears the expando if empty
+			// so use it instead of delete
+			jQuery.removeData( elem, "events", true );
+		}
+	},
+
+	// Events that are safe to short-circuit if no handlers are attached.
+	// Native DOM events should not be added, they may have inline handlers.
+	customEvent: {
+		"getData": true,
+		"setData": true,
+		"changeData": true
+	},
+
+	trigger: function( event, data, elem, onlyHandlers ) {
+		// Don't do events on text and comment nodes
+		if ( elem && (elem.nodeType === 3 || elem.nodeType === 8) ) {
+			return;
+		}
+
+		// Event object or event type
+		var cache, exclusive, i, cur, old, ontype, special, handle, eventPath, bubbleType,
+			type = event.type || event,
+			namespaces = [];
+
+		// focus/blur morphs to focusin/out; ensure we're not firing them right now
+		if ( rfocusMorph.test( type + jQuery.event.triggered ) ) {
+			return;
+		}
+
+		if ( type.indexOf( "!" ) >= 0 ) {
+			// Exclusive events trigger only for the exact event (no namespaces)
+			type = type.slice(0, -1);
+			exclusive = true;
+		}
+
+		if ( type.indexOf( "." ) >= 0 ) {
+			// Namespaced trigger; create a regexp to match event type in handle()
+			namespaces = type.split(".");
+			type = namespaces.shift();
+			namespaces.sort();
+		}
+
+		if ( (!elem || jQuery.event.customEvent[ type ]) && !jQuery.event.global[ type ] ) {
+			// No jQuery handlers for this event type, and it can't have inline handlers
+			return;
+		}
+
+		// Caller can pass in an Event, Object, or just an event type string
+		event = typeof event === "object" ?
+			// jQuery.Event object
+			event[ jQuery.expando ] ? event :
+			// Object literal
+			new jQuery.Event( type, event ) :
+			// Just the event type (string)
+			new jQuery.Event( type );
+
+		event.type = type;
+		event.isTrigger = true;
+		event.exclusive = exclusive;
+		event.namespace = namespaces.join( "." );
+		event.namespace_re = event.namespace? new RegExp("(^|\\.)" + namespaces.join("\\.(?:.*\\.|)") + "(\\.|$)") : null;
+		ontype = type.indexOf( ":" ) < 0 ? "on" + type : "";
+
+		// Handle a global trigger
+		if ( !elem ) {
+
+			// TODO: Stop taunting the data cache; remove global events and always attach to document
+			cache = jQuery.cache;
+			for ( i in cache ) {
+				if ( cache[ i ].events && cache[ i ].events[ type ] ) {
+					jQuery.event.trigger( event, data, cache[ i ].handle.elem, true );
+				}
+			}
+			return;
+		}
+
+		// Clean up the event in case it is being reused
+		event.result = undefined;
+		if ( !event.target ) {
+			event.target = elem;
+		}
+
+		// Clone any incoming data and prepend the event, creating the handler arg list
+		data = data != null ? jQuery.makeArray( data ) : [];
+		data.unshift( event );
+
+		// Allow special events to draw outside the lines
+		special = jQuery.event.special[ type ] || {};
+		if ( special.trigger && special.trigger.apply( elem, data ) === false ) {
+			return;
+		}
+
+		// Determine event propagation path in advance, per W3C events spec (#9951)
+		// Bubble up to document, then to window; watch for a global ownerDocument var (#9724)
+		eventPath = [[ elem, special.bindType || type ]];
+		if ( !onlyHandlers && !special.noBubble && !jQuery.isWindow( elem ) ) {
+
+			bubbleType = special.delegateType || type;
+			cur = rfocusMorph.test( bubbleType + type ) ? elem : elem.parentNode;
+			for ( old = elem; cur; cur = cur.parentNode ) {
+				eventPath.push([ cur, bubbleType ]);
+				old = cur;
+			}
+
+			// Only add window if we got to document (e.g., not plain obj or detached DOM)
+			if ( old === (elem.ownerDocument || document) ) {
+				eventPath.push([ old.defaultView || old.parentWindow || window, bubbleType ]);
+			}
+		}
+
+		// Fire handlers on the event path
+		for ( i = 0; i < eventPath.length && !event.isPropagationStopped(); i++ ) {
+
+			cur = eventPath[i][0];
+			event.type = eventPath[i][1];
+
+			handle = ( jQuery._data( cur, "events" ) || {} )[ event.type ] && jQuery._data( cur, "handle" );
+			if ( handle ) {
+				handle.apply( cur, data );
+			}
+			// Note that this is a bare JS function and not a jQuery handler
+			handle = ontype && cur[ ontype ];
+			if ( handle && jQuery.acceptData( cur ) && handle.apply && handle.apply( cur, data ) === false ) {
+				event.preventDefault();
+			}
+		}
+		event.type = type;
+
+		// If nobody prevented the default action, do it now
+		if ( !onlyHandlers && !event.isDefaultPrevented() ) {
+
+			if ( (!special._default || special._default.apply( elem.ownerDocument, data ) === false) &&
+				!(type === "click" && jQuery.nodeName( elem, "a" )) && jQuery.acceptData( elem ) ) {
+
+				// Call a native DOM method on the target with the same name name as the event.
+				// Can't use an .isFunction() check here because IE6/7 fails that test.
+				// Don't do default actions on window, that's where global variables be (#6170)
+				// IE<9 dies on focus/blur to hidden element (#1486)
+				if ( ontype && elem[ type ] && ((type !== "focus" && type !== "blur") || event.target.offsetWidth !== 0) && !jQuery.isWindow( elem ) ) {
+
+					// Don't re-trigger an onFOO event when we call its FOO() method
+					old = elem[ ontype ];
+
+					if ( old ) {
+						elem[ ontype ] = null;
+					}
+
+					// Prevent re-triggering of the same event, since we already bubbled it above
+					jQuery.event.triggered = type;
+					elem[ type ]();
+					jQuery.event.triggered = undefined;
+
+					if ( old ) {
+						elem[ ontype ] = old;
+					}
+				}
+			}
+		}
+
+		return event.result;
+	},
+
+	dispatch: function( event ) {
+
+		// Make a writable jQuery.Event from the native event object
+		event = jQuery.event.fix( event || window.event );
+
+		var i, j, cur, ret, selMatch, matched, matches, handleObj, sel, related,
+			handlers = ( (jQuery._data( this, "events" ) || {} )[ event.type ] || []),
+			delegateCount = handlers.delegateCount,
+			args = core_slice.call( arguments ),
+			run_all = !event.exclusive && !event.namespace,
+			special = jQuery.event.special[ event.type ] || {},
+			handlerQueue = [];
+
+		// Use the fix-ed jQuery.Event rather than the (read-only) native event
+		args[0] = event;
+		event.delegateTarget = this;
+
+		// Call the preDispatch hook for the mapped type, and let it bail if desired
+		if ( special.preDispatch && special.preDispatch.call( this, event ) === false ) {
+			return;
+		}
+
+		// Determine handlers that should run if there are delegated events
+		// Avoid non-left-click bubbling in Firefox (#3861)
+		if ( delegateCount && !(event.button && event.type === "click") ) {
+
+			for ( cur = event.target; cur != this; cur = cur.parentNode || this ) {
+
+				// Don't process clicks (ONLY) on disabled elements (#6911, #8165, #11382, #11764)
+				if ( cur.disabled !== true || event.type !== "click" ) {
+					selMatch = {};
+					matches = [];
+					for ( i = 0; i < delegateCount; i++ ) {
+						handleObj = handlers[ i ];
+						sel = handleObj.selector;
+
+						if ( selMatch[ sel ] === undefined ) {
+							selMatch[ sel ] = handleObj.needsContext ?
+								jQuery( sel, this ).index( cur ) >= 0 :
+								jQuery.find( sel, this, null, [ cur ] ).length;
+						}
+						if ( selMatch[ sel ] ) {
+							matches.push( handleObj );
+						}
+					}
+					if ( matches.length ) {
+						handlerQueue.push({ elem: cur, matches: matches });
+					}
+				}
+			}
+		}
+
+		// Add the remaining (directly-bound) handlers
+		if ( handlers.length > delegateCount ) {
+			handlerQueue.push({ elem: this, matches: handlers.slice( delegateCount ) });
+		}
+
+		// Run delegates first; they may want to stop propagation beneath us
+		for ( i = 0; i < handlerQueue.length && !event.isPropagationStopped(); i++ ) {
+			matched = handlerQueue[ i ];
+			event.currentTarget = matched.elem;
+
+			for ( j = 0; j < matched.matches.length && !event.isImmediatePropagationStopped(); j++ ) {
+				handleObj = matched.matches[ j ];
+
+				// Triggered event must either 1) be non-exclusive and have no namespace, or
+				// 2) have namespace(s) a subset or equal to those in the bound event (both can have no namespace).
+				if ( run_all || (!event.namespace && !handleObj.namespace) || event.namespace_re && event.namespace_re.test( handleObj.namespace ) ) {
+
+					event.data = handleObj.data;
+					event.handleObj = handleObj;
+
+					ret = ( (jQuery.event.special[ handleObj.origType ] || {}).handle || handleObj.handler )
+							.apply( matched.elem, args );
+
+					if ( ret !== undefined ) {
+						event.result = ret;
+						if ( ret === false ) {
+							event.preventDefault();
+							event.stopPropagation();
+						}
+					}
+				}
+			}
+		}
+
+		// Call the postDispatch hook for the mapped type
+		if ( special.postDispatch ) {
+			special.postDispatch.call( this, event );
+		}
+
+		return event.result;
+	},
+
+	// Includes some event props shared by KeyEvent and MouseEvent
+	// *** attrChange attrName relatedNode srcElement  are not normalized, non-W3C, deprecated, will be removed in 1.8 ***
+	props: "attrChange attrName relatedNode srcElement altKey bubbles cancelable ctrlKey currentTarget eventPhase metaKey relatedTarget shiftKey target timeStamp view which".split(" "),
+
+	fixHooks: {},
+
+	keyHooks: {
+		props: "char charCode key keyCode".split(" "),
+		filter: function( event, original ) {
+
+			// Add which for key events
+			if ( event.which == null ) {
+				event.which = original.charCode != null ? original.charCode : original.keyCode;
+			}
+
+			return event;
+		}
+	},
+
+	mouseHooks: {
+		props: "button buttons clientX clientY fromElement offsetX offsetY pageX pageY screenX screenY toElement".split(" "),
+		filter: function( event, original ) {
+			var eventDoc, doc, body,
+				button = original.button,
+				fromElement = original.fromElement;
+
+			// Calculate pageX/Y if missing and clientX/Y available
+			if ( event.pageX == null && original.clientX != null ) {
+				eventDoc = event.target.ownerDocument || document;
+				doc = eventDoc.documentElement;
+				body = eventDoc.body;
+
+				event.pageX = original.clientX + ( doc && doc.scrollLeft || body && body.scrollLeft || 0 ) - ( doc && doc.clientLeft || body && body.clientLeft || 0 );
+				event.pageY = original.clientY + ( doc && doc.scrollTop  || body && body.scrollTop  || 0 ) - ( doc && doc.clientTop  || body && body.clientTop  || 0 );
+			}
+
+			// Add relatedTarget, if necessary
+			if ( !event.relatedTarget && fromElement ) {
+				event.relatedTarget = fromElement === event.target ? original.toElement : fromElement;
+			}
+
+			// Add which for click: 1 === left; 2 === middle; 3 === right
+			// Note: button is not normalized, so don't use it
+			if ( !event.which && button !== undefined ) {
+				event.which = ( button & 1 ? 1 : ( button & 2 ? 3 : ( button & 4 ? 2 : 0 ) ) );
+			}
+
+			return event;
+		}
+	},
+
+	fix: function( event ) {
+		if ( event[ jQuery.expando ] ) {
+			return event;
+		}
+
+		// Create a writable copy of the event object and normalize some properties
+		var i, prop,
+			originalEvent = event,
+			fixHook = jQuery.event.fixHooks[ event.type ] || {},
+			copy = fixHook.props ? this.props.concat( fixHook.props ) : this.props;
+
+		event = jQuery.Event( originalEvent );
+
+		for ( i = copy.length; i; ) {
+			prop = copy[ --i ];
+			event[ prop ] = originalEvent[ prop ];
+		}
+
+		// Fix target property, if necessary (#1925, IE 6/7/8 & Safari2)
+		if ( !event.target ) {
+			event.target = originalEvent.srcElement || document;
+		}
+
+		// Target should not be a text node (#504, Safari)
+		if ( event.target.nodeType === 3 ) {
+			event.target = event.target.parentNode;
+		}
+
+		// For mouse/key events, metaKey==false if it's undefined (#3368, #11328; IE6/7/8)
+		event.metaKey = !!event.metaKey;
+
+		return fixHook.filter? fixHook.filter( event, originalEvent ) : event;
+	},
+
+	special: {
+		load: {
+			// Prevent triggered image.load events from bubbling to window.load
+			noBubble: true
+		},
+
+		focus: {
+			delegateType: "focusin"
+		},
+		blur: {
+			delegateType: "focusout"
+		},
+
+		beforeunload: {
+			setup: function( data, namespaces, eventHandle ) {
+				// We only want to do this special case on windows
+				if ( jQuery.isWindow( this ) ) {
+					this.onbeforeunload = eventHandle;
+				}
+			},
+
+			teardown: function( namespaces, eventHandle ) {
+				if ( this.onbeforeunload === eventHandle ) {
+					this.onbeforeunload = null;
+				}
+			}
+		}
+	},
+
+	simulate: function( type, elem, event, bubble ) {
+		// Piggyback on a donor event to simulate a different one.
+		// Fake originalEvent to avoid donor's stopPropagation, but if the
+		// simulated event prevents default then we do the same on the donor.
+		var e = jQuery.extend(
+			new jQuery.Event(),
+			event,
+			{ type: type,
+				isSimulated: true,
+				originalEvent: {}
+			}
+		);
+		if ( bubble ) {
+			jQuery.event.trigger( e, null, elem );
+		} else {
+			jQuery.event.dispatch.call( elem, e );
+		}
+		if ( e.isDefaultPrevented() ) {
+			event.preventDefault();
+		}
+	}
+};
+
+// Some plugins are using, but it's undocumented/deprecated and will be removed.
+// The 1.7 special event interface should provide all the hooks needed now.
+jQuery.event.handle = jQuery.event.dispatch;
+
+jQuery.removeEvent = document.removeEventListener ?
+	function( elem, type, handle ) {
+		if ( elem.removeEventListener ) {
+			elem.removeEventListener( type, handle, false );
+		}
+	} :
+	function( elem, type, handle ) {
+		var name = "on" + type;
+
+		if ( elem.detachEvent ) {
+
+			// #8545, #7054, preventing memory leaks for custom events in IE6-8
+			// detachEvent needed property on element, by name of that event, to properly expose it to GC
+			if ( typeof elem[ name ] === "undefined" ) {
+				elem[ name ] = null;
+			}
+
+			elem.detachEvent( name, handle );
+		}
+	};
+
+jQuery.Event = function( src, props ) {
+	// Allow instantiation without the 'new' keyword
+	if ( !(this instanceof jQuery.Event) ) {
+		return new jQuery.Event( src, props );
+	}
+
+	// Event object
+	if ( src && src.type ) {
+		this.originalEvent = src;
+		this.type = src.type;
+
+		// Events bubbling up the document may have been marked as prevented
+		// by a handler lower down the tree; reflect the correct value.
+		this.isDefaultPrevented = ( src.defaultPrevented || src.returnValue === false ||
+			src.getPreventDefault && src.getPreventDefault() ) ? returnTrue : returnFalse;
+
+	// Event type
+	} else {
+		this.type = src;
+	}
+
+	// Put explicitly provided properties onto the event object
+	if ( props ) {
+		jQuery.extend( this, props );
+	}
+
+	// Create a timestamp if incoming event doesn't have one
+	this.timeStamp = src && src.timeStamp || jQuery.now();
+
+	// Mark it as fixed
+	this[ jQuery.expando ] = true;
+};
+
+function returnFalse() {
+	return false;
+}
+function returnTrue() {
+	return true;
+}
+
+// jQuery.Event is based on DOM3 Events as specified by the ECMAScript Language Binding
+// http://www.w3.org/TR/2003/WD-DOM-Level-3-Events-20030331/ecma-script-binding.html
+jQuery.Event.prototype = {
+	preventDefault: function() {
+		this.isDefaultPrevented = returnTrue;
+
+		var e = this.originalEvent;
+		if ( !e ) {
+			return;
+		}
+
+		// if preventDefault exists run it on the original event
+		if ( e.preventDefault ) {
+			e.preventDefault();
+
+		// otherwise set the returnValue property of the original event to false (IE)
+		} else {
+			e.returnValue = false;
+		}
+	},
+	stopPropagation: function() {
+		this.isPropagationStopped = returnTrue;
+
+		var e = this.originalEvent;
+		if ( !e ) {
+			return;
+		}
+		// if stopPropagation exists run it on the original event
+		if ( e.stopPropagation ) {
+			e.stopPropagation();
+		}
+		// otherwise set the cancelBubble property of the original event to true (IE)
+		e.cancelBubble = true;
+	},
+	stopImmediatePropagation: function() {
+		this.isImmediatePropagationStopped = returnTrue;
+		this.stopPropagation();
+	},
+	isDefaultPrevented: returnFalse,
+	isPropagationStopped: returnFalse,
+	isImmediatePropagationStopped: returnFalse
+};
+
+// Create mouseenter/leave events using mouseover/out and event-time checks
+jQuery.each({
+	mouseenter: "mouseover",
+	mouseleave: "mouseout"
+}, function( orig, fix ) {
+	jQuery.event.special[ orig ] = {
+		delegateType: fix,
+		bindType: fix,
+
+		handle: function( event ) {
+			var ret,
+				target = this,
+				related = event.relatedTarget,
+				handleObj = event.handleObj,
+				selector = handleObj.selector;
+
+			// For mousenter/leave call the handler if related is outside the target.
+			// NB: No relatedTarget if the mouse left/entered the browser window
+			if ( !related || (related !== target && !jQuery.contains( target, related )) ) {
+				event.type = handleObj.origType;
+				ret = handleObj.handler.apply( this, arguments );
+				event.type = fix;
+			}
+			return ret;
+		}
+	};
+});
+
+// IE submit delegation
+if ( !jQuery.support.submitBubbles ) {
+
+	jQuery.event.special.submit = {
+		setup: function() {
+			// Only need this for delegated form submit events
+			if ( jQuery.nodeName( this, "form" ) ) {
+				return false;
+			}
+
+			// Lazy-add a submit handler when a descendant form may potentially be submitted
+			jQuery.event.add( this, "click._submit keypress._submit", function( e ) {
+				// Node name check avoids a VML-related crash in IE (#9807)
+				var elem = e.target,
+					form = jQuery.nodeName( elem, "input" ) || jQuery.nodeName( elem, "button" ) ? elem.form : undefined;
+				if ( form && !jQuery._data( form, "_submit_attached" ) ) {
+					jQuery.event.add( form, "submit._submit", function( event ) {
+						event._submit_bubble = true;
+					});
+					jQuery._data( form, "_submit_attached", true );
+				}
+			});
+			// return undefined since we don't need an event listener
+		},
+
+		postDispatch: function( event ) {
+			// If form was submitted by the user, bubble the event up the tree
+			if ( event._submit_bubble ) {
+				delete event._submit_bubble;
+				if ( this.parentNode && !event.isTrigger ) {
+					jQuery.event.simulate( "submit", this.parentNode, event, true );
+				}
+			}
+		},
+
+		teardown: function() {
+			// Only need this for delegated form submit events
+			if ( jQuery.nodeName( this, "form" ) ) {
+				return false;
+			}
+
+			// Remove delegated handlers; cleanData eventually reaps submit handlers attached above
+			jQuery.event.remove( this, "._submit" );
+		}
+	};
+}
+
+// IE change delegation and checkbox/radio fix
+if ( !jQuery.support.changeBubbles ) {
+
+	jQuery.event.special.change = {
+
+		setup: function() {
+
+			if ( rformElems.test( this.nodeName ) ) {
+				// IE doesn't fire change on a check/radio until blur; trigger it on click
+				// after a propertychange. Eat the blur-change in special.change.handle.
+				// This still fires onchange a second time for check/radio after blur.
+				if ( this.type === "checkbox" || this.type === "radio" ) {
+					jQuery.event.add( this, "propertychange._change", function( event ) {
+						if ( event.originalEvent.propertyName === "checked" ) {
+							this._just_changed = true;
+						}
+					});
+					jQuery.event.add( this, "click._change", function( event ) {
+						if ( this._just_changed && !event.isTrigger ) {
+							this._just_changed = false;
+						}
+						// Allow triggered, simulated change events (#11500)
+						jQuery.event.simulate( "change", this, event, true );
+					});
+				}
+				return false;
+			}
+			// Delegated event; lazy-add a change handler on descendant inputs
+			jQuery.event.add( this, "beforeactivate._change", function( e ) {
+				var elem = e.target;
+
+				if ( rformElems.test( elem.nodeName ) && !jQuery._data( elem, "_change_attached" ) ) {
+					jQuery.event.add( elem, "change._change", function( event ) {
+						if ( this.parentNode && !event.isSimulated && !event.isTrigger ) {
+							jQuery.event.simulate( "change", this.parentNode, event, true );
+						}
+					});
+					jQuery._data( elem, "_change_attached", true );
+				}
+			});
+		},
+
+		handle: function( event ) {
+			var elem = event.target;
+
+			// Swallow native change events from checkbox/radio, we already triggered them above
+			if ( this !== elem || event.isSimulated || event.isTrigger || (elem.type !== "radio" && elem.type !== "checkbox") ) {
+				return event.handleObj.handler.apply( this, arguments );
+			}
+		},
+
+		teardown: function() {
+			jQuery.event.remove( this, "._change" );
+
+			return !rformElems.test( this.nodeName );
+		}
+	};
+}
+
+// Create "bubbling" focus and blur events
+if ( !jQuery.support.focusinBubbles ) {
+	jQuery.each({ focus: "focusin", blur: "focusout" }, function( orig, fix ) {
+
+		// Attach a single capturing handler while someone wants focusin/focusout
+		var attaches = 0,
+			handler = function( event ) {
+				jQuery.event.simulate( fix, event.target, jQuery.event.fix( event ), true );
+			};
+
+		jQuery.event.special[ fix ] = {
+			setup: function() {
+				if ( attaches++ === 0 ) {
+					document.addEventListener( orig, handler, true );
+				}
+			},
+			teardown: function() {
+				if ( --attaches === 0 ) {
+					document.removeEventListener( orig, handler, true );
+				}
+			}
+		};
+	});
+}
+
+jQuery.fn.extend({
+
+	on: function( types, selector, data, fn, /*INTERNAL*/ one ) {
+		var origFn, type;
+
+		// Types can be a map of types/handlers
+		if ( typeof types === "object" ) {
+			// ( types-Object, selector, data )
+			if ( typeof selector !== "string" ) { // && selector != null
+				// ( types-Object, data )
+				data = data || selector;
+				selector = undefined;
+			}
+			for ( type in types ) {
+				this.on( type, selector, data, types[ type ], one );
+			}
+			return this;
+		}
+
+		if ( data == null && fn == null ) {
+			// ( types, fn )
+			fn = selector;
+			data = selector = undefined;
+		} else if ( fn == null ) {
+			if ( typeof selector === "string" ) {
+				// ( types, selector, fn )
+				fn = data;
+				data = undefined;
+			} else {
+				// ( types, data, fn )
+				fn = data;
+				data = selector;
+				selector = undefined;
+			}
+		}
+		if ( fn === false ) {
+			fn = returnFalse;
+		} else if ( !fn ) {
+			return this;
+		}
+
+		if ( one === 1 ) {
+			origFn = fn;
+			fn = function( event ) {
+				// Can use an empty set, since event contains the info
+				jQuery().off( event );
+				return origFn.apply( this, arguments );
+			};
+			// Use same guid so caller can remove using origFn
+			fn.guid = origFn.guid || ( origFn.guid = jQuery.guid++ );
+		}
+		return this.each( function() {
+			jQuery.event.add( this, types, fn, data, selector );
+		});
+	},
+	one: function( types, selector, data, fn ) {
+		return this.on( types, selector, data, fn, 1 );
+	},
+	off: function( types, selector, fn ) {
+		var handleObj, type;
+		if ( types && types.preventDefault && types.handleObj ) {
+			// ( event )  dispatched jQuery.Event
+			handleObj = types.handleObj;
+			jQuery( types.delegateTarget ).off(
+				handleObj.namespace ? handleObj.origType + "." + handleObj.namespace : handleObj.origType,
+				handleObj.selector,
+				handleObj.handler
+			);
+			return this;
+		}
+		if ( typeof types === "object" ) {
+			// ( types-object [, selector] )
+			for ( type in types ) {
+				this.off( type, selector, types[ type ] );
+			}
+			return this;
+		}
+		if ( selector === false || typeof selector === "function" ) {
+			// ( types [, fn] )
+			fn = selector;
+			selector = undefined;
+		}
+		if ( fn === false ) {
+			fn = returnFalse;
+		}
+		return this.each(function() {
+			jQuery.event.remove( this, types, fn, selector );
+		});
+	},
+
+	bind: function( types, data, fn ) {
+		return this.on( types, null, data, fn );
+	},
+	unbind: function( types, fn ) {
+		return this.off( types, null, fn );
+	},
+
+	live: function( types, data, fn ) {
+		jQuery( this.context ).on( types, this.selector, data, fn );
+		return this;
+	},
+	die: function( types, fn ) {
+		jQuery( this.context ).off( types, this.selector || "**", fn );
+		return this;
+	},
+
+	delegate: function( selector, types, data, fn ) {
+		return this.on( types, selector, data, fn );
+	},
+	undelegate: function( selector, types, fn ) {
+		// ( namespace ) or ( selector, types [, fn] )
+		return arguments.length === 1 ? this.off( selector, "**" ) : this.off( types, selector || "**", fn );
+	},
+
+	trigger: function( type, data ) {
+		return this.each(function() {
+			jQuery.event.trigger( type, data, this );
+		});
+	},
+	triggerHandler: function( type, data ) {
+		if ( this[0] ) {
+			return jQuery.event.trigger( type, data, this[0], true );
+		}
+	},
+
+	toggle: function( fn ) {
+		// Save reference to arguments for access in closure
+		var args = arguments,
+			guid = fn.guid || jQuery.guid++,
+			i = 0,
+			toggler = function( event ) {
+				// Figure out which function to execute
+				var lastToggle = ( jQuery._data( this, "lastToggle" + fn.guid ) || 0 ) % i;
+				jQuery._data( this, "lastToggle" + fn.guid, lastToggle + 1 );
+
+				// Make sure that clicks stop
+				event.preventDefault();
+
+				// and execute the function
+				return args[ lastToggle ].apply( this, arguments ) || false;
+			};
+
+		// link all the functions, so any of them can unbind this click handler
+		toggler.guid = guid;
+		while ( i < args.length ) {
+			args[ i++ ].guid = guid;
+		}
+
+		return this.click( toggler );
+	},
+
+	hover: function( fnOver, fnOut ) {
+		return this.mouseenter( fnOver ).mouseleave( fnOut || fnOver );
+	}
+});
+
+jQuery.each( ("blur focus focusin focusout load resize scroll unload click dblclick " +
+	"mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave " +
+	"change select submit keydown keypress keyup error contextmenu").split(" "), function( i, name ) {
+
+	// Handle event binding
+	jQuery.fn[ name ] = function( data, fn ) {
+		if ( fn == null ) {
+			fn = data;
+			data = null;
+		}
+
+		return arguments.length > 0 ?
+			this.on( name, null, data, fn ) :
+			this.trigger( name );
+	};
+
+	if ( rkeyEvent.test( name ) ) {
+		jQuery.event.fixHooks[ name ] = jQuery.event.keyHooks;
+	}
+
+	if ( rmouseEvent.test( name ) ) {
+		jQuery.event.fixHooks[ name ] = jQuery.event.mouseHooks;
+	}
+});
+/*!
+ * Sizzle CSS Selector Engine
+ * Copyright 2012 jQuery Foundation and other contributors
+ * Released under the MIT license
+ * http://sizzlejs.com/
+ */
+(function( window, undefined ) {
+
+var cachedruns,
+	assertGetIdNotName,
+	Expr,
+	getText,
+	isXML,
+	contains,
+	compile,
+	sortOrder,
+	hasDuplicate,
+	outermostContext,
+
+	baseHasDuplicate = true,
+	strundefined = "undefined",
+
+	expando = ( "sizcache" + Math.random() ).replace( ".", "" ),
+
+	Token = String,
+	document = window.document,
+	docElem = document.documentElement,
+	dirruns = 0,
+	done = 0,
+	pop = [].pop,
+	push = [].push,
+	slice = [].slice,
+	// Use a stripped-down indexOf if a native one is unavailable
+	indexOf = [].indexOf || function( elem ) {
+		var i = 0,
+			len = this.length;
+		for ( ; i < len; i++ ) {
+			if ( this[i] === elem ) {
+				return i;
+			}
+		}
+		return -1;
+	},
+
+	// Augment a function for special use by Sizzle
+	markFunction = function( fn, value ) {
+		fn[ expando ] = value == null || value;
+		return fn;
+	},
+
+	createCache = function() {
+		var cache = {},
+			keys = [];
+
+		return markFunction(function( key, value ) {
+			// Only keep the most recent entries
+			if ( keys.push( key ) > Expr.cacheLength ) {
+				delete cache[ keys.shift() ];
+			}
+
+			// Retrieve with (key + " ") to avoid collision with native Object.prototype properties (see Issue #157)
+			return (cache[ key + " " ] = value);
+		}, cache );
+	},
+
+	classCache = createCache(),
+	tokenCache = createCache(),
+	compilerCache = createCache(),
+
+	// Regex
+
+	// Whitespace characters http://www.w3.org/TR/css3-selectors/#whitespace
+	whitespace = "[\\x20\\t\\r\\n\\f]",
+	// http://www.w3.org/TR/css3-syntax/#characters
+	characterEncoding = "(?:\\\\.|[-\\w]|[^\\x00-\\xa0])+",
+
+	// Loosely modeled on CSS identifier characters
+	// An unquoted value should be a CSS identifier (http://www.w3.org/TR/css3-selectors/#attribute-selectors)
+	// Proper syntax: http://www.w3.org/TR/CSS21/syndata.html#value-def-identifier
+	identifier = characterEncoding.replace( "w", "w#" ),
+
+	// Acceptable operators http://www.w3.org/TR/selectors/#attribute-selectors
+	operators = "([*^$|!~]?=)",
+	attributes = "\\[" + whitespace + "*(" + characterEncoding + ")" + whitespace +
+		"*(?:" + operators + whitespace + "*(?:(['\"])((?:\\\\.|[^\\\\])*?)\\3|(" + identifier + ")|)|)" + whitespace + "*\\]",
+
+	// Prefer arguments not in parens/brackets,
+	//   then attribute selectors and non-pseudos (denoted by :),
+	//   then anything else
+	// These preferences are here to reduce the number of selectors
+	//   needing tokenize in the PSEUDO preFilter
+	pseudos = ":(" + characterEncoding + ")(?:\\((?:(['\"])((?:\\\\.|[^\\\\])*?)\\2|([^()[\\]]*|(?:(?:" + attributes + ")|[^:]|\\\\.)*|.*))\\)|)",
+
+	// For matchExpr.POS and matchExpr.needsContext
+	pos = ":(even|odd|eq|gt|lt|nth|first|last)(?:\\(" + whitespace +
+		"*((?:-\\d)?\\d*)" + whitespace + "*\\)|)(?=[^-]|$)",
+
+	// Leading and non-escaped trailing whitespace, capturing some non-whitespace characters preceding the latter
+	rtrim = new RegExp( "^" + whitespace + "+|((?:^|[^\\\\])(?:\\\\.)*)" + whitespace + "+$", "g" ),
+
+	rcomma = new RegExp( "^" + whitespace + "*," + whitespace + "*" ),
+	rcombinators = new RegExp( "^" + whitespace + "*([\\x20\\t\\r\\n\\f>+~])" + whitespace + "*" ),
+	rpseudo = new RegExp( pseudos ),
+
+	// Easily-parseable/retrievable ID or TAG or CLASS selectors
+	rquickExpr = /^(?:#([\w\-]+)|(\w+)|\.([\w\-]+))$/,
+
+	rnot = /^:not/,
+	rsibling = /[\x20\t\r\n\f]*[+~]/,
+	rendsWithNot = /:not\($/,
+
+	rheader = /h\d/i,
+	rinputs = /input|select|textarea|button/i,
+
+	rbackslash = /\\(?!\\)/g,
+
+	matchExpr = {
+		"ID": new RegExp( "^#(" + characterEncoding + ")" ),
+		"CLASS": new RegExp( "^\\.(" + characterEncoding + ")" ),
+		"NAME": new RegExp( "^\\[name=['\"]?(" + characterEncoding + ")['\"]?\\]" ),
+		"TAG": new RegExp( "^(" + characterEncoding.replace( "w", "w*" ) + ")" ),
+		"ATTR": new RegExp( "^" + attributes ),
+		"PSEUDO": new RegExp( "^" + pseudos ),
+		"POS": new RegExp( pos, "i" ),
+		"CHILD": new RegExp( "^:(only|nth|first|last)-child(?:\\(" + whitespace +
+			"*(even|odd|(([+-]|)(\\d*)n|)" + whitespace + "*(?:([+-]|)" + whitespace +
+			"*(\\d+)|))" + whitespace + "*\\)|)", "i" ),
+		// For use in libraries implementing .is()
+		"needsContext": new RegExp( "^" + whitespace + "*[>+~]|" + pos, "i" )
+	},
+
+	// Support
+
+	// Used for testing something on an element
+	assert = function( fn ) {
+		var div = document.createElement("div");
+
+		try {
+			return fn( div );
+		} catch (e) {
+			return false;
+		} finally {
+			// release memory in IE
+			div = null;
+		}
+	},
+
+	// Check if getElementsByTagName("*") returns only elements
+	assertTagNameNoComments = assert(function( div ) {
+		div.appendChild( document.createComment("") );
+		return !div.getElementsByTagName("*").length;
+	}),
+
+	// Check if getAttribute returns normalized href attributes
+	assertHrefNotNormalized = assert(function( div ) {
+		div.innerHTML = "<a href='#'></a>";
+		return div.firstChild && typeof div.firstChild.getAttribute !== strundefined &&
+			div.firstChild.getAttribute("href") === "#";
+	}),
+
+	// Check if attributes should be retrieved by attribute nodes
+	assertAttributes = assert(function( div ) {
+		div.innerHTML = "<select></select>";
+		var type = typeof div.lastChild.getAttribute("multiple");
+		// IE8 returns a string for some attributes even when not present
+		return type !== "boolean" && type !== "string";
+	}),
+
+	// Check if getElementsByClassName can be trusted
+	assertUsableClassName = assert(function( div ) {
+		// Opera can't find a second classname (in 9.6)
+		div.innerHTML = "<div class='hidden e'></div><div class='hidden'></div>";
+		if ( !div.getElementsByClassName || !div.getElementsByClassName("e").length ) {
+			return false;
+		}
+
+		// Safari 3.2 caches class attributes and doesn't catch changes
+		div.lastChild.className = "e";
+		return div.getElementsByClassName("e").length === 2;
+	}),
+
+	// Check if getElementById returns elements by name
+	// Check if getElementsByName privileges form controls or returns elements by ID
+	assertUsableName = assert(function( div ) {
+		// Inject content
+		div.id = expando + 0;
+		div.innerHTML = "<a name='" + expando + "'></a><div name='" + expando + "'></div>";
+		docElem.insertBefore( div, docElem.firstChild );
+
+		// Test
+		var pass = document.getElementsByName &&
+			// buggy browsers will return fewer than the correct 2
+			document.getElementsByName( expando ).length === 2 +
+			// buggy browsers will return more than the correct 0
+			document.getElementsByName( expando + 0 ).length;
+		assertGetIdNotName = !document.getElementById( expando );
+
+		// Cleanup
+		docElem.removeChild( div );
+
+		return pass;
+	});
+
+// If slice is not available, provide a backup
+try {
+	slice.call( docElem.childNodes, 0 )[0].nodeType;
+} catch ( e ) {
+	slice = function( i ) {
+		var elem,
+			results = [];
+		for ( ; (elem = this[i]); i++ ) {
+			results.push( elem );
+		}
+		return results;
+	};
+}
+
+function Sizzle( selector, context, results, seed ) {
+	results = results || [];
+	context = context || document;
+	var match, elem, xml, m,
+		nodeType = context.nodeType;
+
+	if ( !selector || typeof selector !== "string" ) {
+		return results;
+	}
+
+	if ( nodeType !== 1 && nodeType !== 9 ) {
+		return [];
+	}
+
+	xml = isXML( context );
+
+	if ( !xml && !seed ) {
+		if ( (match = rquickExpr.exec( selector )) ) {
+			// Speed-up: Sizzle("#ID")
+			if ( (m = match[1]) ) {
+				if ( nodeType === 9 ) {
+					elem = context.getElementById( m );
+					// Check parentNode to catch when Blackberry 4.6 returns
+					// nodes that are no longer in the document #6963
+					if ( elem && elem.parentNode ) {
+						// Handle the case where IE, Opera, and Webkit return items
+						// by name instead of ID
+						if ( elem.id === m ) {
+							results.push( elem );
+							return results;
+						}
+					} else {
+						return results;
+					}
+				} else {
+					// Context is not a document
+					if ( context.ownerDocument && (elem = context.ownerDocument.getElementById( m )) &&
+						contains( context, elem ) && elem.id === m ) {
+						results.push( elem );
+						return results;
+					}
+				}
+
+			// Speed-up: Sizzle("TAG")
+			} else if ( match[2] ) {
+				push.apply( results, slice.call(context.getElementsByTagName( selector ), 0) );
+				return results;
+
+			// Speed-up: Sizzle(".CLASS")
+			} else if ( (m = match[3]) && assertUsableClassName && context.getElementsByClassName ) {
+				push.apply( results, slice.call(context.getElementsByClassName( m ), 0) );
+				return results;
+			}
+		}
+	}
+
+	// All others
+	return select( selector.replace( rtrim, "$1" ), context, results, seed, xml );
+}
+
+Sizzle.matches = function( expr, elements ) {
+	return Sizzle( expr, null, null, elements );
+};
+
+Sizzle.matchesSelector = function( elem, expr ) {
+	return Sizzle( expr, null, null, [ elem ] ).length > 0;
+};
+
+// Returns a function to use in pseudos for input types
+function createInputPseudo( type ) {
+	return function( elem ) {
+		var name = elem.nodeName.toLowerCase();
+		return name === "input" && elem.type === type;
+	};
+}
+
+// Returns a function to use in pseudos for buttons
+function createButtonPseudo( type ) {
+	return function( elem ) {
+		var name = elem.nodeName.toLowerCase();
+		return (name === "input" || name === "button") && elem.type === type;
+	};
+}
+
+// Returns a function to use in pseudos for positionals
+function createPositionalPseudo( fn ) {
+	return markFunction(function( argument ) {
+		argument = +argument;
+		return markFunction(function( seed, matches ) {
+			var j,
+				matchIndexes = fn( [], seed.length, argument ),
+				i = matchIndexes.length;
+
+			// Match elements found at the specified indexes
+			while ( i-- ) {
+				if ( seed[ (j = matchIndexes[i]) ] ) {
+					seed[j] = !(matches[j] = seed[j]);
+				}
+			}
+		});
+	});
+}
+
+/**
+ * Utility function for retrieving the text value of an array of DOM nodes
+ * @param {Array|Element} elem
+ */
+getText = Sizzle.getText = function( elem ) {
+	var node,
+		ret = "",
+		i = 0,
+		nodeType = elem.nodeType;
+
+	if ( nodeType ) {
+		if ( nodeType === 1 || nodeType === 9 || nodeType === 11 ) {
+			// Use textContent for elements
+			// innerText usage removed for consistency of new lines (see #11153)
+			if ( typeof elem.textContent === "string" ) {
+				return elem.textContent;
+			} else {
+				// Traverse its children
+				for ( elem = elem.firstChild; elem; elem = elem.nextSibling ) {
+					ret += getText( elem );
+				}
+			}
+		} else if ( nodeType === 3 || nodeType === 4 ) {
+			return elem.nodeValue;
+		}
+		// Do not include comment or processing instruction nodes
+	} else {
+
+		// If no nodeType, this is expected to be an array
+		for ( ; (node = elem[i]); i++ ) {
+			// Do not traverse comment nodes
+			ret += getText( node );
+		}
+	}
+	return ret;
+};
+
+isXML = Sizzle.isXML = function( elem ) {
+	// documentElement is verified for cases where it doesn't yet exist
+	// (such as loading iframes in IE - #4833)
+	var documentElement = elem && (elem.ownerDocument || elem).documentElement;
+	return documentElement ? documentElement.nodeName !== "HTML" : false;
+};
+
+// Element contains another
+contains = Sizzle.contains = docElem.contains ?
+	function( a, b ) {
+		var adown = a.nodeType === 9 ? a.documentElement : a,
+			bup = b && b.parentNode;
+		return a === bup || !!( bup && bup.nodeType === 1 && adown.contains && adown.contains(bup) );
+	} :
+	docElem.compareDocumentPosition ?
+	function( a, b ) {
+		return b && !!( a.compareDocumentPosition( b ) & 16 );
+	} :
+	function( a, b ) {
+		while ( (b = b.parentNode) ) {
+			if ( b === a ) {
+				return true;
+			}
+		}
+		return false;
+	};
+
+Sizzle.attr = function( elem, name ) {
+	var val,
+		xml = isXML( elem );
+
+	if ( !xml ) {
+		name = name.toLowerCase();
+	}
+	if ( (val = Expr.attrHandle[ name ]) ) {
+		return val( elem );
+	}
+	if ( xml || assertAttributes ) {
+		return elem.getAttribute( name );
+	}
+	val = elem.getAttributeNode( name );
+	return val ?
+		typeof elem[ name ] === "boolean" ?
+			elem[ name ] ? name : null :
+			val.specified ? val.value : null :
+		null;
+};
+
+Expr = Sizzle.selectors = {
+
+	// Can be adjusted by the user
+	cacheLength: 50,
+
+	createPseudo: markFunction,
+
+	match: matchExpr,
+
+	// IE6/7 return a modified href
+	attrHandle: assertHrefNotNormalized ?
+		{} :
+		{
+			"href": function( elem ) {
+				return elem.getAttribute( "href", 2 );
+			},
+			"type": function( elem ) {
+				return elem.getAttribute("type");
+			}
+		},
+
+	find: {
+		"ID": assertGetIdNotName ?
+			function( id, context, xml ) {
+				if ( typeof context.getElementById !== strundefined && !xml ) {
+					var m = context.getElementById( id );
+					// Check parentNode to catch when Blackberry 4.6 returns
+					// nodes that are no longer in the document #6963
+					return m && m.parentNode ? [m] : [];
+				}
+			} :
+			function( id, context, xml ) {
+				if ( typeof context.getElementById !== strundefined && !xml ) {
+					var m = context.getElementById( id );
+
+					return m ?
+						m.id === id || typeof m.getAttributeNode !== strundefined && m.getAttributeNode("id").value === id ?
+							[m] :
+							undefined :
+						[];
+				}
+			},
+
+		"TAG": assertTagNameNoComments ?
+			function( tag, context ) {
+				if ( typeof context.getElementsByTagName !== strundefined ) {
+					return context.getElementsByTagName( tag );
+				}
+			} :
+			function( tag, context ) {
+				var results = context.getElementsByTagName( tag );
+
+				// Filter out possible comments
+				if ( tag === "*" ) {
+					var elem,
+						tmp = [],
+						i = 0;
+
+					for ( ; (elem = results[i]); i++ ) {
+						if ( elem.nodeType === 1 ) {
+							tmp.push( elem );
+						}
+					}
+
+					return tmp;
+				}
+				return results;
+			},
+
+		"NAME": assertUsableName && function( tag, context ) {
+			if ( typeof context.getElementsByName !== strundefined ) {
+				return context.getElementsByName( name );
+			}
+		},
+
+		"CLASS": assertUsableClassName && function( className, context, xml ) {
+			if ( typeof context.getElementsByClassName !== strundefined && !xml ) {
+				return context.getElementsByClassName( className );
+			}
+		}
+	},
+
+	relative: {
+		">": { dir: "parentNode", first: true },
+		" ": { dir: "parentNode" },
+		"+": { dir: "previousSibling", first: true },
+		"~": { dir: "previousSibling" }
+	},
+
+	preFilter: {
+		"ATTR": function( match ) {
+			match[1] = match[1].replace( rbackslash, "" );
+
+			// Move the given value to match[3] whether quoted or unquoted
+			match[3] = ( match[4] || match[5] || "" ).replace( rbackslash, "" );
+
+			if ( match[2] === "~=" ) {
+				match[3] = " " + match[3] + " ";
+			}
+
+			return match.slice( 0, 4 );
+		},
+
+		"CHILD": function( match ) {
+			/* matches from matchExpr["CHILD"]
+				1 type (only|nth|...)
+				2 argument (even|odd|\d*|\d*n([+-]\d+)?|...)
+				3 xn-component of xn+y argument ([+-]?\d*n|)
+				4 sign of xn-component
+				5 x of xn-component
+				6 sign of y-component
+				7 y of y-component
+			*/
+			match[1] = match[1].toLowerCase();
+
+			if ( match[1] === "nth" ) {
+				// nth-child requires argument
+				if ( !match[2] ) {
+					Sizzle.error( match[0] );
+				}
+
+				// numeric x and y parameters for Expr.filter.CHILD
+				// remember that false/true cast respectively to 0/1
+				match[3] = +( match[3] ? match[4] + (match[5] || 1) : 2 * ( match[2] === "even" || match[2] === "odd" ) );
+				match[4] = +( ( match[6] + match[7] ) || match[2] === "odd" );
+
+			// other types prohibit arguments
+			} else if ( match[2] ) {
+				Sizzle.error( match[0] );
+			}
+
+			return match;
+		},
+
+		"PSEUDO": function( match ) {
+			var unquoted, excess;
+			if ( matchExpr["CHILD"].test( match[0] ) ) {
+				return null;
+			}
+
+			if ( match[3] ) {
+				match[2] = match[3];
+			} else if ( (unquoted = match[4]) ) {
+				// Only check arguments that contain a pseudo
+				if ( rpseudo.test(unquoted) &&
+					// Get excess from tokenize (recursively)
+					(excess = tokenize( unquoted, true )) &&
+					// advance to the next closing parenthesis
+					(excess = unquoted.indexOf( ")", unquoted.length - excess ) - unquoted.length) ) {
+
+					// excess is a negative index
+					unquoted = unquoted.slice( 0, excess );
+					match[0] = match[0].slice( 0, excess );
+				}
+				match[2] = unquoted;
+			}
+
+			// Return only captures needed by the pseudo filter method (type and argument)
+			return match.slice( 0, 3 );
+		}
+	},
+
+	filter: {
+		"ID": assertGetIdNotName ?
+			function( id ) {
+				id = id.replace( rbackslash, "" );
+				return function( elem ) {
+					return elem.getAttribute("id") === id;
+				};
+			} :
+			function( id ) {
+				id = id.replace( rbackslash, "" );
+				return function( elem ) {
+					var node = typeof elem.getAttributeNode !== strundefined && elem.getAttributeNode("id");
+					return node && node.value === id;
+				};
+			},
+
+		"TAG": function( nodeName ) {
+			if ( nodeName === "*" ) {
+				return function() { return true; };
+			}
+			nodeName = nodeName.replace( rbackslash, "" ).toLowerCase();
+
+			return function( elem ) {
+				return elem.nodeName && elem.nodeName.toLowerCase() === nodeName;
+			};
+		},
+
+		"CLASS": function( className ) {
+			var pattern = classCache[ expando ][ className + " " ];
+
+			return pattern ||
+				(pattern = new RegExp( "(^|" + whitespace + ")" + className + "(" + whitespace + "|$)" )) &&
+				classCache( className, function( elem ) {
+					return pattern.test( elem.className || (typeof elem.getAttribute !== strundefined && elem.getAttribute("class")) || "" );
+				});
+		},
+
+		"ATTR": function( name, operator, check ) {
+			return function( elem, context ) {
+				var result = Sizzle.attr( elem, name );
+
+				if ( result == null ) {
+					return operator === "!=";
+				}
+				if ( !operator ) {
+					return true;
+				}
+
+				result += "";
+
+				return operator === "=" ? result === check :
+					operator === "!=" ? result !== check :
+					operator === "^=" ? check && result.indexOf( check ) === 0 :
+					operator === "*=" ? check && result.indexOf( check ) > -1 :
+					operator === "$=" ? check && result.substr( result.length - check.length ) === check :
+					operator === "~=" ? ( " " + result + " " ).indexOf( check ) > -1 :
+					operator === "|=" ? result === check || result.substr( 0, check.length + 1 ) === check + "-" :
+					false;
+			};
+		},
+
+		"CHILD": function( type, argument, first, last ) {
+
+			if ( type === "nth" ) {
+				return function( elem ) {
+					var node, diff,
+						parent = elem.parentNode;
+
+					if ( first === 1 && last === 0 ) {
+						return true;
+					}
+
+					if ( parent ) {
+						diff = 0;
+						for ( node = parent.firstChild; node; node = node.nextSibling ) {
+							if ( node.nodeType === 1 ) {
+								diff++;
+								if ( elem === node ) {
+									break;
+								}
+							}
+						}
+					}
+
+					// Incorporate the offset (or cast to NaN), then check against cycle size
+					diff -= last;
+					return diff === first || ( diff % first === 0 && diff / first >= 0 );
+				};
+			}
+
+			return function( elem ) {
+				var node = elem;
+
+				switch ( type ) {
+					case "only":
+					case "first":
+						while ( (node = node.previousSibling) ) {
+							if ( node.nodeType === 1 ) {
+								return false;
+							}
+						}
+
+						if ( type === "first" ) {
+							return true;
+						}
+
+						node = elem;
+
+						/* falls through */
+					case "last":
+						while ( (node = node.nextSibling) ) {
+							if ( node.nodeType === 1 ) {
+								return false;
+							}
+						}
+
+						return true;
+				}
+			};
+		},
+
+		"PSEUDO": function( pseudo, argument ) {
+			// pseudo-class names are case-insensitive
+			// http://www.w3.org/TR/selectors/#pseudo-classes
+			// Prioritize by case sensitivity in case custom pseudos are added with uppercase letters
+			// Remember that setFilters inherits from pseudos
+			var args,
+				fn = Expr.pseudos[ pseudo ] || Expr.setFilters[ pseudo.toLowerCase() ] ||
+					Sizzle.error( "unsupported pseudo: " + pseudo );
+
+			// The user may use createPseudo to indicate that
+			// arguments are needed to create the filter function
+			// just as Sizzle does
+			if ( fn[ expando ] ) {
+				return fn( argument );
+			}
+
+			// But maintain support for old signatures
+			if ( fn.length > 1 ) {
+				args = [ pseudo, pseudo, "", argument ];
+				return Expr.setFilters.hasOwnProperty( pseudo.toLowerCase() ) ?
+					markFunction(function( seed, matches ) {
+						var idx,
+							matched = fn( seed, argument ),
+							i = matched.length;
+						while ( i-- ) {
+							idx = indexOf.call( seed, matched[i] );
+							seed[ idx ] = !( matches[ idx ] = matched[i] );
+						}
+					}) :
+					function( elem ) {
+						return fn( elem, 0, args );
+					};
+			}
+
+			return fn;
+		}
+	},
+
+	pseudos: {
+		"not": markFunction(function( selector ) {
+			// Trim the selector passed to compile
+			// to avoid treating leading and trailing
+			// spaces as combinators
+			var input = [],
+				results = [],
+				matcher = compile( selector.replace( rtrim, "$1" ) );
+
+			return matcher[ expando ] ?
+				markFunction(function( seed, matches, context, xml ) {
+					var elem,
+						unmatched = matcher( seed, null, xml, [] ),
+						i = seed.length;
+
+					// Match elements unmatched by `matcher`
+					while ( i-- ) {
+						if ( (elem = unmatched[i]) ) {
+							seed[i] = !(matches[i] = elem);
+						}
+					}
+				}) :
+				function( elem, context, xml ) {
+					input[0] = elem;
+					matcher( input, null, xml, results );
+					return !results.pop();
+				};
+		}),
+
+		"has": markFunction(function( selector ) {
+			return function( elem ) {
+				return Sizzle( selector, elem ).length > 0;
+			};
+		}),
+
+		"contains": markFunction(function( text ) {
+			return function( elem ) {
+				return ( elem.textContent || elem.innerText || getText( elem ) ).indexOf( text ) > -1;
+			};
+		}),
+
+		"enabled": function( elem ) {
+			return elem.disabled === false;
+		},
+
+		"disabled": function( elem ) {
+			return elem.disabled === true;
+		},
+
+		"checked": function( elem ) {
+			// In CSS3, :checked should return both checked and selected elements
+			// http://www.w3.org/TR/2011/REC-css3-selectors-20110929/#checked
+			var nodeName = elem.nodeName.toLowerCase();
+			return (nodeName === "input" && !!elem.checked) || (nodeName === "option" && !!elem.selected);
+		},
+
+		"selected": function( elem ) {
+			// Accessing this property makes selected-by-default
+			// options in Safari work properly
+			if ( elem.parentNode ) {
+				elem.parentNode.selectedIndex;
+			}
+
+			return elem.selected === true;
+		},
+
+		"parent": function( elem ) {
+			return !Expr.pseudos["empty"]( elem );
+		},
+
+		"empty": function( elem ) {
+			// http://www.w3.org/TR/selectors/#empty-pseudo
+			// :empty is only affected by element nodes and content nodes(including text(3), cdata(4)),
+			//   not comment, processing instructions, or others
+			// Thanks to Diego Perini for the nodeName shortcut
+			//   Greater than "@" means alpha characters (specifically not starting with "#" or "?")
+			var nodeType;
+			elem = elem.firstChild;
+			while ( elem ) {
+				if ( elem.nodeName > "@" || (nodeType = elem.nodeType) === 3 || nodeType === 4 ) {
+					return false;
+				}
+				elem = elem.nextSibling;
+			}
+			return true;
+		},
+
+		"header": function( elem ) {
+			return rheader.test( elem.nodeName );
+		},
+
+		"text": function( elem ) {
+			var type, attr;
+			// IE6 and 7 will map elem.type to 'text' for new HTML5 types (search, etc)
+			// use getAttribute instead to test this case
+			return elem.nodeName.toLowerCase() === "input" &&
+				(type = elem.type) === "text" &&
+				( (attr = elem.getAttribute("type")) == null || attr.toLowerCase() === type );
+		},
+
+		// Input types
+		"radio": createInputPseudo("radio"),
+		"checkbox": createInputPseudo("checkbox"),
+		"file": createInputPseudo("file"),
+		"password": createInputPseudo("password"),
+		"image": createInputPseudo("image"),
+
+		"submit": createButtonPseudo("submit"),
+		"reset": createButtonPseudo("reset"),
+
+		"button": function( elem ) {
+			var name = elem.nodeName.toLowerCase();
+			return name === "input" && elem.type === "button" || name === "button";
+		},
+
+		"input": function( elem ) {
+			return rinputs.test( elem.nodeName );
+		},
+
+		"focus": function( elem ) {
+			var doc = elem.ownerDocument;
+			return elem === doc.activeElement && (!doc.hasFocus || doc.hasFocus()) && !!(elem.type || elem.href || ~elem.tabIndex);
+		},
+
+		"active": function( elem ) {
+			return elem === elem.ownerDocument.activeElement;
+		},
+
+		// Positional types
+		"first": createPositionalPseudo(function() {
+			return [ 0 ];
+		}),
+
+		"last": createPositionalPseudo(function( matchIndexes, length ) {
+			return [ length - 1 ];
+		}),
+
+		"eq": createPositionalPseudo(function( matchIndexes, length, argument ) {
+			return [ argument < 0 ? argument + length : argument ];
+		}),
+
+		"even": createPositionalPseudo(function( matchIndexes, length ) {
+			for ( var i = 0; i < length; i += 2 ) {
+				matchIndexes.push( i );
+			}
+			return matchIndexes;
+		}),
+
+		"odd": createPositionalPseudo(function( matchIndexes, length ) {
+			for ( var i = 1; i < length; i += 2 ) {
+				matchIndexes.push( i );
+			}
+			return matchIndexes;
+		}),
+
+		"lt": createPositionalPseudo(function( matchIndexes, length, argument ) {
+			for ( var i = argument < 0 ? argument + length : argument; --i >= 0; ) {
+				matchIndexes.push( i );
+			}
+			return matchIndexes;
+		}),
+
+		"gt": createPositionalPseudo(function( matchIndexes, length, argument ) {
+			for ( var i = argument < 0 ? argument + length : argument; ++i < length; ) {
+				matchIndexes.push( i );
+			}
+			return matchIndexes;
+		})
+	}
+};
+
+function siblingCheck( a, b, ret ) {
+	if ( a === b ) {
+		return ret;
+	}
+
+	var cur = a.nextSibling;
+
+	while ( cur ) {
+		if ( cur === b ) {
+			return -1;
+		}
+
+		cur = cur.nextSibling;
+	}
+
+	return 1;
+}
+
+sortOrder = docElem.compareDocumentPosition ?
+	function( a, b ) {
+		if ( a === b ) {
+			hasDuplicate = true;
+			return 0;
+		}
+
+		return ( !a.compareDocumentPosition || !b.compareDocumentPosition ?
+			a.compareDocumentPosition :
+			a.compareDocumentPosition(b) & 4
+		) ? -1 : 1;
+	} :
+	function( a, b ) {
+		// The nodes are identical, we can exit early
+		if ( a === b ) {
+			hasDuplicate = true;
+			return 0;
+
+		// Fallback to using sourceIndex (in IE) if it's available on both nodes
+		} else if ( a.sourceIndex && b.sourceIndex ) {
+			return a.sourceIndex - b.sourceIndex;
+		}
+
+		var al, bl,
+			ap = [],
+			bp = [],
+			aup = a.parentNode,
+			bup = b.parentNode,
+			cur = aup;
+
+		// If the nodes are siblings (or identical) we can do a quick check
+		if ( aup === bup ) {
+			return siblingCheck( a, b );
+
+		// If no parents were found then the nodes are disconnected
+		} else if ( !aup ) {
+			return -1;
+
+		} else if ( !bup ) {
+			return 1;
+		}
+
+		// Otherwise they're somewhere else in the tree so we need
+		// to build up a full list of the parentNodes for comparison
+		while ( cur ) {
+			ap.unshift( cur );
+			cur = cur.parentNode;
+		}
+
+		cur = bup;
+
+		while ( cur ) {
+			bp.unshift( cur );
+			cur = cur.parentNode;
+		}
+
+		al = ap.length;
+		bl = bp.length;
+
+		// Start walking down the tree looking for a discrepancy
+		for ( var i = 0; i < al && i < bl; i++ ) {
+			if ( ap[i] !== bp[i] ) {
+				return siblingCheck( ap[i], bp[i] );
+			}
+		}
+
+		// We ended someplace up the tree so do a sibling check
+		return i === al ?
+			siblingCheck( a, bp[i], -1 ) :
+			siblingCheck( ap[i], b, 1 );
+	};
+
+// Always assume the presence of duplicates if sort doesn't
+// pass them to our comparison function (as in Google Chrome).
+[0, 0].sort( sortOrder );
+baseHasDuplicate = !hasDuplicate;
+
+// Document sorting and removing duplicates
+Sizzle.uniqueSort = function( results ) {
+	var elem,
+		duplicates = [],
+		i = 1,
+		j = 0;
+
+	hasDuplicate = baseHasDuplicate;
+	results.sort( sortOrder );
+
+	if ( hasDuplicate ) {
+		for ( ; (elem = results[i]); i++ ) {
+			if ( elem === results[ i - 1 ] ) {
+				j = duplicates.push( i );
+			}
+		}
+		while ( j-- ) {
+			results.splice( duplicates[ j ], 1 );
+		}
+	}
+
+	return results;
+};
+
+Sizzle.error = function( msg ) {
+	throw new Error( "Syntax error, unrecognized expression: " + msg );
+};
+
+function tokenize( selector, parseOnly ) {
+	var matched, match, tokens, type,
+		soFar, groups, preFilters,
+		cached = tokenCache[ expando ][ selector + " " ];
+
+	if ( cached ) {
+		return parseOnly ? 0 : cached.slice( 0 );
+	}
+
+	soFar = selector;
+	groups = [];
+	preFilters = Expr.preFilter;
+
+	while ( soFar ) {
+
+		// Comma and first run
+		if ( !matched || (match = rcomma.exec( soFar )) ) {
+			if ( match ) {
+				// Don't consume trailing commas as valid
+				soFar = soFar.slice( match[0].length ) || soFar;
+			}
+			groups.push( tokens = [] );
+		}
+
+		matched = false;
+
+		// Combinators
+		if ( (match = rcombinators.exec( soFar )) ) {
+			tokens.push( matched = new Token( match.shift() ) );
+			soFar = soFar.slice( matched.length );
+
+			// Cast descendant combinators to space
+			matched.type = match[0].replace( rtrim, " " );
+		}
+
+		// Filters
+		for ( type in Expr.filter ) {
+			if ( (match = matchExpr[ type ].exec( soFar )) && (!preFilters[ type ] ||
+				(match = preFilters[ type ]( match ))) ) {
+
+				tokens.push( matched = new Token( match.shift() ) );
+				soFar = soFar.slice( matched.length );
+				matched.type = type;
+				matched.matches = match;
+			}
+		}
+
+		if ( !matched ) {
+			break;
+		}
+	}
+
+	// Return the length of the invalid excess
+	// if we're just parsing
+	// Otherwise, throw an error or return tokens
+	return parseOnly ?
+		soFar.length :
+		soFar ?
+			Sizzle.error( selector ) :
+			// Cache the tokens
+			tokenCache( selector, groups ).slice( 0 );
+}
+
+function addCombinator( matcher, combinator, base ) {
+	var dir = combinator.dir,
+		checkNonElements = base && combinator.dir === "parentNode",
+		doneName = done++;
+
+	return combinator.first ?
+		// Check against closest ancestor/preceding element
+		function( elem, context, xml ) {
+			while ( (elem = elem[ dir ]) ) {
+				if ( checkNonElements || elem.nodeType === 1  ) {
+					return matcher( elem, context, xml );
+				}
+			}
+		} :
+
+		// Check against all ancestor/preceding elements
+		function( elem, context, xml ) {
+			// We can't set arbitrary data on XML nodes, so they don't benefit from dir caching
+			if ( !xml ) {
+				var cache,
+					dirkey = dirruns + " " + doneName + " ",
+					cachedkey = dirkey + cachedruns;
+				while ( (elem = elem[ dir ]) ) {
+					if ( checkNonElements || elem.nodeType === 1 ) {
+						if ( (cache = elem[ expando ]) === cachedkey ) {
+							return elem.sizset;
+						} else if ( typeof cache === "string" && cache.indexOf(dirkey) === 0 ) {
+							if ( elem.sizset ) {
+								return elem;
+							}
+						} else {
+							elem[ expando ] = cachedkey;
+							if ( matcher( elem, context, xml ) ) {
+								elem.sizset = true;
+								return elem;
+							}
+							elem.sizset = false;
+						}
+					}
+				}
+			} else {
+				while ( (elem = elem[ dir ]) ) {
+					if ( checkNonElements || elem.nodeType === 1 ) {
+						if ( matcher( elem, context, xml ) ) {
+							return elem;
+						}
+					}
+				}
+			}
+		};
+}
+
+function elementMatcher( matchers ) {
+	return matchers.length > 1 ?
+		function( elem, context, xml ) {
+			var i = matchers.length;
+			while ( i-- ) {
+				if ( !matchers[i]( elem, context, xml ) ) {
+					return false;
+				}
+			}
+			return true;
+		} :
+		matchers[0];
+}
+
+function condense( unmatched, map, filter, context, xml ) {
+	var elem,
+		newUnmatched = [],
+		i = 0,
+		len = unmatched.length,
+		mapped = map != null;
+
+	for ( ; i < len; i++ ) {
+		if ( (elem = unmatched[i]) ) {
+			if ( !filter || filter( elem, context, xml ) ) {
+				newUnmatched.push( elem );
+				if ( mapped ) {
+					map.push( i );
+				}
+			}
+		}
+	}
+
+	return newUnmatched;
+}
+
+function setMatcher( preFilter, selector, matcher, postFilter, postFinder, postSelector ) {
+	if ( postFilter && !postFilter[ expando ] ) {
+		postFilter = setMatcher( postFilter );
+	}
+	if ( postFinder && !postFinder[ expando ] ) {
+		postFinder = setMatcher( postFinder, postSelector );
+	}
+	return markFunction(function( seed, results, context, xml ) {
+		var temp, i, elem,
+			preMap = [],
+			postMap = [],
+			preexisting = results.length,
+
+			// Get initial elements from seed or context
+			elems = seed || multipleContexts( selector || "*", context.nodeType ? [ context ] : context, [] ),
+
+			// Prefilter to get matcher input, preserving a map for seed-results synchronization
+			matcherIn = preFilter && ( seed || !selector ) ?
+				condense( elems, preMap, preFilter, context, xml ) :
+				elems,
+
+			matcherOut = matcher ?
+				// If we have a postFinder, or filtered seed, or non-seed postFilter or preexisting results,
+				postFinder || ( seed ? preFilter : preexisting || postFilter ) ?
+
+					// ...intermediate processing is necessary
+					[] :
+
+					// ...otherwise use results directly
+					results :
+				matcherIn;
+
+		// Find primary matches
+		if ( matcher ) {
+			matcher( matcherIn, matcherOut, context, xml );
+		}
+
+		// Apply postFilter
+		if ( postFilter ) {
+			temp = condense( matcherOut, postMap );
+			postFilter( temp, [], context, xml );
+
+			// Un-match failing elements by moving them back to matcherIn
+			i = temp.length;
+			while ( i-- ) {
+				if ( (elem = temp[i]) ) {
+					matcherOut[ postMap[i] ] = !(matcherIn[ postMap[i] ] = elem);
+				}
+			}
+		}
+
+		if ( seed ) {
+			if ( postFinder || preFilter ) {
+				if ( postFinder ) {
+					// Get the final matcherOut by condensing this intermediate into postFinder contexts
+					temp = [];
+					i = matcherOut.length;
+					while ( i-- ) {
+						if ( (elem = matcherOut[i]) ) {
+							// Restore matcherIn since elem is not yet a final match
+							temp.push( (matcherIn[i] = elem) );
+						}
+					}
+					postFinder( null, (matcherOut = []), temp, xml );
+				}
+
+				// Move matched elements from seed to results to keep them synchronized
+				i = matcherOut.length;
+				while ( i-- ) {
+					if ( (elem = matcherOut[i]) &&
+						(temp = postFinder ? indexOf.call( seed, elem ) : preMap[i]) > -1 ) {
+
+						seed[temp] = !(results[temp] = elem);
+					}
+				}
+			}
+
+		// Add elements to results, through postFinder if defined
+		} else {
+			matcherOut = condense(
+				matcherOut === results ?
+					matcherOut.splice( preexisting, matcherOut.length ) :
+					matcherOut
+			);
+			if ( postFinder ) {
+				postFinder( null, results, matcherOut, xml );
+			} else {
+				push.apply( results, matcherOut );
+			}
+		}
+	});
+}
+
+function matcherFromTokens( tokens ) {
+	var checkContext, matcher, j,
+		len = tokens.length,
+		leadingRelative = Expr.relative[ tokens[0].type ],
+		implicitRelative = leadingRelative || Expr.relative[" "],
+		i = leadingRelative ? 1 : 0,
+
+		// The foundational matcher ensures that elements are reachable from top-level context(s)
+		matchContext = addCombinator( function( elem ) {
+			return elem === checkContext;
+		}, implicitRelative, true ),
+		matchAnyContext = addCombinator( function( elem ) {
+			return indexOf.call( checkContext, elem ) > -1;
+		}, implicitRelative, true ),
+		matchers = [ function( elem, context, xml ) {
+			return ( !leadingRelative && ( xml || context !== outermostContext ) ) || (
+				(checkContext = context).nodeType ?
+					matchContext( elem, context, xml ) :
+					matchAnyContext( elem, context, xml ) );
+		} ];
+
+	for ( ; i < len; i++ ) {
+		if ( (matcher = Expr.relative[ tokens[i].type ]) ) {
+			matchers = [ addCombinator( elementMatcher( matchers ), matcher ) ];
+		} else {
+			matcher = Expr.filter[ tokens[i].type ].apply( null, tokens[i].matches );
+
+			// Return special upon seeing a positional matcher
+			if ( matcher[ expando ] ) {
+				// Find the next relative operator (if any) for proper handling
+				j = ++i;
+				for ( ; j < len; j++ ) {
+					if ( Expr.relative[ tokens[j].type ] ) {
+						break;
+					}
+				}
+				return setMatcher(
+					i > 1 && elementMatcher( matchers ),
+					i > 1 && tokens.slice( 0, i - 1 ).join("").replace( rtrim, "$1" ),
+					matcher,
+					i < j && matcherFromTokens( tokens.slice( i, j ) ),
+					j < len && matcherFromTokens( (tokens = tokens.slice( j )) ),
+					j < len && tokens.join("")
+				);
+			}
+			matchers.push( matcher );
+		}
+	}
+
+	return elementMatcher( matchers );
+}
+
+function matcherFromGroupMatchers( elementMatchers, setMatchers ) {
+	var bySet = setMatchers.length > 0,
+		byElement = elementMatchers.length > 0,
+		superMatcher = function( seed, context, xml, results, expandContext ) {
+			var elem, j, matcher,
+				setMatched = [],
+				matchedCount = 0,
+				i = "0",
+				unmatched = seed && [],
+				outermost = expandContext != null,
+				contextBackup = outermostContext,
+				// We must always have either seed elements or context
+				elems = seed || byElement && Expr.find["TAG"]( "*", expandContext && context.parentNode || context ),
+				// Nested matchers should use non-integer dirruns
+				dirrunsUnique = (dirruns += contextBackup == null ? 1 : Math.E);
+
+			if ( outermost ) {
+				outermostContext = context !== document && context;
+				cachedruns = superMatcher.el;
+			}
+
+			// Add elements passing elementMatchers directly to results
+			for ( ; (elem = elems[i]) != null; i++ ) {
+				if ( byElement && elem ) {
+					for ( j = 0; (matcher = elementMatchers[j]); j++ ) {
+						if ( matcher( elem, context, xml ) ) {
+							results.push( elem );
+							break;
+						}
+					}
+					if ( outermost ) {
+						dirruns = dirrunsUnique;
+						cachedruns = ++superMatcher.el;
+					}
+				}
+
+				// Track unmatched elements for set filters
+				if ( bySet ) {
+					// They will have gone through all possible matchers
+					if ( (elem = !matcher && elem) ) {
+						matchedCount--;
+					}
+
+					// Lengthen the array for every element, matched or not
+					if ( seed ) {
+						unmatched.push( elem );
+					}
+				}
+			}
+
+			// Apply set filters to unmatched elements
+			matchedCount += i;
+			if ( bySet && i !== matchedCount ) {
+				for ( j = 0; (matcher = setMatchers[j]); j++ ) {
+					matcher( unmatched, setMatched, context, xml );
+				}
+
+				if ( seed ) {
+					// Reintegrate element matches to eliminate the need for sorting
+					if ( matchedCount > 0 ) {
+						while ( i-- ) {
+							if ( !(unmatched[i] || setMatched[i]) ) {
+								setMatched[i] = pop.call( results );
+							}
+						}
+					}
+
+					// Discard index placeholder values to get only actual matches
+					setMatched = condense( setMatched );
+				}
+
+				// Add matches to results
+				push.apply( results, setMatched );
+
+				// Seedless set matches succeeding multiple successful matchers stipulate sorting
+				if ( outermost && !seed && setMatched.length > 0 &&
+					( matchedCount + setMatchers.length ) > 1 ) {
+
+					Sizzle.uniqueSort( results );
+				}
+			}
+
+			// Override manipulation of globals by nested matchers
+			if ( outermost ) {
+				dirruns = dirrunsUnique;
+				outermostContext = contextBackup;
+			}
+
+			return unmatched;
+		};
+
+	superMatcher.el = 0;
+	return bySet ?
+		markFunction( superMatcher ) :
+		superMatcher;
+}
+
+compile = Sizzle.compile = function( selector, group /* Internal Use Only */ ) {
+	var i,
+		setMatchers = [],
+		elementMatchers = [],
+		cached = compilerCache[ expando ][ selector + " " ];
+
+	if ( !cached ) {
+		// Generate a function of recursive functions that can be used to check each element
+		if ( !group ) {
+			group = tokenize( selector );
+		}
+		i = group.length;
+		while ( i-- ) {
+			cached = matcherFromTokens( group[i] );
+			if ( cached[ expando ] ) {
+				setMatchers.push( cached );
+			} else {
+				elementMatchers.push( cached );
+			}
+		}
+
+		// Cache the compiled function
+		cached = compilerCache( selector, matcherFromGroupMatchers( elementMatchers, setMatchers ) );
+	}
+	return cached;
+};
+
+function multipleContexts( selector, contexts, results ) {
+	var i = 0,
+		len = contexts.length;
+	for ( ; i < len; i++ ) {
+		Sizzle( selector, contexts[i], results );
+	}
+	return results;
+}
+
+function select( selector, context, results, seed, xml ) {
+	var i, tokens, token, type, find,
+		match = tokenize( selector ),
+		j = match.length;
+
+	if ( !seed ) {
+		// Try to minimize operations if there is only one group
+		if ( match.length === 1 ) {
+
+			// Take a shortcut and set the context if the root selector is an ID
+			tokens = match[0] = match[0].slice( 0 );
+			if ( tokens.length > 2 && (token = tokens[0]).type === "ID" &&
+					context.nodeType === 9 && !xml &&
+					Expr.relative[ tokens[1].type ] ) {
+
+				context = Expr.find["ID"]( token.matches[0].replace( rbackslash, "" ), context, xml )[0];
+				if ( !context ) {
+					return results;
+				}
+
+				selector = selector.slice( tokens.shift().length );
+			}
+
+			// Fetch a seed set for right-to-left matching
+			for ( i = matchExpr["POS"].test( selector ) ? -1 : tokens.length - 1; i >= 0; i-- ) {
+				token = tokens[i];
+
+				// Abort if we hit a combinator
+				if ( Expr.relative[ (type = token.type) ] ) {
+					break;
+				}
+				if ( (find = Expr.find[ type ]) ) {
+					// Search, expanding context for leading sibling combinators
+					if ( (seed = find(
+						token.matches[0].replace( rbackslash, "" ),
+						rsibling.test( tokens[0].type ) && context.parentNode || context,
+						xml
+					)) ) {
+
+						// If seed is empty or no tokens remain, we can return early
+						tokens.splice( i, 1 );
+						selector = seed.length && tokens.join("");
+						if ( !selector ) {
+							push.apply( results, slice.call( seed, 0 ) );
+							return results;
+						}
+
+						break;
+					}
+				}
+			}
+		}
+	}
+
+	// Compile and execute a filtering function
+	// Provide `match` to avoid retokenization if we modified the selector above
+	compile( selector, match )(
+		seed,
+		context,
+		xml,
+		results,
+		rsibling.test( selector )
+	);
+	return results;
+}
+
+if ( document.querySelectorAll ) {
+	(function() {
+		var disconnectedMatch,
+			oldSelect = select,
+			rescape = /'|\\/g,
+			rattributeQuotes = /\=[\x20\t\r\n\f]*([^'"\]]*)[\x20\t\r\n\f]*\]/g,
+
+			// qSa(:focus) reports false when true (Chrome 21), no need to also add to buggyMatches since matches checks buggyQSA
+			// A support test would require too much code (would include document ready)
+			rbuggyQSA = [ ":focus" ],
+
+			// matchesSelector(:active) reports false when true (IE9/Opera 11.5)
+			// A support test would require too much code (would include document ready)
+			// just skip matchesSelector for :active
+			rbuggyMatches = [ ":active" ],
+			matches = docElem.matchesSelector ||
+				docElem.mozMatchesSelector ||
+				docElem.webkitMatchesSelector ||
+				docElem.oMatchesSelector ||
+				docElem.msMatchesSelector;
+
+		// Build QSA regex
+		// Regex strategy adopted from Diego Perini
+		assert(function( div ) {
+			// Select is set to empty string on purpose
+			// This is to test IE's treatment of not explictly
+			// setting a boolean content attribute,
+			// since its presence should be enough
+			// http://bugs.jquery.com/ticket/12359
+			div.innerHTML = "<select><option selected=''></option></select>";
+
+			// IE8 - Some boolean attributes are not treated correctly
+			if ( !div.querySelectorAll("[selected]").length ) {
+				rbuggyQSA.push( "\\[" + whitespace + "*(?:checked|disabled|ismap|multiple|readonly|selected|value)" );
+			}
+
+			// Webkit/Opera - :checked should return selected option elements
+			// http://www.w3.org/TR/2011/REC-css3-selectors-20110929/#checked
+			// IE8 throws error here (do not put tests after this one)
+			if ( !div.querySelectorAll(":checked").length ) {
+				rbuggyQSA.push(":checked");
+			}
+		});
+
+		assert(function( div ) {
+
+			// Opera 10-12/IE9 - ^= $= *= and empty values
+			// Should not select anything
+			div.innerHTML = "<p test=''></p>";
+			if ( div.querySelectorAll("[test^='']").length ) {
+				rbuggyQSA.push( "[*^$]=" + whitespace + "*(?:\"\"|'')" );
+			}
+
+			// FF 3.5 - :enabled/:disabled and hidden elements (hidden elements are still enabled)
+			// IE8 throws error here (do not put tests after this one)
+			div.innerHTML = "<input type='hidden'/>";
+			if ( !div.querySelectorAll(":enabled").length ) {
+				rbuggyQSA.push(":enabled", ":disabled");
+			}
+		});
+
+		// rbuggyQSA always contains :focus, so no need for a length check
+		rbuggyQSA = /* rbuggyQSA.length && */ new RegExp( rbuggyQSA.join("|") );
+
+		select = function( selector, context, results, seed, xml ) {
+			// Only use querySelectorAll when not filtering,
+			// when this is not xml,
+			// and when no QSA bugs apply
+			if ( !seed && !xml && !rbuggyQSA.test( selector ) ) {
+				var groups, i,
+					old = true,
+					nid = expando,
+					newContext = context,
+					newSelector = context.nodeType === 9 && selector;
+
+				// qSA works strangely on Element-rooted queries
+				// We can work around this by specifying an extra ID on the root
+				// and working up from there (Thanks to Andrew Dupont for the technique)
+				// IE 8 doesn't work on object elements
+				if ( context.nodeType === 1 && context.nodeName.toLowerCase() !== "object" ) {
+					groups = tokenize( selector );
+
+					if ( (old = context.getAttribute("id")) ) {
+						nid = old.replace( rescape, "\\$&" );
+					} else {
+						context.setAttribute( "id", nid );
+					}
+					nid = "[id='" + nid + "'] ";
+
+					i = groups.length;
+					while ( i-- ) {
+						groups[i] = nid + groups[i].join("");
+					}
+					newContext = rsibling.test( selector ) && context.parentNode || context;
+					newSelector = groups.join(",");
+				}
+
+				if ( newSelector ) {
+					try {
+						push.apply( results, slice.call( newContext.querySelectorAll(
+							newSelector
+						), 0 ) );
+						return results;
+					} catch(qsaError) {
+					} finally {
+						if ( !old ) {
+							context.removeAttribute("id");
+						}
+					}
+				}
+			}
+
+			return oldSelect( selector, context, results, seed, xml );
+		};
+
+		if ( matches ) {
+			assert(function( div ) {
+				// Check to see if it's possible to do matchesSelector
+				// on a disconnected node (IE 9)
+				disconnectedMatch = matches.call( div, "div" );
+
+				// This should fail with an exception
+				// Gecko does not error, returns false instead
+				try {
+					matches.call( div, "[test!='']:sizzle" );
+					rbuggyMatches.push( "!=", pseudos );
+				} catch ( e ) {}
+			});
+
+			// rbuggyMatches always contains :active and :focus, so no need for a length check
+			rbuggyMatches = /* rbuggyMatches.length && */ new RegExp( rbuggyMatches.join("|") );
+
+			Sizzle.matchesSelector = function( elem, expr ) {
+				// Make sure that attribute selectors are quoted
+				expr = expr.replace( rattributeQuotes, "='$1']" );
+
+				// rbuggyMatches always contains :active, so no need for an existence check
+				if ( !isXML( elem ) && !rbuggyMatches.test( expr ) && !rbuggyQSA.test( expr ) ) {
+					try {
+						var ret = matches.call( elem, expr );
+
+						// IE 9's matchesSelector returns false on disconnected nodes
+						if ( ret || disconnectedMatch ||
+								// As well, disconnected nodes are said to be in a document
+								// fragment in IE 9
+								elem.document && elem.document.nodeType !== 11 ) {
+							return ret;
+						}
+					} catch(e) {}
+				}
+
+				return Sizzle( expr, null, null, [ elem ] ).length > 0;
+			};
+		}
+	})();
+}
+
+// Deprecated
+Expr.pseudos["nth"] = Expr.pseudos["eq"];
+
+// Back-compat
+function setFilters() {}
+Expr.filters = setFilters.prototype = Expr.pseudos;
+Expr.setFilters = new setFilters();
+
+// Override sizzle attribute retrieval
+Sizzle.attr = jQuery.attr;
+jQuery.find = Sizzle;
+jQuery.expr = Sizzle.selectors;
+jQuery.expr[":"] = jQuery.expr.pseudos;
+jQuery.unique = Sizzle.uniqueSort;
+jQuery.text = Sizzle.getText;
+jQuery.isXMLDoc = Sizzle.isXML;
+jQuery.contains = Sizzle.contains;
+
+
+})( window );
+var runtil = /Until$/,
+	rparentsprev = /^(?:parents|prev(?:Until|All))/,
+	isSimple = /^.[^:#\[\.,]*$/,
+	rneedsContext = jQuery.expr.match.needsContext,
+	// methods guaranteed to produce a unique set when starting from a unique set
+	guaranteedUnique = {
+		children: true,
+		contents: true,
+		next: true,
+		prev: true
+	};
+
+jQuery.fn.extend({
+	find: function( selector ) {
+		var i, l, length, n, r, ret,
+			self = this;
+
+		if ( typeof selector !== "string" ) {
+			return jQuery( selector ).filter(function() {
+				for ( i = 0, l = self.length; i < l; i++ ) {
+					if ( jQuery.contains( self[ i ], this ) ) {
+						return true;
+					}
+				}
+			});
+		}
+
+		ret = this.pushStack( "", "find", selector );
+
+		for ( i = 0, l = this.length; i < l; i++ ) {
+			length = ret.length;
+			jQuery.find( selector, this[i], ret );
+
+			if ( i > 0 ) {
+				// Make sure that the results are unique
+				for ( n = length; n < ret.length; n++ ) {
+					for ( r = 0; r < length; r++ ) {
+						if ( ret[r] === ret[n] ) {
+							ret.splice(n--, 1);
+							break;
+						}
+					}
+				}
+			}
+		}
+
+		return ret;
+	},
+
+	has: function( target ) {
+		var i,
+			targets = jQuery( target, this ),
+			len = targets.length;
+
+		return this.filter(function() {
+			for ( i = 0; i < len; i++ ) {
+				if ( jQuery.contains( this, targets[i] ) ) {
+					return true;
+				}
+			}
+		});
+	},
+
+	not: function( selector ) {
+		return this.pushStack( winnow(this, selector, false), "not", selector);
+	},
+
+	filter: function( selector ) {
+		return this.pushStack( winnow(this, selector, true), "filter", selector );
+	},
+
+	is: function( selector ) {
+		return !!selector && (
+			typeof selector === "string" ?
+				// If this is a positional/relative selector, check membership in the returned set
+				// so $("p:first").is("p:last") won't return true for a doc with two "p".
+				rneedsContext.test( selector ) ?
+					jQuery( selector, this.context ).index( this[0] ) >= 0 :
+					jQuery.filter( selector, this ).length > 0 :
+				this.filter( selector ).length > 0 );
+	},
+
+	closest: function( selectors, context ) {
+		var cur,
+			i = 0,
+			l = this.length,
+			ret = [],
+			pos = rneedsContext.test( selectors ) || typeof selectors !== "string" ?
+				jQuery( selectors, context || this.context ) :
+				0;
+
+		for ( ; i < l; i++ ) {
+			cur = this[i];
+
+			while ( cur && cur.ownerDocument && cur !== context && cur.nodeType !== 11 ) {
+				if ( pos ? pos.index(cur) > -1 : jQuery.find.matchesSelector(cur, selectors) ) {
+					ret.push( cur );
+					break;
+				}
+				cur = cur.parentNode;
+			}
+		}
+
+		ret = ret.length > 1 ? jQuery.unique( ret ) : ret;
+
+		return this.pushStack( ret, "closest", selectors );
+	},
+
+	// Determine the position of an element within
+	// the matched set of elements
+	index: function( elem ) {
+
+		// No argument, return index in parent
+		if ( !elem ) {
+			return ( this[0] && this[0].parentNode ) ? this.prevAll().length : -1;
+		}
+
+		// index in selector
+		if ( typeof elem === "string" ) {
+			return jQuery.inArray( this[0], jQuery( elem ) );
+		}
+
+		// Locate the position of the desired element
+		return jQuery.inArray(
+			// If it receives a jQuery object, the first element is used
+			elem.jquery ? elem[0] : elem, this );
+	},
+
+	add: function( selector, context ) {
+		var set = typeof selector === "string" ?
+				jQuery( selector, context ) :
+				jQuery.makeArray( selector && selector.nodeType ? [ selector ] : selector ),
+			all = jQuery.merge( this.get(), set );
+
+		return this.pushStack( isDisconnected( set[0] ) || isDisconnected( all[0] ) ?
+			all :
+			jQuery.unique( all ) );
+	},
+
+	addBack: function( selector ) {
+		return this.add( selector == null ?
+			this.prevObject : this.prevObject.filter(selector)
+		);
+	}
+});
+
+jQuery.fn.andSelf = jQuery.fn.addBack;
+
+// A painfully simple check to see if an element is disconnected
+// from a document (should be improved, where feasible).
+function isDisconnected( node ) {
+	return !node || !node.parentNode || node.parentNode.nodeType === 11;
+}
+
+function sibling( cur, dir ) {
+	do {
+		cur = cur[ dir ];
+	} while ( cur && cur.nodeType !== 1 );
+
+	return cur;
+}
+
+jQuery.each({
+	parent: function( elem ) {
+		var parent = elem.parentNode;
+		return parent && parent.nodeType !== 11 ? parent : null;
+	},
+	parents: function( elem ) {
+		return jQuery.dir( elem, "parentNode" );
+	},
+	parentsUntil: function( elem, i, until ) {
+		return jQuery.dir( elem, "parentNode", until );
+	},
+	next: function( elem ) {
+		return sibling( elem, "nextSibling" );
+	},
+	prev: function( elem ) {
+		return sibling( elem, "previousSibling" );
+	},
+	nextAll: function( elem ) {
+		return jQuery.dir( elem, "nextSibling" );
+	},
+	prevAll: function( elem ) {
+		return jQuery.dir( elem, "previousSibling" );
+	},
+	nextUntil: function( elem, i, until ) {
+		return jQuery.dir( elem, "nextSibling", until );
+	},
+	prevUntil: function( elem, i, until ) {
+		return jQuery.dir( elem, "previousSibling", until );
+	},
+	siblings: function( elem ) {
+		return jQuery.sibling( ( elem.parentNode || {} ).firstChild, elem );
+	},
+	children: function( elem ) {
+		return jQuery.sibling( elem.firstChild );
+	},
+	contents: function( elem ) {
+		return jQuery.nodeName( elem, "iframe" ) ?
+			elem.contentDocument || elem.contentWindow.document :
+			jQuery.merge( [], elem.childNodes );
+	}
+}, function( name, fn ) {
+	jQuery.fn[ name ] = function( until, selector ) {
+		var ret = jQuery.map( this, fn, until );
+
+		if ( !runtil.test( name ) ) {
+			selector = until;
+		}
+
+		if ( selector && typeof selector === "string" ) {
+			ret = jQuery.filter( selector, ret );
+		}
+
+		ret = this.length > 1 && !guaranteedUnique[ name ] ? jQuery.unique( ret ) : ret;
+
+		if ( this.length > 1 && rparentsprev.test( name ) ) {
+			ret = ret.reverse();
+		}
+
+		return this.pushStack( ret, name, core_slice.call( arguments ).join(",") );
+	};
+});
+
+jQuery.extend({
+	filter: function( expr, elems, not ) {
+		if ( not ) {
+			expr = ":not(" + expr + ")";
+		}
+
+		return elems.length === 1 ?
+			jQuery.find.matchesSelector(elems[0], expr) ? [ elems[0] ] : [] :
+			jQuery.find.matches(expr, elems);
+	},
+
+	dir: function( elem, dir, until ) {
+		var matched = [],
+			cur = elem[ dir ];
+
+		while ( cur && cur.nodeType !== 9 && (until === undefined || cur.nodeType !== 1 || !jQuery( cur ).is( until )) ) {
+			if ( cur.nodeType === 1 ) {
+				matched.push( cur );
+			}
+			cur = cur[dir];
+		}
+		return matched;
+	},
+
+	sibling: function( n, elem ) {
+		var r = [];
+
+		for ( ; n; n = n.nextSibling ) {
+			if ( n.nodeType === 1 && n !== elem ) {
+				r.push( n );
+			}
+		}
+
+		return r;
+	}
+});
+
+// Implement the identical functionality for filter and not
+function winnow( elements, qualifier, keep ) {
+
+	// Can't pass null or undefined to indexOf in Firefox 4
+	// Set to 0 to skip string check
+	qualifier = qualifier || 0;
+
+	if ( jQuery.isFunction( qualifier ) ) {
+		return jQuery.grep(elements, function( elem, i ) {
+			var retVal = !!qualifier.call( elem, i, elem );
+			return retVal === keep;
+		});
+
+	} else if ( qualifier.nodeType ) {
+		return jQuery.grep(elements, function( elem, i ) {
+			return ( elem === qualifier ) === keep;
+		});
+
+	} else if ( typeof qualifier === "string" ) {
+		var filtered = jQuery.grep(elements, function( elem ) {
+			return elem.nodeType === 1;
+		});
+
+		if ( isSimple.test( qualifier ) ) {
+			return jQuery.filter(qualifier, filtered, !keep);
+		} else {
+			qualifier = jQuery.filter( qualifier, filtered );
+		}
+	}
+
+	return jQuery.grep(elements, function( elem, i ) {
+		return ( jQuery.inArray( elem, qualifier ) >= 0 ) === keep;
+	});
+}
+function createSafeFragment( document ) {
+	var list = nodeNames.split( "|" ),
+	safeFrag = document.createDocumentFragment();
+
+	if ( safeFrag.createElement ) {
+		while ( list.length ) {
+			safeFrag.createElement(
+				list.pop()
+			);
+		}
+	}
+	return safeFrag;
+}
+
+var nodeNames = "abbr|article|aside|audio|bdi|canvas|data|datalist|details|figcaption|figure|footer|" +
+		"header|hgroup|mark|meter|nav|output|progress|section|summary|time|video",
+	rinlinejQuery = / jQuery\d+="(?:null|\d+)"/g,
+	rleadingWhitespace = /^\s+/,
+	rxhtmlTag = /<(?!area|br|col|embed|hr|img|input|link|meta|param)(([\w:]+)[^>]*)\/>/gi,
+	rtagName = /<([\w:]+)/,
+	rtbody = /<tbody/i,
+	rhtml = /<|&#?\w+;/,
+	rnoInnerhtml = /<(?:script|style|link)/i,
+	rnocache = /<(?:script|object|embed|option|style)/i,
+	rnoshimcache = new RegExp("<(?:" + nodeNames + ")[\\s/>]", "i"),
+	rcheckableType = /^(?:checkbox|radio)$/,
+	// checked="checked" or checked
+	rchecked = /checked\s*(?:[^=]|=\s*.checked.)/i,
+	rscriptType = /\/(java|ecma)script/i,
+	rcleanScript = /^\s*<!(?:\[CDATA\[|\-\-)|[\]\-]{2}>\s*$/g,
+	wrapMap = {
+		option: [ 1, "<select multiple='multiple'>", "</select>" ],
+		legend: [ 1, "<fieldset>", "</fieldset>" ],
+		thead: [ 1, "<table>", "</table>" ],
+		tr: [ 2, "<table><tbody>", "</tbody></table>" ],
+		td: [ 3, "<table><tbody><tr>", "</tr></tbody></table>" ],
+		col: [ 2, "<table><tbody></tbody><colgroup>", "</colgroup></table>" ],
+		area: [ 1, "<map>", "</map>" ],
+		_default: [ 0, "", "" ]
+	},
+	safeFragment = createSafeFragment( document ),
+	fragmentDiv = safeFragment.appendChild( document.createElement("div") );
+
+wrapMap.optgroup = wrapMap.option;
+wrapMap.tbody = wrapMap.tfoot = wrapMap.colgroup = wrapMap.caption = wrapMap.thead;
+wrapMap.th = wrapMap.td;
+
+// IE6-8 can't serialize link, script, style, or any html5 (NoScope) tags,
+// unless wrapped in a div with non-breaking characters in front of it.
+if ( !jQuery.support.htmlSerialize ) {
+	wrapMap._default = [ 1, "X<div>", "</div>" ];
+}
+
+jQuery.fn.extend({
+	text: function( value ) {
+		return jQuery.access( this, function( value ) {
+			return value === undefined ?
+				jQuery.text( this ) :
+				this.empty().append( ( this[0] && this[0].ownerDocument || document ).createTextNode( value ) );
+		}, null, value, arguments.length );
+	},
+
+	wrapAll: function( html ) {
+		if ( jQuery.isFunction( html ) ) {
+			return this.each(function(i) {
+				jQuery(this).wrapAll( html.call(this, i) );
+			});
+		}
+
+		if ( this[0] ) {
+			// The elements to wrap the target around
+			var wrap = jQuery( html, this[0].ownerDocument ).eq(0).clone(true);
+
+			if ( this[0].parentNode ) {
+				wrap.insertBefore( this[0] );
+			}
+
+			wrap.map(function() {
+				var elem = this;
+
+				while ( elem.firstChild && elem.firstChild.nodeType === 1 ) {
+					elem = elem.firstChild;
+				}
+
+				return elem;
+			}).append( this );
+		}
+
+		return this;
+	},
+
+	wrapInner: function( html ) {
+		if ( jQuery.isFunction( html ) ) {
+			return this.each(function(i) {
+				jQuery(this).wrapInner( html.call(this, i) );
+			});
+		}
+
+		return this.each(function() {
+			var self = jQuery( this ),
+				contents = self.contents();
+
+			if ( contents.length ) {
+				contents.wrapAll( html );
+
+			} else {
+				self.append( html );
+			}
+		});
+	},
+
+	wrap: function( html ) {
+		var isFunction = jQuery.isFunction( html );
+
+		return this.each(function(i) {
+			jQuery( this ).wrapAll( isFunction ? html.call(this, i) : html );
+		});
+	},
+
+	unwrap: function() {
+		return this.parent().each(function() {
+			if ( !jQuery.nodeName( this, "body" ) ) {
+				jQuery( this ).replaceWith( this.childNodes );
+			}
+		}).end();
+	},
+
+	append: function() {
+		return this.domManip(arguments, true, function( elem ) {
+			if ( this.nodeType === 1 || this.nodeType === 11 ) {
+				this.appendChild( elem );
+			}
+		});
+	},
+
+	prepend: function() {
+		return this.domManip(arguments, true, function( elem ) {
+			if ( this.nodeType === 1 || this.nodeType === 11 ) {
+				this.insertBefore( elem, this.firstChild );
+			}
+		});
+	},
+
+	before: function() {
+		if ( !isDisconnected( this[0] ) ) {
+			return this.domManip(arguments, false, function( elem ) {
+				this.parentNode.insertBefore( elem, this );
+			});
+		}
+
+		if ( arguments.length ) {
+			var set = jQuery.clean( arguments );
+			return this.pushStack( jQuery.merge( set, this ), "before", this.selector );
+		}
+	},
+
+	after: function() {
+		if ( !isDisconnected( this[0] ) ) {
+			return this.domManip(arguments, false, function( elem ) {
+				this.parentNode.insertBefore( elem, this.nextSibling );
+			});
+		}
+
+		if ( arguments.length ) {
+			var set = jQuery.clean( arguments );
+			return this.pushStack( jQuery.merge( this, set ), "after", this.selector );
+		}
+	},
+
+	// keepData is for internal use only--do not document
+	remove: function( selector, keepData ) {
+		var elem,
+			i = 0;
+
+		for ( ; (elem = this[i]) != null; i++ ) {
+			if ( !selector || jQuery.filter( selector, [ elem ] ).length ) {
+				if ( !keepData && elem.nodeType === 1 ) {
+					jQuery.cleanData( elem.getElementsByTagName("*") );
+					jQuery.cleanData( [ elem ] );
+				}
+
+				if ( elem.parentNode ) {
+					elem.parentNode.removeChild( elem );
+				}
+			}
+		}
+
+		return this;
+	},
+
+	empty: function() {
+		var elem,
+			i = 0;
+
+		for ( ; (elem = this[i]) != null; i++ ) {
+			// Remove element nodes and prevent memory leaks
+			if ( elem.nodeType === 1 ) {
+				jQuery.cleanData( elem.getElementsByTagName("*") );
+			}
+
+			// Remove any remaining nodes
+			while ( elem.firstChild ) {
+				elem.removeChild( elem.firstChild );
+			}
+		}
+
+		return this;
+	},
+
+	clone: function( dataAndEvents, deepDataAndEvents ) {
+		dataAndEvents = dataAndEvents == null ? false : dataAndEvents;
+		deepDataAndEvents = deepDataAndEvents == null ? dataAndEvents : deepDataAndEvents;
+
+		return this.map( function () {
+			return jQuery.clone( this, dataAndEvents, deepDataAndEvents );
+		});
+	},
+
+	html: function( value ) {
+		return jQuery.access( this, function( value ) {
+			var elem = this[0] || {},
+				i = 0,
+				l = this.length;
+
+			if ( value === undefined ) {
+				return elem.nodeType === 1 ?
+					elem.innerHTML.replace( rinlinejQuery, "" ) :
+					undefined;
+			}
+
+			// See if we can take a shortcut and just use innerHTML
+			if ( typeof value === "string" && !rnoInnerhtml.test( value ) &&
+				( jQuery.support.htmlSerialize || !rnoshimcache.test( value )  ) &&
+				( jQuery.support.leadingWhitespace || !rleadingWhitespace.test( value ) ) &&
+				!wrapMap[ ( rtagName.exec( value ) || ["", ""] )[1].toLowerCase() ] ) {
+
+				value = value.replace( rxhtmlTag, "<$1></$2>" );
+
+				try {
+					for (; i < l; i++ ) {
+						// Remove element nodes and prevent memory leaks
+						elem = this[i] || {};
+						if ( elem.nodeType === 1 ) {
+							jQuery.cleanData( elem.getElementsByTagName( "*" ) );
+							elem.innerHTML = value;
+						}
+					}
+
+					elem = 0;
+
+				// If using innerHTML throws an exception, use the fallback method
+				} catch(e) {}
+			}
+
+			if ( elem ) {
+				this.empty().append( value );
+			}
+		}, null, value, arguments.length );
+	},
+
+	replaceWith: function( value ) {
+		if ( !isDisconnected( this[0] ) ) {
+			// Make sure that the elements are removed from the DOM before they are inserted
+			// this can help fix replacing a parent with child elements
+			if ( jQuery.isFunction( value ) ) {
+				return this.each(function(i) {
+					var self = jQuery(this), old = self.html();
+					self.replaceWith( value.call( this, i, old ) );
+				});
+			}
+
+			if ( typeof value !== "string" ) {
+				value = jQuery( value ).detach();
+			}
+
+			return this.each(function() {
+				var next = this.nextSibling,
+					parent = this.parentNode;
+
+				jQuery( this ).remove();
+
+				if ( next ) {
+					jQuery(next).before( value );
+				} else {
+					jQuery(parent).append( value );
+				}
+			});
+		}
+
+		return this.length ?
+			this.pushStack( jQuery(jQuery.isFunction(value) ? value() : value), "replaceWith", value ) :
+			this;
+	},
+
+	detach: function( selector ) {
+		return this.remove( selector, true );
+	},
+
+	domManip: function( args, table, callback ) {
+
+		// Flatten any nested arrays
+		args = [].concat.apply( [], args );
+
+		var results, first, fragment, iNoClone,
+			i = 0,
+			value = args[0],
+			scripts = [],
+			l = this.length;
+
+		// We can't cloneNode fragments that contain checked, in WebKit
+		if ( !jQuery.support.checkClone && l > 1 && typeof value === "string" && rchecked.test( value ) ) {
+			return this.each(function() {
+				jQuery(this).domManip( args, table, callback );
+			});
+		}
+
+		if ( jQuery.isFunction(value) ) {
+			return this.each(function(i) {
+				var self = jQuery(this);
+				args[0] = value.call( this, i, table ? self.html() : undefined );
+				self.domManip( args, table, callback );
+			});
+		}
+
+		if ( this[0] ) {
+			results = jQuery.buildFragment( args, this, scripts );
+			fragment = results.fragment;
+			first = fragment.firstChild;
+
+			if ( fragment.childNodes.length === 1 ) {
+				fragment = first;
+			}
+
+			if ( first ) {
+				table = table && jQuery.nodeName( first, "tr" );
+
+				// Use the original fragment for the last item instead of the first because it can end up
+				// being emptied incorrectly in certain situations (#8070).
+				// Fragments from the fragment cache must always be cloned and never used in place.
+				for ( iNoClone = results.cacheable || l - 1; i < l; i++ ) {
+					callback.call(
+						table && jQuery.nodeName( this[i], "table" ) ?
+							findOrAppend( this[i], "tbody" ) :
+							this[i],
+						i === iNoClone ?
+							fragment :
+							jQuery.clone( fragment, true, true )
+					);
+				}
+			}
+
+			// Fix #11809: Avoid leaking memory
+			fragment = first = null;
+
+			if ( scripts.length ) {
+				jQuery.each( scripts, function( i, elem ) {
+					if ( elem.src ) {
+						if ( jQuery.ajax ) {
+							jQuery.ajax({
+								url: elem.src,
+								type: "GET",
+								dataType: "script",
+								async: false,
+								global: false,
+								"throws": true
+							});
+						} else {
+							jQuery.error("no ajax");
+						}
+					} else {
+						jQuery.globalEval( ( elem.text || elem.textContent || elem.innerHTML || "" ).replace( rcleanScript, "" ) );
+					}
+
+					if ( elem.parentNode ) {
+						elem.parentNode.removeChild( elem );
+					}
+				});
+			}
+		}
+
+		return this;
+	}
+});
+
+function findOrAppend( elem, tag ) {
+	return elem.getElementsByTagName( tag )[0] || elem.appendChild( elem.ownerDocument.createElement( tag ) );
+}
+
+function cloneCopyEvent( src, dest ) {
+
+	if ( dest.nodeType !== 1 || !jQuery.hasData( src ) ) {
+		return;
+	}
+
+	var type, i, l,
+		oldData = jQuery._data( src ),
+		curData = jQuery._data( dest, oldData ),
+		events = oldData.events;
+
+	if ( events ) {
+		delete curData.handle;
+		curData.events = {};
+
+		for ( type in events ) {
+			for ( i = 0, l = events[ type ].length; i < l; i++ ) {
+				jQuery.event.add( dest, type, events[ type ][ i ] );
+			}
+		}
+	}
+
+	// make the cloned public data object a copy from the original
+	if ( curData.data ) {
+		curData.data = jQuery.extend( {}, curData.data );
+	}
+}
+
+function cloneFixAttributes( src, dest ) {
+	var nodeName;
+
+	// We do not need to do anything for non-Elements
+	if ( dest.nodeType !== 1 ) {
+		return;
+	}
+
+	// clearAttributes removes the attributes, which we don't want,
+	// but also removes the attachEvent events, which we *do* want
+	if ( dest.clearAttributes ) {
+		dest.clearAttributes();
+	}
+
+	// mergeAttributes, in contrast, only merges back on the
+	// original attributes, not the events
+	if ( dest.mergeAttributes ) {
+		dest.mergeAttributes( src );
+	}
+
+	nodeName = dest.nodeName.toLowerCase();
+
+	if ( nodeName === "object" ) {
+		// IE6-10 improperly clones children of object elements using classid.
+		// IE10 throws NoModificationAllowedError if parent is null, #12132.
+		if ( dest.parentNode ) {
+			dest.outerHTML = src.outerHTML;
+		}
+
+		// This path appears unavoidable for IE9. When cloning an object
+		// element in IE9, the outerHTML strategy above is not sufficient.
+		// If the src has innerHTML and the destination does not,
+		// copy the src.innerHTML into the dest.innerHTML. #10324
+		if ( jQuery.support.html5Clone && (src.innerHTML && !jQuery.trim(dest.innerHTML)) ) {
+			dest.innerHTML = src.innerHTML;
+		}
+
+	} else if ( nodeName === "input" && rcheckableType.test( src.type ) ) {
+		// IE6-8 fails to persist the checked state of a cloned checkbox
+		// or radio button. Worse, IE6-7 fail to give the cloned element
+		// a checked appearance if the defaultChecked value isn't also set
+
+		dest.defaultChecked = dest.checked = src.checked;
+
+		// IE6-7 get confused and end up setting the value of a cloned
+		// checkbox/radio button to an empty string instead of "on"
+		if ( dest.value !== src.value ) {
+			dest.value = src.value;
+		}
+
+	// IE6-8 fails to return the selected option to the default selected
+	// state when cloning options
+	} else if ( nodeName === "option" ) {
+		dest.selected = src.defaultSelected;
+
+	// IE6-8 fails to set the defaultValue to the correct value when
+	// cloning other types of input fields
+	} else if ( nodeName === "input" || nodeName === "textarea" ) {
+		dest.defaultValue = src.defaultValue;
+
+	// IE blanks contents when cloning scripts
+	} else if ( nodeName === "script" && dest.text !== src.text ) {
+		dest.text = src.text;
+	}
+
+	// Event data gets referenced instead of copied if the expando
+	// gets copied too
+	dest.removeAttribute( jQuery.expando );
+}
+
+jQuery.buildFragment = function( args, context, scripts ) {
+	var fragment, cacheable, cachehit,
+		first = args[ 0 ];
+
+	// Set context from what may come in as undefined or a jQuery collection or a node
+	// Updated to fix #12266 where accessing context[0] could throw an exception in IE9/10 &
+	// also doubles as fix for #8950 where plain objects caused createDocumentFragment exception
+	context = context || document;
+	context = !context.nodeType && context[0] || context;
+	context = context.ownerDocument || context;
+
+	// Only cache "small" (1/2 KB) HTML strings that are associated with the main document
+	// Cloning options loses the selected state, so don't cache them
+	// IE 6 doesn't like it when you put <object> or <embed> elements in a fragment
+	// Also, WebKit does not clone 'checked' attributes on cloneNode, so don't cache
+	// Lastly, IE6,7,8 will not correctly reuse cached fragments that were created from unknown elems #10501
+	if ( args.length === 1 && typeof first === "string" && first.length < 512 && context === document &&
+		first.charAt(0) === "<" && !rnocache.test( first ) &&
+		(jQuery.support.checkClone || !rchecked.test( first )) &&
+		(jQuery.support.html5Clone || !rnoshimcache.test( first )) ) {
+
+		// Mark cacheable and look for a hit
+		cacheable = true;
+		fragment = jQuery.fragments[ first ];
+		cachehit = fragment !== undefined;
+	}
+
+	if ( !fragment ) {
+		fragment = context.createDocumentFragment();
+		jQuery.clean( args, context, fragment, scripts );
+
+		// Update the cache, but only store false
+		// unless this is a second parsing of the same content
+		if ( cacheable ) {
+			jQuery.fragments[ first ] = cachehit && fragment;
+		}
+	}
+
+	return { fragment: fragment, cacheable: cacheable };
+};
+
+jQuery.fragments = {};
+
+jQuery.each({
+	appendTo: "append",
+	prependTo: "prepend",
+	insertBefore: "before",
+	insertAfter: "after",
+	replaceAll: "replaceWith"
+}, function( name, original ) {
+	jQuery.fn[ name ] = function( selector ) {
+		var elems,
+			i = 0,
+			ret = [],
+			insert = jQuery( selector ),
+			l = insert.length,
+			parent = this.length === 1 && this[0].parentNode;
+
+		if ( (parent == null || parent && parent.nodeType === 11 && parent.childNodes.length === 1) && l === 1 ) {
+			insert[ original ]( this[0] );
+			return this;
+		} else {
+			for ( ; i < l; i++ ) {
+				elems = ( i > 0 ? this.clone(true) : this ).get();
+				jQuery( insert[i] )[ original ]( elems );
+				ret = ret.concat( elems );
+			}
+
+			return this.pushStack( ret, name, insert.selector );
+		}
+	};
+});
+
+function getAll( elem ) {
+	if ( typeof elem.getElementsByTagName !== "undefined" ) {
+		return elem.getElementsByTagName( "*" );
+
+	} else if ( typeof elem.querySelectorAll !== "undefined" ) {
+		return elem.querySelectorAll( "*" );
+
+	} else {
+		return [];
+	}
+}
+
+// Used in clean, fixes the defaultChecked property
+function fixDefaultChecked( elem ) {
+	if ( rcheckableType.test( elem.type ) ) {
+		elem.defaultChecked = elem.checked;
+	}
+}
+
+jQuery.extend({
+	clone: function( elem, dataAndEvents, deepDataAndEvents ) {
+		var srcElements,
+			destElements,
+			i,
+			clone;
+
+		if ( jQuery.support.html5Clone || jQuery.isXMLDoc(elem) || !rnoshimcache.test( "<" + elem.nodeName + ">" ) ) {
+			clone = elem.cloneNode( true );
+
+		// IE<=8 does not properly clone detached, unknown element nodes
+		} else {
+			fragmentDiv.innerHTML = elem.outerHTML;
+			fragmentDiv.removeChild( clone = fragmentDiv.firstChild );
+		}
+
+		if ( (!jQuery.support.noCloneEvent || !jQuery.support.noCloneChecked) &&
+				(elem.nodeType === 1 || elem.nodeType === 11) && !jQuery.isXMLDoc(elem) ) {
+			// IE copies events bound via attachEvent when using cloneNode.
+			// Calling detachEvent on the clone will also remove the events
+			// from the original. In order to get around this, we use some
+			// proprietary methods to clear the events. Thanks to MooTools
+			// guys for this hotness.
+
+			cloneFixAttributes( elem, clone );
+
+			// Using Sizzle here is crazy slow, so we use getElementsByTagName instead
+			srcElements = getAll( elem );
+			destElements = getAll( clone );
+
+			// Weird iteration because IE will replace the length property
+			// with an element if you are cloning the body and one of the
+			// elements on the page has a name or id of "length"
+			for ( i = 0; srcElements[i]; ++i ) {
+				// Ensure that the destination node is not null; Fixes #9587
+				if ( destElements[i] ) {
+					cloneFixAttributes( srcElements[i], destElements[i] );
+				}
+			}
+		}
+
+		// Copy the events from the original to the clone
+		if ( dataAndEvents ) {
+			cloneCopyEvent( elem, clone );
+
+			if ( deepDataAndEvents ) {
+				srcElements = getAll( elem );
+				destElements = getAll( clone );
+
+				for ( i = 0; srcElements[i]; ++i ) {
+					cloneCopyEvent( srcElements[i], destElements[i] );
+				}
+			}
+		}
+
+		srcElements = destElements = null;
+
+		// Return the cloned set
+		return clone;
+	},
+
+	clean: function( elems, context, fragment, scripts ) {
+		var i, j, elem, tag, wrap, depth, div, hasBody, tbody, len, handleScript, jsTags,
+			safe = context === document && safeFragment,
+			ret = [];
+
+		// Ensure that context is a document
+		if ( !context || typeof context.createDocumentFragment === "undefined" ) {
+			context = document;
+		}
+
+		// Use the already-created safe fragment if context permits
+		for ( i = 0; (elem = elems[i]) != null; i++ ) {
+			if ( typeof elem === "number" ) {
+				elem += "";
+			}
+
+			if ( !elem ) {
+				continue;
+			}
+
+			// Convert html string into DOM nodes
+			if ( typeof elem === "string" ) {
+				if ( !rhtml.test( elem ) ) {
+					elem = context.createTextNode( elem );
+				} else {
+					// Ensure a safe container in which to render the html
+					safe = safe || createSafeFragment( context );
+					div = context.createElement("div");
+					safe.appendChild( div );
+
+					// Fix "XHTML"-style tags in all browsers
+					elem = elem.replace(rxhtmlTag, "<$1></$2>");
+
+					// Go to html and back, then peel off extra wrappers
+					tag = ( rtagName.exec( elem ) || ["", ""] )[1].toLowerCase();
+					wrap = wrapMap[ tag ] || wrapMap._default;
+					depth = wrap[0];
+					div.innerHTML = wrap[1] + elem + wrap[2];
+
+					// Move to the right depth
+					while ( depth-- ) {
+						div = div.lastChild;
+					}
+
+					// Remove IE's autoinserted <tbody> from table fragments
+					if ( !jQuery.support.tbody ) {
+
+						// String was a <table>, *may* have spurious <tbody>
+						hasBody = rtbody.test(elem);
+							tbody = tag === "table" && !hasBody ?
+								div.firstChild && div.firstChild.childNodes :
+
+								// String was a bare <thead> or <tfoot>
+								wrap[1] === "<table>" && !hasBody ?
+									div.childNodes :
+									[];
+
+						for ( j = tbody.length - 1; j >= 0 ; --j ) {
+							if ( jQuery.nodeName( tbody[ j ], "tbody" ) && !tbody[ j ].childNodes.length ) {
+								tbody[ j ].parentNode.removeChild( tbody[ j ] );
+							}
+						}
+					}
+
+					// IE completely kills leading whitespace when innerHTML is used
+					if ( !jQuery.support.leadingWhitespace && rleadingWhitespace.test( elem ) ) {
+						div.insertBefore( context.createTextNode( rleadingWhitespace.exec(elem)[0] ), div.firstChild );
+					}
+
+					elem = div.childNodes;
+
+					// Take out of fragment container (we need a fresh div each time)
+					div.parentNode.removeChild( div );
+				}
+			}
+
+			if ( elem.nodeType ) {
+				ret.push( elem );
+			} else {
+				jQuery.merge( ret, elem );
+			}
+		}
+
+		// Fix #11356: Clear elements from safeFragment
+		if ( div ) {
+			elem = div = safe = null;
+		}
+
+		// Reset defaultChecked for any radios and checkboxes
+		// about to be appended to the DOM in IE 6/7 (#8060)
+		if ( !jQuery.support.appendChecked ) {
+			for ( i = 0; (elem = ret[i]) != null; i++ ) {
+				if ( jQuery.nodeName( elem, "input" ) ) {
+					fixDefaultChecked( elem );
+				} else if ( typeof elem.getElementsByTagName !== "undefined" ) {
+					jQuery.grep( elem.getElementsByTagName("input"), fixDefaultChecked );
+				}
+			}
+		}
+
+		// Append elements to a provided document fragment
+		if ( fragment ) {
+			// Special handling of each script element
+			handleScript = function( elem ) {
+				// Check if we consider it executable
+				if ( !elem.type || rscriptType.test( elem.type ) ) {
+					// Detach the script and store it in the scripts array (if provided) or the fragment
+					// Return truthy to indicate that it has been handled
+					return scripts ?
+						scripts.push( elem.parentNode ? elem.parentNode.removeChild( elem ) : elem ) :
+						fragment.appendChild( elem );
+				}
+			};
+
+			for ( i = 0; (elem = ret[i]) != null; i++ ) {
+				// Check if we're done after handling an executable script
+				if ( !( jQuery.nodeName( elem, "script" ) && handleScript( elem ) ) ) {
+					// Append to fragment and handle embedded scripts
+					fragment.appendChild( elem );
+					if ( typeof elem.getElementsByTagName !== "undefined" ) {
+						// handleScript alters the DOM, so use jQuery.merge to ensure snapshot iteration
+						jsTags = jQuery.grep( jQuery.merge( [], elem.getElementsByTagName("script") ), handleScript );
+
+						// Splice the scripts into ret after their former ancestor and advance our index beyond them
+						ret.splice.apply( ret, [i + 1, 0].concat( jsTags ) );
+						i += jsTags.length;
+					}
+				}
+			}
+		}
+
+		return ret;
+	},
+
+	cleanData: function( elems, /* internal */ acceptData ) {
+		var data, id, elem, type,
+			i = 0,
+			internalKey = jQuery.expando,
+			cache = jQuery.cache,
+			deleteExpando = jQuery.support.deleteExpando,
+			special = jQuery.event.special;
+
+		for ( ; (elem = elems[i]) != null; i++ ) {
+
+			if ( acceptData || jQuery.acceptData( elem ) ) {
+
+				id = elem[ internalKey ];
+				data = id && cache[ id ];
+
+				if ( data ) {
+					if ( data.events ) {
+						for ( type in data.events ) {
+							if ( special[ type ] ) {
+								jQuery.event.remove( elem, type );
+
+							// This is a shortcut to avoid jQuery.event.remove's overhead
+							} else {
+								jQuery.removeEvent( elem, type, data.handle );
+							}
+						}
+					}
+
+					// Remove cache only if it was not already removed by jQuery.event.remove
+					if ( cache[ id ] ) {
+
+						delete cache[ id ];
+
+						// IE does not allow us to delete expando properties from nodes,
+						// nor does it have a removeAttribute function on Document nodes;
+						// we must handle all of these cases
+						if ( deleteExpando ) {
+							delete elem[ internalKey ];
+
+						} else if ( elem.removeAttribute ) {
+							elem.removeAttribute( internalKey );
+
+						} else {
+							elem[ internalKey ] = null;
+						}
+
+						jQuery.deletedIds.push( id );
+					}
+				}
+			}
+		}
+	}
+});
+// Limit scope pollution from any deprecated API
+(function() {
+
+var matched, browser;
+
+// Use of jQuery.browser is frowned upon.
+// More details: http://api.jquery.com/jQuery.browser
+// jQuery.uaMatch maintained for back-compat
+jQuery.uaMatch = function( ua ) {
+	ua = ua.toLowerCase();
+
+	var match = /(chrome)[ \/]([\w.]+)/.exec( ua ) ||
+		/(webkit)[ \/]([\w.]+)/.exec( ua ) ||
+		/(opera)(?:.*version|)[ \/]([\w.]+)/.exec( ua ) ||
+		/(msie) ([\w.]+)/.exec( ua ) ||
+		ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec( ua ) ||
+		[];
+
+	return {
+		browser: match[ 1 ] || "",
+		version: match[ 2 ] || "0"
+	};
+};
+
+matched = jQuery.uaMatch( navigator.userAgent );
+browser = {};
+
+if ( matched.browser ) {
+	browser[ matched.browser ] = true;
+	browser.version = matched.version;
+}
+
+// Chrome is Webkit, but Webkit is also Safari.
+if ( browser.chrome ) {
+	browser.webkit = true;
+} else if ( browser.webkit ) {
+	browser.safari = true;
+}
+
+jQuery.browser = browser;
+
+jQuery.sub = function() {
+	function jQuerySub( selector, context ) {
+		return new jQuerySub.fn.init( selector, context );
+	}
+	jQuery.extend( true, jQuerySub, this );
+	jQuerySub.superclass = this;
+	jQuerySub.fn = jQuerySub.prototype = this();
+	jQuerySub.fn.constructor = jQuerySub;
+	jQuerySub.sub = this.sub;
+	jQuerySub.fn.init = function init( selector, context ) {
+		if ( context && context instanceof jQuery && !(context instanceof jQuerySub) ) {
+			context = jQuerySub( context );
+		}
+
+		return jQuery.fn.init.call( this, selector, context, rootjQuerySub );
+	};
+	jQuerySub.fn.init.prototype = jQuerySub.fn;
+	var rootjQuerySub = jQuerySub(document);
+	return jQuerySub;
+};
+
+})();
+var curCSS, iframe, iframeDoc,
+	ralpha = /alpha\([^)]*\)/i,
+	ropacity = /opacity=([^)]*)/,
+	rposition = /^(top|right|bottom|left)$/,
+	// swappable if display is none or starts with table except "table", "table-cell", or "table-caption"
+	// see here for display values: https://developer.mozilla.org/en-US/docs/CSS/display
+	rdisplayswap = /^(none|table(?!-c[ea]).+)/,
+	rmargin = /^margin/,
+	rnumsplit = new RegExp( "^(" + core_pnum + ")(.*)$", "i" ),
+	rnumnonpx = new RegExp( "^(" + core_pnum + ")(?!px)[a-z%]+$", "i" ),
+	rrelNum = new RegExp( "^([-+])=(" + core_pnum + ")", "i" ),
+	elemdisplay = { BODY: "block" },
+
+	cssShow = { position: "absolute", visibility: "hidden", display: "block" },
+	cssNormalTransform = {
+		letterSpacing: 0,
+		fontWeight: 400
+	},
+
+	cssExpand = [ "Top", "Right", "Bottom", "Left" ],
+	cssPrefixes = [ "Webkit", "O", "Moz", "ms" ],
+
+	eventsToggle = jQuery.fn.toggle;
+
+// return a css property mapped to a potentially vendor prefixed property
+function vendorPropName( style, name ) {
+
+	// shortcut for names that are not vendor prefixed
+	if ( name in style ) {
+		return name;
+	}
+
+	// check for vendor prefixed names
+	var capName = name.charAt(0).toUpperCase() + name.slice(1),
+		origName = name,
+		i = cssPrefixes.length;
+
+	while ( i-- ) {
+		name = cssPrefixes[ i ] + capName;
+		if ( name in style ) {
+			return name;
+		}
+	}
+
+	return origName;
+}
+
+function isHidden( elem, el ) {
+	elem = el || elem;
+	return jQuery.css( elem, "display" ) === "none" || !jQuery.contains( elem.ownerDocument, elem );
+}
+
+function showHide( elements, show ) {
+	var elem, display,
+		values = [],
+		index = 0,
+		length = elements.length;
+
+	for ( ; index < length; index++ ) {
+		elem = elements[ index ];
+		if ( !elem.style ) {
+			continue;
+		}
+		values[ index ] = jQuery._data( elem, "olddisplay" );
+		if ( show ) {
+			// Reset the inline display of this element to learn if it is
+			// being hidden by cascaded rules or not
+			if ( !values[ index ] && elem.style.display === "none" ) {
+				elem.style.display = "";
+			}
+
+			// Set elements which have been overridden with display: none
+			// in a stylesheet to whatever the default browser style is
+			// for such an element
+			if ( elem.style.display === "" && isHidden( elem ) ) {
+				values[ index ] = jQuery._data( elem, "olddisplay", css_defaultDisplay(elem.nodeName) );
+			}
+		} else {
+			display = curCSS( elem, "display" );
+
+			if ( !values[ index ] && display !== "none" ) {
+				jQuery._data( elem, "olddisplay", display );
+			}
+		}
+	}
+
+	// Set the display of most of the elements in a second loop
+	// to avoid the constant reflow
+	for ( index = 0; index < length; index++ ) {
+		elem = elements[ index ];
+		if ( !elem.style ) {
+			continue;
+		}
+		if ( !show || elem.style.display === "none" || elem.style.display === "" ) {
+			elem.style.display = show ? values[ index ] || "" : "none";
+		}
+	}
+
+	return elements;
+}
+
+jQuery.fn.extend({
+	css: function( name, value ) {
+		return jQuery.access( this, function( elem, name, value ) {
+			return value !== undefined ?
+				jQuery.style( elem, name, value ) :
+				jQuery.css( elem, name );
+		}, name, value, arguments.length > 1 );
+	},
+	show: function() {
+		return showHide( this, true );
+	},
+	hide: function() {
+		return showHide( this );
+	},
+	toggle: function( state, fn2 ) {
+		var bool = typeof state === "boolean";
+
+		if ( jQuery.isFunction( state ) && jQuery.isFunction( fn2 ) ) {
+			return eventsToggle.apply( this, arguments );
+		}
+
+		return this.each(function() {
+			if ( bool ? state : isHidden( this ) ) {
+				jQuery( this ).show();
+			} else {
+				jQuery( this ).hide();
+			}
+		});
+	}
+});
+
+jQuery.extend({
+	// Add in style property hooks for overriding the default
+	// behavior of getting and setting a style property
+	cssHooks: {
+		opacity: {
+			get: function( elem, computed ) {
+				if ( computed ) {
+					// We should always get a number back from opacity
+					var ret = curCSS( elem, "opacity" );
+					return ret === "" ? "1" : ret;
+
+				}
+			}
+		}
+	},
+
+	// Exclude the following css properties to add px
+	cssNumber: {
+		"fillOpacity": true,
+		"fontWeight": true,
+		"lineHeight": true,
+		"opacity": true,
+		"orphans": true,
+		"widows": true,
+		"zIndex": true,
+		"zoom": true
+	},
+
+	// Add in properties whose names you wish to fix before
+	// setting or getting the value
+	cssProps: {
+		// normalize float css property
+		"float": jQuery.support.cssFloat ? "cssFloat" : "styleFloat"
+	},
+
+	// Get and set the style property on a DOM Node
+	style: function( elem, name, value, extra ) {
+		// Don't set styles on text and comment nodes
+		if ( !elem || elem.nodeType === 3 || elem.nodeType === 8 || !elem.style ) {
+			return;
+		}
+
+		// Make sure that we're working with the right name
+		var ret, type, hooks,
+			origName = jQuery.camelCase( name ),
+			style = elem.style;
+
+		name = jQuery.cssProps[ origName ] || ( jQuery.cssProps[ origName ] = vendorPropName( style, origName ) );
+
+		// gets hook for the prefixed version
+		// followed by the unprefixed version
+		hooks = jQuery.cssHooks[ name ] || jQuery.cssHooks[ origName ];
+
+		// Check if we're setting a value
+		if ( value !== undefined ) {
+			type = typeof value;
+
+			// convert relative number strings (+= or -=) to relative numbers. #7345
+			if ( type === "string" && (ret = rrelNum.exec( value )) ) {
+				value = ( ret[1] + 1 ) * ret[2] + parseFloat( jQuery.css( elem, name ) );
+				// Fixes bug #9237
+				type = "number";
+			}
+
+			// Make sure that NaN and null values aren't set. See: #7116
+			if ( value == null || type === "number" && isNaN( value ) ) {
+				return;
+			}
+
+			// If a number was passed in, add 'px' to the (except for certain CSS properties)
+			if ( type === "number" && !jQuery.cssNumber[ origName ] ) {
+				value += "px";
+			}
+
+			// If a hook was provided, use that value, otherwise just set the specified value
+			if ( !hooks || !("set" in hooks) || (value = hooks.set( elem, value, extra )) !== undefined ) {
+				// Wrapped to prevent IE from throwing errors when 'invalid' values are provided
+				// Fixes bug #5509
+				try {
+					style[ name ] = value;
+				} catch(e) {}
+			}
+
+		} else {
+			// If a hook was provided get the non-computed value from there
+			if ( hooks && "get" in hooks && (ret = hooks.get( elem, false, extra )) !== undefined ) {
+				return ret;
+			}
+
+			// Otherwise just get the value from the style object
+			return style[ name ];
+		}
+	},
+
+	css: function( elem, name, numeric, extra ) {
+		var val, num, hooks,
+			origName = jQuery.camelCase( name );
+
+		// Make sure that we're working with the right name
+		name = jQuery.cssProps[ origName ] || ( jQuery.cssProps[ origName ] = vendorPropName( elem.style, origName ) );
+
+		// gets hook for the prefixed version
+		// followed by the unprefixed version
+		hooks = jQuery.cssHooks[ name ] || jQuery.cssHooks[ origName ];
+
+		// If a hook was provided get the computed value from there
+		if ( hooks && "get" in hooks ) {
+			val = hooks.get( elem, true, extra );
+		}
+
+		// Otherwise, if a way to get the computed value exists, use that
+		if ( val === undefined ) {
+			val = curCSS( elem, name );
+		}
+
+		//convert "normal" to computed value
+		if ( val === "normal" && name in cssNormalTransform ) {
+			val = cssNormalTransform[ name ];
+		}
+
+		// Return, converting to number if forced or a qualifier was provided and val looks numeric
+		if ( numeric || extra !== undefined ) {
+			num = parseFloat( val );
+			return numeric || jQuery.isNumeric( num ) ? num || 0 : val;
+		}
+		return val;
+	},
+
+	// A method for quickly swapping in/out CSS properties to get correct calculations
+	swap: function( elem, options, callback ) {
+		var ret, name,
+			old = {};
+
+		// Remember the old values, and insert the new ones
+		for ( name in options ) {
+			old[ name ] = elem.style[ name ];
+			elem.style[ name ] = options[ name ];
+		}
+
+		ret = callback.call( elem );
+
+		// Revert the old values
+		for ( name in options ) {
+			elem.style[ name ] = old[ name ];
+		}
+
+		return ret;
+	}
+});
+
+// NOTE: To any future maintainer, we've window.getComputedStyle
+// because jsdom on node.js will break without it.
+if ( window.getComputedStyle ) {
+	curCSS = function( elem, name ) {
+		var ret, width, minWidth, maxWidth,
+			computed = window.getComputedStyle( elem, null ),
+			style = elem.style;
+
+		if ( computed ) {
+
+			// getPropertyValue is only needed for .css('filter') in IE9, see #12537
+			ret = computed.getPropertyValue( name ) || computed[ name ];
+
+			if ( ret === "" && !jQuery.contains( elem.ownerDocument, elem ) ) {
+				ret = jQuery.style( elem, name );
+			}
+
+			// A tribute to the "awesome hack by Dean Edwards"
+			// Chrome < 17 and Safari 5.0 uses "computed value" instead of "used value" for margin-right
+			// Safari 5.1.7 (at least) returns percentage for a larger set of values, but width seems to be reliably pixels
+			// this is against the CSSOM draft spec: http://dev.w3.org/csswg/cssom/#resolved-values
+			if ( rnumnonpx.test( ret ) && rmargin.test( name ) ) {
+				width = style.width;
+				minWidth = style.minWidth;
+				maxWidth = style.maxWidth;
+
+				style.minWidth = style.maxWidth = style.width = ret;
+				ret = computed.width;
+
+				style.width = width;
+				style.minWidth = minWidth;
+				style.maxWidth = maxWidth;
+			}
+		}
+
+		return ret;
+	};
+} else if ( document.documentElement.currentStyle ) {
+	curCSS = function( elem, name ) {
+		var left, rsLeft,
+			ret = elem.currentStyle && elem.currentStyle[ name ],
+			style = elem.style;
+
+		// Avoid setting ret to empty string here
+		// so we don't default to auto
+		if ( ret == null && style && style[ name ] ) {
+			ret = style[ name ];
+		}
+
+		// From the awesome hack by Dean Edwards
+		// http://erik.eae.net/archives/2007/07/27/18.54.15/#comment-102291
+
+		// If we're not dealing with a regular pixel number
+		// but a number that has a weird ending, we need to convert it to pixels
+		// but not position css attributes, as those are proportional to the parent element instead
+		// and we can't measure the parent instead because it might trigger a "stacking dolls" problem
+		if ( rnumnonpx.test( ret ) && !rposition.test( name ) ) {
+
+			// Remember the original values
+			left = style.left;
+			rsLeft = elem.runtimeStyle && elem.runtimeStyle.left;
+
+			// Put in the new values to get a computed value out
+			if ( rsLeft ) {
+				elem.runtimeStyle.left = elem.currentStyle.left;
+			}
+			style.left = name === "fontSize" ? "1em" : ret;
+			ret = style.pixelLeft + "px";
+
+			// Revert the changed values
+			style.left = left;
+			if ( rsLeft ) {
+				elem.runtimeStyle.left = rsLeft;
+			}
+		}
+
+		return ret === "" ? "auto" : ret;
+	};
+}
+
+function setPositiveNumber( elem, value, subtract ) {
+	var matches = rnumsplit.exec( value );
+	return matches ?
+			Math.max( 0, matches[ 1 ] - ( subtract || 0 ) ) + ( matches[ 2 ] || "px" ) :
+			value;
+}
+
+function augmentWidthOrHeight( elem, name, extra, isBorderBox ) {
+	var i = extra === ( isBorderBox ? "border" : "content" ) ?
+		// If we already have the right measurement, avoid augmentation
+		4 :
+		// Otherwise initialize for horizontal or vertical properties
+		name === "width" ? 1 : 0,
+
+		val = 0;
+
+	for ( ; i < 4; i += 2 ) {
+		// both box models exclude margin, so add it if we want it
+		if ( extra === "margin" ) {
+			// we use jQuery.css instead of curCSS here
+			// because of the reliableMarginRight CSS hook!
+			val += jQuery.css( elem, extra + cssExpand[ i ], true );
+		}
+
+		// From this point on we use curCSS for maximum performance (relevant in animations)
+		if ( isBorderBox ) {
+			// border-box includes padding, so remove it if we want content
+			if ( extra === "content" ) {
+				val -= parseFloat( curCSS( elem, "padding" + cssExpand[ i ] ) ) || 0;
+			}
+
+			// at this point, extra isn't border nor margin, so remove border
+			if ( extra !== "margin" ) {
+				val -= parseFloat( curCSS( elem, "border" + cssExpand[ i ] + "Width" ) ) || 0;
+			}
+		} else {
+			// at this point, extra isn't content, so add padding
+			val += parseFloat( curCSS( elem, "padding" + cssExpand[ i ] ) ) || 0;
+
+			// at this point, extra isn't content nor padding, so add border
+			if ( extra !== "padding" ) {
+				val += parseFloat( curCSS( elem, "border" + cssExpand[ i ] + "Width" ) ) || 0;
+			}
+		}
+	}
+
+	return val;
+}
+
+function getWidthOrHeight( elem, name, extra ) {
+
+	// Start with offset property, which is equivalent to the border-box value
+	var val = name === "width" ? elem.offsetWidth : elem.offsetHeight,
+		valueIsBorderBox = true,
+		isBorderBox = jQuery.support.boxSizing && jQuery.css( elem, "boxSizing" ) === "border-box";
+
+	// some non-html elements return undefined for offsetWidth, so check for null/undefined
+	// svg - https://bugzilla.mozilla.org/show_bug.cgi?id=649285
+	// MathML - https://bugzilla.mozilla.org/show_bug.cgi?id=491668
+	if ( val <= 0 || val == null ) {
+		// Fall back to computed then uncomputed css if necessary
+		val = curCSS( elem, name );
+		if ( val < 0 || val == null ) {
+			val = elem.style[ name ];
+		}
+
+		// Computed unit is not pixels. Stop here and return.
+		if ( rnumnonpx.test(val) ) {
+			return val;
+		}
+
+		// we need the check for style in case a browser which returns unreliable values
+		// for getComputedStyle silently falls back to the reliable elem.style
+		valueIsBorderBox = isBorderBox && ( jQuery.support.boxSizingReliable || val === elem.style[ name ] );
+
+		// Normalize "", auto, and prepare for extra
+		val = parseFloat( val ) || 0;
+	}
+
+	// use the active box-sizing model to add/subtract irrelevant styles
+	return ( val +
+		augmentWidthOrHeight(
+			elem,
+			name,
+			extra || ( isBorderBox ? "border" : "content" ),
+			valueIsBorderBox
+		)
+	) + "px";
+}
+
+
+// Try to determine the default display value of an element
+function css_defaultDisplay( nodeName ) {
+	if ( elemdisplay[ nodeName ] ) {
+		return elemdisplay[ nodeName ];
+	}
+
+	var elem = jQuery( "<" + nodeName + ">" ).appendTo( document.body ),
+		display = elem.css("display");
+	elem.remove();
+
+	// If the simple way fails,
+	// get element's real default display by attaching it to a temp iframe
+	if ( display === "none" || display === "" ) {
+		// Use the already-created iframe if possible
+		iframe = document.body.appendChild(
+			iframe || jQuery.extend( document.createElement("iframe"), {
+				frameBorder: 0,
+				width: 0,
+				height: 0
+			})
+		);
+
+		// Create a cacheable copy of the iframe document on first call.
+		// IE and Opera will allow us to reuse the iframeDoc without re-writing the fake HTML
+		// document to it; WebKit & Firefox won't allow reusing the iframe document.
+		if ( !iframeDoc || !iframe.createElement ) {
+			iframeDoc = ( iframe.contentWindow || iframe.contentDocument ).document;
+			iframeDoc.write("<!doctype html><html><body>");
+			iframeDoc.close();
+		}
+
+		elem = iframeDoc.body.appendChild( iframeDoc.createElement(nodeName) );
+
+		display = curCSS( elem, "display" );
+		document.body.removeChild( iframe );
+	}
+
+	// Store the correct default display
+	elemdisplay[ nodeName ] = display;
+
+	return display;
+}
+
+jQuery.each([ "height", "width" ], function( i, name ) {
+	jQuery.cssHooks[ name ] = {
+		get: function( elem, computed, extra ) {
+			if ( computed ) {
+				// certain elements can have dimension info if we invisibly show them
+				// however, it must have a current display style that would benefit from this
+				if ( elem.offsetWidth === 0 && rdisplayswap.test( curCSS( elem, "display" ) ) ) {
+					return jQuery.swap( elem, cssShow, function() {
+						return getWidthOrHeight( elem, name, extra );
+					});
+				} else {
+					return getWidthOrHeight( elem, name, extra );
+				}
+			}
+		},
+
+		set: function( elem, value, extra ) {
+			return setPositiveNumber( elem, value, extra ?
+				augmentWidthOrHeight(
+					elem,
+					name,
+					extra,
+					jQuery.support.boxSizing && jQuery.css( elem, "boxSizing" ) === "border-box"
+				) : 0
+			);
+		}
+	};
+});
+
+if ( !jQuery.support.opacity ) {
+	jQuery.cssHooks.opacity = {
+		get: function( elem, computed ) {
+			// IE uses filters for opacity
+			return ropacity.test( (computed && elem.currentStyle ? elem.currentStyle.filter : elem.style.filter) || "" ) ?
+				( 0.01 * parseFloat( RegExp.$1 ) ) + "" :
+				computed ? "1" : "";
+		},
+
+		set: function( elem, value ) {
+			var style = elem.style,
+				currentStyle = elem.currentStyle,
+				opacity = jQuery.isNumeric( value ) ? "alpha(opacity=" + value * 100 + ")" : "",
+				filter = currentStyle && currentStyle.filter || style.filter || "";
+
+			// IE has trouble with opacity if it does not have layout
+			// Force it by setting the zoom level
+			style.zoom = 1;
+
+			// if setting opacity to 1, and no other filters exist - attempt to remove filter attribute #6652
+			if ( value >= 1 && jQuery.trim( filter.replace( ralpha, "" ) ) === "" &&
+				style.removeAttribute ) {
+
+				// Setting style.filter to null, "" & " " still leave "filter:" in the cssText
+				// if "filter:" is present at all, clearType is disabled, we want to avoid this
+				// style.removeAttribute is IE Only, but so apparently is this code path...
+				style.removeAttribute( "filter" );
+
+				// if there there is no filter style applied in a css rule, we are done
+				if ( currentStyle && !currentStyle.filter ) {
+					return;
+				}
+			}
+
+			// otherwise, set new filter values
+			style.filter = ralpha.test( filter ) ?
+				filter.replace( ralpha, opacity ) :
+				filter + " " + opacity;
+		}
+	};
+}
+
+// These hooks cannot be added until DOM ready because the support test
+// for it is not run until after DOM ready
+jQuery(function() {
+	if ( !jQuery.support.reliableMarginRight ) {
+		jQuery.cssHooks.marginRight = {
+			get: function( elem, computed ) {
+				// WebKit Bug 13343 - getComputedStyle returns wrong value for margin-right
+				// Work around by temporarily setting element display to inline-block
+				return jQuery.swap( elem, { "display": "inline-block" }, function() {
+					if ( computed ) {
+						return curCSS( elem, "marginRight" );
+					}
+				});
+			}
+		};
+	}
+
+	// Webkit bug: https://bugs.webkit.org/show_bug.cgi?id=29084
+	// getComputedStyle returns percent when specified for top/left/bottom/right
+	// rather than make the css module depend on the offset module, we just check for it here
+	if ( !jQuery.support.pixelPosition && jQuery.fn.position ) {
+		jQuery.each( [ "top", "left" ], function( i, prop ) {
+			jQuery.cssHooks[ prop ] = {
+				get: function( elem, computed ) {
+					if ( computed ) {
+						var ret = curCSS( elem, prop );
+						// if curCSS returns percentage, fallback to offset
+						return rnumnonpx.test( ret ) ? jQuery( elem ).position()[ prop ] + "px" : ret;
+					}
+				}
+			};
+		});
+	}
+
+});
+
+if ( jQuery.expr && jQuery.expr.filters ) {
+	jQuery.expr.filters.hidden = function( elem ) {
+		return ( elem.offsetWidth === 0 && elem.offsetHeight === 0 ) || (!jQuery.support.reliableHiddenOffsets && ((elem.style && elem.style.display) || curCSS( elem, "display" )) === "none");
+	};
+
+	jQuery.expr.filters.visible = function( elem ) {
+		return !jQuery.expr.filters.hidden( elem );
+	};
+}
+
+// These hooks are used by animate to expand properties
+jQuery.each({
+	margin: "",
+	padding: "",
+	border: "Width"
+}, function( prefix, suffix ) {
+	jQuery.cssHooks[ prefix + suffix ] = {
+		expand: function( value ) {
+			var i,
+
+				// assumes a single number if not a string
+				parts = typeof value === "string" ? value.split(" ") : [ value ],
+				expanded = {};
+
+			for ( i = 0; i < 4; i++ ) {
+				expanded[ prefix + cssExpand[ i ] + suffix ] =
+					parts[ i ] || parts[ i - 2 ] || parts[ 0 ];
+			}
+
+			return expanded;
+		}
+	};
+
+	if ( !rmargin.test( prefix ) ) {
+		jQuery.cssHooks[ prefix + suffix ].set = setPositiveNumber;
+	}
+});
+var r20 = /%20/g,
+	rbracket = /\[\]$/,
+	rCRLF = /\r?\n/g,
+	rinput = /^(?:color|date|datetime|datetime-local|email|hidden|month|number|password|range|search|tel|text|time|url|week)$/i,
+	rselectTextarea = /^(?:select|textarea)/i;
+
+jQuery.fn.extend({
+	serialize: function() {
+		return jQuery.param( this.serializeArray() );
+	},
+	serializeArray: function() {
+		return this.map(function(){
+			return this.elements ? jQuery.makeArray( this.elements ) : this;
+		})
+		.filter(function(){
+			return this.name && !this.disabled &&
+				( this.checked || rselectTextarea.test( this.nodeName ) ||
+					rinput.test( this.type ) );
+		})
+		.map(function( i, elem ){
+			var val = jQuery( this ).val();
+
+			return val == null ?
+				null :
+				jQuery.isArray( val ) ?
+					jQuery.map( val, function( val, i ){
+						return { name: elem.name, value: val.replace( rCRLF, "\r\n" ) };
+					}) :
+					{ name: elem.name, value: val.replace( rCRLF, "\r\n" ) };
+		}).get();
+	}
+});
+
+//Serialize an array of form elements or a set of
+//key/values into a query string
+jQuery.param = function( a, traditional ) {
+	var prefix,
+		s = [],
+		add = function( key, value ) {
+			// If value is a function, invoke it and return its value
+			value = jQuery.isFunction( value ) ? value() : ( value == null ? "" : value );
+			s[ s.length ] = encodeURIComponent( key ) + "=" + encodeURIComponent( value );
+		};
+
+	// Set traditional to true for jQuery <= 1.3.2 behavior.
+	if ( traditional === undefined ) {
+		traditional = jQuery.ajaxSettings && jQuery.ajaxSettings.traditional;
+	}
+
+	// If an array was passed in, assume that it is an array of form elements.
+	if ( jQuery.isArray( a ) || ( a.jquery && !jQuery.isPlainObject( a ) ) ) {
+		// Serialize the form elements
+		jQuery.each( a, function() {
+			add( this.name, this.value );
+		});
+
+	} else {
+		// If traditional, encode the "old" way (the way 1.3.2 or older
+		// did it), otherwise encode params recursively.
+		for ( prefix in a ) {
+			buildParams( prefix, a[ prefix ], traditional, add );
+		}
+	}
+
+	// Return the resulting serialization
+	return s.join( "&" ).replace( r20, "+" );
+};
+
+function buildParams( prefix, obj, traditional, add ) {
+	var name;
+
+	if ( jQuery.isArray( obj ) ) {
+		// Serialize array item.
+		jQuery.each( obj, function( i, v ) {
+			if ( traditional || rbracket.test( prefix ) ) {
+				// Treat each array item as a scalar.
+				add( prefix, v );
+
+			} else {
+				// If array item is non-scalar (array or object), encode its
+				// numeric index to resolve deserialization ambiguity issues.
+				// Note that rack (as of 1.0.0) can't currently deserialize
+				// nested arrays properly, and attempting to do so may cause
+				// a server error. Possible fixes are to modify rack's
+				// deserialization algorithm or to provide an option or flag
+				// to force array serialization to be shallow.
+				buildParams( prefix + "[" + ( typeof v === "object" ? i : "" ) + "]", v, traditional, add );
+			}
+		});
+
+	} else if ( !traditional && jQuery.type( obj ) === "object" ) {
+		// Serialize object item.
+		for ( name in obj ) {
+			buildParams( prefix + "[" + name + "]", obj[ name ], traditional, add );
+		}
+
+	} else {
+		// Serialize scalar item.
+		add( prefix, obj );
+	}
+}
+var
+	// Document location
+	ajaxLocParts,
+	ajaxLocation,
+
+	rhash = /#.*$/,
+	rheaders = /^(.*?):[ \t]*([^\r\n]*)\r?$/mg, // IE leaves an \r character at EOL
+	// #7653, #8125, #8152: local protocol detection
+	rlocalProtocol = /^(?:about|app|app\-storage|.+\-extension|file|res|widget):$/,
+	rnoContent = /^(?:GET|HEAD)$/,
+	rprotocol = /^\/\//,
+	rquery = /\?/,
+	rscript = /<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi,
+	rts = /([?&])_=[^&]*/,
+	rurl = /^([\w\+\.\-]+:)(?:\/\/([^\/?#:]*)(?::(\d+)|)|)/,
+
+	// Keep a copy of the old load method
+	_load = jQuery.fn.load,
+
+	/* Prefilters
+	 * 1) They are useful to introduce custom dataTypes (see ajax/jsonp.js for an example)
+	 * 2) These are called:
+	 *    - BEFORE asking for a transport
+	 *    - AFTER param serialization (s.data is a string if s.processData is true)
+	 * 3) key is the dataType
+	 * 4) the catchall symbol "*" can be used
+	 * 5) execution will start with transport dataType and THEN continue down to "*" if needed
+	 */
+	prefilters = {},
+
+	/* Transports bindings
+	 * 1) key is the dataType
+	 * 2) the catchall symbol "*" can be used
+	 * 3) selection will start with transport dataType and THEN go to "*" if needed
+	 */
+	transports = {},
+
+	// Avoid comment-prolog char sequence (#10098); must appease lint and evade compression
+	allTypes = ["*/"] + ["*"];
+
+// #8138, IE may throw an exception when accessing
+// a field from window.location if document.domain has been set
+try {
+	ajaxLocation = location.href;
+} catch( e ) {
+	// Use the href attribute of an A element
+	// since IE will modify it given document.location
+	ajaxLocation = document.createElement( "a" );
+	ajaxLocation.href = "";
+	ajaxLocation = ajaxLocation.href;
+}
+
+// Segment location into parts
+ajaxLocParts = rurl.exec( ajaxLocation.toLowerCase() ) || [];
+
+// Base "constructor" for jQuery.ajaxPrefilter and jQuery.ajaxTransport
+function addToPrefiltersOrTransports( structure ) {
+
+	// dataTypeExpression is optional and defaults to "*"
+	return function( dataTypeExpression, func ) {
+
+		if ( typeof dataTypeExpression !== "string" ) {
+			func = dataTypeExpression;
+			dataTypeExpression = "*";
+		}
+
+		var dataType, list, placeBefore,
+			dataTypes = dataTypeExpression.toLowerCase().split( core_rspace ),
+			i = 0,
+			length = dataTypes.length;
+
+		if ( jQuery.isFunction( func ) ) {
+			// For each dataType in the dataTypeExpression
+			for ( ; i < length; i++ ) {
+				dataType = dataTypes[ i ];
+				// We control if we're asked to add before
+				// any existing element
+				placeBefore = /^\+/.test( dataType );
+				if ( placeBefore ) {
+					dataType = dataType.substr( 1 ) || "*";
+				}
+				list = structure[ dataType ] = structure[ dataType ] || [];
+				// then we add to the structure accordingly
+				list[ placeBefore ? "unshift" : "push" ]( func );
+			}
+		}
+	};
+}
+
+// Base inspection function for prefilters and transports
+function inspectPrefiltersOrTransports( structure, options, originalOptions, jqXHR,
+		dataType /* internal */, inspected /* internal */ ) {
+
+	dataType = dataType || options.dataTypes[ 0 ];
+	inspected = inspected || {};
+
+	inspected[ dataType ] = true;
+
+	var selection,
+		list = structure[ dataType ],
+		i = 0,
+		length = list ? list.length : 0,
+		executeOnly = ( structure === prefilters );
+
+	for ( ; i < length && ( executeOnly || !selection ); i++ ) {
+		selection = list[ i ]( options, originalOptions, jqXHR );
+		// If we got redirected to another dataType
+		// we try there if executing only and not done already
+		if ( typeof selection === "string" ) {
+			if ( !executeOnly || inspected[ selection ] ) {
+				selection = undefined;
+			} else {
+				options.dataTypes.unshift( selection );
+				selection = inspectPrefiltersOrTransports(
+						structure, options, originalOptions, jqXHR, selection, inspected );
+			}
+		}
+	}
+	// If we're only executing or nothing was selected
+	// we try the catchall dataType if not done already
+	if ( ( executeOnly || !selection ) && !inspected[ "*" ] ) {
+		selection = inspectPrefiltersOrTransports(
+				structure, options, originalOptions, jqXHR, "*", inspected );
+	}
+	// unnecessary when only executing (prefilters)
+	// but it'll be ignored by the caller in that case
+	return selection;
+}
+
+// A special extend for ajax options
+// that takes "flat" options (not to be deep extended)
+// Fixes #9887
+function ajaxExtend( target, src ) {
+	var key, deep,
+		flatOptions = jQuery.ajaxSettings.flatOptions || {};
+	for ( key in src ) {
+		if ( src[ key ] !== undefined ) {
+			( flatOptions[ key ] ? target : ( deep || ( deep = {} ) ) )[ key ] = src[ key ];
+		}
+	}
+	if ( deep ) {
+		jQuery.extend( true, target, deep );
+	}
+}
+
+jQuery.fn.load = function( url, params, callback ) {
+	if ( typeof url !== "string" && _load ) {
+		return _load.apply( this, arguments );
+	}
+
+	// Don't do a request if no elements are being requested
+	if ( !this.length ) {
+		return this;
+	}
+
+	var selector, type, response,
+		self = this,
+		off = url.indexOf(" ");
+
+	if ( off >= 0 ) {
+		selector = url.slice( off, url.length );
+		url = url.slice( 0, off );
+	}
+
+	// If it's a function
+	if ( jQuery.isFunction( params ) ) {
+
+		// We assume that it's the callback
+		callback = params;
+		params = undefined;
+
+	// Otherwise, build a param string
+	} else if ( params && typeof params === "object" ) {
+		type = "POST";
+	}
+
+	// Request the remote document
+	jQuery.ajax({
+		url: url,
+
+		// if "type" variable is undefined, then "GET" method will be used
+		type: type,
+		dataType: "html",
+		data: params,
+		complete: function( jqXHR, status ) {
+			if ( callback ) {
+				self.each( callback, response || [ jqXHR.responseText, status, jqXHR ] );
+			}
+		}
+	}).done(function( responseText ) {
+
+		// Save response for use in complete callback
+		response = arguments;
+
+		// See if a selector was specified
+		self.html( selector ?
+
+			// Create a dummy div to hold the results
+			jQuery("<div>")
+
+				// inject the contents of the document in, removing the scripts
+				// to avoid any 'Permission Denied' errors in IE
+				.append( responseText.replace( rscript, "" ) )
+
+				// Locate the specified elements
+				.find( selector ) :
+
+			// If not, just inject the full result
+			responseText );
+
+	});
+
+	return this;
+};
+
+// Attach a bunch of functions for handling common AJAX events
+jQuery.each( "ajaxStart ajaxStop ajaxComplete ajaxError ajaxSuccess ajaxSend".split( " " ), function( i, o ){
+	jQuery.fn[ o ] = function( f ){
+		return this.on( o, f );
+	};
+});
+
+jQuery.each( [ "get", "post" ], function( i, method ) {
+	jQuery[ method ] = function( url, data, callback, type ) {
+		// shift arguments if data argument was omitted
+		if ( jQuery.isFunction( data ) ) {
+			type = type || callback;
+			callback = data;
+			data = undefined;
+		}
+
+		return jQuery.ajax({
+			type: method,
+			url: url,
+			data: data,
+			success: callback,
+			dataType: type
+		});
+	};
+});
+
+jQuery.extend({
+
+	getScript: function( url, callback ) {
+		return jQuery.get( url, undefined, callback, "script" );
+	},
+
+	getJSON: function( url, data, callback ) {
+		return jQuery.get( url, data, callback, "json" );
+	},
+
+	// Creates a full fledged settings object into target
+	// with both ajaxSettings and settings fields.
+	// If target is omitted, writes into ajaxSettings.
+	ajaxSetup: function( target, settings ) {
+		if ( settings ) {
+			// Building a settings object
+			ajaxExtend( target, jQuery.ajaxSettings );
+		} else {
+			// Extending ajaxSettings
+			settings = target;
+			target = jQuery.ajaxSettings;
+		}
+		ajaxExtend( target, settings );
+		return target;
+	},
+
+	ajaxSettings: {
+		url: ajaxLocation,
+		isLocal: rlocalProtocol.test( ajaxLocParts[ 1 ] ),
+		global: true,
+		type: "GET",
+		contentType: "application/x-www-form-urlencoded; charset=UTF-8",
+		processData: true,
+		async: true,
+		/*
+		timeout: 0,
+		data: null,
+		dataType: null,
+		username: null,
+		password: null,
+		cache: null,
+		throws: false,
+		traditional: false,
+		headers: {},
+		*/
+
+		accepts: {
+			xml: "application/xml, text/xml",
+			html: "text/html",
+			text: "text/plain",
+			json: "application/json, text/javascript",
+			"*": allTypes
+		},
+
+		contents: {
+			xml: /xml/,
+			html: /html/,
+			json: /json/
+		},
+
+		responseFields: {
+			xml: "responseXML",
+			text: "responseText"
+		},
+
+		// List of data converters
+		// 1) key format is "source_type destination_type" (a single space in-between)
+		// 2) the catchall symbol "*" can be used for source_type
+		converters: {
+
+			// Convert anything to text
+			"* text": window.String,
+
+			// Text to html (true = no transformation)
+			"text html": true,
+
+			// Evaluate text as a json expression
+			"text json": jQuery.parseJSON,
+
+			// Parse text as xml
+			"text xml": jQuery.parseXML
+		},
+
+		// For options that shouldn't be deep extended:
+		// you can add your own custom options here if
+		// and when you create one that shouldn't be
+		// deep extended (see ajaxExtend)
+		flatOptions: {
+			context: true,
+			url: true
+		}
+	},
+
+	ajaxPrefilter: addToPrefiltersOrTransports( prefilters ),
+	ajaxTransport: addToPrefiltersOrTransports( transports ),
+
+	// Main method
+	ajax: function( url, options ) {
+
+		// If url is an object, simulate pre-1.5 signature
+		if ( typeof url === "object" ) {
+			options = url;
+			url = undefined;
+		}
+
+		// Force options to be an object
+		options = options || {};
+
+		var // ifModified key
+			ifModifiedKey,
+			// Response headers
+			responseHeadersString,
+			responseHeaders,
+			// transport
+			transport,
+			// timeout handle
+			timeoutTimer,
+			// Cross-domain detection vars
+			parts,
+			// To know if global events are to be dispatched
+			fireGlobals,
+			// Loop variable
+			i,
+			// Create the final options object
+			s = jQuery.ajaxSetup( {}, options ),
+			// Callbacks context
+			callbackContext = s.context || s,
+			// Context for global events
+			// It's the callbackContext if one was provided in the options
+			// and if it's a DOM node or a jQuery collection
+			globalEventContext = callbackContext !== s &&
+				( callbackContext.nodeType || callbackContext instanceof jQuery ) ?
+						jQuery( callbackContext ) : jQuery.event,
+			// Deferreds
+			deferred = jQuery.Deferred(),
+			completeDeferred = jQuery.Callbacks( "once memory" ),
+			// Status-dependent callbacks
+			statusCode = s.statusCode || {},
+			// Headers (they are sent all at once)
+			requestHeaders = {},
+			requestHeadersNames = {},
+			// The jqXHR state
+			state = 0,
+			// Default abort message
+			strAbort = "canceled",
+			// Fake xhr
+			jqXHR = {
+
+				readyState: 0,
+
+				// Caches the header
+				setRequestHeader: function( name, value ) {
+					if ( !state ) {
+						var lname = name.toLowerCase();
+						name = requestHeadersNames[ lname ] = requestHeadersNames[ lname ] || name;
+						requestHeaders[ name ] = value;
+					}
+					return this;
+				},
+
+				// Raw string
+				getAllResponseHeaders: function() {
+					return state === 2 ? responseHeadersString : null;
+				},
+
+				// Builds headers hashtable if needed
+				getResponseHeader: function( key ) {
+					var match;
+					if ( state === 2 ) {
+						if ( !responseHeaders ) {
+							responseHeaders = {};
+							while( ( match = rheaders.exec( responseHeadersString ) ) ) {
+								responseHeaders[ match[1].toLowerCase() ] = match[ 2 ];
+							}
+						}
+						match = responseHeaders[ key.toLowerCase() ];
+					}
+					return match === undefined ? null : match;
+				},
+
+				// Overrides response content-type header
+				overrideMimeType: function( type ) {
+					if ( !state ) {
+						s.mimeType = type;
+					}
+					return this;
+				},
+
+				// Cancel the request
+				abort: function( statusText ) {
+					statusText = statusText || strAbort;
+					if ( transport ) {
+						transport.abort( statusText );
+					}
+					done( 0, statusText );
+					return this;
+				}
+			};
+
+		// Callback for when everything is done
+		// It is defined here because jslint complains if it is declared
+		// at the end of the function (which would be more logical and readable)
+		function done( status, nativeStatusText, responses, headers ) {
+			var isSuccess, success, error, response, modified,
+				statusText = nativeStatusText;
+
+			// Called once
+			if ( state === 2 ) {
+				return;
+			}
+
+			// State is "done" now
+			state = 2;
+
+			// Clear timeout if it exists
+			if ( timeoutTimer ) {
+				clearTimeout( timeoutTimer );
+			}
+
+			// Dereference transport for early garbage collection
+			// (no matter how long the jqXHR object will be used)
+			transport = undefined;
+
+			// Cache response headers
+			responseHeadersString = headers || "";
+
+			// Set readyState
+			jqXHR.readyState = status > 0 ? 4 : 0;
+
+			// Get response data
+			if ( responses ) {
+				response = ajaxHandleResponses( s, jqXHR, responses );
+			}
+
+			// If successful, handle type chaining
+			if ( status >= 200 && status < 300 || status === 304 ) {
+
+				// Set the If-Modified-Since and/or If-None-Match header, if in ifModified mode.
+				if ( s.ifModified ) {
+
+					modified = jqXHR.getResponseHeader("Last-Modified");
+					if ( modified ) {
+						jQuery.lastModified[ ifModifiedKey ] = modified;
+					}
+					modified = jqXHR.getResponseHeader("Etag");
+					if ( modified ) {
+						jQuery.etag[ ifModifiedKey ] = modified;
+					}
+				}
+
+				// If not modified
+				if ( status === 304 ) {
+
+					statusText = "notmodified";
+					isSuccess = true;
+
+				// If we have data
+				} else {
+
+					isSuccess = ajaxConvert( s, response );
+					statusText = isSuccess.state;
+					success = isSuccess.data;
+					error = isSuccess.error;
+					isSuccess = !error;
+				}
+			} else {
+				// We extract error from statusText
+				// then normalize statusText and status for non-aborts
+				error = statusText;
+				if ( !statusText || status ) {
+					statusText = "error";
+					if ( status < 0 ) {
+						status = 0;
+					}
+				}
+			}
+
+			// Set data for the fake xhr object
+			jqXHR.status = status;
+			jqXHR.statusText = ( nativeStatusText || statusText ) + "";
+
+			// Success/Error
+			if ( isSuccess ) {
+				deferred.resolveWith( callbackContext, [ success, statusText, jqXHR ] );
+			} else {
+				deferred.rejectWith( callbackContext, [ jqXHR, statusText, error ] );
+			}
+
+			// Status-dependent callbacks
+			jqXHR.statusCode( statusCode );
+			statusCode = undefined;
+
+			if ( fireGlobals ) {
+				globalEventContext.trigger( "ajax" + ( isSuccess ? "Success" : "Error" ),
+						[ jqXHR, s, isSuccess ? success : error ] );
+			}
+
+			// Complete
+			completeDeferred.fireWith( callbackContext, [ jqXHR, statusText ] );
+
+			if ( fireGlobals ) {
+				globalEventContext.trigger( "ajaxComplete", [ jqXHR, s ] );
+				// Handle the global AJAX counter
+				if ( !( --jQuery.active ) ) {
+					jQuery.event.trigger( "ajaxStop" );
+				}
+			}
+		}
+
+		// Attach deferreds
+		deferred.promise( jqXHR );
+		jqXHR.success = jqXHR.done;
+		jqXHR.error = jqXHR.fail;
+		jqXHR.complete = completeDeferred.add;
+
+		// Status-dependent callbacks
+		jqXHR.statusCode = function( map ) {
+			if ( map ) {
+				var tmp;
+				if ( state < 2 ) {
+					for ( tmp in map ) {
+						statusCode[ tmp ] = [ statusCode[tmp], map[tmp] ];
+					}
+				} else {
+					tmp = map[ jqXHR.status ];
+					jqXHR.always( tmp );
+				}
+			}
+			return this;
+		};
+
+		// Remove hash character (#7531: and string promotion)
+		// Add protocol if not provided (#5866: IE7 issue with protocol-less urls)
+		// We also use the url parameter if available
+		s.url = ( ( url || s.url ) + "" ).replace( rhash, "" ).replace( rprotocol, ajaxLocParts[ 1 ] + "//" );
+
+		// Extract dataTypes list
+		s.dataTypes = jQuery.trim( s.dataType || "*" ).toLowerCase().split( core_rspace );
+
+		// A cross-domain request is in order when we have a protocol:host:port mismatch
+		if ( s.crossDomain == null ) {
+			parts = rurl.exec( s.url.toLowerCase() );
+			s.crossDomain = !!( parts &&
+				( parts[ 1 ] !== ajaxLocParts[ 1 ] || parts[ 2 ] !== ajaxLocParts[ 2 ] ||
+					( parts[ 3 ] || ( parts[ 1 ] === "http:" ? 80 : 443 ) ) !=
+						( ajaxLocParts[ 3 ] || ( ajaxLocParts[ 1 ] === "http:" ? 80 : 443 ) ) )
+			);
+		}
+
+		// Convert data if not already a string
+		if ( s.data && s.processData && typeof s.data !== "string" ) {
+			s.data = jQuery.param( s.data, s.traditional );
+		}
+
+		// Apply prefilters
+		inspectPrefiltersOrTransports( prefilters, s, options, jqXHR );
+
+		// If request was aborted inside a prefilter, stop there
+		if ( state === 2 ) {
+			return jqXHR;
+		}
+
+		// We can fire global events as of now if asked to
+		fireGlobals = s.global;
+
+		// Uppercase the type
+		s.type = s.type.toUpperCase();
+
+		// Determine if request has content
+		s.hasContent = !rnoContent.test( s.type );
+
+		// Watch for a new set of requests
+		if ( fireGlobals && jQuery.active++ === 0 ) {
+			jQuery.event.trigger( "ajaxStart" );
+		}
+
+		// More options handling for requests with no content
+		if ( !s.hasContent ) {
+
+			// If data is available, append data to url
+			if ( s.data ) {
+				s.url += ( rquery.test( s.url ) ? "&" : "?" ) + s.data;
+				// #9682: remove data so that it's not used in an eventual retry
+				delete s.data;
+			}
+
+			// Get ifModifiedKey before adding the anti-cache parameter
+			ifModifiedKey = s.url;
+
+			// Add anti-cache in url if needed
+			if ( s.cache === false ) {
+
+				var ts = jQuery.now(),
+					// try replacing _= if it is there
+					ret = s.url.replace( rts, "$1_=" + ts );
+
+				// if nothing was replaced, add timestamp to the end
+				s.url = ret + ( ( ret === s.url ) ? ( rquery.test( s.url ) ? "&" : "?" ) + "_=" + ts : "" );
+			}
+		}
+
+		// Set the correct header, if data is being sent
+		if ( s.data && s.hasContent && s.contentType !== false || options.contentType ) {
+			jqXHR.setRequestHeader( "Content-Type", s.contentType );
+		}
+
+		// Set the If-Modified-Since and/or If-None-Match header, if in ifModified mode.
+		if ( s.ifModified ) {
+			ifModifiedKey = ifModifiedKey || s.url;
+			if ( jQuery.lastModified[ ifModifiedKey ] ) {
+				jqXHR.setRequestHeader( "If-Modified-Since", jQuery.lastModified[ ifModifiedKey ] );
+			}
+			if ( jQuery.etag[ ifModifiedKey ] ) {
+				jqXHR.setRequestHeader( "If-None-Match", jQuery.etag[ ifModifiedKey ] );
+			}
+		}
+
+		// Set the Accepts header for the server, depending on the dataType
+		jqXHR.setRequestHeader(
+			"Accept",
+			s.dataTypes[ 0 ] && s.accepts[ s.dataTypes[0] ] ?
+				s.accepts[ s.dataTypes[0] ] + ( s.dataTypes[ 0 ] !== "*" ? ", " + allTypes + "; q=0.01" : "" ) :
+				s.accepts[ "*" ]
+		);
+
+		// Check for headers option
+		for ( i in s.headers ) {
+			jqXHR.setRequestHeader( i, s.headers[ i ] );
+		}
+
+		// Allow custom headers/mimetypes and early abort
+		if ( s.beforeSend && ( s.beforeSend.call( callbackContext, jqXHR, s ) === false || state === 2 ) ) {
+				// Abort if not done already and return
+				return jqXHR.abort();
+
+		}
+
+		// aborting is no longer a cancellation
+		strAbort = "abort";
+
+		// Install callbacks on deferreds
+		for ( i in { success: 1, error: 1, complete: 1 } ) {
+			jqXHR[ i ]( s[ i ] );
+		}
+
+		// Get transport
+		transport = inspectPrefiltersOrTransports( transports, s, options, jqXHR );
+
+		// If no transport, we auto-abort
+		if ( !transport ) {
+			done( -1, "No Transport" );
+		} else {
+			jqXHR.readyState = 1;
+			// Send global event
+			if ( fireGlobals ) {
+				globalEventContext.trigger( "ajaxSend", [ jqXHR, s ] );
+			}
+			// Timeout
+			if ( s.async && s.timeout > 0 ) {
+				timeoutTimer = setTimeout( function(){
+					jqXHR.abort( "timeout" );
+				}, s.timeout );
+			}
+
+			try {
+				state = 1;
+				transport.send( requestHeaders, done );
+			} catch (e) {
+				// Propagate exception as error if not done
+				if ( state < 2 ) {
+					done( -1, e );
+				// Simply rethrow otherwise
+				} else {
+					throw e;
+				}
+			}
+		}
+
+		return jqXHR;
+	},
+
+	// Counter for holding the number of active queries
+	active: 0,
+
+	// Last-Modified header cache for next request
+	lastModified: {},
+	etag: {}
+
+});
+
+/* Handles responses to an ajax request:
+ * - sets all responseXXX fields accordingly
+ * - finds the right dataType (mediates between content-type and expected dataType)
+ * - returns the corresponding response
+ */
+function ajaxHandleResponses( s, jqXHR, responses ) {
+
+	var ct, type, finalDataType, firstDataType,
+		contents = s.contents,
+		dataTypes = s.dataTypes,
+		responseFields = s.responseFields;
+
+	// Fill responseXXX fields
+	for ( type in responseFields ) {
+		if ( type in responses ) {
+			jqXHR[ responseFields[type] ] = responses[ type ];
+		}
+	}
+
+	// Remove auto dataType and get content-type in the process
+	while( dataTypes[ 0 ] === "*" ) {
+		dataTypes.shift();
+		if ( ct === undefined ) {
+			ct = s.mimeType || jqXHR.getResponseHeader( "content-type" );
+		}
+	}
+
+	// Check if we're dealing with a known content-type
+	if ( ct ) {
+		for ( type in contents ) {
+			if ( contents[ type ] && contents[ type ].test( ct ) ) {
+				dataTypes.unshift( type );
+				break;
+			}
+		}
+	}
+
+	// Check to see if we have a response for the expected dataType
+	if ( dataTypes[ 0 ] in responses ) {
+		finalDataType = dataTypes[ 0 ];
+	} else {
+		// Try convertible dataTypes
+		for ( type in responses ) {
+			if ( !dataTypes[ 0 ] || s.converters[ type + " " + dataTypes[0] ] ) {
+				finalDataType = type;
+				break;
+			}
+			if ( !firstDataType ) {
+				firstDataType = type;
+			}
+		}
+		// Or just use first one
+		finalDataType = finalDataType || firstDataType;
+	}
+
+	// If we found a dataType
+	// We add the dataType to the list if needed
+	// and return the corresponding response
+	if ( finalDataType ) {
+		if ( finalDataType !== dataTypes[ 0 ] ) {
+			dataTypes.unshift( finalDataType );
+		}
+		return responses[ finalDataType ];
+	}
+}
+
+// Chain conversions given the request and the original response
+function ajaxConvert( s, response ) {
+
+	var conv, conv2, current, tmp,
+		// Work with a copy of dataTypes in case we need to modify it for conversion
+		dataTypes = s.dataTypes.slice(),
+		prev = dataTypes[ 0 ],
+		converters = {},
+		i = 0;
+
+	// Apply the dataFilter if provided
+	if ( s.dataFilter ) {
+		response = s.dataFilter( response, s.dataType );
+	}
+
+	// Create converters map with lowercased keys
+	if ( dataTypes[ 1 ] ) {
+		for ( conv in s.converters ) {
+			converters[ conv.toLowerCase() ] = s.converters[ conv ];
+		}
+	}
+
+	// Convert to each sequential dataType, tolerating list modification
+	for ( ; (current = dataTypes[++i]); ) {
+
+		// There's only work to do if current dataType is non-auto
+		if ( current !== "*" ) {
+
+			// Convert response if prev dataType is non-auto and differs from current
+			if ( prev !== "*" && prev !== current ) {
+
+				// Seek a direct converter
+				conv = converters[ prev + " " + current ] || converters[ "* " + current ];
+
+				// If none found, seek a pair
+				if ( !conv ) {
+					for ( conv2 in converters ) {
+
+						// If conv2 outputs current
+						tmp = conv2.split(" ");
+						if ( tmp[ 1 ] === current ) {
+
+							// If prev can be converted to accepted input
+							conv = converters[ prev + " " + tmp[ 0 ] ] ||
+								converters[ "* " + tmp[ 0 ] ];
+							if ( conv ) {
+								// Condense equivalence converters
+								if ( conv === true ) {
+									conv = converters[ conv2 ];
+
+								// Otherwise, insert the intermediate dataType
+								} else if ( converters[ conv2 ] !== true ) {
+									current = tmp[ 0 ];
+									dataTypes.splice( i--, 0, current );
+								}
+
+								break;
+							}
+						}
+					}
+				}
+
+				// Apply converter (if not an equivalence)
+				if ( conv !== true ) {
+
+					// Unless errors are allowed to bubble, catch and return them
+					if ( conv && s["throws"] ) {
+						response = conv( response );
+					} else {
+						try {
+							response = conv( response );
+						} catch ( e ) {
+							return { state: "parsererror", error: conv ? e : "No conversion from " + prev + " to " + current };
+						}
+					}
+				}
+			}
+
+			// Update prev for next iteration
+			prev = current;
+		}
+	}
+
+	return { state: "success", data: response };
+}
+var oldCallbacks = [],
+	rquestion = /\?/,
+	rjsonp = /(=)\?(?=&|$)|\?\?/,
+	nonce = jQuery.now();
+
+// Default jsonp settings
+jQuery.ajaxSetup({
+	jsonp: "callback",
+	jsonpCallback: function() {
+		var callback = oldCallbacks.pop() || ( jQuery.expando + "_" + ( nonce++ ) );
+		this[ callback ] = true;
+		return callback;
+	}
+});
+
+// Detect, normalize options and install callbacks for jsonp requests
+jQuery.ajaxPrefilter( "json jsonp", function( s, originalSettings, jqXHR ) {
+
+	var callbackName, overwritten, responseContainer,
+		data = s.data,
+		url = s.url,
+		hasCallback = s.jsonp !== false,
+		replaceInUrl = hasCallback && rjsonp.test( url ),
+		replaceInData = hasCallback && !replaceInUrl && typeof data === "string" &&
+			!( s.contentType || "" ).indexOf("application/x-www-form-urlencoded") &&
+			rjsonp.test( data );
+
+	// Handle iff the expected data type is "jsonp" or we have a parameter to set
+	if ( s.dataTypes[ 0 ] === "jsonp" || replaceInUrl || replaceInData ) {
+
+		// Get callback name, remembering preexisting value associated with it
+		callbackName = s.jsonpCallback = jQuery.isFunction( s.jsonpCallback ) ?
+			s.jsonpCallback() :
+			s.jsonpCallback;
+		overwritten = window[ callbackName ];
+
+		// Insert callback into url or form data
+		if ( replaceInUrl ) {
+			s.url = url.replace( rjsonp, "$1" + callbackName );
+		} else if ( replaceInData ) {
+			s.data = data.replace( rjsonp, "$1" + callbackName );
+		} else if ( hasCallback ) {
+			s.url += ( rquestion.test( url ) ? "&" : "?" ) + s.jsonp + "=" + callbackName;
+		}
+
+		// Use data converter to retrieve json after script execution
+		s.converters["script json"] = function() {
+			if ( !responseContainer ) {
+				jQuery.error( callbackName + " was not called" );
+			}
+			return responseContainer[ 0 ];
+		};
+
+		// force json dataType
+		s.dataTypes[ 0 ] = "json";
+
+		// Install callback
+		window[ callbackName ] = function() {
+			responseContainer = arguments;
+		};
+
+		// Clean-up function (fires after converters)
+		jqXHR.always(function() {
+			// Restore preexisting value
+			window[ callbackName ] = overwritten;
+
+			// Save back as free
+			if ( s[ callbackName ] ) {
+				// make sure that re-using the options doesn't screw things around
+				s.jsonpCallback = originalSettings.jsonpCallback;
+
+				// save the callback name for future use
+				oldCallbacks.push( callbackName );
+			}
+
+			// Call if it was a function and we have a response
+			if ( responseContainer && jQuery.isFunction( overwritten ) ) {
+				overwritten( responseContainer[ 0 ] );
+			}
+
+			responseContainer = overwritten = undefined;
+		});
+
+		// Delegate to script
+		return "script";
+	}
+});
+// Install script dataType
+jQuery.ajaxSetup({
+	accepts: {
+		script: "text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"
+	},
+	contents: {
+		script: /javascript|ecmascript/
+	},
+	converters: {
+		"text script": function( text ) {
+			jQuery.globalEval( text );
+			return text;
+		}
+	}
+});
+
+// Handle cache's special case and global
+jQuery.ajaxPrefilter( "script", function( s ) {
+	if ( s.cache === undefined ) {
+		s.cache = false;
+	}
+	if ( s.crossDomain ) {
+		s.type = "GET";
+		s.global = false;
+	}
+});
+
+// Bind script tag hack transport
+jQuery.ajaxTransport( "script", function(s) {
+
+	// This transport only deals with cross domain requests
+	if ( s.crossDomain ) {
+
+		var script,
+			head = document.head || document.getElementsByTagName( "head" )[0] || document.documentElement;
+
+		return {
+
+			send: function( _, callback ) {
+
+				script = document.createElement( "script" );
+
+				script.async = "async";
+
+				if ( s.scriptCharset ) {
+					script.charset = s.scriptCharset;
+				}
+
+				script.src = s.url;
+
+				// Attach handlers for all browsers
+				script.onload = script.onreadystatechange = function( _, isAbort ) {
+
+					if ( isAbort || !script.readyState || /loaded|complete/.test( script.readyState ) ) {
+
+						// Handle memory leak in IE
+						script.onload = script.onreadystatechange = null;
+
+						// Remove the script
+						if ( head && script.parentNode ) {
+							head.removeChild( script );
+						}
+
+						// Dereference the script
+						script = undefined;
+
+						// Callback if not abort
+						if ( !isAbort ) {
+							callback( 200, "success" );
+						}
+					}
+				};
+				// Use insertBefore instead of appendChild  to circumvent an IE6 bug.
+				// This arises when a base node is used (#2709 and #4378).
+				head.insertBefore( script, head.firstChild );
+			},
+
+			abort: function() {
+				if ( script ) {
+					script.onload( 0, 1 );
+				}
+			}
+		};
+	}
+});
+var xhrCallbacks,
+	// #5280: Internet Explorer will keep connections alive if we don't abort on unload
+	xhrOnUnloadAbort = window.ActiveXObject ? function() {
+		// Abort all pending requests
+		for ( var key in xhrCallbacks ) {
+			xhrCallbacks[ key ]( 0, 1 );
+		}
+	} : false,
+	xhrId = 0;
+
+// Functions to create xhrs
+function createStandardXHR() {
+	try {
+		return new window.XMLHttpRequest();
+	} catch( e ) {}
+}
+
+function createActiveXHR() {
+	try {
+		return new window.ActiveXObject( "Microsoft.XMLHTTP" );
+	} catch( e ) {}
+}
+
+// Create the request object
+// (This is still attached to ajaxSettings for backward compatibility)
+jQuery.ajaxSettings.xhr = window.ActiveXObject ?
+	/* Microsoft failed to properly
+	 * implement the XMLHttpRequest in IE7 (can't request local files),
+	 * so we use the ActiveXObject when it is available
+	 * Additionally XMLHttpRequest can be disabled in IE7/IE8 so
+	 * we need a fallback.
+	 */
+	function() {
+		return !this.isLocal && createStandardXHR() || createActiveXHR();
+	} :
+	// For all other browsers, use the standard XMLHttpRequest object
+	createStandardXHR;
+
+// Determine support properties
+(function( xhr ) {
+	jQuery.extend( jQuery.support, {
+		ajax: !!xhr,
+		cors: !!xhr && ( "withCredentials" in xhr )
+	});
+})( jQuery.ajaxSettings.xhr() );
+
+// Create transport if the browser can provide an xhr
+if ( jQuery.support.ajax ) {
+
+	jQuery.ajaxTransport(function( s ) {
+		// Cross domain only allowed if supported through XMLHttpRequest
+		if ( !s.crossDomain || jQuery.support.cors ) {
+
+			var callback;
+
+			return {
+				send: function( headers, complete ) {
+
+					// Get a new xhr
+					var handle, i,
+						xhr = s.xhr();
+
+					// Open the socket
+					// Passing null username, generates a login popup on Opera (#2865)
+					if ( s.username ) {
+						xhr.open( s.type, s.url, s.async, s.username, s.password );
+					} else {
+						xhr.open( s.type, s.url, s.async );
+					}
+
+					// Apply custom fields if provided
+					if ( s.xhrFields ) {
+						for ( i in s.xhrFields ) {
+							xhr[ i ] = s.xhrFields[ i ];
+						}
+					}
+
+					// Override mime type if needed
+					if ( s.mimeType && xhr.overrideMimeType ) {
+						xhr.overrideMimeType( s.mimeType );
+					}
+
+					// X-Requested-With header
+					// For cross-domain requests, seeing as conditions for a preflight are
+					// akin to a jigsaw puzzle, we simply never set it to be sure.
+					// (it can always be set on a per-request basis or even using ajaxSetup)
+					// For same-domain requests, won't change header if already provided.
+					if ( !s.crossDomain && !headers["X-Requested-With"] ) {
+						headers[ "X-Requested-With" ] = "XMLHttpRequest";
+					}
+
+					// Need an extra try/catch for cross domain requests in Firefox 3
+					try {
+						for ( i in headers ) {
+							xhr.setRequestHeader( i, headers[ i ] );
+						}
+					} catch( _ ) {}
+
+					// Do send the request
+					// This may raise an exception which is actually
+					// handled in jQuery.ajax (so no try/catch here)
+					xhr.send( ( s.hasContent && s.data ) || null );
+
+					// Listener
+					callback = function( _, isAbort ) {
+
+						var status,
+							statusText,
+							responseHeaders,
+							responses,
+							xml;
+
+						// Firefox throws exceptions when accessing properties
+						// of an xhr when a network error occurred
+						// http://helpful.knobs-dials.com/index.php/Component_returned_failure_code:_0x80040111_(NS_ERROR_NOT_AVAILABLE)
+						try {
+
+							// Was never called and is aborted or complete
+							if ( callback && ( isAbort || xhr.readyState === 4 ) ) {
+
+								// Only called once
+								callback = undefined;
+
+								// Do not keep as active anymore
+								if ( handle ) {
+									xhr.onreadystatechange = jQuery.noop;
+									if ( xhrOnUnloadAbort ) {
+										delete xhrCallbacks[ handle ];
+									}
+								}
+
+								// If it's an abort
+								if ( isAbort ) {
+									// Abort it manually if needed
+									if ( xhr.readyState !== 4 ) {
+										xhr.abort();
+									}
+								} else {
+									status = xhr.status;
+									responseHeaders = xhr.getAllResponseHeaders();
+									responses = {};
+									xml = xhr.responseXML;
+
+									// Construct response list
+									if ( xml && xml.documentElement /* #4958 */ ) {
+										responses.xml = xml;
+									}
+
+									// When requesting binary data, IE6-9 will throw an exception
+									// on any attempt to access responseText (#11426)
+									try {
+										responses.text = xhr.responseText;
+									} catch( e ) {
+									}
+
+									// Firefox throws an exception when accessing
+									// statusText for faulty cross-domain requests
+									try {
+										statusText = xhr.statusText;
+									} catch( e ) {
+										// We normalize with Webkit giving an empty statusText
+										statusText = "";
+									}
+
+									// Filter status for non standard behaviors
+
+									// If the request is local and we have data: assume a success
+									// (success with no data won't get notified, that's the best we
+									// can do given current implementations)
+									if ( !status && s.isLocal && !s.crossDomain ) {
+										status = responses.text ? 200 : 404;
+									// IE - #1450: sometimes returns 1223 when it should be 204
+									} else if ( status === 1223 ) {
+										status = 204;
+									}
+								}
+							}
+						} catch( firefoxAccessException ) {
+							if ( !isAbort ) {
+								complete( -1, firefoxAccessException );
+							}
+						}
+
+						// Call complete if needed
+						if ( responses ) {
+							complete( status, statusText, responses, responseHeaders );
+						}
+					};
+
+					if ( !s.async ) {
+						// if we're in sync mode we fire the callback
+						callback();
+					} else if ( xhr.readyState === 4 ) {
+						// (IE6 & IE7) if it's in cache and has been
+						// retrieved directly we need to fire the callback
+						setTimeout( callback, 0 );
+					} else {
+						handle = ++xhrId;
+						if ( xhrOnUnloadAbort ) {
+							// Create the active xhrs callbacks list if needed
+							// and attach the unload handler
+							if ( !xhrCallbacks ) {
+								xhrCallbacks = {};
+								jQuery( window ).unload( xhrOnUnloadAbort );
+							}
+							// Add to list of active xhrs callbacks
+							xhrCallbacks[ handle ] = callback;
+						}
+						xhr.onreadystatechange = callback;
+					}
+				},
+
+				abort: function() {
+					if ( callback ) {
+						callback(0,1);
+					}
+				}
+			};
+		}
+	});
+}
+var fxNow, timerId,
+	rfxtypes = /^(?:toggle|show|hide)$/,
+	rfxnum = new RegExp( "^(?:([-+])=|)(" + core_pnum + ")([a-z%]*)$", "i" ),
+	rrun = /queueHooks$/,
+	animationPrefilters = [ defaultPrefilter ],
+	tweeners = {
+		"*": [function( prop, value ) {
+			var end, unit,
+				tween = this.createTween( prop, value ),
+				parts = rfxnum.exec( value ),
+				target = tween.cur(),
+				start = +target || 0,
+				scale = 1,
+				maxIterations = 20;
+
+			if ( parts ) {
+				end = +parts[2];
+				unit = parts[3] || ( jQuery.cssNumber[ prop ] ? "" : "px" );
+
+				// We need to compute starting value
+				if ( unit !== "px" && start ) {
+					// Iteratively approximate from a nonzero starting point
+					// Prefer the current property, because this process will be trivial if it uses the same units
+					// Fallback to end or a simple constant
+					start = jQuery.css( tween.elem, prop, true ) || end || 1;
+
+					do {
+						// If previous iteration zeroed out, double until we get *something*
+						// Use a string for doubling factor so we don't accidentally see scale as unchanged below
+						scale = scale || ".5";
+
+						// Adjust and apply
+						start = start / scale;
+						jQuery.style( tween.elem, prop, start + unit );
+
+					// Update scale, tolerating zero or NaN from tween.cur()
+					// And breaking the loop if scale is unchanged or perfect, or if we've just had enough
+					} while ( scale !== (scale = tween.cur() / target) && scale !== 1 && --maxIterations );
+				}
+
+				tween.unit = unit;
+				tween.start = start;
+				// If a +=/-= token was provided, we're doing a relative animation
+				tween.end = parts[1] ? start + ( parts[1] + 1 ) * end : end;
+			}
+			return tween;
+		}]
+	};
+
+// Animations created synchronously will run synchronously
+function createFxNow() {
+	setTimeout(function() {
+		fxNow = undefined;
+	}, 0 );
+	return ( fxNow = jQuery.now() );
+}
+
+function createTweens( animation, props ) {
+	jQuery.each( props, function( prop, value ) {
+		var collection = ( tweeners[ prop ] || [] ).concat( tweeners[ "*" ] ),
+			index = 0,
+			length = collection.length;
+		for ( ; index < length; index++ ) {
+			if ( collection[ index ].call( animation, prop, value ) ) {
+
+				// we're done with this property
+				return;
+			}
+		}
+	});
+}
+
+function Animation( elem, properties, options ) {
+	var result,
+		index = 0,
+		tweenerIndex = 0,
+		length = animationPrefilters.length,
+		deferred = jQuery.Deferred().always( function() {
+			// don't match elem in the :animated selector
+			delete tick.elem;
+		}),
+		tick = function() {
+			var currentTime = fxNow || createFxNow(),
+				remaining = Math.max( 0, animation.startTime + animation.duration - currentTime ),
+				// archaic crash bug won't allow us to use 1 - ( 0.5 || 0 ) (#12497)
+				temp = remaining / animation.duration || 0,
+				percent = 1 - temp,
+				index = 0,
+				length = animation.tweens.length;
+
+			for ( ; index < length ; index++ ) {
+				animation.tweens[ index ].run( percent );
+			}
+
+			deferred.notifyWith( elem, [ animation, percent, remaining ]);
+
+			if ( percent < 1 && length ) {
+				return remaining;
+			} else {
+				deferred.resolveWith( elem, [ animation ] );
+				return false;
+			}
+		},
+		animation = deferred.promise({
+			elem: elem,
+			props: jQuery.extend( {}, properties ),
+			opts: jQuery.extend( true, { specialEasing: {} }, options ),
+			originalProperties: properties,
+			originalOptions: options,
+			startTime: fxNow || createFxNow(),
+			duration: options.duration,
+			tweens: [],
+			createTween: function( prop, end, easing ) {
+				var tween = jQuery.Tween( elem, animation.opts, prop, end,
+						animation.opts.specialEasing[ prop ] || animation.opts.easing );
+				animation.tweens.push( tween );
+				return tween;
+			},
+			stop: function( gotoEnd ) {
+				var index = 0,
+					// if we are going to the end, we want to run all the tweens
+					// otherwise we skip this part
+					length = gotoEnd ? animation.tweens.length : 0;
+
+				for ( ; index < length ; index++ ) {
+					animation.tweens[ index ].run( 1 );
+				}
+
+				// resolve when we played the last frame
+				// otherwise, reject
+				if ( gotoEnd ) {
+					deferred.resolveWith( elem, [ animation, gotoEnd ] );
+				} else {
+					deferred.rejectWith( elem, [ animation, gotoEnd ] );
+				}
+				return this;
+			}
+		}),
+		props = animation.props;
+
+	propFilter( props, animation.opts.specialEasing );
+
+	for ( ; index < length ; index++ ) {
+		result = animationPrefilters[ index ].call( animation, elem, props, animation.opts );
+		if ( result ) {
+			return result;
+		}
+	}
+
+	createTweens( animation, props );
+
+	if ( jQuery.isFunction( animation.opts.start ) ) {
+		animation.opts.start.call( elem, animation );
+	}
+
+	jQuery.fx.timer(
+		jQuery.extend( tick, {
+			anim: animation,
+			queue: animation.opts.queue,
+			elem: elem
+		})
+	);
+
+	// attach callbacks from options
+	return animation.progress( animation.opts.progress )
+		.done( animation.opts.done, animation.opts.complete )
+		.fail( animation.opts.fail )
+		.always( animation.opts.always );
+}
+
+function propFilter( props, specialEasing ) {
+	var index, name, easing, value, hooks;
+
+	// camelCase, specialEasing and expand cssHook pass
+	for ( index in props ) {
+		name = jQuery.camelCase( index );
+		easing = specialEasing[ name ];
+		value = props[ index ];
+		if ( jQuery.isArray( value ) ) {
+			easing = value[ 1 ];
+			value = props[ index ] = value[ 0 ];
+		}
+
+		if ( index !== name ) {
+			props[ name ] = value;
+			delete props[ index ];
+		}
+
+		hooks = jQuery.cssHooks[ name ];
+		if ( hooks && "expand" in hooks ) {
+			value = hooks.expand( value );
+			delete props[ name ];
+
+			// not quite $.extend, this wont overwrite keys already present.
+			// also - reusing 'index' from above because we have the correct "name"
+			for ( index in value ) {
+				if ( !( index in props ) ) {
+					props[ index ] = value[ index ];
+					specialEasing[ index ] = easing;
+				}
+			}
+		} else {
+			specialEasing[ name ] = easing;
+		}
+	}
+}
+
+jQuery.Animation = jQuery.extend( Animation, {
+
+	tweener: function( props, callback ) {
+		if ( jQuery.isFunction( props ) ) {
+			callback = props;
+			props = [ "*" ];
+		} else {
+			props = props.split(" ");
+		}
+
+		var prop,
+			index = 0,
+			length = props.length;
+
+		for ( ; index < length ; index++ ) {
+			prop = props[ index ];
+			tweeners[ prop ] = tweeners[ prop ] || [];
+			tweeners[ prop ].unshift( callback );
+		}
+	},
+
+	prefilter: function( callback, prepend ) {
+		if ( prepend ) {
+			animationPrefilters.unshift( callback );
+		} else {
+			animationPrefilters.push( callback );
+		}
+	}
+});
+
+function defaultPrefilter( elem, props, opts ) {
+	var index, prop, value, length, dataShow, toggle, tween, hooks, oldfire,
+		anim = this,
+		style = elem.style,
+		orig = {},
+		handled = [],
+		hidden = elem.nodeType && isHidden( elem );
+
+	// handle queue: false promises
+	if ( !opts.queue ) {
+		hooks = jQuery._queueHooks( elem, "fx" );
+		if ( hooks.unqueued == null ) {
+			hooks.unqueued = 0;
+			oldfire = hooks.empty.fire;
+			hooks.empty.fire = function() {
+				if ( !hooks.unqueued ) {
+					oldfire();
+				}
+			};
+		}
+		hooks.unqueued++;
+
+		anim.always(function() {
+			// doing this makes sure that the complete handler will be called
+			// before this completes
+			anim.always(function() {
+				hooks.unqueued--;
+				if ( !jQuery.queue( elem, "fx" ).length ) {
+					hooks.empty.fire();
+				}
+			});
+		});
+	}
+
+	// height/width overflow pass
+	if ( elem.nodeType === 1 && ( "height" in props || "width" in props ) ) {
+		// Make sure that nothing sneaks out
+		// Record all 3 overflow attributes because IE does not
+		// change the overflow attribute when overflowX and
+		// overflowY are set to the same value
+		opts.overflow = [ style.overflow, style.overflowX, style.overflowY ];
+
+		// Set display property to inline-block for height/width
+		// animations on inline elements that are having width/height animated
+		if ( jQuery.css( elem, "display" ) === "inline" &&
+				jQuery.css( elem, "float" ) === "none" ) {
+
+			// inline-level elements accept inline-block;
+			// block-level elements need to be inline with layout
+			if ( !jQuery.support.inlineBlockNeedsLayout || css_defaultDisplay( elem.nodeName ) === "inline" ) {
+				style.display = "inline-block";
+
+			} else {
+				style.zoom = 1;
+			}
+		}
+	}
+
+	if ( opts.overflow ) {
+		style.overflow = "hidden";
+		if ( !jQuery.support.shrinkWrapBlocks ) {
+			anim.done(function() {
+				style.overflow = opts.overflow[ 0 ];
+				style.overflowX = opts.overflow[ 1 ];
+				style.overflowY = opts.overflow[ 2 ];
+			});
+		}
+	}
+
+
+	// show/hide pass
+	for ( index in props ) {
+		value = props[ index ];
+		if ( rfxtypes.exec( value ) ) {
+			delete props[ index ];
+			toggle = toggle || value === "toggle";
+			if ( value === ( hidden ? "hide" : "show" ) ) {
+				continue;
+			}
+			handled.push( index );
+		}
+	}
+
+	length = handled.length;
+	if ( length ) {
+		dataShow = jQuery._data( elem, "fxshow" ) || jQuery._data( elem, "fxshow", {} );
+		if ( "hidden" in dataShow ) {
+			hidden = dataShow.hidden;
+		}
+
+		// store state if its toggle - enables .stop().toggle() to "reverse"
+		if ( toggle ) {
+			dataShow.hidden = !hidden;
+		}
+		if ( hidden ) {
+			jQuery( elem ).show();
+		} else {
+			anim.done(function() {
+				jQuery( elem ).hide();
+			});
+		}
+		anim.done(function() {
+			var prop;
+			jQuery.removeData( elem, "fxshow", true );
+			for ( prop in orig ) {
+				jQuery.style( elem, prop, orig[ prop ] );
+			}
+		});
+		for ( index = 0 ; index < length ; index++ ) {
+			prop = handled[ index ];
+			tween = anim.createTween( prop, hidden ? dataShow[ prop ] : 0 );
+			orig[ prop ] = dataShow[ prop ] || jQuery.style( elem, prop );
+
+			if ( !( prop in dataShow ) ) {
+				dataShow[ prop ] = tween.start;
+				if ( hidden ) {
+					tween.end = tween.start;
+					tween.start = prop === "width" || prop === "height" ? 1 : 0;
+				}
+			}
+		}
+	}
+}
+
+function Tween( elem, options, prop, end, easing ) {
+	return new Tween.prototype.init( elem, options, prop, end, easing );
+}
+jQuery.Tween = Tween;
+
+Tween.prototype = {
+	constructor: Tween,
+	init: function( elem, options, prop, end, easing, unit ) {
+		this.elem = elem;
+		this.prop = prop;
+		this.easing = easing || "swing";
+		this.options = options;
+		this.start = this.now = this.cur();
+		this.end = end;
+		this.unit = unit || ( jQuery.cssNumber[ prop ] ? "" : "px" );
+	},
+	cur: function() {
+		var hooks = Tween.propHooks[ this.prop ];
+
+		return hooks && hooks.get ?
+			hooks.get( this ) :
+			Tween.propHooks._default.get( this );
+	},
+	run: function( percent ) {
+		var eased,
+			hooks = Tween.propHooks[ this.prop ];
+
+		if ( this.options.duration ) {
+			this.pos = eased = jQuery.easing[ this.easing ](
+				percent, this.options.duration * percent, 0, 1, this.options.duration
+			);
+		} else {
+			this.pos = eased = percent;
+		}
+		this.now = ( this.end - this.start ) * eased + this.start;
+
+		if ( this.options.step ) {
+			this.options.step.call( this.elem, this.now, this );
+		}
+
+		if ( hooks && hooks.set ) {
+			hooks.set( this );
+		} else {
+			Tween.propHooks._default.set( this );
+		}
+		return this;
+	}
+};
+
+Tween.prototype.init.prototype = Tween.prototype;
+
+Tween.propHooks = {
+	_default: {
+		get: function( tween ) {
+			var result;
+
+			if ( tween.elem[ tween.prop ] != null &&
+				(!tween.elem.style || tween.elem.style[ tween.prop ] == null) ) {
+				return tween.elem[ tween.prop ];
+			}
+
+			// passing any value as a 4th parameter to .css will automatically
+			// attempt a parseFloat and fallback to a string if the parse fails
+			// so, simple values such as "10px" are parsed to Float.
+			// complex values such as "rotate(1rad)" are returned as is.
+			result = jQuery.css( tween.elem, tween.prop, false, "" );
+			// Empty strings, null, undefined and "auto" are converted to 0.
+			return !result || result === "auto" ? 0 : result;
+		},
+		set: function( tween ) {
+			// use step hook for back compat - use cssHook if its there - use .style if its
+			// available and use plain properties where available
+			if ( jQuery.fx.step[ tween.prop ] ) {
+				jQuery.fx.step[ tween.prop ]( tween );
+			} else if ( tween.elem.style && ( tween.elem.style[ jQuery.cssProps[ tween.prop ] ] != null || jQuery.cssHooks[ tween.prop ] ) ) {
+				jQuery.style( tween.elem, tween.prop, tween.now + tween.unit );
+			} else {
+				tween.elem[ tween.prop ] = tween.now;
+			}
+		}
+	}
+};
+
+// Remove in 2.0 - this supports IE8's panic based approach
+// to setting things on disconnected nodes
+
+Tween.propHooks.scrollTop = Tween.propHooks.scrollLeft = {
+	set: function( tween ) {
+		if ( tween.elem.nodeType && tween.elem.parentNode ) {
+			tween.elem[ tween.prop ] = tween.now;
+		}
+	}
+};
+
+jQuery.each([ "toggle", "show", "hide" ], function( i, name ) {
+	var cssFn = jQuery.fn[ name ];
+	jQuery.fn[ name ] = function( speed, easing, callback ) {
+		return speed == null || typeof speed === "boolean" ||
+			// special check for .toggle( handler, handler, ... )
+			( !i && jQuery.isFunction( speed ) && jQuery.isFunction( easing ) ) ?
+			cssFn.apply( this, arguments ) :
+			this.animate( genFx( name, true ), speed, easing, callback );
+	};
+});
+
+jQuery.fn.extend({
+	fadeTo: function( speed, to, easing, callback ) {
+
+		// show any hidden elements after setting opacity to 0
+		return this.filter( isHidden ).css( "opacity", 0 ).show()
+
+			// animate to the value specified
+			.end().animate({ opacity: to }, speed, easing, callback );
+	},
+	animate: function( prop, speed, easing, callback ) {
+		var empty = jQuery.isEmptyObject( prop ),
+			optall = jQuery.speed( speed, easing, callback ),
+			doAnimation = function() {
+				// Operate on a copy of prop so per-property easing won't be lost
+				var anim = Animation( this, jQuery.extend( {}, prop ), optall );
+
+				// Empty animations resolve immediately
+				if ( empty ) {
+					anim.stop( true );
+				}
+			};
+
+		return empty || optall.queue === false ?
+			this.each( doAnimation ) :
+			this.queue( optall.queue, doAnimation );
+	},
+	stop: function( type, clearQueue, gotoEnd ) {
+		var stopQueue = function( hooks ) {
+			var stop = hooks.stop;
+			delete hooks.stop;
+			stop( gotoEnd );
+		};
+
+		if ( typeof type !== "string" ) {
+			gotoEnd = clearQueue;
+			clearQueue = type;
+			type = undefined;
+		}
+		if ( clearQueue && type !== false ) {
+			this.queue( type || "fx", [] );
+		}
+
+		return this.each(function() {
+			var dequeue = true,
+				index = type != null && type + "queueHooks",
+				timers = jQuery.timers,
+				data = jQuery._data( this );
+
+			if ( index ) {
+				if ( data[ index ] && data[ index ].stop ) {
+					stopQueue( data[ index ] );
+				}
+			} else {
+				for ( index in data ) {
+					if ( data[ index ] && data[ index ].stop && rrun.test( index ) ) {
+						stopQueue( data[ index ] );
+					}
+				}
+			}
+
+			for ( index = timers.length; index--; ) {
+				if ( timers[ index ].elem === this && (type == null || timers[ index ].queue === type) ) {
+					timers[ index ].anim.stop( gotoEnd );
+					dequeue = false;
+					timers.splice( index, 1 );
+				}
+			}
+
+			// start the next in the queue if the last step wasn't forced
+			// timers currently will call their complete callbacks, which will dequeue
+			// but only if they were gotoEnd
+			if ( dequeue || !gotoEnd ) {
+				jQuery.dequeue( this, type );
+			}
+		});
+	}
+});
+
+// Generate parameters to create a standard animation
+function genFx( type, includeWidth ) {
+	var which,
+		attrs = { height: type },
+		i = 0;
+
+	// if we include width, step value is 1 to do all cssExpand values,
+	// if we don't include width, step value is 2 to skip over Left and Right
+	includeWidth = includeWidth? 1 : 0;
+	for( ; i < 4 ; i += 2 - includeWidth ) {
+		which = cssExpand[ i ];
+		attrs[ "margin" + which ] = attrs[ "padding" + which ] = type;
+	}
+
+	if ( includeWidth ) {
+		attrs.opacity = attrs.width = type;
+	}
+
+	return attrs;
+}
+
+// Generate shortcuts for custom animations
+jQuery.each({
+	slideDown: genFx("show"),
+	slideUp: genFx("hide"),
+	slideToggle: genFx("toggle"),
+	fadeIn: { opacity: "show" },
+	fadeOut: { opacity: "hide" },
+	fadeToggle: { opacity: "toggle" }
+}, function( name, props ) {
+	jQuery.fn[ name ] = function( speed, easing, callback ) {
+		return this.animate( props, speed, easing, callback );
+	};
+});
+
+jQuery.speed = function( speed, easing, fn ) {
+	var opt = speed && typeof speed === "object" ? jQuery.extend( {}, speed ) : {
+		complete: fn || !fn && easing ||
+			jQuery.isFunction( speed ) && speed,
+		duration: speed,
+		easing: fn && easing || easing && !jQuery.isFunction( easing ) && easing
+	};
+
+	opt.duration = jQuery.fx.off ? 0 : typeof opt.duration === "number" ? opt.duration :
+		opt.duration in jQuery.fx.speeds ? jQuery.fx.speeds[ opt.duration ] : jQuery.fx.speeds._default;
+
+	// normalize opt.queue - true/undefined/null -> "fx"
+	if ( opt.queue == null || opt.queue === true ) {
+		opt.queue = "fx";
+	}
+
+	// Queueing
+	opt.old = opt.complete;
+
+	opt.complete = function() {
+		if ( jQuery.isFunction( opt.old ) ) {
+			opt.old.call( this );
+		}
+
+		if ( opt.queue ) {
+			jQuery.dequeue( this, opt.queue );
+		}
+	};
+
+	return opt;
+};
+
+jQuery.easing = {
+	linear: function( p ) {
+		return p;
+	},
+	swing: function( p ) {
+		return 0.5 - Math.cos( p*Math.PI ) / 2;
+	}
+};
+
+jQuery.timers = [];
+jQuery.fx = Tween.prototype.init;
+jQuery.fx.tick = function() {
+	var timer,
+		timers = jQuery.timers,
+		i = 0;
+
+	fxNow = jQuery.now();
+
+	for ( ; i < timers.length; i++ ) {
+		timer = timers[ i ];
+		// Checks the timer has not already been removed
+		if ( !timer() && timers[ i ] === timer ) {
+			timers.splice( i--, 1 );
+		}
+	}
+
+	if ( !timers.length ) {
+		jQuery.fx.stop();
+	}
+	fxNow = undefined;
+};
+
+jQuery.fx.timer = function( timer ) {
+	if ( timer() && jQuery.timers.push( timer ) && !timerId ) {
+		timerId = setInterval( jQuery.fx.tick, jQuery.fx.interval );
+	}
+};
+
+jQuery.fx.interval = 13;
+
+jQuery.fx.stop = function() {
+	clearInterval( timerId );
+	timerId = null;
+};
+
+jQuery.fx.speeds = {
+	slow: 600,
+	fast: 200,
+	// Default speed
+	_default: 400
+};
+
+// Back Compat <1.8 extension point
+jQuery.fx.step = {};
+
+if ( jQuery.expr && jQuery.expr.filters ) {
+	jQuery.expr.filters.animated = function( elem ) {
+		return jQuery.grep(jQuery.timers, function( fn ) {
+			return elem === fn.elem;
+		}).length;
+	};
+}
+var rroot = /^(?:body|html)$/i;
+
+jQuery.fn.offset = function( options ) {
+	if ( arguments.length ) {
+		return options === undefined ?
+			this :
+			this.each(function( i ) {
+				jQuery.offset.setOffset( this, options, i );
+			});
+	}
+
+	var docElem, body, win, clientTop, clientLeft, scrollTop, scrollLeft,
+		box = { top: 0, left: 0 },
+		elem = this[ 0 ],
+		doc = elem && elem.ownerDocument;
+
+	if ( !doc ) {
+		return;
+	}
+
+	if ( (body = doc.body) === elem ) {
+		return jQuery.offset.bodyOffset( elem );
+	}
+
+	docElem = doc.documentElement;
+
+	// Make sure it's not a disconnected DOM node
+	if ( !jQuery.contains( docElem, elem ) ) {
+		return box;
+	}
+
+	// If we don't have gBCR, just use 0,0 rather than error
+	// BlackBerry 5, iOS 3 (original iPhone)
+	if ( typeof elem.getBoundingClientRect !== "undefined" ) {
+		box = elem.getBoundingClientRect();
+	}
+	win = getWindow( doc );
+	clientTop  = docElem.clientTop  || body.clientTop  || 0;
+	clientLeft = docElem.clientLeft || body.clientLeft || 0;
+	scrollTop  = win.pageYOffset || docElem.scrollTop;
+	scrollLeft = win.pageXOffset || docElem.scrollLeft;
+	return {
+		top: box.top  + scrollTop  - clientTop,
+		left: box.left + scrollLeft - clientLeft
+	};
+};
+
+jQuery.offset = {
+
+	bodyOffset: function( body ) {
+		var top = body.offsetTop,
+			left = body.offsetLeft;
+
+		if ( jQuery.support.doesNotIncludeMarginInBodyOffset ) {
+			top  += parseFloat( jQuery.css(body, "marginTop") ) || 0;
+			left += parseFloat( jQuery.css(body, "marginLeft") ) || 0;
+		}
+
+		return { top: top, left: left };
+	},
+
+	setOffset: function( elem, options, i ) {
+		var position = jQuery.css( elem, "position" );
+
+		// set position first, in-case top/left are set even on static elem
+		if ( position === "static" ) {
+			elem.style.position = "relative";
+		}
+
+		var curElem = jQuery( elem ),
+			curOffset = curElem.offset(),
+			curCSSTop = jQuery.css( elem, "top" ),
+			curCSSLeft = jQuery.css( elem, "left" ),
+			calculatePosition = ( position === "absolute" || position === "fixed" ) && jQuery.inArray("auto", [curCSSTop, curCSSLeft]) > -1,
+			props = {}, curPosition = {}, curTop, curLeft;
+
+		// need to be able to calculate position if either top or left is auto and position is either absolute or fixed
+		if ( calculatePosition ) {
+			curPosition = curElem.position();
+			curTop = curPosition.top;
+			curLeft = curPosition.left;
+		} else {
+			curTop = parseFloat( curCSSTop ) || 0;
+			curLeft = parseFloat( curCSSLeft ) || 0;
+		}
+
+		if ( jQuery.isFunction( options ) ) {
+			options = options.call( elem, i, curOffset );
+		}
+
+		if ( options.top != null ) {
+			props.top = ( options.top - curOffset.top ) + curTop;
+		}
+		if ( options.left != null ) {
+			props.left = ( options.left - curOffset.left ) + curLeft;
+		}
+
+		if ( "using" in options ) {
+			options.using.call( elem, props );
+		} else {
+			curElem.css( props );
+		}
+	}
+};
+
+
+jQuery.fn.extend({
+
+	position: function() {
+		if ( !this[0] ) {
+			return;
+		}
+
+		var elem = this[0],
+
+		// Get *real* offsetParent
+		offsetParent = this.offsetParent(),
+
+		// Get correct offsets
+		offset       = this.offset(),
+		parentOffset = rroot.test(offsetParent[0].nodeName) ? { top: 0, left: 0 } : offsetParent.offset();
+
+		// Subtract element margins
+		// note: when an element has margin: auto the offsetLeft and marginLeft
+		// are the same in Safari causing offset.left to incorrectly be 0
+		offset.top  -= parseFloat( jQuery.css(elem, "marginTop") ) || 0;
+		offset.left -= parseFloat( jQuery.css(elem, "marginLeft") ) || 0;
+
+		// Add offsetParent borders
+		parentOffset.top  += parseFloat( jQuery.css(offsetParent[0], "borderTopWidth") ) || 0;
+		parentOffset.left += parseFloat( jQuery.css(offsetParent[0], "borderLeftWidth") ) || 0;
+
+		// Subtract the two offsets
+		return {
+			top:  offset.top  - parentOffset.top,
+			left: offset.left - parentOffset.left
+		};
+	},
+
+	offsetParent: function() {
+		return this.map(function() {
+			var offsetParent = this.offsetParent || document.body;
+			while ( offsetParent && (!rroot.test(offsetParent.nodeName) && jQuery.css(offsetParent, "position") === "static") ) {
+				offsetParent = offsetParent.offsetParent;
+			}
+			return offsetParent || document.body;
+		});
+	}
+});
+
+
+// Create scrollLeft and scrollTop methods
+jQuery.each( {scrollLeft: "pageXOffset", scrollTop: "pageYOffset"}, function( method, prop ) {
+	var top = /Y/.test( prop );
+
+	jQuery.fn[ method ] = function( val ) {
+		return jQuery.access( this, function( elem, method, val ) {
+			var win = getWindow( elem );
+
+			if ( val === undefined ) {
+				return win ? (prop in win) ? win[ prop ] :
+					win.document.documentElement[ method ] :
+					elem[ method ];
+			}
+
+			if ( win ) {
+				win.scrollTo(
+					!top ? val : jQuery( win ).scrollLeft(),
+					 top ? val : jQuery( win ).scrollTop()
+				);
+
+			} else {
+				elem[ method ] = val;
+			}
+		}, method, val, arguments.length, null );
+	};
+});
+
+function getWindow( elem ) {
+	return jQuery.isWindow( elem ) ?
+		elem :
+		elem.nodeType === 9 ?
+			elem.defaultView || elem.parentWindow :
+			false;
+}
+// Create innerHeight, innerWidth, height, width, outerHeight and outerWidth methods
+jQuery.each( { Height: "height", Width: "width" }, function( name, type ) {
+	jQuery.each( { padding: "inner" + name, content: type, "": "outer" + name }, function( defaultExtra, funcName ) {
+		// margin is only for outerHeight, outerWidth
+		jQuery.fn[ funcName ] = function( margin, value ) {
+			var chainable = arguments.length && ( defaultExtra || typeof margin !== "boolean" ),
+				extra = defaultExtra || ( margin === true || value === true ? "margin" : "border" );
+
+			return jQuery.access( this, function( elem, type, value ) {
+				var doc;
+
+				if ( jQuery.isWindow( elem ) ) {
+					// As of 5/8/2012 this will yield incorrect results for Mobile Safari, but there
+					// isn't a whole lot we can do. See pull request at this URL for discussion:
+					// https://github.com/jquery/jquery/pull/764
+					return elem.document.documentElement[ "client" + name ];
+				}
+
+				// Get document width or height
+				if ( elem.nodeType === 9 ) {
+					doc = elem.documentElement;
+
+					// Either scroll[Width/Height] or offset[Width/Height] or client[Width/Height], whichever is greatest
+					// unfortunately, this causes bug #3838 in IE6/8 only, but there is currently no good, small way to fix it.
+					return Math.max(
+						elem.body[ "scroll" + name ], doc[ "scroll" + name ],
+						elem.body[ "offset" + name ], doc[ "offset" + name ],
+						doc[ "client" + name ]
+					);
+				}
+
+				return value === undefined ?
+					// Get width or height on the element, requesting but not forcing parseFloat
+					jQuery.css( elem, type, value, extra ) :
+
+					// Set width or height on the element
+					jQuery.style( elem, type, value, extra );
+			}, type, chainable ? margin : undefined, chainable, null );
+		};
+	});
+});
+// Expose jQuery to the global object
+window.jQuery = window.$ = jQuery;
+
+// Expose jQuery as an AMD module, but only for AMD loaders that
+// understand the issues with loading multiple versions of jQuery
+// in a page that all might call define(). The loader will indicate
+// they have special allowances for multiple jQuery versions by
+// specifying define.amd.jQuery = true. Register as a named module,
+// since jQuery can be concatenated with other files that may use define,
+// but not use a proper concatenation script that understands anonymous
+// AMD modules. A named AMD is safest and most robust way to register.
+// Lowercase jquery is used because AMD module names are derived from
+// file names, and jQuery is normally delivered in a lowercase file name.
+// Do this after creating the global so that if an AMD module wants to call
+// noConflict to hide this version of jQuery, it will work.
+if ( typeof define === "function" && define.amd && define.amd.jQuery ) {
+	define( "jquery", [], function () { return jQuery; } );
+}
+
+})( window );
diff --git a/qa/workunits/erasure-code/plot.js b/qa/workunits/erasure-code/plot.js
new file mode 100644
index 000000000..bd2bba5bb
--- /dev/null
+++ b/qa/workunits/erasure-code/plot.js
@@ -0,0 +1,82 @@
+$(function() {
+    encode = [];
+    if (typeof encode_vandermonde_isa != 'undefined') {
+        encode.push({
+	    data: encode_vandermonde_isa,
+            label: "ISA, Vandermonde",
+	    points: { show: true },
+	    lines: { show: true },
+	});
+    }
+    if (typeof encode_vandermonde_jerasure != 'undefined') {
+        encode.push({
+	    data: encode_vandermonde_jerasure,
+            label: "Jerasure Generic, Vandermonde",
+	    points: { show: true },
+	    lines: { show: true },
+	});
+    }
+    if (typeof encode_cauchy_isa != 'undefined') {
+        encode.push({
+	    data: encode_cauchy_isa,
+            label: "ISA, Cauchy",
+	    points: { show: true },
+	    lines: { show: true },
+	});
+    }
+    if (typeof encode_cauchy_jerasure != 'undefined') {
+        encode.push({
+	    data: encode_cauchy_jerasure,
+            label: "Jerasure, Cauchy",
+	    points: { show: true },
+	    lines: { show: true },
+	});
+    }
+    $.plot("#encode", encode, {
+	xaxis: {
+	    mode: "categories",
+	    tickLength: 0
+	},
+    });
+
+    decode = [];
+    if (typeof decode_vandermonde_isa != 'undefined') {
+        decode.push({
+	    data: decode_vandermonde_isa,
+            label: "ISA, Vandermonde",
+	    points: { show: true },
+	    lines: { show: true },
+	});
+    }
+    if (typeof decode_vandermonde_jerasure != 'undefined') {
+        decode.push({
+	    data: decode_vandermonde_jerasure,
+            label: "Jerasure Generic, Vandermonde",
+	    points: { show: true },
+	    lines: { show: true },
+	});
+    }
+    if (typeof decode_cauchy_isa != 'undefined') {
+        decode.push({
+	    data: decode_cauchy_isa,
+            label: "ISA, Cauchy",
+	    points: { show: true },
+	    lines: { show: true },
+	});
+    }
+    if (typeof decode_cauchy_jerasure != 'undefined') {
+        decode.push({
+	    data: decode_cauchy_jerasure,
+            label: "Jerasure, Cauchy",
+	    points: { show: true },
+	    lines: { show: true },
+	});
+    }
+    $.plot("#decode", decode, {
+	xaxis: {
+	    mode: "categories",
+	    tickLength: 0
+	},
+    });
+
+});
diff --git a/qa/workunits/false.sh b/qa/workunits/false.sh
new file mode 100644
index 000000000..8a961b329
--- /dev/null
+++ b/qa/workunits/false.sh
@@ -0,0 +1,3 @@
+#!/bin/sh -ex
+
+false
+\ No newline at end of file
diff --git a/qa/workunits/fs/.gitignore b/qa/workunits/fs/.gitignore
new file mode 100644
index 000000000..f7f7a0614
--- /dev/null
+++ b/qa/workunits/fs/.gitignore
@@ -0,0 +1 @@
+test_o_trunc
diff --git a/qa/workunits/fs/Makefile b/qa/workunits/fs/Makefile
new file mode 100644
index 000000000..c9934254d
--- /dev/null
+++ b/qa/workunits/fs/Makefile
@@ -0,0 +1,11 @@
+CFLAGS = -Wall -Wextra -D_GNU_SOURCE
+
+TARGETS = test_o_trunc
+
+.c:
+	$(CC) $(CFLAGS) $@.c -o $@
+
+all:	$(TARGETS)
+
+clean:
+	rm $(TARGETS)
diff --git a/qa/workunits/fs/cephfs_mirror_ha_gen.sh b/qa/workunits/fs/cephfs_mirror_ha_gen.sh
new file mode 100755
index 000000000..35ee9d4c7
--- /dev/null
+++ b/qa/workunits/fs/cephfs_mirror_ha_gen.sh
@@ -0,0 +1,69 @@
+#!/bin/bash -ex
+#
+# cephfs_mirror_ha_gen.sh - generate workload to synchronize
+#
+
+. $(dirname $0)/cephfs_mirror_helpers.sh
+
+cleanup()
+{
+    for i in `seq 1 $NR_DIRECTORIES`
+    do
+        local repo_name="${REPO_PATH_PFX}_$i"
+        for j in `seq 1 $NR_SNAPSHOTS`
+        do
+            snap_name=$repo_name/.snap/snap_$j
+            if test -d $snap_name; then
+                rmdir $snap_name
+            fi
+        done
+    done
+    exit 1
+}
+trap cleanup EXIT
+
+configure_peer()
+{
+    ceph mgr module enable mirroring
+    ceph fs snapshot mirror enable $PRIMARY_FS
+    ceph fs snapshot mirror peer_add $PRIMARY_FS client.mirror_remote@ceph $BACKUP_FS
+
+    for i in `seq 1 $NR_DIRECTORIES`
+    do
+        local repo_name="${REPO_PATH_PFX}_$i"
+        ceph fs snapshot mirror add $PRIMARY_FS "$MIRROR_SUBDIR/$repo_name"
+    done
+}
+
+create_snaps()
+{
+    for i in `seq 1 $NR_DIRECTORIES`
+    do
+        local repo_name="${REPO_PATH_PFX}_$i"
+        for j in `seq 1 $NR_SNAPSHOTS`
+        do
+            snap_name=$repo_name/.snap/snap_$j
+            r=$(( $RANDOM % 100 + 5 ))
+            arr=($repo_name "reset" "--hard" "HEAD~$r")
+            exec_git_cmd "${arr[@]}"
+            mkdir $snap_name
+            store_checksum $snap_name
+        done
+    done
+}
+
+unset CEPH_CLI_TEST_DUP_COMMAND
+
+echo "running generator on prmary file system..."
+
+# setup git repos to be used as data set
+setup_repos
+
+# turn on mirroring, add peers...
+configure_peer
+
+# snapshots on primary
+create_snaps
+
+# do not cleanup when exiting on success..
+trap - EXIT
diff --git a/qa/workunits/fs/cephfs_mirror_ha_verify.sh b/qa/workunits/fs/cephfs_mirror_ha_verify.sh
new file mode 100755
index 000000000..8d8b3859c
--- /dev/null
+++ b/qa/workunits/fs/cephfs_mirror_ha_verify.sh
@@ -0,0 +1,40 @@
+#!/bin/bash -ex
+#
+# cephfs_mirror_ha_verify.sh - verify synchronized snapshots
+#
+
+. $(dirname $0)/cephfs_mirror_helpers.sh
+
+echo "running verifier on secondary file system..."
+
+for i in `seq 1 $NR_DIRECTORIES`
+do
+    repo_name="${REPO_PATH_PFX}_$i"
+    for j in `seq 1 $NR_SNAPSHOTS`
+    do
+        for s in 1 1 2 4 4 4 4 4 8 8 8 8 16 16 32 64 64 128 128
+        do
+            sleep $s
+            snap_name=$repo_name/.snap/snap_$j
+            if test -d $repo_name; then
+                echo "checking snapshot [$snap_name] in $repo_name"
+                if test -d $snap_name; then
+                    echo "generating hash for $snap_name"
+                    cksum=''
+                    calc_checksum $snap_name cksum
+                    ret=$(compare_checksum $cksum $snap_name)
+                    if [ $ret -ne 0 ]; then
+                        echo "checksum failed $snap_name ($cksum)"
+                        return $ret
+                    else
+                        echo "checksum matched $snap_name ($cksum)"
+                        break
+                    fi
+                fi
+            fi
+        done
+        echo "couldn't complete verification for: $snap_name"
+    done
+done
+
+echo "verify done!"
diff --git a/qa/workunits/fs/cephfs_mirror_helpers.sh b/qa/workunits/fs/cephfs_mirror_helpers.sh
new file mode 100644
index 000000000..69f1c6f3d
--- /dev/null
+++ b/qa/workunits/fs/cephfs_mirror_helpers.sh
@@ -0,0 +1,66 @@
+PRIMARY_FS='dc'
+BACKUP_FS='dc-backup'
+
+REPO=ceph-qa-suite
+REPO_DIR=ceph_repo
+REPO_PATH_PFX="$REPO_DIR/$REPO"
+
+NR_DIRECTORIES=4
+NR_SNAPSHOTS=4
+MIRROR_SUBDIR='/mirror'
+
+calc_checksum()
+{
+    local path=$1
+    local -n ref=$2
+    ref=`find -L $path -type f -exec md5sum {} +  | awk '{ print $1 }' | md5sum | awk '{ print $1 }'`
+}
+
+store_checksum()
+{
+    local path=$1
+    local cksum='' #something invalid
+    local fhash=`echo -n $path | md5sum | awk '{ print $1 }'`
+    calc_checksum $path cksum
+    echo -n $cksum > "/tmp/primary-$fhash"
+}
+
+compare_checksum()
+{
+    local ret=0
+    local cksum=$1
+    local path=$2
+    local fhash=`echo -n $path | md5sum | awk '{ print $1 }'`
+    local cksum_ondisk=`cat /tmp/primary-$fhash`
+    if [ $cksum != $cksum_ondisk ]; then
+        echo "$cksum <> $cksum_ondisk"
+        ret=1
+    fi
+    echo $ret
+}
+
+exec_git_cmd()
+{
+    local arg=("$@")
+    local repo_name=${arg[0]}
+    local cmd=${arg[@]:1}
+    git --git-dir "$repo_name/.git" $cmd
+}
+
+clone_repo()
+{
+    local repo_name=$1
+    git clone --branch giant "http://github.com/ceph/$REPO" $repo_name
+}
+
+setup_repos()
+{
+    mkdir "$REPO_DIR"
+
+    for i in `seq 1 $NR_DIRECTORIES`
+    do
+        local repo_name="${REPO_PATH_PFX}_$i"
+        mkdir $repo_name
+        clone_repo $repo_name
+    done
+}
diff --git a/qa/workunits/fs/damage/test-first-damage.sh b/qa/workunits/fs/damage/test-first-damage.sh
new file mode 100755
index 000000000..57447b957
--- /dev/null
+++ b/qa/workunits/fs/damage/test-first-damage.sh
@@ -0,0 +1,194 @@
+#!/bin/bash
+
+set -ex
+
+FIRST_DAMAGE="first-damage.py"
+FS=cephfs
+METADATA_POOL=cephfs_meta
+MOUNT=~/mnt/mnt.0
+PYTHON=python3
+
+function usage {
+  printf '%s: [--fs=<fs_name>] [--metadata-pool=<pool>] [--first-damage=</path/to/first-damage.py>]\n'
+  exit 1
+}
+
+
+function create {
+  ceph config set mds mds_bal_fragment_dirs 0
+  mkdir dir
+  DIR_INODE=$(stat -c '%i' dir)
+  touch dir/a
+  touch dir/"a space"
+  touch -- $(printf 'dir/\xff')
+  mkdir dir/.snap/1
+  mkdir dir/.snap/2
+  # two snaps
+  rm dir/a
+  mkdir dir/.snap/3
+  # not present in HEAD
+  touch dir/a
+  mkdir dir/.snap/4
+  # one snap
+  rm dir/a
+  touch dir/a
+  mkdir dir/.snap/5
+  # unlink then create
+  rm dir/a
+  touch dir/a
+  # unlink then create, HEAD not snapped
+  ls dir/.snap/*/
+  mkdir big
+  BIG_DIR_INODE=$(stat -c '%i' big)
+  for i in `seq 1 15000`; do
+    touch $(printf 'big/%08d' $i)
+  done
+}
+
+function flush {
+  ceph tell mds."$FS":0 flush journal
+}
+
+function damage {
+  local IS=$(printf '%llx.%08llx' "$DIR_INODE" 0)
+  local LS=$(ceph tell mds."$FS":0 dump snaps | jq .last_created)
+
+  local T=$(mktemp -p /tmp)
+
+  # nuke snap 1 version of "a"
+  rados --pool="$METADATA_POOL" getomapval "$IS" a_$(printf %x $((LS-4))) "$T"
+  printf '\xff\xff\xff\xf0' | dd of="$T" count=4 bs=1 conv=notrunc,nocreat
+  rados --pool="$METADATA_POOL" setomapval "$IS" a_$(printf %x $((LS-4))) --input-file="$T"
+
+  # nuke snap 4 version of "a"
+  rados --pool="$METADATA_POOL" getomapval "$IS" a_$(printf %x $((LS-1))) "$T"
+  printf '\xff\xff\xff\xff' | dd of="$T" count=4 bs=1 conv=notrunc,nocreat
+  rados --pool="$METADATA_POOL" setomapval "$IS" a_$(printf %x $((LS-1))) --input-file="$T"
+
+  # screw up HEAD
+  rados --pool="$METADATA_POOL" getomapval "$IS" a_head "$T"
+  printf '\xfe\xff\xff\xff' | dd of="$T" count=4 bs=1 conv=notrunc,nocreat
+  rados --pool="$METADATA_POOL" setomapval "$IS" a_head --input-file="$T"
+
+  # screw up HEAD on what dentry in big
+  IS=$(printf '%llx.%08llx' "$BIG_DIR_INODE" 0)
+  rados --pool="$METADATA_POOL" getomapval "$IS" 00009999_head "$T"
+  printf '\xfe\xff\xff\xff' | dd of="$T" count=4 bs=1 conv=notrunc,nocreat
+  rados --pool="$METADATA_POOL" setomapval "$IS" 00009999_head --input-file="$T"
+
+  rm -f "$T"
+}
+
+function recover {
+  flush
+  ceph fs fail "$FS"
+  sleep 5
+  cephfs-journal-tool --rank="$FS":0 event recover_dentries summary
+  cephfs-journal-tool --rank="$FS":0 journal reset
+  "$PYTHON" $FIRST_DAMAGE --debug /tmp/debug1 --memo /tmp/memo1 "$METADATA_POOL"
+  "$PYTHON" $FIRST_DAMAGE --debug /tmp/debug2 --memo /tmp/memo2 --repair-nosnap  "$METADATA_POOL"
+  "$PYTHON" $FIRST_DAMAGE --debug /tmp/debug3 --memo /tmp/memo3 --remove "$METADATA_POOL"
+  ceph fs set "$FS" joinable true
+}
+
+function check {
+  stat dir || exit 1
+  stat dir/a || exit 1
+  for i in `seq 1 5`; do
+    stat dir/.snap/$i || exit 2
+  done
+  stat dir/.snap/2/a || exit 3
+  stat dir/.snap/5/a || exit 4
+  if stat dir/.snap/1/a; then
+    echo should be gone
+    exit 5
+  fi
+  if stat dir/.snap/3/a; then
+    echo should not ever exist
+    exit 6
+  fi
+  if stat dir/.snap/4/a; then
+    echo should be gone
+    exit 7
+  fi
+}
+
+function cleanup {
+  rmdir dir/.snap/*
+  find dir
+  rm -rf dir
+}
+
+function mount {
+  sudo --preserve-env=CEPH_CONF bin/mount.ceph :/ "$MOUNT" -o name=admin,noshare
+  df -h "$MOUNT"
+}
+
+function main {
+  eval set -- $(getopt --name "$0" --options '' --longoptions 'help,fs:,metadata-pool:,first-damage:,mount:,python:' -- "$@")
+
+  while [ "$#" -gt 0 ]; do
+      echo "$*"
+      echo "$1"
+      case "$1" in
+          -h|--help)
+              usage
+              ;;
+          --fs)
+              FS="$2"
+              shift 2
+              ;;
+          --metadata-pool)
+              METADATA_POOL="$2"
+              shift 2
+              ;;
+          --mount)
+              MOUNT="$2"
+              shift 2
+              ;;
+          --first-damage)
+              FIRST_DAMAGE="$2"
+              shift 2
+              ;;
+          --python)
+              PYTHON="$2"
+              shift 2
+              ;;
+          --)
+              shift
+              break
+              ;;
+          *)
+              usage
+              ;;
+      esac
+  done
+
+  mount
+
+  pushd "$MOUNT"
+  create
+  popd
+
+  sudo umount -f "$MOUNT"
+
+  # flush dentries/inodes to omap
+  flush
+
+  damage
+
+  recover
+
+  sleep 5 # for mds to join
+
+  mount
+
+  pushd "$MOUNT"
+  check
+  cleanup
+  popd
+
+  sudo umount -f "$MOUNT"
+}
+
+main "$@"
diff --git a/qa/workunits/fs/fscrypt.sh b/qa/workunits/fs/fscrypt.sh
new file mode 100755
index 000000000..ca856a62e
--- /dev/null
+++ b/qa/workunits/fs/fscrypt.sh
@@ -0,0 +1,119 @@
+#!/usr/bin/env bash
+
+set -xe
+
+mydir=`dirname $0`
+
+if [ $# -ne 2 ]
+then
+	echo "2 parameters are required!\n"
+	echo "Usage:"
+	echo "  fscrypt.sh <type> <testdir>"
+	echo "  type: should be any of 'none', 'unlocked' or 'locked'"
+	echo "  testdir: the test direcotry name"
+	exit 1
+fi
+
+fscrypt=$1
+testcase=$2
+testdir=fscrypt_test_${fscrypt}_${testcase}
+mkdir $testdir
+
+XFSPROGS_DIR='xfprogs-dev-dir'
+XFSTESTS_DIR='xfstest-dev-dir'
+export XFS_IO_PROG="$(type -P xfs_io)"
+
+# Setup the xfstests env
+setup_xfstests_env()
+{
+	git clone https://git.ceph.com/xfstests-dev.git $XFSTESTS_DIR --depth 1
+	pushd $XFSTESTS_DIR
+	. common/encrypt
+	popd
+}
+
+install_deps()
+{
+	local system_value=$(sudo lsb_release -is | awk '{print tolower($0)}')
+	case $system_value in
+		"centos" | "centosstream" | "fedora")
+			sudo yum install -y inih-devel userspace-rcu-devel \
+				libblkid-devel gettext libedit-devel \
+				libattr-devel device-mapper-devel libicu-devel
+			;;
+		"ubuntu" | "debian")
+			sudo apt-get install -y libinih-dev liburcu-dev \
+				libblkid-dev gettext libedit-dev libattr1-dev \
+				libdevmapper-dev libicu-dev pkg-config
+			;;
+		*)
+			echo "Unsupported distro $system_value"
+			exit 1
+			;;
+	esac
+}
+
+# Install xfsprogs-dev from source to support "add_enckey" for xfs_io
+install_xfsprogs()
+{
+	local install_xfsprogs=0
+
+	xfs_io -c "help add_enckey" | grep -q 'not found' && install_xfsprogs=1
+
+	if [ $install_xfsprogs -eq 1 ]; then
+		install_deps
+
+		git clone https://git.ceph.com/xfsprogs-dev.git $XFSPROGS_DIR --depth 1
+		pushd $XFSPROGS_DIR
+		make
+		sudo make install
+		popd
+	fi
+}
+
+clean_up()
+{
+	rm -rf $XFSPROGS_DIR
+	rm -rf $XFSTESTS_DIR
+	rm -rf $testdir
+}
+
+# For now will test the V2 encryption policy only as the
+# V1 encryption policy is deprecated
+
+install_xfsprogs
+setup_xfstests_env
+
+# Generate a fixed keying identifier
+raw_key=$(_generate_raw_encryption_key)
+keyid=$(_add_enckey $testdir "$raw_key" | awk '{print $NF}')
+
+case ${fscrypt} in
+	"none")
+		# do nothing for the test directory and will test it
+		# as one non-encrypted directory.
+		pushd $testdir
+		${mydir}/../suites/${testcase}.sh
+		popd
+		clean_up
+		;;
+	"unlocked")
+		# set encrypt policy with the key provided and then
+		# the test directory will be encrypted & unlocked
+		_set_encpolicy $testdir $keyid
+		pushd $testdir
+		${mydir}/../suites/${testcase}.sh
+		popd
+		clean_up
+		;;
+	"locked")
+		# remove the key, then the test directory will be locked
+		# and any modification will be denied by requiring the key
+		_rm_enckey $testdir $keyid
+		clean_up
+		;;
+	*)
+		clean_up
+		echo "Unknown parameter $1"
+		exit 1
+esac
diff --git a/qa/workunits/fs/full/subvolume_clone.sh b/qa/workunits/fs/full/subvolume_clone.sh
new file mode 100755
index 000000000..a11131215
--- /dev/null
+++ b/qa/workunits/fs/full/subvolume_clone.sh
@@ -0,0 +1,114 @@
+#!/usr/bin/env bash
+set -ex
+
+# This testcase tests the  'ceph fs subvolume snapshot clone' when the osd is full.
+# The clone fails with 'MetadataMgrException: -28 (error in write)' and
+# truncates the config file of corresponding subvolume while updating the config file.
+# Hence the subsequent subvolume commands on the clone fails with
+# 'MetadataMgrException: -2 (section 'GLOBAL' does not exist)' traceback.
+
+# The osd is of the size 1GB. The full-ratios are set so that osd is treated full
+# at around 600MB. The subvolume is created and 100MB is written.
+# The subvolume is snapshotted and cloned ten times. Since the clone delay is set to 15 seconds,
+# all the clones reach pending state for sure. Among ten clones, only few succeed and rest fails
+# with ENOSPACE.
+
+# At this stage, ".meta" config file of the failed clones are checked if it's truncated.
+# and clone status command is checked for traceback.
+
+# Note that the failed clones would be in retry loop and it's state would be 'pending' or 'in-progress'.
+# It's state is not updated to 'failed' as the config update to gets ENOSPACE too.
+
+set -e
+ignore_failure() {
+        if "$@"; then return 0; else return 0; fi
+}
+
+expect_failure() {
+        if "$@"; then return 1; else return 0; fi
+}
+
+NUM_CLONES=10
+
+ceph fs subvolume create cephfs sub_0
+subvol_path_0=$(ceph fs subvolume getpath cephfs sub_0 2>/dev/null)
+
+# For debugging
+echo "Before ratios are set"
+df $CEPH_MNT
+ceph osd df
+
+ceph osd set-full-ratio 0.6
+ceph osd set-nearfull-ratio 0.50
+ceph osd set-backfillfull-ratio 0.55
+
+# For debugging
+echo "After ratios are set"
+df -h
+ceph osd df
+
+for i in {1..100};do sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path_0/1MB_file-$i status=progress bs=1M count=1 conv=fdatasync;done
+
+# For debugging
+echo "After subvolumes are written"
+df -h $CEPH_MNT
+ceph osd df
+
+# snapshot
+ceph fs subvolume snapshot create cephfs sub_0 snap_0
+
+# Set clone snapshot delay
+ceph config set mgr mgr/volumes/snapshot_clone_delay 15
+
+# Schedule few clones, some would fail with no space
+for i in $(eval echo {1..$NUM_CLONES});do ceph fs subvolume snapshot clone cephfs sub_0 snap_0 clone_$i;done
+
+# Wait for osd is full
+timeout=90
+while [ $timeout -gt 0 ]
+do
+  health=$(ceph health detail)
+  [[ $health = *"OSD_FULL"* ]] && echo "OSD is full" && break
+  echo "Wating for osd to be full: $timeout"
+  sleep 1
+  let "timeout-=1"
+done
+
+# For debugging
+echo "After osd is full"
+df -h $CEPH_MNT
+ceph osd df
+
+# Check clone status, this should not crash
+for i in $(eval echo {1..$NUM_CLONES})
+do
+  ignore_failure ceph fs clone status cephfs clone_$i >/tmp/out_${PID}_file 2>/tmp/error_${PID}_file
+  cat /tmp/error_${PID}_file
+  if grep "complete" /tmp/out_${PID}_file; then
+    echo "The clone_$i is completed"
+  else
+    #in-progress/pending clones, No traceback should be found in stderr
+    echo clone_$i in PENDING/IN-PROGRESS
+    expect_failure sudo grep "Traceback" /tmp/error_${PID}_file
+    #config file should not be truncated and GLOBAL section should be found
+    sudo grep "GLOBAL" $CEPH_MNT/volumes/_nogroup/clone_$i/.meta
+  fi
+done
+
+# Hard cleanup
+ignore_failure sudo rm -rf $CEPH_MNT/_index/clone/*
+ignore_failure sudo rm -rf $CEPH_MNT/volumes/_nogroup/clone_*
+ignore_failure sudo rmdir $CEPH_MNT/volumes/_nogroup/sub_0/.snap/snap_0
+ignore_failure sudo rm -rf $CEPH_MNT/volumes/_nogroup/sub_0
+
+#Set the ratios back for other full tests to run
+ceph osd set-full-ratio 0.95
+ceph osd set-nearfull-ratio 0.95
+ceph osd set-backfillfull-ratio 0.95
+
+#After test
+echo "After test"
+df -h $CEPH_MNT
+ceph osd df
+
+echo OK
diff --git a/qa/workunits/fs/full/subvolume_rm.sh b/qa/workunits/fs/full/subvolume_rm.sh
new file mode 100755
index 000000000..a464e30f5
--- /dev/null
+++ b/qa/workunits/fs/full/subvolume_rm.sh
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+set -ex
+
+# This testcase tests the scenario of the 'ceph fs subvolume rm' mgr command
+# when the osd is full. The command used to hang. The osd is of the size 1GB.
+# The subvolume is created and 500MB file is written. The full-ratios are
+# set below 500MB such that the osd is treated as full. Now the subvolume is
+# is removed. This should be successful with the introduction of FULL
+# capabilities which the mgr holds.
+
+set -e
+expect_failure() {
+	if "$@"; then return 1; else return 0; fi
+}
+
+ceph fs subvolume create cephfs sub_0
+subvol_path=$(ceph fs subvolume getpath cephfs sub_0 2>/dev/null)
+
+#For debugging
+echo "Before write"
+df -h
+ceph osd df
+
+sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path/500MB_file-1 status=progress bs=1M count=500
+
+ceph osd set-full-ratio 0.2
+ceph osd set-nearfull-ratio 0.16
+ceph osd set-backfillfull-ratio 0.18
+
+timeout=30
+while [ $timeout -gt 0 ]
+do
+  health=$(ceph health detail)
+  [[ $health = *"OSD_FULL"* ]] && echo "OSD is full" && break
+  echo "Wating for osd to be full: $timeout"
+  sleep 1
+  let "timeout-=1"
+done
+
+#For debugging
+echo "After ratio set"
+df -h
+ceph osd df
+
+#Delete subvolume
+ceph fs subvolume rm cephfs sub_0
+
+#Validate subvolume is deleted
+expect_failure ceph fs subvolume info cephfs sub_0
+
+#Wait for subvolume to delete data
+trashdir=$CEPH_MNT/volumes/_deleting
+timeout=30
+while [ $timeout -gt 0 ]
+do
+  [ -z "$(sudo ls -A $trashdir)" ] && echo "Trash directory $trashdir is empty" &&  break
+  echo "Wating for trash dir to be empty: $timeout"
+  sleep 1
+  let "timeout-=1"
+done
+
+#Set the ratios back for other full tests to run
+ceph osd set-full-ratio 0.95
+ceph osd set-nearfull-ratio 0.95
+ceph osd set-backfillfull-ratio 0.95
+
+#After test
+echo "After test"
+df -h
+ceph osd df
+
+echo OK
diff --git a/qa/workunits/fs/full/subvolume_snapshot_rm.sh b/qa/workunits/fs/full/subvolume_snapshot_rm.sh
new file mode 100755
index 000000000..f6d0add9f
--- /dev/null
+++ b/qa/workunits/fs/full/subvolume_snapshot_rm.sh
@@ -0,0 +1,86 @@
+#!/usr/bin/env bash
+set -ex
+
+# This testcase tests the  'ceph fs subvolume snapshot rm' when the osd is full.
+# The snapshot rm fails with 'MetadataMgrException: -28 (error in write)' and
+# truncates the config file of corresponding subvolume. Hence the subsequent
+# snapshot rm of the same snapshot fails with 'MetadataMgrException: -2 (section 'GLOBAL' does not exist)'
+# traceback.
+
+# The osd is of the size 1GB. The subvolume is created and 800MB file is written.
+# Then full-ratios are set below 500MB such that the osd is treated as full.
+# The subvolume snapshot is taken which succeeds as no extra space is required
+# for snapshot. Now, the removal of the snapshot fails with ENOSPACE as it
+# fails to remove the snapshot metadata set. The snapshot removal fails
+# but should not traceback and truncate the config file.
+
+set -e
+expect_failure() {
+	if "$@"; then return 1; else return 0; fi
+}
+
+ignore_failure() {
+	if "$@"; then return 0; else return 0; fi
+}
+
+ceph fs subvolume create cephfs sub_0
+subvol_path=$(ceph fs subvolume getpath cephfs sub_0 2>/dev/null)
+
+#For debugging
+echo "Before write"
+df $CEPH_MNT
+ceph osd df
+
+# Write 800MB file and set full ratio to around 200MB
+ignore_failure sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path/800MB_file-1 status=progress bs=1M count=800 conv=fdatasync
+
+ceph osd set-full-ratio 0.2
+ceph osd set-nearfull-ratio 0.16
+ceph osd set-backfillfull-ratio 0.18
+
+timeout=30
+while [ $timeout -gt 0 ]
+do
+  health=$(ceph health detail)
+  [[ $health = *"OSD_FULL"* ]] && echo "OSD is full" && break
+  echo "Wating for osd to be full: $timeout"
+  sleep 1
+  let "timeout-=1"
+done
+
+#Take snapshot
+ceph fs subvolume snapshot create cephfs sub_0 snap_0
+
+#Remove snapshot fails but should not throw traceback
+expect_failure ceph fs subvolume snapshot rm cephfs sub_0 snap_0 2>/tmp/error_${PID}_file
+cat /tmp/error_${PID}_file
+
+# No traceback should be found
+expect_failure grep "Traceback" /tmp/error_${PID}_file
+
+# Validate config file is not truncated and GLOBAL section exists
+sudo grep "GLOBAL" $CEPH_MNT/volumes/_nogroup/sub_0/.meta
+
+#For debugging
+echo "After write"
+df $CEPH_MNT
+ceph osd df
+
+# Snapshot removal with force option should succeed
+ceph fs subvolume snapshot rm cephfs sub_0 snap_0 --force
+
+#Cleanup from backend
+ignore_failure sudo rm -f /tmp/error_${PID}_file
+ignore_failure sudo rm -rf $CEPH_MNT/volumes/_nogroup/sub_0
+
+#Set the ratios back for other full tests to run
+ceph osd set-full-ratio 0.95
+ceph osd set-nearfull-ratio 0.95
+ceph osd set-backfillfull-ratio 0.95
+
+#After test
+echo "After test"
+df -h $CEPH_MNT
+ceph osd df
+
+echo OK
diff --git a/qa/workunits/fs/maxentries/maxentries.sh b/qa/workunits/fs/maxentries/maxentries.sh
new file mode 100755
index 000000000..d48fd956e
--- /dev/null
+++ b/qa/workunits/fs/maxentries/maxentries.sh
@@ -0,0 +1,155 @@
+#!/usr/bin/env bash
+
+set -ex
+
+function expect_false()
+{
+	set -x
+	if "$@"; then return 1; else return 0; fi
+}
+
+function make_files()
+{
+  set +x
+  temp_dir=`mktemp -d mkfile_test_XXXXXX`
+  for i in $(seq 1 $1)
+  do
+    echo -n | dd of="${temp_dir}/file_$i" conv=fsync || return 1
+    sync "${temp_dir}" || return 1
+  done
+  set -x
+  return 0
+}
+
+function make_dirs()
+{
+  set +x
+  temp_dir=`mktemp -d mkdir_test_XXXXXX`
+  for i in $(seq 1 $1)
+  do
+    mkdir -p ${temp_dir}/dir_${i} || return 1
+    sync "${temp_dir}" || return 1
+  done
+  set -x
+  return 0
+}
+
+function make_nodes()
+{
+  set +x
+  temp_dir=`mktemp -d mknod_test_XXXXXX`
+  for i in $(seq 1 $1)
+  do
+    mknod ${temp_dir}/fifo_${i} p || return 1
+    sync "${temp_dir}" || return 1
+  done
+  set -x
+  return 0
+}
+
+function rename_files()
+{
+  set +x
+  temp_dir=`mktemp -d rename_test_XXXXXX`
+  mkdir -p ${temp_dir}/rename
+
+  for i in $(seq 1 $1)
+  do
+    touch ${temp_dir}/file_${i} || return 1
+
+    mv ${temp_dir}/file_${i} ${temp_dir}/rename/ || return 1
+    sync "${temp_dir}" || return 1
+  done
+  set -x
+  return 0
+}
+
+function make_symlinks()
+{
+  set +x
+  temp_dir=`mktemp -d symlink_test_XXXXXX`
+  mkdir -p ${temp_dir}/symlink 
+
+  touch ${temp_dir}/file
+
+  for i in $(seq 1 $1)
+  do
+    ln -s ../file ${temp_dir}/symlink/sym_${i} || return 1
+    sync "${temp_dir}" || return 1
+  done
+  set -x
+  return 0
+}
+
+function make_links()
+{
+  set +x
+  temp_dir=`mktemp -d link_test_XXXXXX`
+  mkdir -p ${temp_dir}/link 
+
+  touch ${temp_dir}/file
+
+  for i in $(seq 1 $1)
+  do
+    ln ${temp_dir}/file ${temp_dir}/link/link_${i} || return 1
+    sync "${temp_dir}" || return 1
+  done
+  set -x
+  return 0
+}
+
+function cleanup()
+{
+  rm -rf *
+}
+
+test_dir="max_entries"
+mkdir -p $test_dir
+pushd $test_dir
+
+dir_max_entries=100
+ceph config set mds mds_dir_max_entries $dir_max_entries
+
+ok_dir_max_entries=$dir_max_entries
+fail_dir_max_entries=$((dir_max_entries+1))
+
+# make files test
+make_files $ok_dir_max_entries
+expect_false make_files $fail_dir_max_entries
+
+# make dirs test
+make_dirs $ok_dir_max_entries
+expect_false make_dirs $fail_dir_max_entries
+
+# make nodes test
+make_nodes $ok_dir_max_entries
+expect_false make_nodes $fail_dir_max_entries
+
+# rename files test
+rename_files $ok_dir_max_entries
+expect_false rename_files $fail_dir_max_entries
+
+# symlink files test
+make_symlinks $ok_dir_max_entries
+expect_false make_symlinks $fail_dir_max_entries
+
+# link files test
+make_links $ok_dir_max_entries
+expect_false make_links $fail_dir_max_entries
+
+# no limit (e.g., default value)
+dir_max_entries=0
+ceph config set mds mds_dir_max_entries $dir_max_entries
+
+make_files 500
+make_dirs 500
+make_nodes 500
+rename_files 500
+make_symlinks 500
+make_links 500
+
+cleanup
+
+popd # $test_dir
+
+echo OK
diff --git a/qa/workunits/fs/misc/acl.sh b/qa/workunits/fs/misc/acl.sh
new file mode 100755
index 000000000..198b05671
--- /dev/null
+++ b/qa/workunits/fs/misc/acl.sh
@@ -0,0 +1,50 @@
+#!/bin/sh -x
+
+set -e
+mkdir -p testdir
+cd testdir
+
+set +e
+setfacl -d -m u:nobody:rw .
+if test $? != 0; then
+	echo "Filesystem does not support ACL"
+	exit 0
+fi
+
+expect_failure() {
+	if "$@"; then return 1; else return 0; fi
+}
+
+set -e
+c=0
+while [ $c -lt 100  ]
+do
+	c=`expr $c + 1`
+	# inherited ACL from parent directory's default ACL
+	mkdir d1
+	c1=`getfacl d1 | grep -c "nobody:rw"`
+	echo 3 | sudo tee /proc/sys/vm/drop_caches > /dev/null
+	c2=`getfacl d1 | grep -c "nobody:rw"`
+	rmdir d1
+	if [ $c1 -ne 2 ] || [ $c2 -ne 2 ]
+	then
+		echo "ERROR: incorrect ACLs"
+		exit 1
+	fi
+done
+
+mkdir d1
+
+# The ACL xattr only contains ACL header. ACL should be removed
+# in this case.
+setfattr -n system.posix_acl_access -v 0x02000000 d1
+setfattr -n system.posix_acl_default -v 0x02000000 .
+
+expect_failure getfattr -n system.posix_acl_access d1
+expect_failure getfattr -n system.posix_acl_default .
+
+
+rmdir d1
+cd ..
+rmdir testdir
+echo OK
diff --git a/qa/workunits/fs/misc/chmod.sh b/qa/workunits/fs/misc/chmod.sh
new file mode 100755
index 000000000..de66776f1
--- /dev/null
+++ b/qa/workunits/fs/misc/chmod.sh
@@ -0,0 +1,60 @@
+#!/bin/sh -x
+
+set -e
+
+check_perms() {
+
+	file=$1
+	r=$(ls -la ${file})
+	if test $? != 0; then
+		echo "ERROR: File listing/stat failed"
+		exit 1
+	fi
+
+	perms=$2
+	if test "${perms}" != $(echo ${r} | awk '{print $1}') && \
+           test "${perms}." != $(echo ${r} | awk '{print $1}') && \
+           test "${perms}+" != $(echo ${r} | awk '{print $1}'); then
+		echo "ERROR: Permissions should be ${perms}"
+		exit 1
+	fi
+}
+
+file=test_chmod.$$
+
+echo "foo" > ${file}
+if test $? != 0; then
+	echo "ERROR: Failed to create file ${file}"
+	exit 1
+fi
+
+chmod 400 ${file}
+if test $? != 0; then
+	echo "ERROR: Failed to change mode of ${file}"
+	exit 1
+fi
+
+check_perms ${file} "-r--------"
+
+set +e
+echo "bar" >> ${file}
+if test $? = 0; then
+	echo "ERROR: Write to read-only file should Fail"
+	exit 1
+fi
+
+set -e
+chmod 600 ${file}
+echo "bar" >> ${file}
+if test $? != 0; then
+	echo "ERROR: Write to writeable file failed"
+	exit 1
+fi
+
+check_perms ${file} "-rw-------"
+
+echo "foo" >> ${file}
+if test $? != 0; then
+	echo "ERROR: Failed to write to file"
+	exit 1
+fi
diff --git a/qa/workunits/fs/misc/dac_override.sh b/qa/workunits/fs/misc/dac_override.sh
new file mode 100755
index 000000000..dfb1a9091
--- /dev/null
+++ b/qa/workunits/fs/misc/dac_override.sh
@@ -0,0 +1,19 @@
+#!/bin/sh -x
+
+expect_failure() {
+	if "$@"; then return 1; else return 0; fi
+}
+
+set -e
+
+mkdir -p testdir
+file=test_chmod.$$
+
+echo "foo" > testdir/${file}
+sudo chmod 600 testdir
+
+# only root can read
+expect_failure cat testdir/${file}
+
+# directory read/write DAC override for root should allow read
+sudo cat testdir/${file}
diff --git a/qa/workunits/fs/misc/direct_io.py b/qa/workunits/fs/misc/direct_io.py
new file mode 100755
index 000000000..f7d59d95a
--- /dev/null
+++ b/qa/workunits/fs/misc/direct_io.py
@@ -0,0 +1,42 @@
+#!/usr/bin/python3
+
+import mmap
+import os
+import subprocess
+
+def main():
+    path = "testfile"
+    fd = os.open(path, os.O_RDWR | os.O_CREAT | os.O_TRUNC | os.O_DIRECT, 0o644)
+
+    ino = os.fstat(fd).st_ino
+    obj_name = "{ino:x}.00000000".format(ino=ino)
+    pool_name = os.getxattr(path, "ceph.file.layout.pool")
+
+    buf = mmap.mmap(-1, 1)
+    buf.write(b'1')
+    os.write(fd, buf)
+
+    proc = subprocess.Popen(['rados', '-p', pool_name, 'get', obj_name, 'tmpfile'])
+    proc.wait()
+
+    with open('tmpfile', 'rb') as tmpf:
+        out = tmpf.read(1)
+        if out != b'1':
+            raise RuntimeError("data were not written to object store directly")
+
+    with open('tmpfile', 'wb') as tmpf:
+        tmpf.write(b'2')
+
+    proc = subprocess.Popen(['rados', '-p', pool_name, 'put', obj_name, 'tmpfile'])
+    proc.wait()
+
+    os.lseek(fd, 0, os.SEEK_SET)
+    out = os.read(fd, 1)
+    if out != b'2':
+        raise RuntimeError("data were not directly read from object store")
+
+    os.close(fd)
+    print('ok')
+
+
+main()
diff --git a/qa/workunits/fs/misc/dirfrag.sh b/qa/workunits/fs/misc/dirfrag.sh
new file mode 100755
index 000000000..eea0ec3bc
--- /dev/null
+++ b/qa/workunits/fs/misc/dirfrag.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+
+set -e
+
+DEPTH=5
+COUNT=10000
+
+kill_jobs() {
+  jobs -p | xargs kill
+}
+trap kill_jobs INT
+
+create_files() {
+  for i in `seq 1 $COUNT`
+  do
+    touch file$i
+  done
+}
+
+delete_files() {
+  for i in `ls -f`
+  do
+    if [[ ${i}a = file*a ]]
+    then
+      rm -f $i
+    fi
+  done
+}
+
+rm -rf testdir
+mkdir testdir
+cd testdir
+
+echo "creating folder hierarchy"
+for i in `seq 1 $DEPTH`; do
+  mkdir dir$i
+  cd dir$i
+  create_files &
+done
+wait
+
+echo "created hierarchy, now cleaning up"
+
+for i in `seq 1 $DEPTH`; do
+  delete_files &
+  cd ..
+done
+wait
+
+echo "cleaned up hierarchy"
+cd ..
+rm -rf testdir
diff --git a/qa/workunits/fs/misc/filelock_deadlock.py b/qa/workunits/fs/misc/filelock_deadlock.py
new file mode 100755
index 000000000..398902f6c
--- /dev/null
+++ b/qa/workunits/fs/misc/filelock_deadlock.py
@@ -0,0 +1,72 @@
+#!/usr/bin/python3
+
+import errno
+import fcntl
+import os
+import signal
+import struct
+import time
+
+
+def handler(signum, frame):
+    pass
+
+
+def lock_two(f1, f2):
+    lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 10, 0, 0)
+    fcntl.fcntl(f1, fcntl.F_SETLKW, lockdata)
+    time.sleep(10)
+
+    # don't wait forever
+    signal.signal(signal.SIGALRM, handler)
+    signal.alarm(10)
+    exitcode = 0
+    try:
+        fcntl.fcntl(f2, fcntl.F_SETLKW, lockdata)
+    except IOError as e:
+        if e.errno == errno.EDEADLK:
+            exitcode = 1
+        elif e.errno == errno.EINTR:
+            exitcode = 2
+        else:
+            exitcode = 3
+    os._exit(exitcode)
+
+
+def main():
+    pid1 = os.fork()
+    if pid1 == 0:
+        f1 = open("testfile1", 'w')
+        f2 = open("testfile2", 'w')
+        lock_two(f1, f2)
+
+    pid2 = os.fork()
+    if pid2 == 0:
+        f1 = open("testfile2", 'w')
+        f2 = open("testfile3", 'w')
+        lock_two(f1, f2)
+
+    pid3 = os.fork()
+    if pid3 == 0:
+        f1 = open("testfile3", 'w')
+        f2 = open("testfile1", 'w')
+        lock_two(f1, f2)
+
+    deadlk_count = 0
+    i = 0
+    while i < 3:
+        pid, status = os.wait()
+        exitcode = status >> 8
+        if exitcode == 1:
+            deadlk_count += 1
+        elif exitcode != 0:
+            raise RuntimeError("unexpect exit code of child")
+        i += 1
+
+    if deadlk_count != 1:
+        raise RuntimeError("unexpect count of EDEADLK")
+
+    print('ok')
+
+
+main()
diff --git a/qa/workunits/fs/misc/filelock_interrupt.py b/qa/workunits/fs/misc/filelock_interrupt.py
new file mode 100755
index 000000000..b261d74fb
--- /dev/null
+++ b/qa/workunits/fs/misc/filelock_interrupt.py
@@ -0,0 +1,94 @@
+#!/usr/bin/python3
+
+from contextlib import contextmanager
+import errno
+import fcntl
+import signal
+import struct
+
+@contextmanager
+def timeout(seconds):
+    def timeout_handler(signum, frame):
+        raise InterruptedError
+
+    orig_handler = signal.signal(signal.SIGALRM, timeout_handler)
+    try:
+        signal.alarm(seconds)
+        yield
+    finally:
+        signal.alarm(0)
+        signal.signal(signal.SIGALRM, orig_handler)
+
+
+"""
+introduced by Linux 3.15
+"""
+setattr(fcntl, "F_OFD_GETLK", 36)
+setattr(fcntl, "F_OFD_SETLK", 37)
+setattr(fcntl, "F_OFD_SETLKW", 38)
+
+
+def main():
+    f1 = open("testfile", 'w')
+    f2 = open("testfile", 'w')
+
+    fcntl.flock(f1, fcntl.LOCK_SH | fcntl.LOCK_NB)
+
+    """
+    is flock interruptible?
+    """
+    with timeout(5):
+        try:
+            fcntl.flock(f2, fcntl.LOCK_EX)
+        except InterruptedError:
+            pass
+        else:
+            raise RuntimeError("expect flock to block")
+
+    fcntl.flock(f1, fcntl.LOCK_UN)
+
+    lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 10, 0, 0)
+    try:
+        fcntl.fcntl(f1, fcntl.F_OFD_SETLK, lockdata)
+    except IOError as e:
+        if e.errno != errno.EINVAL:
+            raise
+        else:
+            print('kernel does not support fcntl.F_OFD_SETLK')
+            return
+
+    lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 10, 10, 0, 0)
+    fcntl.fcntl(f2, fcntl.F_OFD_SETLK, lockdata)
+
+    """
+    is posix lock interruptible?
+    """
+    with timeout(5):
+        try:
+            lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0)
+            fcntl.fcntl(f2, fcntl.F_OFD_SETLKW, lockdata)
+        except InterruptedError:
+            pass
+        else:
+            raise RuntimeError("expect posix lock to block")
+
+    """
+    file handler 2 should still hold lock on 10~10
+    """
+    try:
+        lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 10, 10, 0, 0)
+        fcntl.fcntl(f1, fcntl.F_OFD_SETLK, lockdata)
+    except IOError as e:
+        if e.errno == errno.EAGAIN:
+            pass
+    else:
+        raise RuntimeError("expect file handler 2 to hold lock on 10~10")
+
+    lockdata = struct.pack('hhllhh', fcntl.F_UNLCK, 0, 0, 0, 0, 0)
+    fcntl.fcntl(f1, fcntl.F_OFD_SETLK, lockdata)
+    fcntl.fcntl(f2, fcntl.F_OFD_SETLK, lockdata)
+
+    print('ok')
+
+
+main()
diff --git a/qa/workunits/fs/misc/i_complete_vs_rename.sh b/qa/workunits/fs/misc/i_complete_vs_rename.sh
new file mode 100755
index 000000000..a9b98271d
--- /dev/null
+++ b/qa/workunits/fs/misc/i_complete_vs_rename.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+set -e
+
+mkdir x
+cd x
+touch a
+touch b
+touch c
+touch d
+ls
+chmod 777 .
+stat e || true
+touch f
+touch g
+
+# over existing file
+echo attempting rename over existing file...
+touch ../xx
+mv ../xx f
+ls | grep f || false
+echo rename over existing file is okay
+
+# over negative dentry
+echo attempting rename over negative dentry...
+touch ../xx
+mv ../xx e
+ls | grep e || false
+echo rename over negative dentry is ok
+
+echo OK
diff --git a/qa/workunits/fs/misc/layout_vxattrs.sh b/qa/workunits/fs/misc/layout_vxattrs.sh
new file mode 100755
index 000000000..811336273
--- /dev/null
+++ b/qa/workunits/fs/misc/layout_vxattrs.sh
@@ -0,0 +1,115 @@
+#!/usr/bin/env bash
+
+set -ex
+
+# detect data pool
+datapool=
+dir=.
+while true ; do
+    echo $dir
+    datapool=$(getfattr -n ceph.dir.layout.pool $dir --only-values) && break
+    dir=$dir/..
+done
+
+# file
+rm -f file file2
+touch file file2
+
+getfattr -n ceph.file.layout file
+getfattr -n ceph.file.layout file | grep -q object_size=
+getfattr -n ceph.file.layout file | grep -q stripe_count=
+getfattr -n ceph.file.layout file | grep -q stripe_unit=
+getfattr -n ceph.file.layout file | grep -q pool=
+getfattr -n ceph.file.layout.pool file
+getfattr -n ceph.file.layout.pool_namespace file
+getfattr -n ceph.file.layout.stripe_unit file
+getfattr -n ceph.file.layout.stripe_count file
+getfattr -n ceph.file.layout.object_size file
+
+getfattr -n ceph.file.layout.bogus file   2>&1 | grep -q 'No such attribute'
+getfattr -n ceph.dir.layout file    2>&1 | grep -q 'No such attribute'
+
+setfattr -n ceph.file.layout.stripe_unit -v 1048576 file2
+setfattr -n ceph.file.layout.stripe_count -v 8 file2
+setfattr -n ceph.file.layout.object_size -v 10485760 file2
+
+setfattr -n ceph.file.layout.pool -v $datapool file2
+getfattr -n ceph.file.layout.pool file2 | grep -q $datapool
+setfattr -n ceph.file.layout.pool_namespace -v foons file2
+getfattr -n ceph.file.layout.pool_namespace file2 | grep -q foons
+setfattr -x ceph.file.layout.pool_namespace file2
+getfattr -n ceph.file.layout.pool_namespace file2 | grep -q -v foons
+
+getfattr -n ceph.file.layout.stripe_unit file2 | grep -q 1048576
+getfattr -n ceph.file.layout.stripe_count file2 | grep -q 8
+getfattr -n ceph.file.layout.object_size file2 | grep -q 10485760
+
+setfattr -n ceph.file.layout -v "stripe_unit=4194304 stripe_count=16 object_size=41943040 pool=$datapool pool_namespace=foons" file2
+getfattr -n ceph.file.layout.stripe_unit file2 | grep -q 4194304
+getfattr -n ceph.file.layout.stripe_count file2 | grep -q 16
+getfattr -n ceph.file.layout.object_size file2 | grep -q 41943040
+getfattr -n ceph.file.layout.pool file2 | grep -q $datapool
+getfattr -n ceph.file.layout.pool_namespace file2 | grep -q foons
+
+setfattr -n ceph.file.layout -v "stripe_unit=1048576" file2
+getfattr -n ceph.file.layout.stripe_unit file2 | grep -q 1048576
+getfattr -n ceph.file.layout.stripe_count file2 | grep -q 16
+getfattr -n ceph.file.layout.object_size file2 | grep -q 41943040
+getfattr -n ceph.file.layout.pool file2 | grep -q $datapool
+getfattr -n ceph.file.layout.pool_namespace file2 | grep -q foons
+
+setfattr -n ceph.file.layout -v "stripe_unit=2097152 stripe_count=4 object_size=2097152 pool=$datapool pool_namespace=barns" file2
+getfattr -n ceph.file.layout.stripe_unit file2 | grep -q 2097152
+getfattr -n ceph.file.layout.stripe_count file2 | grep -q 4
+getfattr -n ceph.file.layout.object_size file2 | grep -q 2097152
+getfattr -n ceph.file.layout.pool file2 | grep -q $datapool
+getfattr -n ceph.file.layout.pool_namespace file2 | grep -q barns
+
+# dir
+rm -f dir/file || true
+rmdir dir || true
+mkdir -p dir
+
+getfattr -d -m - dir | grep -q ceph.dir.layout       && exit 1 || true
+getfattr -d -m - dir | grep -q ceph.file.layout      && exit 1 || true
+getfattr -n ceph.dir.layout dir                      && exit 1 || true
+
+setfattr -n ceph.dir.layout.stripe_unit -v 1048576 dir
+setfattr -n ceph.dir.layout.stripe_count -v 8 dir
+setfattr -n ceph.dir.layout.object_size -v 10485760 dir
+setfattr -n ceph.dir.layout.pool -v $datapool dir
+setfattr -n ceph.dir.layout.pool_namespace -v dirns dir
+
+getfattr -n ceph.dir.layout dir
+getfattr -n ceph.dir.layout dir | grep -q object_size=10485760
+getfattr -n ceph.dir.layout dir | grep -q stripe_count=8
+getfattr -n ceph.dir.layout dir | grep -q stripe_unit=1048576
+getfattr -n ceph.dir.layout dir | grep -q pool=$datapool
+getfattr -n ceph.dir.layout dir | grep -q pool_namespace=dirns
+getfattr -n ceph.dir.layout.pool dir | grep -q $datapool
+getfattr -n ceph.dir.layout.stripe_unit dir | grep -q 1048576
+getfattr -n ceph.dir.layout.stripe_count dir | grep -q 8
+getfattr -n ceph.dir.layout.object_size dir | grep -q 10485760
+getfattr -n ceph.dir.layout.pool_namespace dir | grep -q dirns
+
+
+setfattr -n ceph.file.layout -v "stripe_count=16" file2
+getfattr -n ceph.file.layout.stripe_count file2 | grep -q 16
+setfattr -n ceph.file.layout -v "object_size=10485760 stripe_count=8 stripe_unit=1048576 pool=$datapool pool_namespace=dirns" file2
+getfattr -n ceph.file.layout.stripe_count file2 | grep -q 8
+
+touch dir/file
+getfattr -n ceph.file.layout.pool dir/file | grep -q $datapool
+getfattr -n ceph.file.layout.stripe_unit dir/file | grep -q 1048576
+getfattr -n ceph.file.layout.stripe_count dir/file | grep -q 8
+getfattr -n ceph.file.layout.object_size dir/file | grep -q 10485760
+getfattr -n ceph.file.layout.pool_namespace dir/file | grep -q dirns
+
+setfattr -x ceph.dir.layout.pool_namespace dir
+getfattr -n ceph.dir.layout dir | grep -q -v pool_namespace=dirns
+
+setfattr -x ceph.dir.layout dir
+getfattr -n ceph.dir.layout dir     2>&1 | grep -q 'No such attribute'
+
+echo OK
+
diff --git a/qa/workunits/fs/misc/mkpool_layout_vxattrs.sh b/qa/workunits/fs/misc/mkpool_layout_vxattrs.sh
new file mode 100755
index 000000000..6b2fecbc0
--- /dev/null
+++ b/qa/workunits/fs/misc/mkpool_layout_vxattrs.sh
@@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+
+set -e
+
+touch foo.$$
+ceph osd pool create foo.$$ 8
+ceph fs add_data_pool cephfs foo.$$
+setfattr -n ceph.file.layout.pool -v foo.$$ foo.$$
+
+# cleanup
+rm foo.$$
+ceph fs rm_data_pool cephfs foo.$$
+ceph osd pool rm foo.$$ foo.$$ --yes-i-really-really-mean-it
+
+echo OK
diff --git a/qa/workunits/fs/misc/multiple_rsync.sh b/qa/workunits/fs/misc/multiple_rsync.sh
new file mode 100755
index 000000000..4397c1e7f
--- /dev/null
+++ b/qa/workunits/fs/misc/multiple_rsync.sh
@@ -0,0 +1,25 @@
+#!/bin/sh -ex
+
+
+# Populate with some arbitrary files from the local system.  Take
+# a copy to protect against false fails from system updates during test.
+export PAYLOAD=/tmp/multiple_rsync_payload.$$
+sudo cp -r /usr/lib/ $PAYLOAD
+
+set -e
+
+sudo rsync -av $PAYLOAD payload.1
+sudo rsync -av $PAYLOAD payload.2
+
+# this shouldn't transfer any additional files
+echo we should get 4 here if no additional files are transferred
+sudo rsync -auv $PAYLOAD payload.1 | tee /tmp/$$
+hexdump -C /tmp/$$
+wc -l /tmp/$$ | grep 4
+sudo rsync -auv $PAYLOAD payload.2 | tee /tmp/$$
+hexdump -C /tmp/$$
+wc -l /tmp/$$ | grep 4
+echo OK
+
+rm /tmp/$$
+sudo rm -rf $PAYLOAD
diff --git a/qa/workunits/fs/misc/rstats.sh b/qa/workunits/fs/misc/rstats.sh
new file mode 100755
index 000000000..e6b3eddf2
--- /dev/null
+++ b/qa/workunits/fs/misc/rstats.sh
@@ -0,0 +1,80 @@
+#!/usr/bin/env bash
+
+set -x
+
+timeout=30
+old_value=""
+new_value=""
+
+wait_until_changed() {
+	name=$1
+	wait=0
+	while [ $wait -lt $timeout ]; do
+		new_value=`getfattr --only-value -n ceph.dir.$name .`
+		[ $new_value == $old_value ] || return 0
+		sleep 1
+		wait=$(($wait + 1))
+	done
+	return 1
+}
+
+check_rctime() {
+	old_sec=$(echo $old_value | cut -d. -f1)
+	old_nsec=$(echo $old_value | cut -d. -f2)
+	new_sec=$(echo $new_value | cut -d. -f1)
+	new_nsec=$(echo $new_value | cut -d. -f2)
+	[ "$old_sec" -lt "$new_sec" ] && return 0
+	[ "$old_sec" -gt "$new_sec" ] && return 1
+	[ "$old_nsec" -lt "$new_nsec" ] && return 0
+	return 1
+}
+
+# sync(3) does not make ceph-fuse flush dirty caps, because fuse kernel module
+# does not notify ceph-fuse about it. Use fsync(3) instead.
+fsync_path() {
+	cmd="import os; fd=os.open(\"$1\", os.O_RDONLY); os.fsync(fd); os.close(fd)"
+	python3 -c "$cmd"
+}
+
+set -e
+
+mkdir -p rstats_testdir/d1/d2
+cd rstats_testdir
+
+# rfiles
+old_value=`getfattr --only-value -n ceph.dir.rfiles .`
+[ $old_value == 0 ] || false
+touch d1/d2/f1
+wait_until_changed rfiles
+[ $new_value == $(($old_value + 1)) ] || false
+
+# rsubdirs
+old_value=`getfattr --only-value -n ceph.dir.rsubdirs .`
+[ $old_value == 3 ] || false
+mkdir d1/d2/d3
+wait_until_changed rsubdirs
+[ $new_value == $(($old_value + 1)) ] || false
+
+# rbytes
+old_value=`getfattr --only-value -n ceph.dir.rbytes .`
+[ $old_value == 0 ] || false
+echo hello > d1/d2/f2
+fsync_path d1/d2/f2
+wait_until_changed rbytes
+[ $new_value == $(($old_value + 6)) ] || false
+
+#rctime
+old_value=`getfattr --only-value -n ceph.dir.rctime .`
+touch d1/d2/d3 # touch existing file
+fsync_path d1/d2/d3
+wait_until_changed rctime
+check_rctime
+
+old_value=`getfattr --only-value -n ceph.dir.rctime .`
+touch d1/d2/f3 # create new file
+wait_until_changed rctime
+check_rctime
+
+cd ..
+rm -rf rstats_testdir
+echo OK
diff --git a/qa/workunits/fs/misc/trivial_sync.sh b/qa/workunits/fs/misc/trivial_sync.sh
new file mode 100755
index 000000000..7c8c4e2b4
--- /dev/null
+++ b/qa/workunits/fs/misc/trivial_sync.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+set -e
+
+mkdir foo
+echo foo > bar
+sync
diff --git a/qa/workunits/fs/misc/xattrs.sh b/qa/workunits/fs/misc/xattrs.sh
new file mode 100755
index 000000000..fcd94d22c
--- /dev/null
+++ b/qa/workunits/fs/misc/xattrs.sh
@@ -0,0 +1,14 @@
+#!/bin/sh -x
+
+set -e
+
+touch file
+
+setfattr -n user.foo -v foo file
+setfattr -n user.bar -v bar file
+setfattr -n user.empty file
+getfattr -d file | grep foo
+getfattr -d file | grep bar
+getfattr -d file | grep empty
+
+echo OK.
diff --git a/qa/workunits/fs/multiclient_sync_read_eof.py b/qa/workunits/fs/multiclient_sync_read_eof.py
new file mode 100755
index 000000000..15ecbb825
--- /dev/null
+++ b/qa/workunits/fs/multiclient_sync_read_eof.py
@@ -0,0 +1,42 @@
+#!/usr/bin/python3
+
+import argparse
+import os
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('mnt1')
+    parser.add_argument('mnt2')
+    parser.add_argument('fn')
+    args = parser.parse_args()
+
+    open(os.path.join(args.mnt1, args.fn), 'w')
+    f1 = open(os.path.join(args.mnt1, args.fn), 'r+')
+    f2 = open(os.path.join(args.mnt2, args.fn), 'r+')
+
+    f1.write('foo')
+    f1.flush()
+    a = f2.read(3)
+    print('got "%s"' % a)
+    assert a == 'foo'
+    f2.write('bar')
+    f2.flush()
+    a = f1.read(3)
+    print('got "%s"' % a)
+    assert a == 'bar'
+
+    ## test short reads
+    f1.write('short')
+    f1.flush()
+    a = f2.read(100)
+    print('got "%s"' % a)
+    assert a == 'short'
+    f2.write('longer')
+    f2.flush()
+    a = f1.read(1000)
+    print('got "%s"' % a)
+    assert a == 'longer'
+
+    print('ok')
+
+main()
diff --git a/qa/workunits/fs/norstats/kernel_untar_tar.sh b/qa/workunits/fs/norstats/kernel_untar_tar.sh
new file mode 100755
index 000000000..6a175dcd9
--- /dev/null
+++ b/qa/workunits/fs/norstats/kernel_untar_tar.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+# check if there is file changed while being archived
+
+set -e
+
+KERNEL=linux-4.0.5
+
+wget -q http://download.ceph.com/qa/$KERNEL.tar.xz
+
+mkdir untar_tar
+cd untar_tar
+
+tar Jxvf ../$KERNEL.tar.xz $KERNEL/Documentation/
+tar cf doc.tar $KERNEL
+
+tar xf doc.tar
+sync
+tar c $KERNEL >/dev/null
+
+rm -rf $KERNEL
+
+tar xf doc.tar
+sync
+tar c $KERNEL >/dev/null
+
+echo Ok
diff --git a/qa/workunits/fs/quota/quota.sh b/qa/workunits/fs/quota/quota.sh
new file mode 100755
index 000000000..1315be6d8
--- /dev/null
+++ b/qa/workunits/fs/quota/quota.sh
@@ -0,0 +1,128 @@
+#!/usr/bin/env bash
+
+set -ex
+
+function expect_false()
+{
+	set -x
+	if "$@"; then return 1; else return 0; fi
+}
+
+function write_file()
+{
+	set +x
+	for ((i=1;i<=$2;i++))
+	do
+		dd if=/dev/zero of=$1 bs=1M count=1 conv=notrunc oflag=append 2>/dev/null >/dev/null
+		if [ $? != 0 ]; then
+			echo Try to write $(($i * 1048576))
+			set -x
+			return 1
+		fi
+		sleep 0.05
+	done
+	set -x
+	return 0
+}
+
+mkdir quota-test
+cd quota-test
+
+# bytes
+setfattr . -n ceph.quota.max_bytes -v 100000000  # 100m
+expect_false write_file big 1000     # 1g
+expect_false write_file second 10
+setfattr . -n ceph.quota.max_bytes -v 0
+dd if=/dev/zero of=third bs=1M count=10
+dd if=/dev/zero of=big2 bs=1M count=100
+
+
+rm -rf *
+
+# files
+setfattr . -n ceph.quota.max_files -v 5
+mkdir ok
+touch ok/1
+touch ok/2
+touch 3
+expect_false touch shouldbefail     #  5 files will include the "."
+expect_false touch ok/shouldbefail  #  5 files will include the "."
+setfattr . -n ceph.quota.max_files -v 0
+touch shouldbecreated
+touch shouldbecreated2
+
+
+rm -rf *
+
+# mix
+mkdir bytes bytes/files
+
+setfattr bytes -n ceph.quota.max_bytes -v 10000000   #10m
+setfattr bytes/files -n ceph.quota.max_files -v 5
+dd if=/dev/zero of=bytes/files/1 bs=1M count=4
+dd if=/dev/zero of=bytes/files/2 bs=1M count=4
+expect_false write_file bytes/files/3 1000
+expect_false write_file bytes/files/4 1000
+expect_false write_file bytes/files/5 1000
+stat --printf="%n %s\n" bytes/files/1 #4M
+stat --printf="%n %s\n" bytes/files/2 #4M
+stat --printf="%n %s\n" bytes/files/3 #bigger than 2M
+stat --printf="%n %s\n" bytes/files/4 #should be zero
+expect_false stat bytes/files/5       #shouldn't be exist
+
+
+
+
+rm -rf *
+
+#mv
+mkdir files limit
+truncate files/file -s 10G
+setfattr limit -n ceph.quota.max_bytes -v 1000000 #1m
+expect_false mv files limit/
+
+
+
+rm -rf *
+
+#limit by ancestor
+
+mkdir -p ancestor/p1/p2/parent/p3
+setfattr ancestor -n ceph.quota.max_bytes -v 1000000
+setfattr ancestor/p1/p2/parent -n ceph.quota.max_bytes -v 1000000000 #1g
+expect_false write_file ancestor/p1/p2/parent/p3/file1 900 #900m
+stat --printf="%n %s\n" ancestor/p1/p2/parent/p3/file1
+
+
+#get/set attribute
+
+setfattr -n ceph.quota.max_bytes -v 0 .
+setfattr -n ceph.quota.max_bytes -v 1 .
+setfattr -n ceph.quota.max_bytes -v 9223372036854775807 .
+expect_false setfattr -n ceph.quota.max_bytes -v 9223372036854775808 .
+expect_false setfattr -n ceph.quota.max_bytes -v -1 .
+expect_false setfattr -n ceph.quota.max_bytes -v -9223372036854775808 .
+expect_false setfattr -n ceph.quota.max_bytes -v -9223372036854775809 .
+
+setfattr -n ceph.quota.max_files -v 0 .
+setfattr -n ceph.quota.max_files -v 1 .
+setfattr -n ceph.quota.max_files -v 9223372036854775807 .
+expect_false setfattr -n ceph.quota.max_files -v 9223372036854775808 .
+expect_false setfattr -n ceph.quota.max_files -v -1 .
+expect_false setfattr -n ceph.quota.max_files -v -9223372036854775808 .
+expect_false setfattr -n ceph.quota.max_files -v -9223372036854775809 .
+
+setfattr -n ceph.quota -v "max_bytes=0 max_files=0" .
+setfattr -n ceph.quota -v "max_bytes=1 max_files=0" .
+setfattr -n ceph.quota -v "max_bytes=0 max_files=1" .
+setfattr -n ceph.quota -v "max_bytes=1 max_files=1" .
+expect_false setfattr -n ceph.quota -v "max_bytes=-1 max_files=0" .
+expect_false setfattr -n ceph.quota -v "max_bytes=0 max_files=-1" .
+expect_false setfattr -n ceph.quota -v "max_bytes=-1 max_files=-1" .
+
+#addme
+
+cd ..
+rm -rf quota-test
+
+echo OK
diff --git a/qa/workunits/fs/snap-hierarchy.sh b/qa/workunits/fs/snap-hierarchy.sh
new file mode 100755
index 000000000..67f0e014b
--- /dev/null
+++ b/qa/workunits/fs/snap-hierarchy.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+set -ex
+
+if [ -d "$1" ]; then
+  mkdir -p -- "$1" && cd "$1"
+fi
+
+[ "$VERIFY" != verify ] && mkdir 1
+[ "$VERIFY" != verify ] && mkdir 1/.snap/first
+stat 1/.snap/first
+[ "$VERIFY" != verify ] && mkdir 1/2
+stat 1/.snap/first/2 && exit 1
+[ "$VERIFY" != verify ] && mkdir 1/2/.snap/second
+stat 1/2/.snap/second
+[ "$VERIFY" != verify ] && touch 1/foo
+stat 1/.snap/first/foo && exit 1
+[ "$VERIFY" != verify ] && mkdir 1/.snap/third
+stat 1/.snap/third/foo || exit 1
+[ "$VERIFY" != verify ] && mkdir 1/2/3
+[ "$VERIFY" != verify ] && mkdir 1/2/.snap/fourth
+stat 1/2/.snap/fourth/3
+
+exit 0
diff --git a/qa/workunits/fs/snaps/snap-rm-diff.sh b/qa/workunits/fs/snaps/snap-rm-diff.sh
new file mode 100755
index 000000000..30ffa9113
--- /dev/null
+++ b/qa/workunits/fs/snaps/snap-rm-diff.sh
@@ -0,0 +1,10 @@
+#!/bin/sh -ex
+
+wget -q http://download.ceph.com/qa/linux-2.6.33.tar.bz2
+mkdir foo
+cp linux* foo
+mkdir foo/.snap/barsnap
+rm foo/linux*
+diff -q foo/.snap/barsnap/linux* linux* && echo "passed: files are identical"
+rmdir foo/.snap/barsnap
+echo OK
diff --git a/qa/workunits/fs/snaps/snaptest-1.sh b/qa/workunits/fs/snaps/snaptest-1.sh
new file mode 100755
index 000000000..431e83387
--- /dev/null
+++ b/qa/workunits/fs/snaps/snaptest-1.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+
+set -ex
+
+echo 1 > file1
+echo 2 > file2
+echo 3 > file3
+[ -e file4 ] && rm file4
+mkdir .snap/snap1
+echo 4 > file4
+now=`ls`
+then=`ls .snap/snap1`
+rmdir .snap/snap1
+if [ "$now" = "$then" ]; then
+    echo live and snap contents are identical?
+    false
+fi
+
+# do it again
+echo 1 > file1
+echo 2 > file2
+echo 3 > file3
+mkdir .snap/snap1
+echo 4 > file4
+rmdir .snap/snap1
+
+rm file?
+
+echo OK
diff --git a/qa/workunits/fs/snaps/snaptest-2.sh b/qa/workunits/fs/snaps/snaptest-2.sh
new file mode 100755
index 000000000..11fe9316a
--- /dev/null
+++ b/qa/workunits/fs/snaps/snaptest-2.sh
@@ -0,0 +1,59 @@
+#!/usr/bin/env bash
+
+echo "Create dir 100 to 199 ..."
+for i in $(seq 100 199); do
+	echo "    create dir $i"
+	mkdir "$i"
+	for y in $(seq 10 20); do
+		echo "This is a test file before any snapshot was taken." >"$i/$y"
+	done
+done
+
+echo "Take first snapshot .snap/test1"
+mkdir .snap/test1
+
+echo "Create dir 200 to 299 ..."
+for i in $(seq 200 299); do
+	echo "    create dir $i"
+        mkdir $i
+        for y in $(seq 20 29); do
+                echo "This is a test file. Created after .snap/test1" >"$i/$y"
+        done
+done
+
+echo "Create a snapshot in every first level dir ..."
+for dir in $(ls); do
+	echo "    create $dir/.snap/snap-subdir-test"
+	mkdir "$dir/.snap/snap-subdir-test"
+	for y in $(seq 30 39); do
+		echo "        create $dir/$y file after the snapshot"
+                echo "This is a test file. Created after $dir/.snap/snap-subdir-test" >"$dir/$y"
+        done
+done
+
+echo "Take second snapshot .snap/test2"
+mkdir .snap/test2
+
+echo "Copy content of .snap/test1 to copyofsnap1 ..."
+mkdir copyofsnap1
+cp -Rv .snap/test1 copyofsnap1/
+
+
+echo "Take third snapshot .snap/test3"
+mkdir .snap/test3
+
+echo "Delete the snapshots..."
+
+find ./ -type d -print | \
+        xargs -I% -n1 find %/.snap -mindepth 1 -maxdepth 1 \
+                         \( ! -name "_*" \) -print 2>/dev/null
+
+find ./ -type d -print | \
+	xargs -I% -n1 find %/.snap -mindepth 1 -maxdepth 1 \
+                         \( ! -name "_*" \) -print 2>/dev/null | \
+	xargs -n1 rmdir
+
+echo "Delete all the files and directories ..."
+rm -Rfv ./*
+
+echo OK
diff --git a/qa/workunits/fs/snaps/snaptest-authwb.sh b/qa/workunits/fs/snaps/snaptest-authwb.sh
new file mode 100755
index 000000000..965ee8512
--- /dev/null
+++ b/qa/workunits/fs/snaps/snaptest-authwb.sh
@@ -0,0 +1,12 @@
+#!/bin/sh -x
+
+set -e
+
+touch foo
+chmod +x foo
+mkdir .snap/s
+find .snap/s/foo -executable | grep foo
+rmdir .snap/s
+rm foo
+
+echo OK
diff --git a/qa/workunits/fs/snaps/snaptest-capwb.sh b/qa/workunits/fs/snaps/snaptest-capwb.sh
new file mode 100755
index 000000000..d26f324b6
--- /dev/null
+++ b/qa/workunits/fs/snaps/snaptest-capwb.sh
@@ -0,0 +1,33 @@
+#!/bin/sh -x
+
+set -e
+
+mkdir foo
+
+# make sure mds handles it when the client does not send flushsnap
+echo x > foo/x
+sync
+mkdir foo/.snap/ss
+ln foo/x foo/xx
+cat foo/.snap/ss/x
+rmdir foo/.snap/ss
+
+#
+echo a > foo/a
+echo b > foo/b
+mkdir foo/.snap/s
+r=`cat foo/.snap/s/a`
+[ -z "$r" ] && echo "a appears empty in snapshot" && false
+
+ln foo/b foo/b2
+cat foo/.snap/s/b
+
+echo "this used to hang:"
+echo more >> foo/b2
+echo "oh, it didn't hang! good job."
+cat foo/b
+rmdir foo/.snap/s
+
+rm -r foo
+
+echo OK
diff --git a/qa/workunits/fs/snaps/snaptest-dir-rename.sh b/qa/workunits/fs/snaps/snaptest-dir-rename.sh
new file mode 100755
index 000000000..3bbd9a11e
--- /dev/null
+++ b/qa/workunits/fs/snaps/snaptest-dir-rename.sh
@@ -0,0 +1,17 @@
+#!/bin/sh -x
+
+set -e
+
+#
+# make sure we keep an existing dn's seq
+#
+
+mkdir a
+mkdir .snap/bar
+mkdir a/.snap/foo
+rmdir a/.snap/foo
+rmdir a
+stat .snap/bar/a
+rmdir .snap/bar
+
+echo OK
diff --git a/qa/workunits/fs/snaps/snaptest-double-null.sh b/qa/workunits/fs/snaps/snaptest-double-null.sh
new file mode 100755
index 000000000..cdf32e4f0
--- /dev/null
+++ b/qa/workunits/fs/snaps/snaptest-double-null.sh
@@ -0,0 +1,23 @@
+#!/bin/sh -x
+
+set -e
+
+# multiple intervening snapshots with no modifications, and thus no
+# snapflush client_caps messages.  make sure the mds can handle this.
+
+for f in `seq 1 20` ; do
+
+mkdir a
+cat > a/foo &
+mkdir a/.snap/one
+mkdir a/.snap/two
+chmod 777 a/foo
+sync   # this might crash the mds
+ps
+rmdir a/.snap/*
+rm a/foo
+rmdir a
+
+done
+
+echo OK
diff --git a/qa/workunits/fs/snaps/snaptest-estale.sh b/qa/workunits/fs/snaps/snaptest-estale.sh
new file mode 100755
index 000000000..a4fb94368
--- /dev/null
+++ b/qa/workunits/fs/snaps/snaptest-estale.sh
@@ -0,0 +1,13 @@
+#!/bin/sh -x
+
+mkdir .snap/foo
+
+echo "We want ENOENT, not ESTALE, here."
+for f in `seq 1 100`
+do
+    stat .snap/foo/$f 2>&1 | grep 'No such file'
+done
+
+rmdir .snap/foo
+
+echo "OK"
diff --git a/qa/workunits/fs/snaps/snaptest-git-ceph.sh b/qa/workunits/fs/snaps/snaptest-git-ceph.sh
new file mode 100755
index 000000000..12c1f0fdc
--- /dev/null
+++ b/qa/workunits/fs/snaps/snaptest-git-ceph.sh
@@ -0,0 +1,52 @@
+#!/bin/sh -x
+
+set -e
+
+# try it again if the clone is slow and the second time
+retried=false
+trap -- 'retry' EXIT
+retry() {
+    rm -rf ceph
+    # double the timeout value
+    timeout 3600 git clone https://git.ceph.com/ceph.git
+}
+rm -rf ceph
+timeout 1800 git clone https://git.ceph.com/ceph.git
+trap - EXIT
+cd ceph
+
+versions=`seq 1 90`
+
+for v in $versions
+do
+    if [ $v -eq 48 ]; then
+        continue
+    fi
+    ver="v0.$v"
+    echo $ver
+    git reset --hard $ver
+    mkdir .snap/$ver
+done
+
+for v in $versions
+do
+    if [ $v -eq 48 ]; then
+        continue
+    fi
+    ver="v0.$v"
+    echo checking $ver
+    cd .snap/$ver
+    git diff --exit-code
+    cd ../..
+done
+
+for v in $versions
+do
+    if [ $v -eq 48 ]; then
+        continue
+    fi
+    ver="v0.$v"
+    rmdir .snap/$ver
+done
+
+echo OK
diff --git a/qa/workunits/fs/snaps/snaptest-hardlink.sh b/qa/workunits/fs/snaps/snaptest-hardlink.sh
new file mode 100755
index 000000000..90f3583b1
--- /dev/null
+++ b/qa/workunits/fs/snaps/snaptest-hardlink.sh
@@ -0,0 +1,25 @@
+#!/bin/sh -x
+
+set -e
+
+mkdir 1 2
+echo asdf >1/file1
+echo asdf >1/file2
+
+ln 1/file1 2/file1
+ln 1/file2 2/file2
+
+mkdir 2/.snap/s1
+
+echo qwer >1/file1
+grep asdf 2/.snap/s1/file1
+
+rm -f 1/file2
+grep asdf 2/.snap/s1/file2
+rm -f 2/file2
+grep asdf 2/.snap/s1/file2
+
+rmdir 2/.snap/s1
+rm -rf 1 2
+
+echo OK
diff --git a/qa/workunits/fs/snaps/snaptest-intodir.sh b/qa/workunits/fs/snaps/snaptest-intodir.sh
new file mode 100755
index 000000000..d6a220f73
--- /dev/null
+++ b/qa/workunits/fs/snaps/snaptest-intodir.sh
@@ -0,0 +1,22 @@
+#!/bin/sh -ex
+
+# this tests fix for #1399
+mkdir foo
+mkdir foo/.snap/one
+touch bar
+mv bar foo
+sync
+# should not crash :)
+
+mkdir baz
+mkdir baz/.snap/two
+mv baz foo
+sync
+# should not crash :)
+
+# clean up.
+rmdir foo/baz/.snap/two
+rmdir foo/.snap/one
+rm -r foo
+
+echo OK
diff --git a/qa/workunits/fs/snaps/snaptest-multiple-capsnaps.sh b/qa/workunits/fs/snaps/snaptest-multiple-capsnaps.sh
new file mode 100755
index 000000000..5ebc852cf
--- /dev/null
+++ b/qa/workunits/fs/snaps/snaptest-multiple-capsnaps.sh
@@ -0,0 +1,42 @@
+#!/bin/sh -x
+
+set -e
+
+echo asdf > a
+mkdir .snap/1
+chmod 777 a
+mkdir .snap/2
+echo qwer > a
+mkdir .snap/3
+chmod 666 a
+mkdir .snap/4
+echo zxcv > a
+mkdir .snap/5
+
+ls -al .snap/?/a
+
+grep asdf .snap/1/a
+stat .snap/1/a | grep 'Size: 5'
+
+grep asdf .snap/2/a
+stat .snap/2/a | grep 'Size: 5'
+stat .snap/2/a | grep -- '-rwxrwxrwx'
+
+grep qwer .snap/3/a
+stat .snap/3/a | grep 'Size: 5'
+stat .snap/3/a | grep -- '-rwxrwxrwx'
+
+grep qwer .snap/4/a
+stat .snap/4/a | grep 'Size: 5'
+stat .snap/4/a | grep -- '-rw-rw-rw-'
+
+grep zxcv .snap/5/a
+stat .snap/5/a | grep 'Size: 5'
+stat .snap/5/a | grep -- '-rw-rw-rw-'
+
+rmdir .snap/[12345]
+
+echo "OK"
+
+
+
diff --git a/qa/workunits/fs/snaps/snaptest-name-limits.sh b/qa/workunits/fs/snaps/snaptest-name-limits.sh
new file mode 100755
index 000000000..f40d0231e
--- /dev/null
+++ b/qa/workunits/fs/snaps/snaptest-name-limits.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+#
+# This tests snapshot names limits: names have to be < 240 chars
+#
+
+function cleanup ()
+{
+	rmdir d1/.snap/*
+	rm -rf d1
+}
+
+function fail ()
+{
+	echo $@
+	cleanup
+	exit 1
+}
+
+mkdir d1
+
+longname=$(printf "%.241d" 2)
+mkdir d1/.snap/$longname 2> /dev/null
+[ -d d1/.snap/$longname ] && fail "Invalid snapshot exists: $longname"
+
+cleanup
+
+echo OK
diff --git a/qa/workunits/fs/snaps/snaptest-parents.sh b/qa/workunits/fs/snaps/snaptest-parents.sh
new file mode 100755
index 000000000..7ab1ba7cf
--- /dev/null
+++ b/qa/workunits/fs/snaps/snaptest-parents.sh
@@ -0,0 +1,39 @@
+#!/bin/sh
+
+set -e
+
+echo "making directory tree and files"
+mkdir -p 1/a/b/c/
+echo "i'm file1" > 1/a/file1
+echo "i'm file2" > 1/a/b/file2
+echo "i'm file3" > 1/a/b/c/file3
+echo "snapshotting"
+mkdir 1/.snap/foosnap1
+mkdir 2
+echo "moving tree"
+mv 1/a 2
+echo "checking snapshot contains tree..."
+dir1=`find 1/.snap/foosnap1 | wc -w`
+dir2=`find 2/ | wc -w`
+#diff $dir1 $dir2 && echo "Success!"
+test $dir1==$dir2 && echo "Success!"
+echo "adding folder and file to tree..."
+mkdir 2/a/b/c/d
+echo "i'm file 4!" > 2/a/b/c/d/file4
+echo "snapshotting tree 2"
+mkdir 2/.snap/barsnap2
+echo "comparing snapshots"
+dir1=`find 1/.snap/foosnap1/ -maxdepth 2 | wc -w`
+dir2=`find 2/.snap/barsnap2/ -maxdepth 2 | wc -w`
+#diff $dir1 $dir2 && echo "Success!"
+test $dir1==$dir2 && echo "Success!"
+echo "moving subtree to first folder"
+mv 2/a/b/c 1
+echo "comparing snapshots and new tree"
+dir1=`find 1/ | wc -w`
+dir2=`find 2/.snap/barsnap2/a/b/c | wc -w`
+#diff $dir1 $dir2 && echo "Success!"
+test $dir1==$dir2 && echo "Success!"
+rmdir 1/.snap/*
+rmdir 2/.snap/*
+echo "OK"
diff --git a/qa/workunits/fs/snaps/snaptest-realm-split.sh b/qa/workunits/fs/snaps/snaptest-realm-split.sh
new file mode 100755
index 000000000..300cca21d
--- /dev/null
+++ b/qa/workunits/fs/snaps/snaptest-realm-split.sh
@@ -0,0 +1,31 @@
+#!/bin/sh -x
+
+set -e
+
+mkdir -p 1/a
+exec 3<> 1/a/file1
+
+echo -n a >&3
+
+mkdir 1/.snap/s1
+
+echo -n b >&3
+
+mkdir 2
+# create new snaprealm at dir a, file1's cap should be attached to the new snaprealm
+mv 1/a 2
+
+mkdir 2/.snap/s2
+
+echo -n c >&3
+
+exec 3>&-
+
+grep '^a$' 1/.snap/s1/a/file1
+grep '^ab$' 2/.snap/s2/a/file1
+grep '^abc$' 2/a/file1
+
+rmdir 1/.snap/s1
+rmdir 2/.snap/s2
+rm -rf 1 2
+echo OK
diff --git a/qa/workunits/fs/snaps/snaptest-snap-rename.sh b/qa/workunits/fs/snaps/snaptest-snap-rename.sh
new file mode 100755
index 000000000..aa7325b92
--- /dev/null
+++ b/qa/workunits/fs/snaps/snaptest-snap-rename.sh
@@ -0,0 +1,33 @@
+#!/bin/sh -x
+
+expect_failure() {
+	if "$@"; then return 1; else return 0; fi
+}
+set -e
+
+mkdir -p d1/d2
+mkdir -p d1/d3
+mkdir d1/.snap/foo
+mkdir d1/d2/.snap/foo
+mkdir d1/d3/.snap/foo
+mkdir d1/d3/.snap/bar
+mv d1/d2/.snap/foo d1/d2/.snap/bar
+# snapshot name can't start with _
+expect_failure mv d1/d2/.snap/bar d1/d2/.snap/_bar
+# can't rename parent snapshot
+expect_failure mv d1/d2/.snap/_foo_* d1/d2/.snap/foo
+expect_failure mv d1/d2/.snap/_foo_* d1/d2/.snap/_foo_1
+# can't rename snapshot to different directroy
+expect_failure mv d1/d2/.snap/bar d1/.snap/
+# can't overwrite existing snapshot
+expect_failure python3 -c "import os; os.rename('d1/d3/.snap/foo', 'd1/d3/.snap/bar')"
+# can't move snaphost out of snapdir
+expect_failure python3 -c "import os; os.rename('d1/.snap/foo', 'd1/foo')"
+
+rmdir d1/.snap/foo
+rmdir d1/d2/.snap/bar
+rmdir d1/d3/.snap/foo
+rmdir d1/d3/.snap/bar
+rm -rf d1
+
+echo OK
diff --git a/qa/workunits/fs/snaps/snaptest-snap-rm-cmp.sh b/qa/workunits/fs/snaps/snaptest-snap-rm-cmp.sh
new file mode 100755
index 000000000..88a0e8ae5
--- /dev/null
+++ b/qa/workunits/fs/snaps/snaptest-snap-rm-cmp.sh
@@ -0,0 +1,24 @@
+#!/bin/sh -x
+
+set -e
+
+file=linux-2.6.33.tar.bz2
+wget -q http://download.ceph.com/qa/$file
+
+real=`md5sum $file | awk '{print $1}'`
+
+for f in `seq 1 20`
+do
+    echo $f
+    cp $file a
+    mkdir .snap/s
+    rm a
+    cp .snap/s/a /tmp/a
+    cur=`md5sum /tmp/a | awk '{print $1}'`
+    if [ "$cur" != "$real" ]; then
+	echo "FAIL: bad match, /tmp/a $cur != real $real"
+	false
+    fi
+    rmdir .snap/s
+done
+rm $file
diff --git a/qa/workunits/fs/snaps/snaptest-upchildrealms.sh b/qa/workunits/fs/snaps/snaptest-upchildrealms.sh
new file mode 100755
index 000000000..4e531a966
--- /dev/null
+++ b/qa/workunits/fs/snaps/snaptest-upchildrealms.sh
@@ -0,0 +1,28 @@
+#!/bin/sh -x
+
+set -e
+
+#
+# verify that a snap update on a parent realm will induce
+# snap cap writeback for inodes child realms
+#
+
+mkdir a
+mkdir a/b
+mkdir a/.snap/a1
+mkdir a/b/.snap/b1
+echo asdf > a/b/foo
+mkdir a/.snap/a2
+# client _should_ have just queued a capsnap for writeback
+ln a/b/foo a/b/bar       # make the server cow the inode
+
+echo "this should not hang..."
+cat a/b/.snap/_a2_*/foo
+echo "good, it did not hang."
+
+rmdir a/b/.snap/b1
+rmdir a/.snap/a1
+rmdir a/.snap/a2
+rm -r a
+
+echo "OK"
diff --git a/qa/workunits/fs/snaps/snaptest-xattrwb.sh b/qa/workunits/fs/snaps/snaptest-xattrwb.sh
new file mode 100755
index 000000000..e503aed77
--- /dev/null
+++ b/qa/workunits/fs/snaps/snaptest-xattrwb.sh
@@ -0,0 +1,29 @@
+#!/bin/sh -x
+
+set -e
+
+echo "testing simple xattr wb"
+touch x
+setfattr -n user.foo x
+mkdir .snap/s1
+getfattr -n user.foo .snap/s1/x | grep user.foo
+rm x
+rmdir .snap/s1
+
+echo "testing wb with pre-wb server cow"
+mkdir a
+mkdir a/b
+mkdir a/b/c
+# b now has As but not Ax
+setfattr -n user.foo a/b
+mkdir a/.snap/s
+mkdir a/b/cc
+# b now has been cowed on the server, but we still have dirty xattr caps
+getfattr -n user.foo a/b          # there they are...
+getfattr -n user.foo a/.snap/s/b | grep user.foo  # should be there, too!
+
+# ok, clean up
+rmdir a/.snap/s
+rm -r a
+
+echo OK
diff --git a/qa/workunits/fs/snaps/untar_snap_rm.sh b/qa/workunits/fs/snaps/untar_snap_rm.sh
new file mode 100755
index 000000000..8a8412e66
--- /dev/null
+++ b/qa/workunits/fs/snaps/untar_snap_rm.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+set -e
+
+do_tarball() {
+    wget http://download.ceph.com/qa/$1
+    tar xvf$2 $1
+    mkdir .snap/k
+    sync
+    rm -rv $3
+    cp -av .snap/k .
+    rmdir .snap/k
+    rm -rv k
+    rm $1
+}
+
+do_tarball coreutils_8.5.orig.tar.gz z coreutils-8.5
+do_tarball linux-2.6.33.tar.bz2 j linux-2.6.33
diff --git a/qa/workunits/fs/test_o_trunc.c b/qa/workunits/fs/test_o_trunc.c
new file mode 100644
index 000000000..1ce19e4bb
--- /dev/null
+++ b/qa/workunits/fs/test_o_trunc.c
@@ -0,0 +1,45 @@
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+
+int main(int argc, char *argv[])
+{
+	char obuf[32], ibuf[1024];
+	int n, max = 0;
+	
+	if (argc > 2)
+		max = atoi(argv[2]);
+	if (!max)
+		max = 600;
+	
+	memset(obuf, 0xff, sizeof(obuf));
+	
+	for (n = 1; n <= max; ++n) {
+		int fd, ret;
+		fd = open(argv[1], O_RDWR | O_CREAT | O_TRUNC, 0644);
+		printf("%d/%d: open fd = %d\n", n, max, fd);
+		
+		ret = write(fd, obuf, sizeof(obuf));
+		printf("write ret = %d\n", ret);
+		
+		sleep(1);
+		
+		ret = write(fd, obuf, sizeof(obuf));
+		printf("write ret = %d\n", ret);
+		
+		ret = pread(fd, ibuf, sizeof(ibuf), 0);
+		printf("pread ret = %d\n", ret);
+		
+		if (memcmp(obuf, ibuf, sizeof(obuf))) {
+			printf("mismatch\n");
+			close(fd);
+			break;
+		}
+		close(fd);
+	}
+	return 0;
+}
diff --git a/qa/workunits/fs/test_o_trunc.sh b/qa/workunits/fs/test_o_trunc.sh
new file mode 100755
index 000000000..90a72600d
--- /dev/null
+++ b/qa/workunits/fs/test_o_trunc.sh
@@ -0,0 +1,7 @@
+#!/bin/sh -ex
+
+mydir=`dirname $0`
+$mydir/test_o_trunc trunc.foo 600
+
+echo OK
+
diff --git a/qa/workunits/fs/test_python.sh b/qa/workunits/fs/test_python.sh
new file mode 100755
index 000000000..6e39b95a4
--- /dev/null
+++ b/qa/workunits/fs/test_python.sh
@@ -0,0 +1,6 @@
+#!/bin/sh -ex
+
+# Running as root because the filesystem root directory will be
+# owned by uid 0, and that's where we're writing.
+sudo python3 -m pytest -v $(dirname $0)/../../../src/test/pybind/test_cephfs.py
+exit 0
diff --git a/qa/workunits/hadoop/repl.sh b/qa/workunits/hadoop/repl.sh
new file mode 100755
index 000000000..84f6150ab
--- /dev/null
+++ b/qa/workunits/hadoop/repl.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+
+set -e
+set -x
+
+# bail if $TESTDIR is not set as this test will fail in that scenario
+[ -z $TESTDIR ] && { echo "\$TESTDIR needs to be set, but is not. Exiting."; exit 1; }
+
+# if HADOOP_PREFIX is not set, use default
+[ -z $HADOOP_PREFIX ] && { HADOOP_PREFIX=$TESTDIR/hadoop; }
+
+# create pools with different replication factors
+for repl in 2 3 7 8 9; do
+  name=hadoop.$repl
+  ceph osd pool create $name 8 8
+  ceph osd pool set $name size $repl
+
+  id=`ceph osd dump | sed -n "s/^pool \([0-9]*\) '$name'.*/\1/p"`
+  ceph fs add_data_pool cephfs $id
+done
+
+# create a file in each of the pools
+for repl in 2 3 7 8 9; do
+  name=hadoop.$repl
+  $HADOOP_PREFIX/bin/hadoop fs -rm -f /$name.dat
+  dd if=/dev/zero bs=1048576 count=1 | \
+    $HADOOP_PREFIX/bin/hadoop fs -Dceph.data.pools="$name" \
+    -put - /$name.dat
+done
+
+# check that hadoop reports replication matching
+# that of the pool the file was written into
+for repl in 2 3 7 8 9; do
+  name=hadoop.$repl
+  repl2=$($HADOOP_PREFIX/bin/hadoop fs -ls /$name.dat | awk '{print $2}')
+  if [ $repl -ne $repl2 ]; then
+    echo "replication factors didn't match!"
+    exit 1
+  fi
+done
+
+exit 0
diff --git a/qa/workunits/hadoop/terasort.sh b/qa/workunits/hadoop/terasort.sh
new file mode 100755
index 000000000..3d6988a21
--- /dev/null
+++ b/qa/workunits/hadoop/terasort.sh
@@ -0,0 +1,76 @@
+#!/usr/bin/env bash
+
+set -e
+set -x
+
+INPUT=/terasort-input
+OUTPUT=/terasort-output
+REPORT=/tersort-report
+
+num_records=100000
+[ ! -z $NUM_RECORDS ] && num_records=$NUM_RECORDS
+
+# bail if $TESTDIR is not set as this test will fail in that scenario
+[ -z $TESTDIR ] && { echo "\$TESTDIR needs to be set, but is not. Exiting."; exit 1; }
+
+# if HADOOP_PREFIX is not set, use default
+[ -z $HADOOP_PREFIX ] && { HADOOP_PREFIX=$TESTDIR/hadoop; }
+
+# Nuke hadoop directories
+$HADOOP_PREFIX/bin/hadoop fs -rm -r $INPUT $OUTPUT $REPORT || true
+
+# Generate terasort data
+#
+#-Ddfs.blocksize=512M \
+#-Dio.file.buffer.size=131072 \
+#-Dmapreduce.map.java.opts=-Xmx1536m \
+#-Dmapreduce.map.memory.mb=2048 \
+#-Dmapreduce.task.io.sort.mb=256 \
+#-Dyarn.app.mapreduce.am.resource.mb=1024 \
+#-Dmapred.map.tasks=64 \
+$HADOOP_PREFIX/bin/hadoop jar \
+  $HADOOP_PREFIX/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar \
+  teragen \
+  -Dmapred.map.tasks=9 \
+  $num_records \
+  $INPUT
+
+# Run the sort job
+#
+#-Ddfs.blocksize=512M \
+#-Dio.file.buffer.size=131072 \
+#-Dmapreduce.map.java.opts=-Xmx1536m \
+#-Dmapreduce.map.memory.mb=2048 \
+#-Dmapreduce.map.output.compress=true \
+#-Dmapreduce.map.output.compress.codec=org.apache.hadoop.io.compress.Lz4Codec \
+#-Dmapreduce.reduce.java.opts=-Xmx1536m \
+#-Dmapreduce.reduce.memory.mb=2048 \
+#-Dmapreduce.task.io.sort.factor=100 \
+#-Dmapreduce.task.io.sort.mb=768 \
+#-Dyarn.app.mapreduce.am.resource.mb=1024 \
+#-Dmapred.reduce.tasks=100 \
+#-Dmapreduce.terasort.output.replication=1 \
+$HADOOP_PREFIX/bin/hadoop jar \
+  $HADOOP_PREFIX/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar \
+  terasort \
+  -Dmapred.reduce.tasks=10 \
+  $INPUT $OUTPUT
+
+# Validate the sorted data
+#
+#-Ddfs.blocksize=512M \
+#-Dio.file.buffer.size=131072 \
+#-Dmapreduce.map.java.opts=-Xmx1536m \
+#-Dmapreduce.map.memory.mb=2048 \
+#-Dmapreduce.reduce.java.opts=-Xmx1536m \
+#-Dmapreduce.reduce.memory.mb=2048 \
+#-Dmapreduce.task.io.sort.mb=256 \
+#-Dyarn.app.mapreduce.am.resource.mb=1024 \
+#-Dmapred.reduce.tasks=1 \
+$HADOOP_PREFIX/bin/hadoop jar \
+  $HADOOP_PREFIX/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar \
+  teravalidate \
+  -Dmapred.reduce.tasks=1 \
+  $OUTPUT $REPORT
+
+exit 0
diff --git a/qa/workunits/hadoop/wordcount.sh b/qa/workunits/hadoop/wordcount.sh
new file mode 100755
index 000000000..616b08af2
--- /dev/null
+++ b/qa/workunits/hadoop/wordcount.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+
+set -e
+set -x
+
+WC_INPUT=/wc_input
+WC_OUTPUT=/wc_output
+DATA_INPUT=$(mktemp -d)
+
+echo "starting hadoop-wordcount test"
+
+# bail if $TESTDIR is not set as this test will fail in that scenario
+[ -z $TESTDIR ] && { echo "\$TESTDIR needs to be set, but is not. Exiting."; exit 1; }
+
+# if HADOOP_PREFIX is not set, use default
+[ -z $HADOOP_PREFIX ] && { HADOOP_PREFIX=$TESTDIR/hadoop; }
+
+# Nuke hadoop directories
+$HADOOP_PREFIX/bin/hadoop fs -rm -r $WC_INPUT $WC_OUTPUT || true
+
+# Fetch and import testing data set
+curl http://download.ceph.com/qa/hadoop_input_files.tar | tar xf - -C $DATA_INPUT
+$HADOOP_PREFIX/bin/hadoop fs -copyFromLocal $DATA_INPUT $WC_INPUT
+rm -rf $DATA_INPUT
+
+# Run the job
+$HADOOP_PREFIX/bin/hadoop jar \
+  $HADOOP_PREFIX/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar \
+  wordcount $WC_INPUT $WC_OUTPUT
+
+# Cleanup
+$HADOOP_PREFIX/bin/hadoop fs -rm -r $WC_INPUT $WC_OUTPUT || true
+
+echo "completed hadoop-wordcount test"
+exit 0
diff --git a/qa/workunits/kernel_untar_build.sh b/qa/workunits/kernel_untar_build.sh
new file mode 100755
index 000000000..9b60f065c
--- /dev/null
+++ b/qa/workunits/kernel_untar_build.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+
+set -e
+
+wget -O linux.tar.gz http://download.ceph.com/qa/linux-5.4.tar.gz
+
+mkdir t
+cd t
+tar xzf ../linux.tar.gz
+cd linux*
+make defconfig
+make -j`grep -c processor /proc/cpuinfo`
+cd ..
+if ! rm -rv linux* ; then
+    echo "uh oh rm -r failed, it left behind:"
+    find .
+    exit 1
+fi
+cd ..
+rm -rv t linux*
diff --git a/qa/workunits/libcephfs/test.sh b/qa/workunits/libcephfs/test.sh
new file mode 100755
index 000000000..c53fe893c
--- /dev/null
+++ b/qa/workunits/libcephfs/test.sh
@@ -0,0 +1,10 @@
+#!/bin/sh -e
+
+ceph_test_libcephfs
+ceph_test_libcephfs_access
+ceph_test_libcephfs_reclaim
+ceph_test_libcephfs_lazyio
+ceph_test_libcephfs_newops
+ceph_test_libcephfs_suidsgid
+
+exit 0
diff --git a/qa/workunits/mgr/test_localpool.sh b/qa/workunits/mgr/test_localpool.sh
new file mode 100755
index 000000000..40a749e8d
--- /dev/null
+++ b/qa/workunits/mgr/test_localpool.sh
@@ -0,0 +1,21 @@
+#!/bin/sh -ex
+
+ceph config set mgr mgr/localpool/subtree host
+ceph config set mgr mgr/localpool/failure_domain osd
+ceph mgr module enable localpool
+
+while ! ceph osd pool ls | grep '^by-host-'
+do
+    sleep 5
+done
+
+ceph mgr module disable localpool
+for p in `ceph osd pool ls | grep '^by-host-'`
+do
+    ceph osd pool rm $p $p --yes-i-really-really-mean-it
+done
+
+ceph config rm mgr mgr/localpool/subtree
+ceph config rm mgr mgr/localpool/failure_domain
+
+echo OK
diff --git a/qa/workunits/mgr/test_per_module_finisher.sh b/qa/workunits/mgr/test_per_module_finisher.sh
new file mode 100755
index 000000000..dc66bce23
--- /dev/null
+++ b/qa/workunits/mgr/test_per_module_finisher.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+set -ex
+
+# This testcase tests the per module finisher stats for enabled modules
+# using check counter (qa/tasks/check_counter.py).
+
+# 'balancer' commands
+ceph balancer pool ls
+
+# 'crash' commands
+ceph crash ls
+ceph crash ls-new
+
+# 'device' commands
+ceph device query-daemon-health-metrics mon.a
+
+# 'iostat' command
+ceph iostat &
+pid=$!
+sleep 3
+kill -SIGTERM $pid
+
+# 'pg_autoscaler' command
+ceph osd pool autoscale-status
+
+# 'progress' command
+ceph progress
+ceph progress json
+
+# 'status' commands
+ceph fs status
+ceph osd status
+
+# 'telemetry' commands
+ceph telemetry status
+ceph telemetry diff
+
+echo OK
diff --git a/qa/workunits/mon/auth_caps.sh b/qa/workunits/mon/auth_caps.sh
new file mode 100755
index 000000000..1f59ae1f7
--- /dev/null
+++ b/qa/workunits/mon/auth_caps.sh
@@ -0,0 +1,130 @@
+#!/usr/bin/env bash
+
+set -e
+set -x
+declare -A keymap
+
+combinations="r w x rw rx wx rwx"
+
+for i in ${combinations}; do
+  k="foo_$i"
+  k=`ceph auth get-or-create-key client.$i mon "allow $i"` || exit 1
+  keymap["$i"]=$k
+done
+
+# add special caps
+keymap["all"]=`ceph auth get-or-create-key client.all mon 'allow *'` || exit 1
+
+tmp=`mktemp`
+ceph auth export > $tmp
+
+trap "rm $tmp" INT ERR EXIT QUIT 0
+
+expect() {
+
+  set +e
+
+  local expected_ret=$1
+  local ret
+
+  shift
+  cmd=$@
+
+  eval $cmd
+  ret=$?
+
+  set -e
+
+  if [[ $ret -ne $expected_ret ]]; then
+    echo "ERROR: running \'$cmd\': expected $expected_ret got $ret"
+    return 1
+  fi
+
+  return 0
+}
+
+read_ops() {
+  local caps=$1
+  local has_read=1 has_exec=1
+  local ret
+  local args
+
+  ( echo $caps | grep 'r' ) || has_read=0
+  ( echo $caps | grep 'x' ) || has_exec=0
+  
+  if [[ "$caps" == "all" ]]; then
+    has_read=1
+    has_exec=1
+  fi
+
+  ret=13
+  if [[ $has_read -gt 0 && $has_exec -gt 0 ]]; then
+    ret=0
+  fi
+
+  args="--id $caps --key ${keymap[$caps]}"
+ 
+  expect $ret ceph auth get client.admin $args
+  expect $ret ceph auth get-key client.admin $args
+  expect $ret ceph auth export $args
+  expect $ret ceph auth export client.admin $args
+  expect $ret ceph auth ls $args
+  expect $ret ceph auth print-key client.admin $args
+  expect $ret ceph auth print_key client.admin $args
+}
+
+write_ops() {
+
+  local caps=$1
+  local has_read=1 has_write=1 has_exec=1
+  local ret
+  local args
+
+  ( echo $caps | grep 'r' ) || has_read=0
+  ( echo $caps | grep 'w' ) || has_write=0
+  ( echo $caps | grep 'x' ) || has_exec=0
+
+  if [[ "$caps" == "all" ]]; then
+    has_read=1
+    has_write=1
+    has_exec=1
+  fi
+
+  ret=13
+  if [[ $has_read -gt 0 && $has_write -gt 0 && $has_exec -gt 0 ]]; then
+    ret=0
+  fi
+
+  args="--id $caps --key ${keymap[$caps]}"
+
+  expect $ret ceph auth add client.foo $args
+  expect $ret "ceph auth caps client.foo mon 'allow *' $args"
+  expect $ret ceph auth get-or-create client.admin $args
+  expect $ret ceph auth get-or-create-key client.admin $args
+  expect $ret ceph auth get-or-create-key client.baz $args
+  expect $ret ceph auth del client.foo $args
+  expect $ret ceph auth del client.baz $args
+  expect $ret ceph auth import -i $tmp $args
+}
+
+echo "running combinations: ${!keymap[@]}"
+
+subcmd=$1
+
+for i in ${!keymap[@]}; do
+  echo "caps: $i"
+  if [[ -z "$subcmd" || "$subcmd" == "read" || "$subcmd" == "all" ]]; then
+    read_ops $i
+  fi
+
+  if [[ -z "$subcmd" || "$subcmd" == "write" || "$subcmd" == "all" ]]; then
+    write_ops $i
+  fi
+done
+
+# cleanup
+for i in ${combinations} all; do
+  ceph auth del client.$i || exit 1
+done
+
+echo "OK"
diff --git a/qa/workunits/mon/auth_key_rotation.sh b/qa/workunits/mon/auth_key_rotation.sh
new file mode 100755
index 000000000..1a53bab6d
--- /dev/null
+++ b/qa/workunits/mon/auth_key_rotation.sh
@@ -0,0 +1,58 @@
+#!/usr/bin/bash -ex
+
+function expect_false()
+{
+	set -x
+	if "$@"; then return 1; else return 0; fi
+}
+
+
+ceph auth export
+ceph auth rm client.rot
+
+ceph auth get-or-create client.rot mon 'allow rwx'
+ceph auth export client.rot | grep key
+ceph auth export client.rot | expect_false grep pending.key
+
+ceph auth get-or-create-pending client.rot
+ceph auth export client.rot | grep key
+ceph auth export client.rot | grep pending.key
+
+ceph auth clear-pending client.rot
+ceph auth export client.rot | expect_false grep pending.key
+
+ceph auth get-or-create-pending client.rot
+ceph auth export client.rot | grep key
+ceph auth export client.rot | grep pending.key
+K=$(ceph auth export client.rot | grep 'key = ' | head -n 1 | awk '{print $3}')
+PK=$(ceph auth export client.rot | grep pending.key | awk '{print $4}')
+echo "K is $K"
+echo "PK is $PK"
+ceph -n client.rot --key $K -s
+
+ceph auth commit-pending client.rot
+ceph auth export client.rot | expect_false grep pending.key
+ceph auth export client.rot | grep key | grep $PK
+
+ceph auth get-or-create-pending client.rot
+ceph auth export client.rot | grep key
+ceph auth export client.rot | grep pending.key
+K=$(ceph auth export client.rot | grep 'key = ' | head -n 1 | awk '{print $3}')
+PK=$(ceph auth export client.rot | grep pending.key | awk '{print $4}')
+echo "2, K is $K"
+echo "2, PK is $PK"
+
+ceph auth export client.rot
+
+while ceph -n client.rot --key $K -s ; do
+    ceph auth export client.rot
+    ceph -n client.rot --key $PK -s
+    sleep 1
+done
+
+ceph auth export client.rot | expect_false grep pending.key
+ceph auth export client.rot | grep key | grep $PK
+
+ceph -n client.rot --key $PK -s
+
+echo ok
diff --git a/qa/workunits/mon/caps.py b/qa/workunits/mon/caps.py
new file mode 100644
index 000000000..26c0cd14c
--- /dev/null
+++ b/qa/workunits/mon/caps.py
@@ -0,0 +1,359 @@
+#!/usr/bin/python3
+
+from __future__ import print_function
+
+import subprocess
+import shlex
+import errno
+import sys
+import os
+import io
+import re
+
+from ceph_argparse import * # noqa
+
+keyring_base = '/tmp/cephtest-caps.keyring'
+
+class UnexpectedReturn(Exception):
+  def __init__(self, cmd, ret, expected, msg):
+    if isinstance(cmd, list):
+      self.cmd = ' '.join(cmd)
+    else:
+      assert isinstance(cmd, str), 'cmd needs to be either a list or a str'
+      self.cmd = cmd
+    self.cmd = str(self.cmd)
+    self.ret = int(ret)
+    self.expected = int(expected)
+    self.msg = str(msg)
+
+  def __str__(self):
+    return repr('{c}: expected return {e}, got {r} ({o})'.format(
+        c=self.cmd, e=self.expected, r=self.ret, o=self.msg))
+
+def call(cmd):
+  if isinstance(cmd, list):
+    args = cmd
+  elif isinstance(cmd, str):
+    args = shlex.split(cmd)
+  else:
+    assert False, 'cmd is not a string/unicode nor a list!'
+
+  print('call: {0}'.format(args))
+  proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+  ret = proc.wait()
+
+  return (ret, proc)
+
+def expect(cmd, expected_ret):
+
+  try:
+    (r, p) = call(cmd)
+  except ValueError as e:
+    print('unable to run {c}: {err}'.format(c=repr(cmd), err=e.message),
+          file=sys.stderr)
+    return errno.EINVAL
+
+  assert r == p.returncode, \
+      'wth? r was supposed to match returncode!'
+
+  if r != expected_ret:
+    raise UnexpectedReturn(repr(cmd), r, expected_ret, str(p.stderr.read()))
+
+  return p
+
+def expect_to_file(cmd, expected_ret, out_file):
+
+  # Let the exception be propagated to the caller
+  p = expect(cmd, expected_ret)
+  assert p.returncode == expected_ret, \
+      'expected result doesn\'t match and no exception was thrown!'
+
+  with io.open(out_file, 'ab') as file:
+    file.write(p.stdout.read())
+
+  return p
+
+class Command:
+  def __init__(self, cid, j):
+    self.cid = cid[3:]
+    self.perms = j['perm']
+    self.module = j['module']
+
+    self.sig = ''
+    self.args = []
+    for s in j['sig']:
+      if not isinstance(s, dict):
+        assert isinstance(s, str), \
+            'malformatted signature cid {0}: {1}\n{2}'.format(cid,s,j)
+        if len(self.sig) > 0:
+          self.sig += ' '
+        self.sig += s
+      else:
+        self.args.append(s)
+
+  def __str__(self):
+    return repr('command {0}: {1} (requires \'{2}\')'.format(self.cid,\
+          self.sig, self.perms))
+
+
+def destroy_keyring(path):
+  if not os.path.exists(path):
+    raise Exception('oops! cannot remove inexistent keyring {0}'.format(path))
+
+  # grab all client entities from the keyring
+  entities = [m.group(1) for m in [re.match(r'\[client\.(.*)\]', l)
+                for l in [str(line.strip())
+                  for line in io.open(path,'r')]] if m is not None]
+
+  # clean up and make sure each entity is gone
+  for e in entities:
+    expect('ceph auth del client.{0}'.format(e), 0)
+    expect('ceph auth get client.{0}'.format(e), errno.ENOENT)
+
+  # remove keyring
+  os.unlink(path)
+
+  return True
+
+def test_basic_auth():
+  # make sure we can successfully add/del entities, change their caps
+  # and import/export keyrings.
+
+  expect('ceph auth add client.basicauth', 0)
+  expect('ceph auth caps client.basicauth mon \'allow *\'', 0)
+  # entity exists and caps do not match
+  expect('ceph auth add client.basicauth', errno.EINVAL)
+  # this command attempts to change an existing state and will fail
+  expect('ceph auth add client.basicauth mon \'allow w\'', errno.EINVAL)
+  expect('ceph auth get-or-create client.basicauth', 0)
+  expect('ceph auth get-key client.basicauth', 0)
+  expect('ceph auth get-or-create client.basicauth2', 0)
+  # cleanup
+  expect('ceph auth del client.basicauth', 0)
+  expect('ceph auth del client.basicauth2', 0)
+
+  return True
+
+def gen_module_keyring(module):
+  module_caps = [
+      ('all', '{t} \'allow service {s} rwx\'', 0),
+      ('none', '', errno.EACCES),
+      ('wrong', '{t} \'allow service foobar rwx\'', errno.EACCES),
+      ('right', '{t} \'allow service {s} {p}\'', 0),
+      ('no-execute', '{t} \'allow service {s} x\'', errno.EACCES)
+      ]
+
+  keyring = '{0}.service-{1}'.format(keyring_base,module)
+  for perms in 'r rw x'.split():
+    for (n,p,r) in module_caps:
+      c = p.format(t='mon', s=module, p=perms)
+      expect_to_file(
+          'ceph auth get-or-create client.{cn}-{cp} {caps}'.format(
+          cn=n,cp=perms,caps=c), 0, keyring)
+
+  return keyring
+
+
+def test_all():
+
+
+  perms = {
+      'good': {
+        'broad':[
+          ('rwx', 'allow *'),
+          ('r', 'allow r'),
+          ('rw', 'allow rw'),
+          ('x', 'allow x'),
+          ],
+        'service':[
+          ('rwx', 'allow service {s} rwx'),
+          ('r', 'allow service {s} r'),
+          ('rw', 'allow service {s} rw'),
+          ('x', 'allow service {s} x'),
+          ],
+        'command':[
+          ('rwx', 'allow command "{c}"'),
+          ],
+        'command-with':[
+          ('rwx', 'allow command "{c}" with {kv}')
+          ],
+        'command-with-prefix':[
+          ('rwx', 'allow command "{c}" with {key} prefix {val}')
+          ]
+        },
+      'bad': {
+        'broad':[
+          ('none', ''),
+          ],
+        'service':[
+          ('none1', 'allow service foo rwx'),
+          ('none2', 'allow service foo r'),
+          ('none3', 'allow service foo rw'),
+          ('none4', 'allow service foo x'),
+          ],
+        'command':[
+          ('none', 'allow command foo'),
+          ],
+        'command-with':[
+          ('none', 'allow command "{c}" with foo=bar'),
+          ],
+        'command-with-prefix':[
+          ('none', 'allow command "{c}" with foo prefix bar'),
+          ],
+        }
+      }
+
+  cmds = {
+      '':[
+        {
+          'cmd':('status', '', 'r')
+          },
+        {
+          'pre':'heap start_profiler',
+          'cmd':('heap', 'heapcmd=stats', 'rw'),
+          'post':'heap stop_profiler'
+          }
+        ],
+      'auth':[
+        {
+          'pre':'',
+          'cmd':('auth ls', '', 'r'),
+          'post':''
+          },
+        {
+          'pre':'auth get-or-create client.foo mon \'allow *\'',
+          'cmd':('auth caps', 'entity="client.foo"', 'rw'),
+          'post':'auth del client.foo'
+          }
+        ],
+      'pg':[
+        {
+          'cmd':('pg getmap', '', 'r'),
+          },
+        ],
+      'mds':[
+        {
+          'cmd':('mds getmap', '', 'r'),
+          },
+        ],
+      'mon':[
+        {
+          'cmd':('mon getmap', '', 'r')
+          },
+        {
+          'cmd':('mon remove', 'name=a', 'rw')
+          }
+        ],
+      'osd':[
+        {
+          'cmd':('osd getmap', '', 'r'),
+          },
+        {
+          'cmd':('osd pause', '', 'rw'),
+          'post':'osd unpause'
+          },
+        {
+          'cmd':('osd crush dump', '', 'r')
+          },
+        ],
+      'config-key':[
+          {
+            'pre':'config-key set foo bar',
+            'cmd':('config-key get', 'key=foo', 'r')
+            },
+          {
+            'pre':'config-key set foo bar',
+            'cmd':('config-key del', 'key=foo', 'rw')
+            }
+          ]
+      }
+
+  for (module,cmd_lst) in cmds.items():
+    k = keyring_base + '.' + module
+    for cmd in cmd_lst:
+
+      (cmd_cmd, cmd_args, cmd_perm) = cmd['cmd']
+      cmd_args_key = ''
+      cmd_args_val = ''
+      if len(cmd_args) > 0:
+        (cmd_args_key, cmd_args_val) = cmd_args.split('=')
+
+      print('generating keyring for {m}/{c}'.format(m=module,c=cmd_cmd))
+      # gen keyring
+      for (good_or_bad,kind_map) in perms.items():
+        for (kind,lst) in kind_map.items():
+          for (perm, cap) in lst:
+            cap_formatted = cap.format(
+                s=module,
+                c=cmd_cmd,
+                kv=cmd_args,
+                key=cmd_args_key,
+                val=cmd_args_val)
+
+            if len(cap_formatted) == 0:
+              run_cap = ''
+            else:
+              run_cap = 'mon \'{fc}\''.format(fc=cap_formatted)
+
+            cname = 'client.{gb}-{kind}-{p}'.format(
+                gb=good_or_bad,kind=kind,p=perm)
+            expect_to_file(
+                'ceph auth get-or-create {n} {c}'.format(
+                  n=cname,c=run_cap), 0, k)
+      # keyring generated
+      print('testing {m}/{c}'.format(m=module,c=cmd_cmd))
+
+      # test
+      for good_bad in perms.keys():
+        for (kind,lst) in perms[good_bad].items():
+          for (perm,_) in lst:
+            cname = 'client.{gb}-{k}-{p}'.format(gb=good_bad,k=kind,p=perm)
+
+          if good_bad == 'good':
+            expect_ret = 0
+          else:
+            expect_ret = errno.EACCES
+
+          if ( cmd_perm not in perm ):
+            expect_ret = errno.EACCES
+          if 'with' in kind and len(cmd_args) == 0:
+            expect_ret = errno.EACCES
+          if 'service' in kind and len(module) == 0:
+            expect_ret = errno.EACCES
+
+          if 'pre' in cmd and len(cmd['pre']) > 0:
+            expect('ceph {0}'.format(cmd['pre']), 0)
+          expect('ceph -n {cn} -k {k} {c} {arg_val}'.format(
+            cn=cname,k=k,c=cmd_cmd,arg_val=cmd_args_val), expect_ret)
+          if 'post' in cmd and len(cmd['post']) > 0:
+            expect('ceph {0}'.format(cmd['post']), 0)
+      # finish testing
+      destroy_keyring(k)
+
+
+  return True
+
+
+def test_misc():
+
+  k = keyring_base + '.misc'
+  expect_to_file(
+      'ceph auth get-or-create client.caps mon \'allow command "auth caps"' \
+          ' with entity="client.caps"\'', 0, k)
+  expect('ceph -n client.caps -k {kf} quorum_status'.format(kf=k), errno.EACCES)
+  expect('ceph -n client.caps -k {kf} auth caps client.caps mon \'allow *\''.format(kf=k), 0)
+  expect('ceph -n client.caps -k {kf} quorum_status'.format(kf=k), 0)
+  destroy_keyring(k)
+
+def main():
+
+  test_basic_auth()
+  test_all()
+  test_misc()
+
+  print('OK')
+
+  return 0
+
+if __name__ == '__main__':
+  main()
diff --git a/qa/workunits/mon/caps.sh b/qa/workunits/mon/caps.sh
new file mode 100755
index 000000000..eae5d8665
--- /dev/null
+++ b/qa/workunits/mon/caps.sh
@@ -0,0 +1,90 @@
+#!/usr/bin/env bash
+
+set -x
+
+tmp=/tmp/cephtest-mon-caps-madness
+
+exit_on_error=1
+
+[[ ! -z $TEST_EXIT_ON_ERROR ]] && exit_on_error=$TEST_EXIT_ON_ERROR
+
+if [ `uname` = FreeBSD ]; then
+    ETIMEDOUT=60
+else
+    ETIMEDOUT=110
+fi
+
+expect()
+{
+  cmd=$1
+  expected_ret=$2
+
+  echo $cmd
+  eval $cmd >&/dev/null
+  ret=$?
+
+  if [[ $ret -ne $expected_ret ]]; then
+    echo "Error: Expected return $expected_ret, got $ret"
+    [[ $exit_on_error -eq 1 ]] && exit 1
+    return 1
+  fi
+
+  return 0
+}
+
+expect "ceph auth get-or-create client.bazar > $tmp.bazar.keyring" 0
+expect "ceph -k $tmp.bazar.keyring --user bazar quorum_status" 13
+ceph auth del client.bazar
+
+c="'allow command \"auth ls\", allow command quorum_status'"
+expect "ceph auth get-or-create client.foo mon $c > $tmp.foo.keyring" 0
+expect "ceph -k $tmp.foo.keyring --user foo quorum_status" 0
+expect "ceph -k $tmp.foo.keyring --user foo auth ls" 0
+expect "ceph -k $tmp.foo.keyring --user foo auth export" 13
+expect "ceph -k $tmp.foo.keyring --user foo auth del client.bazar" 13
+expect "ceph -k $tmp.foo.keyring --user foo osd dump" 13
+
+# monitor drops the subscribe message from client if it does not have enough caps
+# for read from mon. in that case, the client will be waiting for mgrmap in vain,
+# if it is instructed to send a command to mgr. "pg dump" is served by mgr. so,
+# we need to set a timeout for testing this scenario.
+#
+# leave plenty of time here because the mons might be thrashing.
+export CEPH_ARGS='--rados-mon-op-timeout=300'
+expect "ceph -k $tmp.foo.keyring --user foo pg dump" $ETIMEDOUT
+export CEPH_ARGS=''
+
+ceph auth del client.foo
+expect "ceph -k $tmp.foo.keyring --user foo quorum_status" 13
+
+c="'allow command service with prefix=list, allow command quorum_status'"
+expect "ceph auth get-or-create client.bar mon $c > $tmp.bar.keyring" 0
+expect "ceph -k $tmp.bar.keyring --user bar quorum_status" 0
+expect "ceph -k $tmp.bar.keyring --user bar auth ls" 13
+expect "ceph -k $tmp.bar.keyring --user bar auth export" 13
+expect "ceph -k $tmp.bar.keyring --user bar auth del client.foo" 13
+expect "ceph -k $tmp.bar.keyring --user bar osd dump" 13
+
+# again, we'll need to timeout.
+export CEPH_ARGS='--rados-mon-op-timeout=300'
+expect "ceph -k $tmp.bar.keyring --user bar pg dump" $ETIMEDOUT
+export CEPH_ARGS=''
+
+ceph auth del client.bar
+expect "ceph -k $tmp.bar.keyring --user bar quorum_status" 13
+
+rm $tmp.bazar.keyring $tmp.foo.keyring $tmp.bar.keyring
+
+# invalid caps health warning
+cat <<EOF | ceph auth import -i -
+[client.bad]
+  caps mon = this is wrong
+  caps osd = does not parse
+  caps mds = also does not parse
+EOF
+ceph health | grep AUTH_BAD_CAP
+ceph health detail | grep client.bad
+ceph auth rm client.bad
+expect "ceph auth health | grep AUTH_BAD_CAP" 1
+
+echo OK
diff --git a/qa/workunits/mon/config.sh b/qa/workunits/mon/config.sh
new file mode 100755
index 000000000..1b00201ae
--- /dev/null
+++ b/qa/workunits/mon/config.sh
@@ -0,0 +1,136 @@
+#!/bin/bash -ex
+
+function expect_false()
+{
+	set -x
+	if "$@"; then return 1; else return 0; fi
+}
+
+ceph config dump
+
+# value validation
+ceph config set mon.a debug_asok 22
+ceph config set mon.a debug_asok 22/33
+ceph config get mon.a debug_asok | grep 22
+ceph config set mon.a debug_asok 1/2
+expect_false ceph config set mon.a debug_asok foo
+expect_false ceph config set mon.a debug_asok -10
+ceph config rm mon.a debug_asok
+
+ceph config set global log_graylog_port 123
+expect_false ceph config set global log_graylog_port asdf
+ceph config rm global log_graylog_port
+
+ceph config set mon mon_cluster_log_to_stderr true
+ceph config get mon.a mon_cluster_log_to_stderr | grep true
+ceph config set mon mon_cluster_log_to_stderr 2
+ceph config get mon.a mon_cluster_log_to_stderr | grep true
+ceph config set mon mon_cluster_log_to_stderr 1
+ceph config get mon.a mon_cluster_log_to_stderr | grep true
+ceph config set mon mon_cluster_log_to_stderr false
+ceph config get mon.a mon_cluster_log_to_stderr | grep false
+ceph config set mon mon_cluster_log_to_stderr 0
+ceph config get mon.a mon_cluster_log_to_stderr | grep false
+expect_false ceph config set mon mon_cluster_log_to_stderr fiddle
+expect_false ceph config set mon mon_cluster_log_to_stderr ''
+ceph config rm mon mon_cluster_log_to_stderr
+
+expect_false ceph config set mon.a osd_pool_default_type foo
+ceph config set mon.a osd_pool_default_type replicated
+ceph config rm mon.a osd_pool_default_type
+
+# scoping
+ceph config set global debug_asok 33
+ceph config get mon.a debug_asok | grep 33
+ceph config set mon debug_asok 11
+ceph config get mon.a debug_asok | grep 11
+ceph config set mon.a debug_asok 22
+ceph config get mon.a debug_asok | grep 22
+ceph config rm mon.a debug_asok
+ceph config get mon.a debug_asok | grep 11
+ceph config rm mon debug_asok
+ceph config get mon.a debug_asok | grep 33
+#  nested .-prefix scoping
+ceph config set client.foo debug_asok 44
+ceph config get client.foo.bar debug_asok | grep 44
+ceph config get client.foo.bar.baz debug_asok | grep 44
+ceph config set client.foo.bar debug_asok 55
+ceph config get client.foo.bar.baz debug_asok | grep 55
+ceph config rm client.foo debug_asok
+ceph config get client.foo.bar.baz debug_asok | grep 55
+ceph config rm client.foo.bar debug_asok
+ceph config get client.foo.bar.baz debug_asok | grep 33
+ceph config rm global debug_asok
+
+# whitespace keys
+ceph config set client.foo 'debug asok' 44
+ceph config get client.foo 'debug asok' | grep 44
+ceph config set client.foo debug_asok 55
+ceph config get client.foo 'debug asok' | grep 55
+ceph config set client.foo 'debug asok' 66
+ceph config get client.foo debug_asok | grep 66
+ceph config rm client.foo debug_asok
+ceph config set client.foo debug_asok 66
+ceph config rm client.foo 'debug asok'
+
+# help
+ceph config help debug_asok | grep debug_asok
+
+# show
+ceph config set osd.0 debug_asok 33
+while ! ceph config show osd.0 | grep debug_asok | grep 33 | grep mon
+do
+    sleep 1
+done
+ceph config set osd.0 debug_asok 22
+while ! ceph config show osd.0 | grep debug_asok | grep 22 | grep mon
+do
+    sleep 1
+done
+
+ceph tell osd.0 config set debug_asok 99
+while ! ceph config show osd.0 | grep debug_asok | grep 99
+do
+    sleep 1
+done
+ceph config show osd.0 | grep debug_asok | grep 'override  mon'
+ceph tell osd.0 config unset debug_asok
+ceph tell osd.0 config unset debug_asok
+
+ceph config rm osd.0 debug_asok
+while ceph config show osd.0 | grep debug_asok | grep mon
+do
+    sleep 1
+done
+ceph config show osd.0 | grep -c debug_asok | grep 0
+
+ceph config set osd.0 osd_scrub_cost 123
+while ! ceph config show osd.0 | grep osd_scrub_cost | grep mon
+do
+    sleep 1
+done
+ceph config rm osd.0 osd_scrub_cost
+
+# show-with-defaults
+ceph config show-with-defaults osd.0 | grep debug_asok
+
+# assimilate
+t1=`mktemp`
+t2=`mktemp`
+cat <<EOF > $t1
+[osd.0]
+keyring = foo
+debug_asok = 66
+EOF
+ceph config assimilate-conf -i $t1 | tee $t2
+
+grep keyring $t2
+expect_false grep debug_asok $t2
+rm -f $t1 $t2
+
+expect_false ceph config reset
+expect_false ceph config reset -1
+# we are at end of testing, so it's okay to revert everything
+ceph config reset 0
+
+echo OK
diff --git a/qa/workunits/mon/crush_ops.sh b/qa/workunits/mon/crush_ops.sh
new file mode 100755
index 000000000..a68761985
--- /dev/null
+++ b/qa/workunits/mon/crush_ops.sh
@@ -0,0 +1,237 @@
+#!/usr/bin/env bash
+
+set -ex
+
+function expect_false()
+{
+	set -x
+	if "$@"; then return 1; else return 0; fi
+}
+
+ceph osd crush dump
+
+# rules
+ceph osd crush rule dump
+ceph osd crush rule ls
+ceph osd crush rule list
+
+ceph osd crush rule create-simple foo default host
+ceph osd crush rule create-simple foo default host
+ceph osd crush rule create-simple bar default host
+
+ceph osd crush rm-device-class all
+ceph osd crush set-device-class ssd osd.0
+ceph osd crush set-device-class hdd osd.1
+ceph osd crush rule create-replicated foo-ssd default host ssd
+ceph osd crush rule create-replicated foo-hdd default host hdd
+ceph osd crush rule ls-by-class ssd | grep 'foo-ssd'
+ceph osd crush rule ls-by-class ssd | expect_false grep 'foo-hdd'
+ceph osd crush rule ls-by-class hdd | grep 'foo-hdd'
+ceph osd crush rule ls-by-class hdd | expect_false grep 'foo-ssd'
+
+ceph osd erasure-code-profile set ec-foo-ssd crush-device-class=ssd m=2 k=2
+ceph osd pool create ec-foo 2 erasure ec-foo-ssd
+ceph osd pool rm ec-foo ec-foo --yes-i-really-really-mean-it
+
+ceph osd crush rule ls | grep foo
+
+ceph osd crush rule rename foo foo-asdf
+ceph osd crush rule rename foo foo-asdf # idempotent
+ceph osd crush rule rename bar bar-asdf
+ceph osd crush rule ls | grep 'foo-asdf'
+ceph osd crush rule ls | grep 'bar-asdf'
+ceph osd crush rule rm foo 2>&1 | grep 'does not exist'
+ceph osd crush rule rm bar 2>&1 | grep 'does not exist'
+ceph osd crush rule rename foo-asdf foo
+ceph osd crush rule rename foo-asdf foo # idempotent
+ceph osd crush rule rename bar-asdf bar
+ceph osd crush rule ls | expect_false grep 'foo-asdf'
+ceph osd crush rule ls | expect_false grep 'bar-asdf'
+ceph osd crush rule rm foo
+ceph osd crush rule rm foo  # idempotent
+ceph osd crush rule rm bar
+
+# can't delete in-use rules, tho:
+ceph osd pool create pinning_pool 1
+expect_false ceph osd crush rule rm replicated_rule
+ceph osd pool rm pinning_pool pinning_pool --yes-i-really-really-mean-it
+
+# build a simple map
+expect_false ceph osd crush add-bucket foo osd
+ceph osd crush add-bucket foo root
+o1=`ceph osd create`
+o2=`ceph osd create`
+ceph osd crush add $o1 1 host=host1 root=foo
+ceph osd crush add $o1 1 host=host1 root=foo  # idemptoent
+ceph osd crush add $o2 1 host=host2 root=foo
+ceph osd crush add $o2 1 host=host2 root=foo  # idempotent
+ceph osd crush add-bucket bar root
+ceph osd crush add-bucket bar root  # idempotent
+ceph osd crush link host1 root=bar
+ceph osd crush link host1 root=bar  # idempotent
+ceph osd crush link host2 root=bar
+ceph osd crush link host2 root=bar  # idempotent
+
+ceph osd tree | grep -c osd.$o1 | grep -q 2
+ceph osd tree | grep -c host1 | grep -q 2
+ceph osd tree | grep -c osd.$o2 | grep -q 2
+ceph osd tree | grep -c host2 | grep -q 2
+expect_false ceph osd crush rm host1 foo   # not empty
+ceph osd crush unlink host1 foo
+ceph osd crush unlink host1 foo
+ceph osd tree | grep -c host1 | grep -q 1
+
+expect_false ceph osd crush rm foo  # not empty
+expect_false ceph osd crush rm bar  # not empty
+ceph osd crush unlink host1 bar
+ceph osd tree | grep -c host1 | grep -q 1   # now an orphan
+ceph osd crush rm osd.$o1 host1
+ceph osd crush rm host1
+ceph osd tree | grep -c host1 | grep -q 0
+expect_false ceph osd tree-from host1
+ceph osd tree-from host2
+expect_false ceph osd tree-from osd.$o2
+
+expect_false ceph osd crush rm bar   # not empty
+ceph osd crush unlink host2
+
+ceph osd crush add-bucket host-for-test host root=root-for-test rack=rack-for-test
+ceph osd tree | grep host-for-test
+ceph osd tree | grep rack-for-test
+ceph osd tree | grep root-for-test
+ceph osd crush rm host-for-test
+ceph osd crush rm rack-for-test
+ceph osd crush rm root-for-test
+
+# reference foo and bar with a rule
+ceph osd crush rule create-simple foo-rule foo host firstn
+expect_false ceph osd crush rm foo
+ceph osd crush rule rm foo-rule
+
+ceph osd crush rm bar
+ceph osd crush rm foo
+ceph osd crush rm osd.$o2 host2
+ceph osd crush rm host2
+
+ceph osd crush add-bucket foo host
+ceph osd crush move foo root=default rack=localrack
+
+ceph osd crush create-or-move osd.$o1 1.0 root=default
+ceph osd crush move osd.$o1 host=foo
+ceph osd find osd.$o1 | grep host | grep foo
+
+ceph osd crush rm osd.$o1
+ceph osd crush rm osd.$o2
+
+ceph osd crush rm foo
+
+# test reweight
+o3=`ceph osd create`
+ceph osd crush add $o3 123 root=default
+ceph osd tree | grep osd.$o3 | grep 123
+ceph osd crush reweight osd.$o3 113
+expect_false ceph osd crush reweight osd.$o3 123456
+ceph osd tree | grep osd.$o3 | grep 113
+ceph osd crush rm osd.$o3
+ceph osd rm osd.$o3
+
+# test reweight-subtree
+o4=`ceph osd create`
+o5=`ceph osd create`
+ceph osd crush add $o4 123 root=default host=foobaz
+ceph osd crush add $o5 123 root=default host=foobaz
+ceph osd tree | grep osd.$o4 | grep 123
+ceph osd tree | grep osd.$o5 | grep 123
+ceph osd crush reweight-subtree foobaz 155
+expect_false ceph osd crush reweight-subtree foobaz 123456
+ceph osd tree | grep osd.$o4 | grep 155
+ceph osd tree | grep osd.$o5 | grep 155
+ceph osd crush rm osd.$o4
+ceph osd crush rm osd.$o5
+ceph osd rm osd.$o4
+ceph osd rm osd.$o5
+
+# weight sets
+# make sure we require luminous before testing weight-sets
+ceph osd set-require-min-compat-client luminous
+ceph osd crush weight-set dump
+ceph osd crush weight-set ls
+expect_false ceph osd crush weight-set reweight fooset osd.0 .9
+ceph osd pool create fooset 8
+ceph osd pool create barset 8
+ceph osd pool set barset size 3
+expect_false ceph osd crush weight-set reweight fooset osd.0 .9
+ceph osd crush weight-set create fooset flat
+ceph osd crush weight-set create barset positional
+ceph osd crush weight-set ls | grep fooset
+ceph osd crush weight-set ls | grep barset
+ceph osd crush weight-set dump
+ceph osd crush weight-set reweight fooset osd.0 .9
+expect_false ceph osd crush weight-set reweight fooset osd.0 .9 .9
+expect_false ceph osd crush weight-set reweight barset osd.0 .9
+ceph osd crush weight-set reweight barset osd.0 .9 .9 .9
+ceph osd crush weight-set ls | grep -c fooset | grep -q 1
+ceph osd crush weight-set rm fooset
+ceph osd crush weight-set ls | grep -c fooset | grep -q 0
+ceph osd crush weight-set ls | grep barset
+ceph osd crush weight-set rm barset
+ceph osd crush weight-set ls | grep -c barset | grep -q 0
+ceph osd crush weight-set create-compat
+ceph osd crush weight-set ls | grep '(compat)'
+ceph osd crush weight-set rm-compat
+
+# weight set vs device classes
+ceph osd pool create cool 2
+ceph osd pool create cold 2
+ceph osd pool set cold size 2
+ceph osd crush weight-set create-compat
+ceph osd crush weight-set create cool flat
+ceph osd crush weight-set create cold positional
+ceph osd crush rm-device-class osd.0
+ceph osd crush weight-set reweight-compat osd.0 10.5
+ceph osd crush weight-set reweight cool osd.0 11.5
+ceph osd crush weight-set reweight cold osd.0 12.5 12.4
+ceph osd crush set-device-class fish osd.0
+ceph osd crush tree --show-shadow | grep osd\\.0 | grep fish | grep 10\\.
+ceph osd crush tree --show-shadow | grep osd\\.0 | grep fish | grep 11\\.
+ceph osd crush tree --show-shadow | grep osd\\.0 | grep fish | grep 12\\.
+ceph osd crush rm-device-class osd.0
+ceph osd crush set-device-class globster osd.0
+ceph osd crush tree --show-shadow | grep osd\\.0 | grep globster | grep 10\\.
+ceph osd crush tree --show-shadow | grep osd\\.0 | grep globster | grep 11\\.
+ceph osd crush tree --show-shadow | grep osd\\.0 | grep globster | grep 12\\.
+ceph osd crush weight-set reweight-compat osd.0 7.5
+ceph osd crush weight-set reweight cool osd.0 8.5
+ceph osd crush weight-set reweight cold osd.0 6.5 6.6
+ceph osd crush tree --show-shadow | grep osd\\.0 | grep globster | grep 7\\.
+ceph osd crush tree --show-shadow | grep osd\\.0 | grep globster | grep 8\\.
+ceph osd crush tree --show-shadow | grep osd\\.0 | grep globster | grep 6\\.
+ceph osd crush rm-device-class osd.0
+ceph osd pool rm cool cool --yes-i-really-really-mean-it
+ceph osd pool rm cold cold --yes-i-really-really-mean-it
+ceph osd crush weight-set rm-compat
+
+# weight set vs device classes vs move
+ceph osd crush weight-set create-compat
+ceph osd crush add-bucket fooo host
+ceph osd crush move fooo root=default
+ceph osd crush add-bucket barr rack
+ceph osd crush move barr root=default
+ceph osd crush move fooo rack=barr
+ceph osd crush rm fooo
+ceph osd crush rm barr
+ceph osd crush weight-set rm-compat
+
+# this sequence would crash at one point
+ceph osd crush weight-set create-compat
+ceph osd crush add-bucket r1 rack root=default
+for f in `seq 1 32`; do
+    ceph osd crush add-bucket h$f host rack=r1
+done
+for f in `seq 1 32`; do
+    ceph osd crush rm h$f
+done
+ceph osd crush rm r1
+ceph osd crush weight-set rm-compat
+
+echo OK
diff --git a/qa/workunits/mon/osd.sh b/qa/workunits/mon/osd.sh
new file mode 100755
index 000000000..535d6c137
--- /dev/null
+++ b/qa/workunits/mon/osd.sh
@@ -0,0 +1,24 @@
+#!/bin/sh -x
+
+set -e
+
+ua=`uuidgen`
+ub=`uuidgen`
+
+# should get same id with same uuid
+na=`ceph osd create $ua`
+test $na -eq `ceph osd create $ua`
+
+nb=`ceph osd create $ub`
+test $nb -eq `ceph osd create $ub`
+test $nb -ne $na
+
+ceph osd rm $na
+ceph osd rm $na
+ceph osd rm $nb
+ceph osd rm 1000
+
+na2=`ceph osd create $ua`
+
+echo OK
+
diff --git a/qa/workunits/mon/pg_autoscaler.sh b/qa/workunits/mon/pg_autoscaler.sh
new file mode 100755
index 000000000..4cf71a31c
--- /dev/null
+++ b/qa/workunits/mon/pg_autoscaler.sh
@@ -0,0 +1,156 @@
+#!/bin/bash -ex
+
+NUM_OSDS=$(ceph osd ls | wc -l)
+if [ $NUM_OSDS -lt 6 ]; then
+    echo "test requires at least 6 OSDs"
+    exit 1
+fi
+
+NUM_POOLS=$(ceph osd pool ls | wc -l)
+if [ $NUM_POOLS -gt 0 ]; then
+    echo "test requires no preexisting pools"
+    exit 1
+fi
+
+function wait_for() {
+    local sec=$1
+    local cmd=$2
+
+    while true ; do
+        if bash -c "$cmd" ; then
+            break
+        fi
+        sec=$(( $sec - 1 ))
+        if [ $sec -eq 0 ]; then
+            echo failed
+            return 1
+        fi
+        sleep 1
+    done
+    return 0
+}
+
+function power2() { echo "x=l($1)/l(2); scale=0; 2^((x+0.5)/1)" | bc -l;}
+
+function eval_actual_expected_val() {
+    local actual_value=$1
+    local expected_value=$2
+    if [[ $actual_value = $expected_value ]]
+    then
+     echo "Success: " $actual_value "=" $expected_value
+    else
+      echo "Error: " $actual_value "!=" $expected_value
+      exit 1
+    fi
+}
+
+# enable
+ceph config set mgr mgr/pg_autoscaler/sleep_interval 60
+ceph mgr module enable pg_autoscaler
+# ceph config set global osd_pool_default_pg_autoscale_mode on
+
+# pg_num_min
+ceph osd pool create meta0 16
+ceph osd pool create bulk0 16 --bulk
+ceph osd pool create bulk1 16 --bulk
+ceph osd pool create bulk2 16 --bulk
+ceph osd pool set meta0 pg_autoscale_mode on
+ceph osd pool set bulk0 pg_autoscale_mode on
+ceph osd pool set bulk1 pg_autoscale_mode on
+ceph osd pool set bulk2 pg_autoscale_mode on
+# set pool size
+ceph osd pool set meta0 size 2
+ceph osd pool set bulk0 size 2
+ceph osd pool set bulk1 size 2
+ceph osd pool set bulk2 size 2
+
+# get num pools again since we created more pools
+NUM_POOLS=$(ceph osd pool ls | wc -l)
+
+# get bulk flag of each pool through the command ceph osd pool autoscale-status
+BULK_FLAG_1=$(ceph osd pool autoscale-status | grep 'meta0' | grep -o -m 1 'True\|False' || true)
+BULK_FLAG_2=$(ceph osd pool autoscale-status | grep 'bulk0' | grep -o -m 1 'True\|False' || true)
+BULK_FLAG_3=$(ceph osd pool autoscale-status | grep 'bulk1' | grep -o -m 1 'True\|False' || true)
+BULK_FLAG_4=$(ceph osd pool autoscale-status | grep 'bulk2' | grep -o -m 1 'True\|False' || true)
+
+# evaluate the accuracy of ceph osd pool autoscale-status specifically the `BULK` column
+
+eval_actual_expected_val $BULK_FLAG_1 'False'
+eval_actual_expected_val $BULK_FLAG_2 'True'
+eval_actual_expected_val $BULK_FLAG_3 'True'
+eval_actual_expected_val $BULK_FLAG_4 'True'
+
+# This part of this code will now evaluate the accuracy of the autoscaler
+
+# get pool size
+POOL_SIZE_1=$(ceph osd pool get meta0 size| grep -Eo '[0-9]{1,4}')
+POOL_SIZE_2=$(ceph osd pool get bulk0 size| grep -Eo '[0-9]{1,4}')
+POOL_SIZE_3=$(ceph osd pool get bulk1 size| grep -Eo '[0-9]{1,4}')
+POOL_SIZE_4=$(ceph osd pool get bulk2 size| grep -Eo '[0-9]{1,4}')
+
+# Calculate target pg of each pools
+# First Pool is a non-bulk so we do it first.
+# Since the Capacity ratio = 0 we first meta pool remains the same pg_num
+
+TARGET_PG_1=$(ceph osd pool get meta0 pg_num| grep -Eo '[0-9]{1,4}')
+PG_LEFT=$NUM_OSDS*100
+NUM_POOLS_LEFT=$NUM_POOLS-1
+# Rest of the pool is bulk and even pools so pretty straight forward
+# calculations.
+TARGET_PG_2=$(power2 $((($PG_LEFT)/($NUM_POOLS_LEFT)/($POOL_SIZE_2))))
+TARGET_PG_3=$(power2 $((($PG_LEFT)/($NUM_POOLS_LEFT)/($POOL_SIZE_3))))
+TARGET_PG_4=$(power2 $((($PG_LEFT)/($NUM_POOLS_LEFT)/($POOL_SIZE_4))))
+
+# evaluate target_pg against pg num of each pools
+wait_for 300 "ceph osd pool get meta0 pg_num | grep $TARGET_PG_1"
+wait_for 300 "ceph osd pool get bulk0 pg_num | grep $TARGET_PG_2"
+wait_for 300 "ceph osd pool get bulk1 pg_num | grep $TARGET_PG_3"
+wait_for 300 "ceph osd pool get bulk2 pg_num | grep $TARGET_PG_4"
+
+# target ratio
+ceph osd pool set meta0 target_size_ratio 5
+ceph osd pool set bulk0 target_size_ratio 1
+sleep 60
+APGS=$(ceph osd dump -f json-pretty | jq '.pools[0].pg_num_target')
+BPGS=$(ceph osd dump -f json-pretty | jq '.pools[1].pg_num_target')
+test $APGS -gt 100
+test $BPGS -gt 10
+
+# small ratio change does not change pg_num
+ceph osd pool set meta0 target_size_ratio 7
+ceph osd pool set bulk0 target_size_ratio 2
+sleep 60
+APGS2=$(ceph osd dump -f json-pretty | jq '.pools[0].pg_num_target')
+BPGS2=$(ceph osd dump -f json-pretty | jq '.pools[1].pg_num_target')
+test $APGS -eq $APGS2
+test $BPGS -eq $BPGS2
+
+# target_size
+ceph osd pool set meta0 target_size_bytes 1000000000000000
+ceph osd pool set bulk0 target_size_bytes 1000000000000000
+ceph osd pool set meta0 target_size_ratio 0
+ceph osd pool set bulk0 target_size_ratio 0
+wait_for 60 "ceph health detail | grep POOL_TARGET_SIZE_BYTES_OVERCOMMITTED"
+
+ceph osd pool set meta0 target_size_bytes 1000
+ceph osd pool set bulk0 target_size_bytes 1000
+ceph osd pool set meta0 target_size_ratio 1
+wait_for 60 "ceph health detail | grep POOL_HAS_TARGET_SIZE_BYTES_AND_RATIO"
+
+# test autoscale warn
+
+ceph osd pool create warn0 1 --autoscale-mode=warn
+wait_for 120 "ceph health detail | grep POOL_TOO_FEW_PGS"
+
+ceph osd pool create warn1 256 --autoscale-mode=warn
+wait_for 120 "ceph health detail | grep POOL_TOO_MANY_PGS"
+
+ceph osd pool rm meta0 meta0 --yes-i-really-really-mean-it
+ceph osd pool rm bulk0 bulk0 --yes-i-really-really-mean-it
+ceph osd pool rm bulk1 bulk1 --yes-i-really-really-mean-it
+ceph osd pool rm bulk2 bulk2 --yes-i-really-really-mean-it
+ceph osd pool rm warn0 warn0 --yes-i-really-really-mean-it
+ceph osd pool rm warn1 warn1 --yes-i-really-really-mean-it
+
+echo OK
+
diff --git a/qa/workunits/mon/ping.py b/qa/workunits/mon/ping.py
new file mode 100755
index 000000000..1f6d0a1dd
--- /dev/null
+++ b/qa/workunits/mon/ping.py
@@ -0,0 +1,106 @@
+#!/usr/bin/python3
+
+import json
+import shlex
+import subprocess
+
+
+class UnexpectedReturn(Exception):
+    def __init__(self, cmd, ret, expected, msg):
+        if isinstance(cmd, list):
+            self.cmd = ' '.join(cmd)
+        else:
+            assert isinstance(cmd, str), \
+                'cmd needs to be either a list or a str'
+            self.cmd = cmd
+        self.cmd = str(self.cmd)
+        self.ret = int(ret)
+        self.expected = int(expected)
+        self.msg = str(msg)
+
+    def __str__(self):
+        return repr('{c}: expected return {e}, got {r} ({o})'.format(
+            c=self.cmd, e=self.expected, r=self.ret, o=self.msg))
+
+
+def call(cmd):
+    if isinstance(cmd, list):
+        args = cmd
+    elif isinstance(cmd, str):
+        args = shlex.split(cmd)
+    else:
+        assert False, 'cmd is not a string/unicode nor a list!'
+
+    print('call: {0}'.format(args))
+    proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    procout, procerr = proc.communicate(None)
+
+    return proc.returncode, procout, procerr
+
+
+def expect(cmd, expected_ret):
+    try:
+        (r, out, err) = call(cmd)
+    except ValueError as e:
+        assert False, \
+            'unable to run {c}: {err}'.format(c=repr(cmd), err=str(e))
+
+    if r != expected_ret:
+        raise UnexpectedReturn(repr(cmd), r, expected_ret, err)
+
+    return out.decode() if isinstance(out, bytes) else out
+
+
+def get_quorum_status(timeout=300):
+    cmd = 'ceph quorum_status'
+    if timeout > 0:
+        cmd += ' --connect-timeout {0}'.format(timeout)
+
+    out = expect(cmd, 0)
+    j = json.loads(out)
+    return j
+
+
+def main():
+    quorum_status = get_quorum_status()
+    mon_names = [mon['name'] for mon in quorum_status['monmap']['mons']]
+
+    print('ping all monitors')
+    for m in mon_names:
+        print('ping mon.{0}'.format(m))
+        out = expect('ceph ping mon.{0}'.format(m), 0)
+        reply = json.loads(out)
+
+        assert reply['mon_status']['name'] == m, \
+            'reply obtained from mon.{0}, expected mon.{1}'.format(
+                reply['mon_status']['name'], m)
+
+    print('test out-of-quorum reply')
+    for m in mon_names:
+        print('testing mon.{0}'.format(m))
+        expect('ceph daemon mon.{0} quorum exit'.format(m), 0)
+
+        quorum_status = get_quorum_status()
+        assert m not in quorum_status['quorum_names'], \
+            'mon.{0} was not supposed to be in quorum ({1})'.format(
+                m, quorum_status['quorum_names'])
+
+        out = expect('ceph ping mon.{0}'.format(m), 0)
+        reply = json.loads(out)
+        mon_status = reply['mon_status']
+
+        assert mon_status['name'] == m, \
+            'reply obtained from mon.{0}, expected mon.{1}'.format(
+                mon_status['name'], m)
+
+        assert mon_status['state'] == 'electing', \
+            'mon.{0} is in state {1}, expected electing'.format(
+                m, mon_status['state'])
+
+        expect('ceph daemon mon.{0} quorum enter'.format(m), 0)
+
+    print('OK')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/qa/workunits/mon/pool_ops.sh b/qa/workunits/mon/pool_ops.sh
new file mode 100755
index 000000000..23bb3c0be
--- /dev/null
+++ b/qa/workunits/mon/pool_ops.sh
@@ -0,0 +1,104 @@
+#!/usr/bin/env bash
+
+set -ex
+
+function expect_false()
+{
+	set -x
+	if "$@"; then return 1; else return 0; fi
+}
+
+function get_config_value_or_die()
+{
+  local pool_name config_opt raw val
+
+  pool_name=$1
+  config_opt=$2
+
+  raw="`$SUDO ceph osd pool get $pool_name $config_opt 2>/dev/null`"
+  if [[ $? -ne 0 ]]; then
+    echo "error obtaining config opt '$config_opt' from '$pool_name': $raw"
+    exit 1
+  fi
+
+  raw=`echo $raw | sed -e 's/[{} "]//g'`
+  val=`echo $raw | cut -f2 -d:`
+
+  echo "$val"
+  return 0
+}
+
+function expect_config_value()
+{
+  local pool_name config_opt expected_val val
+  pool_name=$1
+  config_opt=$2
+  expected_val=$3
+
+  val=$(get_config_value_or_die $pool_name $config_opt)
+
+  if [[ "$val" != "$expected_val" ]]; then
+    echo "expected '$expected_val', got '$val'"
+    exit 1
+  fi
+}
+
+# pg_num min/max
+TEST_POOL=testpool1234
+ceph osd pool create testpool1234 8 --autoscale-mode off
+ceph osd pool set $TEST_POOL pg_num_min 2
+ceph osd pool get $TEST_POOL pg_num_min | grep 2
+ceph osd pool set $TEST_POOL pg_num_max 33
+ceph osd pool get $TEST_POOL pg_num_max | grep 33
+expect_false ceph osd pool set $TEST_POOL pg_num_min 9
+expect_false ceph osd pool set $TEST_POOL pg_num_max 7
+expect_false ceph osd pool set $TEST_POOL pg_num 1
+expect_false ceph osd pool set $TEST_POOL pg_num 44
+ceph osd pool set $TEST_POOL pg_num_min 0
+expect_false ceph osd pool get $TEST_POOL pg_num_min
+ceph osd pool set $TEST_POOL pg_num_max 0
+expect_false ceph osd pool get $TEST_POOL pg_num_max
+ceph osd pool delete $TEST_POOL $TEST_POOL --yes-i-really-really-mean-it
+
+# note: we need to pass the other args or ceph_argparse.py will take
+# 'invalid' that is not replicated|erasure and assume it is the next
+# argument, which is a string.
+expect_false ceph osd pool create foo 123 123 invalid foo-profile foo-rule
+
+ceph osd pool create foo 123 123 replicated
+ceph osd pool create fooo 123 123 erasure default
+ceph osd pool create foooo 123
+
+ceph osd pool create foo 123 # idempotent
+
+ceph osd pool set foo size 1 --yes-i-really-mean-it
+expect_config_value "foo" "min_size" 1
+ceph osd pool set foo size 4
+expect_config_value "foo" "min_size" 2
+ceph osd pool set foo size 10
+expect_config_value "foo" "min_size" 5
+expect_false ceph osd pool set foo size 0
+expect_false ceph osd pool set foo size 20
+
+# should fail due to safety interlock
+expect_false ceph osd pool delete foo
+expect_false ceph osd pool delete foo foo
+expect_false ceph osd pool delete foo foo --force
+expect_false ceph osd pool delete foo fooo --yes-i-really-mean-it
+expect_false ceph osd pool delete foo --yes-i-really-mean-it foo
+
+ceph osd pool delete foooo foooo --yes-i-really-really-mean-it
+ceph osd pool delete fooo fooo --yes-i-really-really-mean-it
+ceph osd pool delete foo foo --yes-i-really-really-mean-it
+
+# idempotent
+ceph osd pool delete foo foo --yes-i-really-really-mean-it
+ceph osd pool delete fooo fooo --yes-i-really-really-mean-it
+ceph osd pool delete fooo fooo --yes-i-really-really-mean-it
+
+# non-existent pool
+ceph osd pool delete fuggg fuggg --yes-i-really-really-mean-it
+
+echo OK
+
+
diff --git a/qa/workunits/mon/rbd_snaps_ops.sh b/qa/workunits/mon/rbd_snaps_ops.sh
new file mode 100755
index 000000000..eb88565ea
--- /dev/null
+++ b/qa/workunits/mon/rbd_snaps_ops.sh
@@ -0,0 +1,61 @@
+#!/usr/bin/env bash
+
+# attempt to trigger #6047
+
+
+cmd_no=0
+expect()
+{
+  cmd_no=$(($cmd_no+1))
+  cmd="$1"
+  expected=$2
+  echo "[$cmd_no] $cmd"
+  eval  $cmd
+  ret=$?
+  if [[ $ret -ne $expected ]]; then
+    echo "[$cmd_no] unexpected return '$ret', expected '$expected'"
+    exit 1
+  fi
+}
+
+ceph osd pool delete test test --yes-i-really-really-mean-it || true
+expect 'ceph osd pool create test 8 8' 0
+expect 'ceph osd pool application enable test rbd'
+expect 'ceph osd pool mksnap test snapshot' 0
+expect 'ceph osd pool rmsnap test snapshot' 0
+
+expect 'rbd --pool=test --rbd_validate_pool=false create --size=102400 image' 0
+expect 'rbd --pool=test snap create image@snapshot' 22
+
+expect 'ceph osd pool delete test test --yes-i-really-really-mean-it' 0
+expect 'ceph osd pool create test 8 8' 0
+expect 'rbd --pool=test pool init' 0
+expect 'rbd --pool=test create --size=102400 image' 0
+expect 'rbd --pool=test snap create image@snapshot' 0
+expect 'rbd --pool=test snap ls image' 0
+expect 'rbd --pool=test snap rm image@snapshot' 0
+
+expect 'ceph osd pool mksnap test snapshot' 22
+
+expect 'ceph osd pool delete test test --yes-i-really-really-mean-it' 0
+
+# reproduce 7210 and expect it to be fixed
+# basically create such a scenario where we end up deleting what used to
+# be an unmanaged snapshot from a not-unmanaged pool
+
+ceph osd pool delete test-foo test-foo --yes-i-really-really-mean-it || true
+expect 'ceph osd pool create test-foo 8' 0
+expect 'ceph osd pool application enable test-foo rbd'
+expect 'rbd --pool test-foo create --size 1024 image' 0
+expect 'rbd --pool test-foo snap create image@snapshot' 0
+
+ceph osd pool delete test-bar test-bar --yes-i-really-really-mean-it || true
+expect 'ceph osd pool create test-bar 8' 0
+expect 'ceph osd pool application enable test-bar rbd'
+expect 'rados cppool test-foo test-bar --yes-i-really-mean-it' 0
+expect 'rbd --pool test-bar snap rm image@snapshot' 95
+expect 'ceph osd pool delete test-foo test-foo --yes-i-really-really-mean-it' 0
+expect 'ceph osd pool delete test-bar test-bar --yes-i-really-really-mean-it' 0
+
+
+echo OK
diff --git a/qa/workunits/mon/test_config_key_caps.sh b/qa/workunits/mon/test_config_key_caps.sh
new file mode 100755
index 000000000..77b4b53b7
--- /dev/null
+++ b/qa/workunits/mon/test_config_key_caps.sh
@@ -0,0 +1,201 @@
+#!/usr/bin/env bash
+
+set -x
+set -e
+
+tmp=$(mktemp -d -p /tmp test_mon_config_key_caps.XXXXX)
+entities=()
+
+function cleanup()
+{
+	set +e
+	set +x
+	if [[ -e $tmp/keyring ]] && [[ -e $tmp/keyring.orig ]]; then
+		grep '\[.*\..*\]' $tmp/keyring.orig > $tmp/entities.orig
+		for e in $(grep '\[.*\..*\]' $tmp/keyring | \
+			diff $tmp/entities.orig - | \
+			sed -n 's/^.*\[\(.*\..*\)\]/\1/p');
+		do
+			ceph auth rm $e 2>&1 >& /dev/null
+		done
+	fi
+	#rm -fr $tmp
+}
+
+trap cleanup 0 # cleanup on exit
+
+function expect_false()
+{
+	set -x
+	if "$@"; then return 1; else return 0; fi
+}
+
+# for cleanup purposes
+ceph auth export -o $tmp/keyring.orig
+
+k=$tmp/keyring
+
+# setup a few keys
+ceph config-key ls
+ceph config-key set daemon-private/osd.123/test-foo
+ceph config-key set mgr/test-foo
+ceph config-key set device/test-foo
+ceph config-key set test/foo
+
+allow_aa=client.allow_aa
+allow_bb=client.allow_bb
+allow_cc=client.allow_cc
+
+mgr_a=mgr.a
+mgr_b=mgr.b
+osd_a=osd.100
+osd_b=osd.200
+
+prefix_aa=client.prefix_aa
+prefix_bb=client.prefix_bb
+prefix_cc=client.prefix_cc
+match_aa=client.match_aa
+match_bb=client.match_bb
+
+fail_aa=client.fail_aa
+fail_bb=client.fail_bb
+fail_cc=client.fail_cc
+fail_dd=client.fail_dd
+fail_ee=client.fail_ee
+fail_ff=client.fail_ff
+fail_gg=client.fail_gg
+fail_writes=client.fail_writes
+
+ceph auth get-or-create $allow_aa mon 'allow *'
+ceph auth get-or-create $allow_bb mon 'allow service config-key rwx'
+ceph auth get-or-create $allow_cc mon 'allow command "config-key get"'
+
+ceph auth get-or-create $mgr_a mon 'allow profile mgr'
+ceph auth get-or-create $mgr_b mon 'allow profile mgr'
+ceph auth get-or-create $osd_a mon 'allow profile osd'
+ceph auth get-or-create $osd_b mon 'allow profile osd'
+
+ceph auth get-or-create $prefix_aa mon \
+	"allow command \"config-key get\" with key prefix client/$prefix_aa"
+
+cap="allow command \"config-key set\" with key prefix client/"
+cap="$cap,allow command \"config-key get\" with key prefix client/$prefix_bb"
+ceph auth get-or-create $prefix_bb mon "$cap"
+
+cap="allow command \"config-key get\" with key prefix client/"
+cap="$cap, allow command \"config-key set\" with key prefix client/"
+cap="$cap, allow command \"config-key ls\""
+ceph auth get-or-create $prefix_cc mon "$cap"
+
+cap="allow command \"config-key get\" with key=client/$match_aa/foo"
+ceph auth get-or-create $match_aa mon "$cap"
+cap="allow command \"config-key get\" with key=client/$match_bb/foo"
+cap="$cap,allow command \"config-key set\" with key=client/$match_bb/foo"
+ceph auth get-or-create $match_bb mon "$cap"
+
+ceph auth get-or-create $fail_aa mon 'allow rx'
+ceph auth get-or-create $fail_bb mon 'allow r,allow w'
+ceph auth get-or-create $fail_cc mon 'allow rw'
+ceph auth get-or-create $fail_dd mon 'allow rwx'
+ceph auth get-or-create $fail_ee mon 'allow profile bootstrap-rgw'
+ceph auth get-or-create $fail_ff mon 'allow profile bootstrap-rbd'
+# write commands will require rw; wx is not enough
+ceph auth get-or-create $fail_gg mon 'allow service config-key wx'
+# read commands will only require 'r'; 'rx' should be enough.
+ceph auth get-or-create $fail_writes mon 'allow service config-key rx'
+
+# grab keyring
+ceph auth export -o $k
+
+# keys will all the caps can do whatever
+for c in $allow_aa $allow_bb $allow_cc $mgr_a $mgr_b; do
+	ceph -k $k --name $c config-key get daemon-private/osd.123/test-foo
+	ceph -k $k --name $c config-key get mgr/test-foo
+	ceph -k $k --name $c config-key get device/test-foo
+	ceph -k $k --name $c config-key get test/foo
+done
+
+for c in $osd_a $osd_b; do
+	ceph -k $k --name $c config-key put daemon-private/$c/test-foo
+	ceph -k $k --name $c config-key get daemon-private/$c/test-foo
+	expect_false ceph -k $k --name $c config-key ls
+	expect_false ceph -k $k --name $c config-key get mgr/test-foo
+	expect_false ceph -k $k --name $c config-key get device/test-foo
+	expect_false ceph -k $k --name $c config-key get test/foo
+done
+
+expect_false ceph -k $k --name $osd_a get daemon-private/$osd_b/test-foo
+expect_false ceph -k $k --name $osd_b get daemon-private/$osd_a/test-foo
+
+expect_false ceph -k $k --name $prefix_aa \
+	config-key ls
+expect_false ceph -k $k --name $prefix_aa \
+	config-key get daemon-private/osd.123/test-foo
+expect_false ceph -k $k --name $prefix_aa \
+	config-key set test/bar
+expect_false ceph -k $k --name $prefix_aa \
+	config-key set client/$prefix_aa/foo
+
+# write something so we can read, use a custom entity
+ceph -k $k --name $allow_bb config-key set client/$prefix_aa/foo
+ceph -k $k --name $prefix_aa config-key get client/$prefix_aa/foo
+# check one writes to the other's prefix, the other is able to read
+ceph -k $k --name $prefix_bb config-key set client/$prefix_aa/bar
+ceph -k $k --name $prefix_aa config-key get client/$prefix_aa/bar
+
+ceph -k $k --name $prefix_bb config-key set client/$prefix_bb/foo
+ceph -k $k --name $prefix_bb config-key get client/$prefix_bb/foo
+
+expect_false ceph -k $k --name $prefix_bb config-key get client/$prefix_aa/bar
+expect_false ceph -k $k --name $prefix_bb config-key ls
+expect_false ceph -k $k --name $prefix_bb \
+	config-key get daemon-private/osd.123/test-foo
+expect_false ceph -k $k --name $prefix_bb config-key get mgr/test-foo
+expect_false ceph -k $k --name $prefix_bb config-key get device/test-foo
+expect_false ceph -k $k --name $prefix_bb config-key get test/bar
+expect_false ceph -k $k --name $prefix_bb config-key set test/bar
+
+ceph -k $k --name $prefix_cc config-key set client/$match_aa/foo
+ceph -k $k --name $prefix_cc config-key set client/$match_bb/foo
+ceph -k $k --name $prefix_cc config-key get client/$match_aa/foo
+ceph -k $k --name $prefix_cc config-key get client/$match_bb/foo
+expect_false ceph -k $k --name $prefix_cc config-key set other/prefix
+expect_false ceph -k $k --name $prefix_cc config-key get mgr/test-foo
+ceph -k $k --name $prefix_cc config-key ls >& /dev/null
+
+ceph -k $k --name $match_aa config-key get client/$match_aa/foo
+expect_false ceph -k $k --name $match_aa config-key get client/$match_bb/foo
+expect_false ceph -k $k --name $match_aa config-key set client/$match_aa/foo
+ceph -k $k --name $match_bb config-key get client/$match_bb/foo
+ceph -k $k --name $match_bb config-key set client/$match_bb/foo
+expect_false ceph -k $k --name $match_bb config-key get client/$match_aa/foo
+expect_false ceph -k $k --name $match_bb config-key set client/$match_aa/foo
+
+keys=(daemon-private/osd.123/test-foo
+	  mgr/test-foo
+	  device/test-foo
+	  test/foo
+	  client/$prefix_aa/foo
+	  client/$prefix_bb/foo
+	  client/$match_aa/foo
+	  client/$match_bb/foo
+)
+# expect these all to fail accessing config-key
+for c in $fail_aa $fail_bb $fail_cc \
+		 $fail_dd $fail_ee $fail_ff \
+		 $fail_gg; do
+	for m in get set; do
+		for key in ${keys[*]} client/$prefix_aa/foo client/$prefix_bb/foo; do
+			expect_false ceph -k $k --name $c config-key $m $key
+		done
+	done
+done
+
+# fail writes but succeed on reads
+expect_false ceph -k $k --name $fail_writes config-key set client/$match_aa/foo
+expect_false ceph -k $k --name $fail_writes config-key set test/foo
+ceph -k $k --name $fail_writes config-key ls
+ceph -k $k --name $fail_writes config-key get client/$match_aa/foo 
+ceph -k $k --name $fail_writes config-key get daemon-private/osd.123/test-foo
+
+echo "OK"
diff --git a/qa/workunits/mon/test_mon_config_key.py b/qa/workunits/mon/test_mon_config_key.py
new file mode 100755
index 000000000..f81804c8a
--- /dev/null
+++ b/qa/workunits/mon/test_mon_config_key.py
@@ -0,0 +1,463 @@
+#!/usr/bin/python3
+#
+# test_mon_config_key - Test 'ceph config-key' interface
+#
+# Copyright (C) 2013 Inktank
+#
+# This is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License version 2.1, as published by the Free Software
+# Foundation.  See file COPYING.
+#
+import argparse
+import base64
+import errno
+import json
+import logging
+import os
+import random
+import string
+import subprocess
+import sys
+import time
+from typing import List, Dict
+
+#
+# Accepted Environment variables:
+#   CEPH_TEST_VERBOSE     - be more verbose; '1' enables; '0' disables
+#   CEPH_TEST_DURATION    - test duration in seconds
+#   CEPH_TEST_SEED        - seed to be used during the test
+#
+# Accepted arguments and options (see --help):
+#   -v, --verbose         - be more verbose
+#   -d, --duration SECS   - test duration in seconds
+#   -s, --seed SEED       - seed to be used during the test
+#
+
+
+LOG = logging.getLogger(os.path.basename(sys.argv[0].replace('.py', '')))
+
+SIZES = [
+    (0, 0),
+    (10, 0),
+    (25, 0),
+    (50, 0),
+    (100, 0),
+    (1000, 0),
+    (64 * 1024, 0),
+    (64 * 1024 + 1, -errno.EFBIG),
+    (128 * 1024, -errno.EFBIG)
+]
+
+# tests will be randomly selected from the keys here, and the test
+# suboperation will be randomly selected from the list in the values
+# here.  i.e. 'exists/existing' would test that a key the test put into
+# the store earlier actually does still exist in the config store,
+# and that's a separate test case from 'exists/enoent', which tests
+# nonexistence of a key known to not be present.
+
+OPS = {
+    'put': ['existing', 'new'],
+    'del': ['existing', 'enoent'],
+    'exists': ['existing', 'enoent'],
+    'get': ['existing', 'enoent'],
+    'list': ['existing', 'enoent'],
+    'dump': ['existing', 'enoent'],
+}
+
+CONFIG_PUT: List[str] = []  # list: keys
+CONFIG_DEL: List[str] = []  # list: keys
+CONFIG_EXISTING: Dict[str, int] = {}  # map: key -> size
+
+
+def run_cmd(cmd, expects=0):
+    full_cmd = ['ceph', 'config-key'] + cmd
+
+    if expects < 0:
+        expects = -expects
+
+    cmdlog = LOG.getChild('run_cmd')
+    cmdlog.debug('{fc}'.format(fc=' '.join(full_cmd)))
+
+    proc = subprocess.run(full_cmd,
+                          stdout=subprocess.PIPE,
+                          stderr=subprocess.PIPE,
+                          universal_newlines=True)
+    if proc.returncode != expects:
+        cmdlog.error(f'cmd > {proc.args}')
+        cmdlog.error(f'expected return "{expects}" got "{proc.returncode}"')
+        cmdlog.error('stdout')
+        cmdlog.error(proc.stdout)
+        cmdlog.error('stderr')
+        cmdlog.error(proc.stderr)
+
+
+def gen_data(size, rnd):
+    chars = string.ascii_letters + string.digits
+    return ''.join(rnd.choice(chars) for _ in range(size))
+
+
+def gen_key(rnd):
+    return gen_data(20, rnd)
+
+
+def gen_tmp_file_path(rnd):
+    file_name = gen_data(20, rnd)
+    file_path = os.path.join('/tmp', 'ceph-test.' + file_name)
+    return file_path
+
+
+def destroy_tmp_file(fpath):
+    if os.path.exists(fpath) and os.path.isfile(fpath):
+        os.unlink(fpath)
+
+
+def write_data_file(data, rnd):
+    file_path = gen_tmp_file_path(rnd)
+    data_file = open(file_path, 'a+')
+    data_file.truncate()
+    data_file.write(data)
+    data_file.close()
+    return file_path
+
+
+# end write_data_file
+
+def choose_random_op(rnd):
+    op = rnd.choice(
+        list(OPS.keys())
+    )
+    sop = rnd.choice(OPS[op])
+    return op, sop
+
+
+def parse_args(args):
+    parser = argparse.ArgumentParser(
+        description="Test the monitor's 'config-key' API",
+    )
+    parser.add_argument(
+        '-v', '--verbose',
+        action='store_true',
+        help='be more verbose',
+    )
+    parser.add_argument(
+        '-s', '--seed',
+        metavar='SEED',
+        help='use SEED instead of generating it in run-time',
+    )
+    parser.add_argument(
+        '-d', '--duration',
+        metavar='SECS',
+        help='run test for SECS seconds (default: 300)',
+    )
+    parser.set_defaults(
+        seed=None,
+        duration=300,
+        verbose=False,
+    )
+    return parser.parse_args(args)
+
+
+def main():
+    args = parse_args(sys.argv[1:])
+
+    verbose = args.verbose
+    if os.environ.get('CEPH_TEST_VERBOSE') is not None:
+        verbose = (os.environ.get('CEPH_TEST_VERBOSE') == '1')
+
+    duration = int(os.environ.get('CEPH_TEST_DURATION', args.duration))
+    seed = os.environ.get('CEPH_TEST_SEED', args.seed)
+    seed = int(time.time()) if seed is None else int(seed)
+
+    rnd = random.Random()
+    rnd.seed(seed)
+
+    loglevel = logging.INFO
+    if verbose:
+        loglevel = logging.DEBUG
+
+    logging.basicConfig(level=loglevel)
+
+    LOG.info('seed: {s}'.format(s=seed))
+
+    start = time.time()
+
+    while (time.time() - start) < duration:
+        (op, sop) = choose_random_op(rnd)
+
+        LOG.info('{o}({s})'.format(o=op, s=sop))
+        op_log = LOG.getChild('{o}({s})'.format(o=op, s=sop))
+
+        if op == 'put':
+            via_file = (rnd.uniform(0, 100) < 50.0)
+
+            expected = 0
+            cmd = ['put']
+            key = None
+
+            if sop == 'existing':
+                if len(CONFIG_EXISTING) == 0:
+                    op_log.debug('no existing keys; continue')
+                    continue
+                key = rnd.choice(CONFIG_PUT)
+                assert key in CONFIG_EXISTING, \
+                    "key '{k_}' not in CONFIG_EXISTING".format(k_=key)
+
+                expected = 0  # the store just overrides the value if the key exists
+            # end if sop == 'existing'
+            elif sop == 'new':
+                for x in range(0, 10):
+                    key = gen_key(rnd)
+                    if key not in CONFIG_EXISTING:
+                        break
+                    key = None
+                if key is None:
+                    op_log.error('unable to generate an unique key -- try again later.')
+                    continue
+
+                assert key not in CONFIG_PUT and key not in CONFIG_EXISTING, \
+                    'key {k} was not supposed to exist!'.format(k=key)
+
+            assert key is not None, \
+                'key must be != None'
+
+            cmd += [key]
+
+            (size, error) = rnd.choice(SIZES)
+            if size > 25:
+                via_file = True
+
+            data = gen_data(size, rnd)
+
+            if error == 0:  # only add if we expect the put to be successful
+                if sop == 'new':
+                    CONFIG_PUT.append(key)
+                CONFIG_EXISTING[key] = size
+            expected = error
+
+            if via_file:
+                data_file = write_data_file(data, rnd)
+                cmd += ['-i', data_file]
+            else:
+                cmd += [data]
+
+            op_log.debug('size: {sz}, via: {v}'.format(
+                sz=size,
+                v='file: {f}'.format(f=data_file) if via_file == True else 'cli')
+            )
+            run_cmd(cmd, expects=expected)
+            if via_file:
+                destroy_tmp_file(data_file)
+            continue
+
+        elif op == 'del':
+            expected = 0
+            cmd = ['del']
+            key = None
+
+            if sop == 'existing':
+                if len(CONFIG_EXISTING) == 0:
+                    op_log.debug('no existing keys; continue')
+                    continue
+                key = rnd.choice(CONFIG_PUT)
+                assert key in CONFIG_EXISTING, \
+                    "key '{k_}' not in CONFIG_EXISTING".format(k_=key)
+
+            if sop == 'enoent':
+                for x in range(0, 10):
+                    key = base64.b64encode(os.urandom(20)).decode()
+                    if key not in CONFIG_EXISTING:
+                        break
+                    key = None
+                if key is None:
+                    op_log.error('unable to generate an unique key -- try again later.')
+                    continue
+                assert key not in CONFIG_PUT and key not in CONFIG_EXISTING, \
+                    'key {k} was not supposed to exist!'.format(k=key)
+                expected = 0  # deleting a non-existent key succeeds
+
+            assert key is not None, \
+                'key must be != None'
+
+            cmd += [key]
+            op_log.debug('key: {k}'.format(k=key))
+            run_cmd(cmd, expects=expected)
+            if sop == 'existing':
+                CONFIG_DEL.append(key)
+                CONFIG_PUT.remove(key)
+                del CONFIG_EXISTING[key]
+            continue
+
+        elif op == 'exists':
+            expected = 0
+            cmd = ['exists']
+            key = None
+
+            if sop == 'existing':
+                if len(CONFIG_EXISTING) == 0:
+                    op_log.debug('no existing keys; continue')
+                    continue
+                key = rnd.choice(CONFIG_PUT)
+                assert key in CONFIG_EXISTING, \
+                    "key '{k_}' not in CONFIG_EXISTING".format(k_=key)
+
+            if sop == 'enoent':
+                for x in range(0, 10):
+                    key = base64.b64encode(os.urandom(20)).decode()
+                    if key not in CONFIG_EXISTING:
+                        break
+                    key = None
+                if key is None:
+                    op_log.error('unable to generate an unique key -- try again later.')
+                    continue
+                assert key not in CONFIG_PUT and key not in CONFIG_EXISTING, \
+                    'key {k} was not supposed to exist!'.format(k=key)
+                expected = -errno.ENOENT
+
+            assert key is not None, \
+                'key must be != None'
+
+            cmd += [key]
+            op_log.debug('key: {k}'.format(k=key))
+            run_cmd(cmd, expects=expected)
+            continue
+
+        elif op == 'get':
+            expected = 0
+            cmd = ['get']
+            key = None
+
+            if sop == 'existing':
+                if len(CONFIG_EXISTING) == 0:
+                    op_log.debug('no existing keys; continue')
+                    continue
+                key = rnd.choice(CONFIG_PUT)
+                assert key in CONFIG_EXISTING, \
+                    "key '{k_}' not in CONFIG_EXISTING".format(k_=key)
+
+            if sop == 'enoent':
+                for x in range(0, 10):
+                    key = base64.b64encode(os.urandom(20)).decode()
+                    if key not in CONFIG_EXISTING:
+                        break
+                    key = None
+                if key is None:
+                    op_log.error('unable to generate an unique key -- try again later.')
+                    continue
+                assert key not in CONFIG_PUT and key not in CONFIG_EXISTING, \
+                    'key {k} was not supposed to exist!'.format(k=key)
+                expected = -errno.ENOENT
+
+            assert key is not None, \
+                'key must be != None'
+
+            file_path = gen_tmp_file_path(rnd)
+            cmd += [key, '-o', file_path]
+            op_log.debug('key: {k}'.format(k=key))
+            run_cmd(cmd, expects=expected)
+            if sop == 'existing':
+                try:
+                    temp_file = open(file_path, 'r+')
+                except IOError as err:
+                    if err.errno == errno.ENOENT:
+                        assert CONFIG_EXISTING[key] == 0, \
+                            "error opening '{fp}': {e}".format(fp=file_path, e=err)
+                        continue
+                    else:
+                        assert False, \
+                            'some error occurred: {e}'.format(e=err)
+                cnt = 0
+                while True:
+                    read_data = temp_file.read()
+                    if read_data == '':
+                        break
+                    cnt += len(read_data)
+                assert cnt == CONFIG_EXISTING[key], \
+                    "wrong size from store for key '{k}': {sz}, expected {es}".format(
+                        k=key, sz=cnt, es=CONFIG_EXISTING[key])
+                destroy_tmp_file(file_path)
+            continue
+
+        elif op == 'list' or op == 'dump':
+            expected = 0
+            cmd = [op]
+            key = None
+
+            if sop == 'existing':
+                if len(CONFIG_EXISTING) == 0:
+                    op_log.debug('no existing keys; continue')
+                    continue
+                key = rnd.choice(CONFIG_PUT)
+                assert key in CONFIG_EXISTING, \
+                    "key '{k_}' not in CONFIG_EXISTING".format(k_=key)
+
+            if sop == 'enoent':
+                for x in range(0, 10):
+                    key = base64.b64encode(os.urandom(20)).decode()
+                    if key not in CONFIG_EXISTING:
+                        break
+                    key = None
+                if key is None:
+                    op_log.error('unable to generate an unique key -- try again later.')
+                    continue
+                assert key not in CONFIG_PUT and key not in CONFIG_EXISTING, \
+                    'key {k} was not supposed to exist!'.format(k=key)
+
+            assert key is not None, \
+                'key must be != None'
+
+            file_path = gen_tmp_file_path(rnd)
+            cmd += ['-o', file_path]
+            op_log.debug('key: {k}'.format(k=key))
+            run_cmd(cmd, expects=expected)
+            try:
+                temp_file = open(file_path, 'r+')
+            except IOError as err:
+                if err.errno == errno.ENOENT:
+                    assert CONFIG_EXISTING[key] == 0, \
+                        "error opening '{fp}': {e}".format(fp=file_path, e=err)
+                    continue
+                else:
+                    assert False, \
+                        'some error occurred: {e}'.format(e=err)
+            cnt = 0
+            try:
+                read_data = json.load(temp_file)
+            except ValueError:
+                temp_file.seek(0)
+                assert False, "{op} output was not valid JSON:\n{filedata}".format(
+                    op=op, filedata=temp_file.readlines())
+
+            if sop == 'existing':
+                assert key in read_data, "key '{k}' not found in list/dump output".format(k=key)
+                if op == 'dump':
+                    cnt = len(read_data[key])
+                    assert cnt == CONFIG_EXISTING[key], \
+                        "wrong size from list for key '{k}': {sz}, expected {es}".format(
+                        k=key, sz=cnt, es=CONFIG_EXISTING[key])
+            elif sop == 'enoent':
+                assert key not in read_data, "key '{k}' found in list/dump output".format(k=key)
+            destroy_tmp_file(file_path)
+            continue
+        else:
+            assert False, 'unknown op {o}'.format(o=op)
+
+    # check if all keys in 'CONFIG_PUT' exist and
+    # if all keys on 'CONFIG_DEL' don't.
+    # but first however, remove all keys in CONFIG_PUT that might
+    # be in CONFIG_DEL as well.
+    config_put_set = set(CONFIG_PUT)
+    config_del_set = set(CONFIG_DEL).difference(config_put_set)
+
+    LOG.info('perform sanity checks on store')
+
+    for k in config_put_set:
+        LOG.getChild('check(puts)').debug('key: {k_}'.format(k_=k))
+        run_cmd(['exists', k], expects=0)
+    for k in config_del_set:
+        LOG.getChild('check(dels)').debug('key: {k_}'.format(k_=k))
+        run_cmd(['exists', k], expects=-errno.ENOENT)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qa/workunits/mon/test_mon_osdmap_prune.sh b/qa/workunits/mon/test_mon_osdmap_prune.sh
new file mode 100755
index 000000000..9cdd72179
--- /dev/null
+++ b/qa/workunits/mon/test_mon_osdmap_prune.sh
@@ -0,0 +1,205 @@
+#!/bin/bash
+
+. $(dirname $0)/../../standalone/ceph-helpers.sh
+
+set -x
+
+function wait_for_osdmap_manifest() {
+
+  local what=${1:-"true"}
+
+  local -a delays=($(get_timeout_delays $TIMEOUT .1))
+  local -i loop=0
+
+  for ((i=0; i < ${#delays[*]}; ++i)); do
+    has_manifest=$(ceph report | jq 'has("osdmap_manifest")')
+    if [[ "$has_manifest" == "$what" ]]; then
+      return 0
+    fi
+
+    sleep ${delays[$i]}
+  done
+
+  echo "osdmap_manifest never outputted on report"
+  ceph report
+  return 1
+}
+
+function wait_for_trim() {
+
+  local -i epoch=$1
+  local -a delays=($(get_timeout_delays $TIMEOUT .1))
+  local -i loop=0
+
+  for ((i=0; i < ${#delays[*]}; ++i)); do
+    fc=$(ceph report | jq '.osdmap_first_committed')
+    if [[ $fc -eq $epoch ]]; then
+      return 0
+    fi
+    sleep ${delays[$i]}
+  done
+
+  echo "never trimmed up to epoch $epoch"
+  ceph report
+  return 1
+}
+
+function test_osdmap() {
+
+  local epoch=$1
+  local ret=0
+
+  tmp_map=$(mktemp)
+  ceph osd getmap $epoch -o $tmp_map || return 1
+  if ! osdmaptool --print $tmp_map | grep "epoch $epoch" ; then
+    echo "ERROR: failed processing osdmap epoch $epoch"
+    ret=1
+  fi
+  rm $tmp_map
+  return $ret
+}
+
+function generate_osdmaps() {
+
+  local -i num=$1
+
+  cmds=( set unset )
+  for ((i=0; i < num; ++i)); do
+    ceph osd ${cmds[$((i%2))]} noup || return 1
+  done
+  return 0
+}
+
+function test_mon_osdmap_prune() {
+
+  create_pool foo 32
+  wait_for_clean || return 1
+
+  ceph config set mon mon_debug_block_osdmap_trim true || return 1
+
+  generate_osdmaps 500 || return 1
+
+  report="$(ceph report)"
+  fc=$(jq '.osdmap_first_committed' <<< $report)
+  lc=$(jq '.osdmap_last_committed' <<< $report)
+
+  [[ $((lc-fc)) -ge 500 ]] || return 1
+
+  wait_for_osdmap_manifest || return 1
+
+  manifest="$(ceph report | jq '.osdmap_manifest')"
+
+  first_pinned=$(jq '.first_pinned' <<< $manifest)
+  last_pinned=$(jq '.last_pinned' <<< $manifest)
+  pinned_maps=( $(jq '.pinned_maps[]' <<< $manifest) )
+
+  # validate pinned maps list
+  [[ $first_pinned -eq ${pinned_maps[0]} ]] || return 1
+  [[ $last_pinned -eq ${pinned_maps[-1]} ]] || return 1
+
+  # validate pinned maps range
+  [[ $first_pinned -lt $last_pinned ]] || return 1
+  [[ $last_pinned -lt $lc ]] || return 1
+  [[ $first_pinned -eq $fc ]] || return 1
+
+  # ensure all the maps are available, and work as expected
+  # this can take a while...
+
+  for ((i=$first_pinned; i <= $last_pinned; ++i)); do
+    test_osdmap $i || return 1
+  done
+
+  # update pinned maps state:
+  #  the monitor may have pruned & pinned additional maps since we last
+  #  assessed state, given it's an iterative process.
+  #
+  manifest="$(ceph report | jq '.osdmap_manifest')"
+  first_pinned=$(jq '.first_pinned' <<< $manifest)
+  last_pinned=$(jq '.last_pinned' <<< $manifest)
+  pinned_maps=( $(jq '.pinned_maps[]' <<< $manifest) )
+
+  # test trimming maps
+  #
+  # we're going to perform the following tests:
+  #
+  #  1. force trim to a pinned map
+  #  2. force trim to a pinned map's previous epoch
+  #  3. trim all maps except the last 200 or so.
+  #
+
+  # 1. force trim to a pinned map
+  #
+  [[ ${#pinned_maps[@]} -gt 10 ]] || return 1
+
+  trim_to=${pinned_maps[1]}
+  ceph config set mon mon_osd_force_trim_to $trim_to
+  ceph config set mon mon_min_osdmap_epochs 100
+  ceph config set mon paxos_service_trim_min 1
+  ceph config set mon mon_debug_block_osdmap_trim false
+
+  # generate an epoch so we get to trim maps
+  ceph osd set noup
+  ceph osd unset noup
+
+  wait_for_trim $trim_to || return 1
+
+  report="$(ceph report)"
+  fc=$(jq '.osdmap_first_committed' <<< $report)
+  [[ $fc -eq $trim_to ]] || return 1
+
+  old_first_pinned=$first_pinned
+  old_last_pinned=$last_pinned
+  first_pinned=$(jq '.osdmap_manifest.first_pinned' <<< $report)
+  last_pinned=$(jq '.osdmap_manifest.last_pinned' <<< $report)
+  [[ $first_pinned -eq $trim_to ]] || return 1
+  [[ $first_pinned -gt $old_first_pinned ]] || return 1
+  [[ $last_pinned -gt $old_first_pinned ]] || return 1
+
+  test_osdmap $trim_to || return 1
+  test_osdmap $(( trim_to+1 )) || return 1
+
+  pinned_maps=( $(jq '.osdmap_manifest.pinned_maps[]' <<< $report) )
+
+  # 2. force trim to a pinned map's previous epoch
+  #
+  [[ ${#pinned_maps[@]} -gt 2 ]] || return 1
+  trim_to=$(( ${pinned_maps[1]} - 1))
+  ceph config set mon mon_osd_force_trim_to $trim_to
+
+  # generate an epoch so we get to trim maps
+  ceph osd set noup
+  ceph osd unset noup
+
+  wait_for_trim $trim_to || return 1
+
+  report="$(ceph report)"
+  fc=$(jq '.osdmap_first_committed' <<< $report)
+  [[ $fc -eq $trim_to ]] || return 1
+
+  old_first_pinned=$first_pinned
+  old_last_pinned=$last_pinned
+  first_pinned=$(jq '.osdmap_manifest.first_pinned' <<< $report)
+  last_pinned=$(jq '.osdmap_manifest.last_pinned' <<< $report)
+  pinned_maps=( $(jq '.osdmap_manifest.pinned_maps[]' <<< $report) )
+  [[ $first_pinned -eq $trim_to ]] || return 1
+  [[ ${pinned_maps[1]} -eq $(( trim_to+1)) ]] || return 1
+
+  test_osdmap $first_pinned || return 1
+  test_osdmap $(( first_pinned + 1 )) || return 1
+
+  # 3. trim everything
+  #
+  ceph config set mon mon_osd_force_trim_to 0
+
+  # generate an epoch so we get to trim maps
+  ceph osd set noup
+  ceph osd unset noup
+
+  wait_for_osdmap_manifest "false" || return 1
+
+  return 0
+}
+
+test_mon_osdmap_prune || exit 1
+
+echo "OK"
diff --git a/qa/workunits/mon/test_noautoscale_flag.sh b/qa/workunits/mon/test_noautoscale_flag.sh
new file mode 100755
index 000000000..e1a45a4d8
--- /dev/null
+++ b/qa/workunits/mon/test_noautoscale_flag.sh
@@ -0,0 +1,104 @@
+#!/bin/bash -ex
+
+unset CEPH_CLI_TEST_DUP_COMMAND
+
+NUM_POOLS=$(ceph osd pool ls | wc -l)
+
+if [ "$NUM_POOLS" -gt 0 ]; then
+    echo "test requires no preexisting pools"
+    exit 1
+fi
+
+ceph osd pool set noautoscale
+
+ceph osd pool create pool_a
+
+echo 'pool_a autoscale_mode:' $(ceph osd pool autoscale-status | grep pool_a | grep -o -m 1 'on\|off')
+
+NUM_POOLS=$[NUM_POOLS+1]
+
+sleep 2
+
+# Count the number of Pools with AUTOSCALE `off`
+
+RESULT1=$(ceph osd pool autoscale-status | grep -oe 'off' | wc -l)
+
+# number of Pools with AUTOSCALE `off` should equal to $NUM_POOLS
+
+test "$RESULT1" -eq "$NUM_POOLS"
+
+ceph osd pool unset noautoscale
+
+echo $(ceph osd pool get noautoscale)
+
+
+ceph osd pool create pool_b
+
+echo 'pool_a autoscale_mode:' $(ceph osd pool autoscale-status | grep pool_a | grep -o -m 1 'on\|off')
+
+echo 'pool_b autoscale_mode:' $(ceph osd pool autoscale-status | grep pool_b | grep -o -m 1 'on\|off')
+
+
+NUM_POOLS=$[NUM_POOLS+1]
+
+sleep 2
+
+# Count the number of Pools with AUTOSCALE `on`
+
+RESULT2=$(ceph osd pool autoscale-status | grep -oe 'on' | wc -l)
+
+# number of Pools with AUTOSCALE `on` should equal to 3
+
+test "$RESULT2" -eq "$[NUM_POOLS-1]"
+
+ceph osd pool set noautoscale
+
+ceph osd pool create pool_c
+
+echo 'pool_a autoscale_mode:' $(ceph osd pool autoscale-status | grep pool_a | grep -o -m 1 'on\|off')
+
+echo 'pool_b autoscale_mode:' $(ceph osd pool autoscale-status | grep pool_b | grep -o -m 1 'on\|off')
+
+echo 'pool_c autoscale_mode:' $(ceph osd pool autoscale-status | grep pool_c | grep -o -m 1 'on\|off')
+
+
+NUM_POOLS=$[NUM_POOLS+1]
+
+sleep 2
+
+# Count the number of Pools with AUTOSCALE `off`
+
+RESULT3=$(ceph osd pool autoscale-status | grep -oe 'off' | wc -l)
+
+# number of Pools with AUTOSCALE `off` should equal to 4
+
+test "$RESULT3" -eq "$NUM_POOLS"
+
+# Now we test if we retain individual pool state of autoscale mode
+# when we set and unset the noautoscale flag.
+
+ceph osd pool unset noautoscale
+
+ceph osd pool set pool_a pg_autoscale_mode on
+
+ceph osd pool set pool_b pg_autoscale_mode warn
+
+ceph osd pool set noautoscale
+
+ceph osd pool unset noautoscale
+
+RESULT4=$(ceph osd pool autoscale-status | grep pool_a | grep -o -m 1 'on\|off\|warn')
+RESULT5=$(ceph osd pool autoscale-status | grep pool_b | grep -o -m 1 'on\|off\|warn')
+RESULT6=$(ceph osd pool autoscale-status | grep pool_c | grep -o -m 1 'on\|off\|warn')
+
+test "$RESULT4" == 'on'
+test "$RESULT5" == 'warn'
+test "$RESULT6" == 'off'
+
+ceph osd pool rm pool_a pool_a  --yes-i-really-really-mean-it
+
+ceph osd pool rm pool_b pool_b  --yes-i-really-really-mean-it
+
+ceph osd pool rm pool_c pool_c  --yes-i-really-really-mean-it
+
+echo OK
diff --git a/qa/workunits/objectstore/test_fuse.sh b/qa/workunits/objectstore/test_fuse.sh
new file mode 100755
index 000000000..f1dcbd04f
--- /dev/null
+++ b/qa/workunits/objectstore/test_fuse.sh
@@ -0,0 +1,129 @@
+#!/bin/sh -ex
+
+if ! id -u | grep -q '^0$'; then
+    echo "not root, re-running self via sudo"
+    sudo PATH=$PATH TYPE=$TYPE $0
+    exit 0
+fi
+
+expect_false()
+{
+        set -x
+        if "$@"; then return 1; else return 0; fi
+}
+
+COT=ceph-objectstore-tool
+DATA=store_test_fuse_dir
+[ -z "$TYPE" ] && TYPE=bluestore
+MNT=store_test_fuse_mnt
+
+rm -rf $DATA
+mkdir -p $DATA
+
+test -d $MNT && fusermount -u $MNT || true
+rmdir $MNT || true
+mkdir $MNT
+
+export CEPH_ARGS=--enable_experimental_unrecoverable_data_corrupting_features=bluestore
+
+$COT --no-mon-config --op mkfs --data-path $DATA --type $TYPE
+$COT --no-mon-config --op fuse --data-path $DATA --mountpoint $MNT &
+
+while ! test -e $MNT/type ; do
+    echo waiting for $MNT/type to appear
+    sleep 1
+done
+
+umask 0
+
+grep $TYPE $MNT/type
+
+# create collection
+mkdir $MNT/meta
+test -e $MNT/meta/bitwise_hash_start
+test -d $MNT/meta/all
+test -d $MNT/meta/by_bitwise_hash
+
+# create object
+mkdir $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#
+test -e $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/data
+test -d $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/attr
+test -d $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/omap
+test -e $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/bitwise_hash
+test -e $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/omap_header
+
+# omap header
+echo omap header > $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/omap_header
+grep -q omap $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/omap_header
+
+# omap
+echo value a > $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/omap/keya
+echo value b > $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/omap/keyb
+ls $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/omap | grep -c key | grep -q 2
+grep 'value a' $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/omap/keya
+grep 'value b' $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/omap/keyb
+rm $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/omap/keya
+test ! -e $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/omap/keya
+rm $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/omap/keyb
+test ! -e $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/omap/keyb
+
+# attr
+echo value a > $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/attr/keya
+echo value b > $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/attr/keyb
+ls $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/attr | grep -c key | grep -q 2
+grep 'value a' $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/attr/keya
+grep 'value b' $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/attr/keyb
+rm $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/attr/keya
+test ! -e $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/attr/keya
+rm $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/attr/keyb
+test ! -e $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/attr/keyb
+
+# data
+test ! -s $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/data
+echo asdfasdfasdf > $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/data
+test -s $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/data
+grep -q asdfasdfasdf $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/data
+truncate --size 4 $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/data
+stat --format=%s $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/data | grep -q ^4$
+expect_false grep -q asdfasdfasdf $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/data
+rm $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/data
+test ! -s $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/data
+
+
+# create pg collection
+mkdir --mode 0003 $MNT/0.0_head
+grep -q 00000000 $MNT/0.0_head/bitwise_hash_start
+if [ "$TYPE" = "bluestore" ]; then
+    cat $MNT/0.0_head/bitwise_hash_bits
+    grep -q 3 $MNT/0.0_head/bitwise_hash_bits
+    grep -q 1fffffff $MNT/0.0_head/bitwise_hash_end
+fi
+test -d $MNT/0.0_head/all
+
+mkdir --mode 0003 $MNT/0.1_head
+grep -q 80000000 $MNT/0.1_head/bitwise_hash_start
+if [ "$TYPE" = "bluestore" ]; then
+    grep -q 3 $MNT/0.1_head/bitwise_hash_bits
+    grep -q 9fffffff $MNT/0.1_head/bitwise_hash_end
+fi
+
+# create pg object
+mkdir $MNT/0.0_head/all/#0:00000000::::head#/
+mkdir $MNT/0.0_head/all/#0:10000000:::foo:head#/
+
+# verify pg bounds check
+if [ "$TYPE" = "bluestore" ]; then
+    expect_false mkdir $MNT/0.0_head/all/#0:20000000:::bar:head#/
+fi
+
+# remove a collection
+expect_false rmdir $MNT/0.0_head
+rmdir $MNT/0.0_head/all/#0:10000000:::foo:head#/
+rmdir $MNT/0.0_head/all/#0:00000000::::head#/
+rmdir $MNT/0.0_head
+rmdir $MNT/0.1_head
+
+fusermount -u $MNT
+wait
+
+echo OK
diff --git a/qa/workunits/osdc/stress_objectcacher.sh b/qa/workunits/osdc/stress_objectcacher.sh
new file mode 100755
index 000000000..67baadc33
--- /dev/null
+++ b/qa/workunits/osdc/stress_objectcacher.sh
@@ -0,0 +1,28 @@
+#!/bin/sh -ex
+
+for i in $(seq 1 10)
+do
+    for DELAY in 0 1000
+    do
+        for OPS in 1000 10000
+        do
+            for OBJECTS in 10 50 100
+            do
+                for READS in 0.90 0.50 0.10
+                do
+                    for OP_SIZE in 4096 131072 1048576
+                    do
+                        for MAX_DIRTY in 0 25165824
+                        do
+                            ceph_test_objectcacher_stress --ops $OPS --percent-read $READS --delay-ns $DELAY --objects $OBJECTS --max-op-size $OP_SIZE --client-oc-max-dirty $MAX_DIRTY --stress-test > /dev/null 2>&1
+                        done
+                    done
+                done
+            done
+        done
+    done
+done
+
+ceph_test_objectcacher_stress --correctness-test > /dev/null 2>&1
+
+echo OK
diff --git a/qa/workunits/post-file.sh b/qa/workunits/post-file.sh
new file mode 100755
index 000000000..120fb2634
--- /dev/null
+++ b/qa/workunits/post-file.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+set -ex
+
+what="$1"
+[ -z "$what" ] && what=/etc/udev/rules.d
+sudo ceph-post-file -d ceph-test-workunit $what
+
+echo OK
diff --git a/qa/workunits/rados/clone.sh b/qa/workunits/rados/clone.sh
new file mode 100755
index 000000000..281e89f71
--- /dev/null
+++ b/qa/workunits/rados/clone.sh
@@ -0,0 +1,13 @@
+#!/bin/sh -x
+
+set -e
+
+rados -p data rm foo || true
+rados -p data put foo.tmp /etc/passwd --object-locator foo
+rados -p data clonedata foo.tmp foo --object-locator foo
+rados -p data get foo /tmp/foo
+cmp /tmp/foo /etc/passwd
+rados -p data rm foo.tmp --object-locator foo
+rados -p data rm foo
+
+echo OK
+\ No newline at end of file
diff --git a/qa/workunits/rados/load-gen-big.sh b/qa/workunits/rados/load-gen-big.sh
new file mode 100755
index 000000000..6715658ec
--- /dev/null
+++ b/qa/workunits/rados/load-gen-big.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+rados -p rbd load-gen \
+    --num-objects 10240 \
+    --min-object-size 1048576 \
+    --max-object-size 25600000 \
+    --max-ops 1024 \
+    --max-backlog 1024 \
+    --read-percent 50 \
+    --run-length 1200
diff --git a/qa/workunits/rados/load-gen-mix-small-long.sh b/qa/workunits/rados/load-gen-mix-small-long.sh
new file mode 100755
index 000000000..593bad51d
--- /dev/null
+++ b/qa/workunits/rados/load-gen-mix-small-long.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+rados -p rbd load-gen \
+    --num-objects 1024 \
+    --min-object-size 1 \
+    --max-object-size 1048576 \
+    --max-ops 128 \
+    --max-backlog 128 \
+    --read-percent 50 \
+    --run-length 1800
diff --git a/qa/workunits/rados/load-gen-mix-small.sh b/qa/workunits/rados/load-gen-mix-small.sh
new file mode 100755
index 000000000..02db77bd0
--- /dev/null
+++ b/qa/workunits/rados/load-gen-mix-small.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+rados -p rbd load-gen \
+    --num-objects 1024 \
+    --min-object-size 1 \
+    --max-object-size 1048576 \
+    --max-ops 128 \
+    --max-backlog 128 \
+    --read-percent 50 \
+    --run-length 600
diff --git a/qa/workunits/rados/load-gen-mix.sh b/qa/workunits/rados/load-gen-mix.sh
new file mode 100755
index 000000000..ad3b4be84
--- /dev/null
+++ b/qa/workunits/rados/load-gen-mix.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+rados -p rbd load-gen \
+    --num-objects 10240 \
+    --min-object-size 1 \
+    --max-object-size 1048576 \
+    --max-ops 128 \
+    --max-backlog 128 \
+    --read-percent 50 \
+    --run-length 600
diff --git a/qa/workunits/rados/load-gen-mostlyread.sh b/qa/workunits/rados/load-gen-mostlyread.sh
new file mode 100755
index 000000000..236f82dd4
--- /dev/null
+++ b/qa/workunits/rados/load-gen-mostlyread.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+rados -p rbd load-gen \
+    --num-objects 51200 \
+    --min-object-size 1 \
+    --max-object-size 1048576 \
+    --max-ops 128 \
+    --max-backlog 128 \
+    --read-percent 90 \
+    --run-length 600
diff --git a/qa/workunits/rados/stress_watch.sh b/qa/workunits/rados/stress_watch.sh
new file mode 100755
index 000000000..49f144bbc
--- /dev/null
+++ b/qa/workunits/rados/stress_watch.sh
@@ -0,0 +1,7 @@
+#!/bin/sh -e
+
+ceph_test_stress_watch
+ceph_multi_stress_watch rep reppool repobj
+ceph_multi_stress_watch ec ecpool ecobj
+
+exit 0
diff --git a/qa/workunits/rados/test.sh b/qa/workunits/rados/test.sh
new file mode 100755
index 000000000..daa25fe4d
--- /dev/null
+++ b/qa/workunits/rados/test.sh
@@ -0,0 +1,62 @@
+#!/usr/bin/env bash
+set -ex
+
+parallel=1
+[ "$1" = "--serial" ] && parallel=0
+
+color=""
+[ -t 1 ] && color="--gtest_color=yes"
+
+function cleanup() {
+    pkill -P $$ || true
+}
+trap cleanup EXIT ERR HUP INT QUIT
+
+declare -A pids
+
+for f in \
+    api_aio api_aio_pp \
+    api_io api_io_pp \
+    api_asio api_list \
+    api_lock api_lock_pp \
+    api_misc api_misc_pp \
+    api_tier_pp \
+    api_pool \
+    api_snapshots api_snapshots_pp \
+    api_stat api_stat_pp \
+    api_watch_notify api_watch_notify_pp \
+    api_cmd api_cmd_pp \
+    api_service api_service_pp \
+    api_c_write_operations \
+    api_c_read_operations \
+    api_cls_remote_reads \
+    list_parallel \
+    open_pools_parallel \
+    delete_pools_parallel
+do
+    if [ $parallel -eq 1 ]; then
+	r=`printf '%25s' $f`
+	ff=`echo $f | awk '{print $1}'`
+	bash -o pipefail -exc "ceph_test_rados_$f $color 2>&1 | tee ceph_test_rados_$ff.log | sed \"s/^/$r: /\"" &
+	pid=$!
+	echo "test $f on pid $pid"
+	pids[$f]=$pid
+    else
+	ceph_test_rados_$f
+    fi
+done
+
+ret=0
+if [ $parallel -eq 1 ]; then
+for t in "${!pids[@]}"
+do
+  pid=${pids[$t]}
+  if ! wait $pid
+  then
+    echo "error in $t ($pid)"
+    ret=1
+  fi
+done
+fi
+
+exit $ret
diff --git a/qa/workunits/rados/test_alloc_hint.sh b/qa/workunits/rados/test_alloc_hint.sh
new file mode 100755
index 000000000..535201ca3
--- /dev/null
+++ b/qa/workunits/rados/test_alloc_hint.sh
@@ -0,0 +1,177 @@
+#!/usr/bin/env bash
+
+set -ex
+shopt -s nullglob # fns glob expansion in expect_alloc_hint_eq()
+
+#
+# Helpers
+#
+
+function get_xml_val() {
+    local xml="$1"
+    local tag="$2"
+
+    local regex=".*<${tag}>(.*)</${tag}>.*"
+    if [[ ! "${xml}" =~ ${regex} ]]; then
+        echo "'${xml}' xml doesn't match '${tag}' tag regex" >&2
+        return 2
+    fi
+
+    echo "${BASH_REMATCH[1]}"
+}
+
+function get_conf_val() {
+    set -e
+
+    local entity="$1"
+    local option="$2"
+
+    local val
+    val="$(sudo ceph daemon "${entity}" config get --format=xml "${option}")"
+    val="$(get_xml_val "${val}" "${option}")"
+
+    echo "${val}"
+}
+
+function setup_osd_data() {
+    for (( i = 0 ; i < "${NUM_OSDS}" ; i++ )); do
+        OSD_DATA[i]="$(get_conf_val "osd.$i" "osd_data")"
+    done
+}
+
+function setup_pgid() {
+    local poolname="$1"
+    local objname="$2"
+
+    local pgid
+    pgid="$(ceph osd map "${poolname}" "${objname}" --format=xml)"
+    pgid="$(get_xml_val "${pgid}" "pgid")"
+
+    PGID="${pgid}"
+}
+
+function expect_alloc_hint_eq() {
+    export CEPH_ARGS="--osd-objectstore=filestore"
+    local expected_extsize="$1"
+
+    for (( i = 0 ; i < "${NUM_OSDS}" ; i++ )); do
+        # Make sure that stuff is flushed from the journal to the store
+        # by the time we get to it, as we prod the actual files and not
+        # the journal.
+        sudo ceph daemon "osd.${i}" "flush_journal"
+
+        # e.g., .../25.6_head/foo__head_7FC1F406__19
+        #       .../26.bs1_head/bar__head_EFE6384B__1a_ffffffffffffffff_1
+        local fns=$(sudo sh -c "ls ${OSD_DATA[i]}/current/${PGID}*_head/${OBJ}_*")
+        local count="${#fns[@]}"
+        if [ "${count}" -ne 1 ]; then
+            echo "bad fns count: ${count}" >&2
+            return 2
+        fi
+
+        local extsize
+        extsize="$(sudo xfs_io -c extsize "${fns[0]}")"
+        local extsize_regex="^\[(.*)\] ${fns[0]}$"
+        if [[ ! "${extsize}" =~ ${extsize_regex} ]]; then
+            echo "extsize doesn't match extsize_regex: ${extsize}" >&2
+            return 2
+        fi
+        extsize="${BASH_REMATCH[1]}"
+
+        if [ "${extsize}" -ne "${expected_extsize}" ]; then
+            echo "FAIL: alloc_hint: actual ${extsize}, expected ${expected_extsize}" >&2
+            return 1
+        fi
+    done
+}
+
+#
+# Global setup
+#
+
+EC_K="2"
+EC_M="1"
+NUM_OSDS="$((EC_K + EC_M))"
+
+NUM_PG="12"
+NUM_PGP="${NUM_PG}"
+
+LOW_CAP="$(get_conf_val "osd.0" "filestore_max_alloc_hint_size")"
+HIGH_CAP="$((LOW_CAP * 10))" # 10M, assuming 1M default cap
+SMALL_HINT="$((LOW_CAP / 4))" # 256K, assuming 1M default cap
+BIG_HINT="$((LOW_CAP * 6))" # 6M, assuming 1M default cap
+
+setup_osd_data
+
+#
+# ReplicatedBackend tests
+#
+
+POOL="alloc_hint-rep"
+ceph osd pool create "${POOL}" "${NUM_PG}"
+ceph osd pool set "${POOL}" size "${NUM_OSDS}" --yes-i-really-mean-it
+ceph osd pool application enable "${POOL}" rados
+
+OBJ="foo"
+setup_pgid "${POOL}" "${OBJ}"
+rados -p "${POOL}" create "${OBJ}"
+
+# Empty object, SMALL_HINT - expect SMALL_HINT
+rados -p "${POOL}" set-alloc-hint "${OBJ}" "${SMALL_HINT}" "${SMALL_HINT}"
+expect_alloc_hint_eq "${SMALL_HINT}"
+
+# Try changing to BIG_HINT (1) - expect LOW_CAP (BIG_HINT > LOW_CAP)
+rados -p "${POOL}" set-alloc-hint "${OBJ}" "${BIG_HINT}" "${BIG_HINT}"
+expect_alloc_hint_eq "${LOW_CAP}"
+
+# Bump the cap to HIGH_CAP
+ceph tell 'osd.*' injectargs "--filestore_max_alloc_hint_size ${HIGH_CAP}"
+
+# Try changing to BIG_HINT (2) - expect BIG_HINT (BIG_HINT < HIGH_CAP)
+rados -p "${POOL}" set-alloc-hint "${OBJ}" "${BIG_HINT}" "${BIG_HINT}"
+expect_alloc_hint_eq "${BIG_HINT}"
+
+ceph tell 'osd.*' injectargs "--filestore_max_alloc_hint_size ${LOW_CAP}"
+
+# Populate object with some data
+rados -p "${POOL}" put "${OBJ}" /etc/passwd
+
+# Try changing back to SMALL_HINT - expect BIG_HINT (non-empty object)
+rados -p "${POOL}" set-alloc-hint "${OBJ}" "${SMALL_HINT}" "${SMALL_HINT}"
+expect_alloc_hint_eq "${BIG_HINT}"
+
+OBJ="bar"
+setup_pgid "${POOL}" "${OBJ}"
+
+# Non-existent object, SMALL_HINT - expect SMALL_HINT (object creation)
+rados -p "${POOL}" set-alloc-hint "${OBJ}" "${SMALL_HINT}" "${SMALL_HINT}"
+expect_alloc_hint_eq "${SMALL_HINT}"
+
+ceph osd pool delete "${POOL}" "${POOL}" --yes-i-really-really-mean-it
+
+#
+# ECBackend tests
+#
+
+PROFILE="alloc_hint-ecprofile"
+POOL="alloc_hint-ec"
+ceph osd erasure-code-profile set "${PROFILE}" k=2 m=1 crush-failure-domain=osd
+ceph osd erasure-code-profile get "${PROFILE}" # just so it's logged
+ceph osd pool create "${POOL}" "${NUM_PG}" "${NUM_PGP}" erasure "${PROFILE}"
+ceph osd pool application enable "${POOL}" rados
+
+OBJ="baz"
+setup_pgid "${POOL}" "${OBJ}"
+rados -p "${POOL}" create "${OBJ}"
+
+# Empty object, SMALL_HINT - expect scaled-down SMALL_HINT
+rados -p "${POOL}" set-alloc-hint "${OBJ}" "${SMALL_HINT}" "${SMALL_HINT}"
+expect_alloc_hint_eq "$((SMALL_HINT / EC_K))"
+
+ceph osd pool delete "${POOL}" "${POOL}" --yes-i-really-really-mean-it
+
+#
+# Global teardown
+#
+
+echo "OK"
diff --git a/qa/workunits/rados/test_cache_pool.sh b/qa/workunits/rados/test_cache_pool.sh
new file mode 100755
index 000000000..f4187a98a
--- /dev/null
+++ b/qa/workunits/rados/test_cache_pool.sh
@@ -0,0 +1,170 @@
+#!/usr/bin/env bash
+
+set -ex
+
+expect_false()
+{
+	set -x
+	if "$@"; then return 1; else return 0; fi
+}
+
+# create pools, set up tier relationship
+ceph osd pool create base_pool 2
+ceph osd pool application enable base_pool rados
+ceph osd pool create partial_wrong 2
+ceph osd pool create wrong_cache 2
+ceph osd tier add base_pool partial_wrong
+ceph osd tier add base_pool wrong_cache
+
+# populate base_pool with some data
+echo "foo" > foo.txt
+echo "bar" > bar.txt
+echo "baz" > baz.txt
+rados -p base_pool put fooobj foo.txt
+rados -p base_pool put barobj bar.txt
+# fill in wrong_cache backwards so we can tell we read from it
+rados -p wrong_cache put fooobj bar.txt
+rados -p wrong_cache put barobj foo.txt
+# partial_wrong gets barobj backwards so we can check promote and non-promote
+rados -p partial_wrong put barobj foo.txt
+
+# get the objects back before setting a caching pool
+rados -p base_pool get fooobj tmp.txt
+diff -q tmp.txt foo.txt
+rados -p base_pool get barobj tmp.txt
+diff -q tmp.txt bar.txt
+
+# set up redirect and make sure we get backwards results
+ceph osd tier set-overlay base_pool wrong_cache
+ceph osd tier cache-mode wrong_cache writeback
+rados -p base_pool get fooobj tmp.txt
+diff -q tmp.txt bar.txt
+rados -p base_pool get barobj tmp.txt
+diff -q tmp.txt foo.txt
+
+# switch cache pools and make sure we're doing promote
+ceph osd tier remove-overlay base_pool
+ceph osd tier set-overlay base_pool partial_wrong
+ceph osd tier cache-mode partial_wrong writeback
+rados -p base_pool get fooobj tmp.txt
+diff -q tmp.txt foo.txt # hurray, it promoted!
+rados -p base_pool get barobj tmp.txt
+diff -q tmp.txt foo.txt # yep, we read partial_wrong's local object!
+
+# try a nonexistent object and make sure we get an error
+expect_false rados -p base_pool get bazobj tmp.txt
+
+# drop the cache entirely and make sure contents are still the same
+ceph osd tier remove-overlay base_pool
+rados -p base_pool get fooobj tmp.txt
+diff -q tmp.txt foo.txt
+rados -p base_pool get barobj tmp.txt
+diff -q tmp.txt bar.txt
+
+# create an empty cache pool and make sure it has objects after reading
+ceph osd pool create empty_cache 2
+
+touch empty.txt
+rados -p empty_cache ls > tmp.txt
+diff -q tmp.txt empty.txt
+
+ceph osd tier add base_pool empty_cache
+ceph osd tier set-overlay base_pool empty_cache
+ceph osd tier cache-mode empty_cache writeback
+rados -p base_pool get fooobj tmp.txt
+rados -p base_pool get barobj tmp.txt
+expect_false rados -p base_pool get bazobj tmp.txt
+
+rados -p empty_cache ls > tmp.txt
+expect_false diff -q tmp.txt empty.txt
+
+# cleanup
+ceph osd tier remove-overlay base_pool
+ceph osd tier remove base_pool wrong_cache
+ceph osd tier remove base_pool partial_wrong
+ceph osd tier remove base_pool empty_cache
+ceph osd pool delete base_pool base_pool --yes-i-really-really-mean-it
+ceph osd pool delete empty_cache empty_cache --yes-i-really-really-mean-it
+ceph osd pool delete wrong_cache wrong_cache --yes-i-really-really-mean-it
+ceph osd pool delete partial_wrong partial_wrong --yes-i-really-really-mean-it
+
+## set of base, cache
+ceph osd pool create base 8
+ceph osd pool application enable base rados
+ceph osd pool create cache 8
+
+ceph osd tier add base cache
+ceph osd tier cache-mode cache writeback
+ceph osd tier set-overlay base cache
+
+# cache-flush, cache-evict
+rados -p base put foo /etc/passwd
+expect_false rados -p base cache-evict foo
+expect_false rados -p base cache-flush foo
+expect_false rados -p cache cache-evict foo
+rados -p cache cache-flush foo
+rados -p cache cache-evict foo
+rados -p cache ls - | wc -l | grep 0
+
+# cache-try-flush, cache-evict
+rados -p base put foo /etc/passwd
+expect_false rados -p base cache-evict foo
+expect_false rados -p base cache-flush foo
+expect_false rados -p cache cache-evict foo
+rados -p cache cache-try-flush foo
+rados -p cache cache-evict foo
+rados -p cache ls - | wc -l | grep 0
+
+# cache-flush-evict-all
+rados -p base put bar /etc/passwd
+rados -p cache ls - | wc -l | grep 1
+expect_false rados -p base cache-flush-evict-all
+rados -p cache cache-flush-evict-all
+rados -p cache ls - | wc -l | grep 0
+
+# cache-try-flush-evict-all
+rados -p base put bar /etc/passwd
+rados -p cache ls - | wc -l | grep 1
+expect_false rados -p base cache-flush-evict-all
+rados -p cache cache-try-flush-evict-all
+rados -p cache ls - | wc -l | grep 0
+
+# cache flush/evit when clone objects exist
+rados -p base put testclone /etc/passwd
+rados -p cache ls - | wc -l | grep 1
+ceph osd pool mksnap base snap
+rados -p base put testclone /etc/hosts
+rados -p cache cache-flush-evict-all
+rados -p cache ls - | wc -l | grep 0
+
+ceph osd tier cache-mode cache proxy --yes-i-really-mean-it
+rados -p base -s snap get testclone testclone.txt
+diff -q testclone.txt /etc/passwd
+rados -p base get testclone testclone.txt
+diff -q testclone.txt /etc/hosts
+
+# test --with-clones option
+ceph osd tier cache-mode cache writeback
+rados -p base put testclone2 /etc/passwd
+rados -p cache ls - | wc -l | grep 1
+ceph osd pool mksnap base snap1
+rados -p base put testclone2 /etc/hosts
+expect_false rados -p cache cache-flush testclone2
+rados -p cache cache-flush testclone2 --with-clones
+expect_false rados -p cache cache-evict testclone2
+rados -p cache cache-evict testclone2 --with-clones
+rados -p cache ls - | wc -l | grep 0
+
+rados -p base -s snap1 get testclone2 testclone2.txt
+diff -q testclone2.txt /etc/passwd
+rados -p base get testclone2 testclone2.txt
+diff -q testclone2.txt /etc/hosts
+
+# cleanup
+ceph osd tier remove-overlay base
+ceph osd tier remove base cache
+
+ceph osd pool delete cache cache --yes-i-really-really-mean-it
+ceph osd pool delete base base --yes-i-really-really-mean-it
+
+echo OK
diff --git a/qa/workunits/rados/test_crash.sh b/qa/workunits/rados/test_crash.sh
new file mode 100755
index 000000000..26a4c9bdc
--- /dev/null
+++ b/qa/workunits/rados/test_crash.sh
@@ -0,0 +1,44 @@
+#!/bin/sh
+
+set -x
+
+# run on a single-node three-OSD cluster
+
+sudo killall -ABRT ceph-osd
+sleep 5
+
+# kill caused coredumps; find them and delete them, carefully, so as
+# not to disturb other coredumps, or else teuthology will see them
+# and assume test failure.  sudos are because the core files are
+# root/600
+for f in $(find $TESTDIR/archive/coredump -type f); do
+	gdb_output=$(echo "quit" | sudo gdb /usr/bin/ceph-osd $f)
+	if expr match "$gdb_output" ".*generated.*ceph-osd.*" && \
+	   ( \
+
+	   	expr match "$gdb_output" ".*terminated.*signal 6.*" || \
+	   	expr match "$gdb_output" ".*terminated.*signal SIGABRT.*" \
+	   )
+	then
+		sudo rm $f
+	fi
+done
+
+# ceph-crash runs as the unprivileged "ceph" user, but when under test
+# the ceph osd daemons are running as root, so their crash files aren't
+# readable.  let's chown them so they behave as they would in real life.
+sudo chown -R ceph:ceph /var/lib/ceph/crash
+
+# let daemon find crashdumps on startup
+sudo systemctl restart ceph-crash
+sleep 30
+
+# must be 3 crashdumps registered and moved to crash/posted
+[ $(ceph crash ls | wc -l) = 4 ]  || exit 1   # 4 here bc of the table header
+[ $(sudo find /var/lib/ceph/crash/posted/ -name meta | wc -l) = 3 ] || exit 1
+
+# there should be a health warning
+ceph health detail | grep RECENT_CRASH || exit 1
+ceph crash archive-all
+sleep 30
+ceph health detail | grep -c RECENT_CRASH | grep 0     # should be gone!
diff --git a/qa/workunits/rados/test_crushdiff.sh b/qa/workunits/rados/test_crushdiff.sh
new file mode 100755
index 000000000..833ecbd0b
--- /dev/null
+++ b/qa/workunits/rados/test_crushdiff.sh
@@ -0,0 +1,103 @@
+#!/usr/bin/env bash
+
+set -ex
+
+REP_POOL=
+EC_POOL=
+TEMPDIR=
+
+OSD_NUM=$(ceph osd ls | wc -l)
+test ${OSD_NUM} -gt 0
+
+setup() {
+    local pool
+
+    TEMPDIR=`mktemp -d`
+
+    pool=test-crushdiff-rep-$$
+    ceph osd pool create ${pool} 32
+    REP_POOL=${pool}
+    rados -p ${REP_POOL} bench 5 write --no-cleanup
+
+    if [ ${OSD_NUM} -gt 3 ]; then
+        pool=test-crushdiff-ec-$$
+        ceph osd pool create ${pool} 32 32 erasure
+        EC_POOL=${pool}
+        rados -p ${EC_POOL} bench 5 write --no-cleanup
+    fi
+}
+
+cleanup() {
+    set +e
+
+    test -n "${EC_POOL}" &&
+        ceph osd pool delete "${EC_POOL}" "${EC_POOL}" \
+             --yes-i-really-really-mean-it
+    EC_POOL=
+
+    test -n "${REP_POOL}" &&
+        ceph osd pool delete "${REP_POOL}" "${REP_POOL}" \
+             --yes-i-really-really-mean-it
+    REP_POOL=
+
+    test -n "${TEMPDIR}" && rm -Rf ${TEMPDIR}
+    TEMPDIR=
+}
+
+trap "cleanup" INT TERM EXIT
+
+setup
+
+# test without crushmap modification
+
+crushdiff export ${TEMPDIR}/cm.txt --verbose
+crushdiff compare ${TEMPDIR}/cm.txt --verbose
+crushdiff import ${TEMPDIR}/cm.txt --verbose
+
+# test using a compiled crushmap
+
+crushdiff export ${TEMPDIR}/cm --compiled --verbose
+crushdiff compare ${TEMPDIR}/cm --compiled --verbose
+crushdiff import ${TEMPDIR}/cm --compiled --verbose
+
+# test using "offline" osdmap and pg-dump
+
+ceph osd getmap -o ${TEMPDIR}/osdmap
+ceph pg dump --format json > ${TEMPDIR}/pg-dump
+
+crushdiff export ${TEMPDIR}/cm.txt --osdmap ${TEMPDIR}/osdmap --verbose
+crushdiff compare ${TEMPDIR}/cm.txt --osdmap ${TEMPDIR}/osdmap \
+          --pg-dump ${TEMPDIR}/pg-dump --verbose | tee ${TEMPDIR}/compare.txt
+
+# test the diff is zero when the crushmap is not modified
+
+grep '^0/[0-9]* (0\.00%) pgs affected' ${TEMPDIR}/compare.txt
+grep '^0/[0-9]* (0\.00%) objects affected' ${TEMPDIR}/compare.txt
+grep '^0/[0-9]* (0\.00%) pg shards to move' ${TEMPDIR}/compare.txt
+grep '^0/[0-9]* (0\.00%) pg object shards to move' ${TEMPDIR}/compare.txt
+grep '^0\.00/.* (0\.00%) bytes to move' ${TEMPDIR}/compare.txt
+crushdiff import ${TEMPDIR}/cm.txt --osdmap ${TEMPDIR}/osdmap --verbose
+
+if [ ${OSD_NUM} -gt 3 ]; then
+
+    # test the diff is non-zero when the crushmap is modified
+
+    cat ${TEMPDIR}/cm.txt >&2
+
+    weight=$(awk '/item osd\.0 weight ([0-9.]+)/ {print $4 * 3}' \
+                 ${TEMPDIR}/cm.txt)
+    test -n "${weight}"
+    sed -i -Ee 's/^(.*item osd\.0 weight )[0-9.]+/\1'${weight}'/' \
+        ${TEMPDIR}/cm.txt
+    crushdiff compare ${TEMPDIR}/cm.txt --osdmap ${TEMPDIR}/osdmap \
+        --pg-dump ${TEMPDIR}/pg-dump --verbose | tee ${TEMPDIR}/compare.txt
+    grep '^[1-9][0-9]*/[0-9]* (.*%) pgs affected' ${TEMPDIR}/compare.txt
+    grep '^[1-9][0-9]*/[0-9]* (.*%) objects affected' ${TEMPDIR}/compare.txt
+    grep '^[1-9][0-9]*/[0-9]* (.*%) pg shards to move' ${TEMPDIR}/compare.txt
+    grep '^[1-9][0-9]*/[0-9]* (.*%) pg object shards to move' \
+         ${TEMPDIR}/compare.txt
+    grep '^.*/.* (.*%) bytes to move' ${TEMPDIR}/compare.txt
+    crushdiff import ${TEMPDIR}/cm.txt --osdmap ${TEMPDIR}/osdmap --verbose
+fi
+
+echo OK
diff --git a/qa/workunits/rados/test_dedup_tool.sh b/qa/workunits/rados/test_dedup_tool.sh
new file mode 100755
index 000000000..18deb331b
--- /dev/null
+++ b/qa/workunits/rados/test_dedup_tool.sh
@@ -0,0 +1,458 @@
+#!/usr/bin/env bash
+
+set -x
+
+die() {
+    echo "$@"
+    exit 1
+}
+
+do_run() {
+    if [ "$1" == "--tee" ]; then
+      shift
+      tee_out="$1"
+      shift
+      "$@" | tee $tee_out
+    else
+      "$@"
+    fi
+}
+
+run_expect_succ() {
+    echo "RUN_EXPECT_SUCC: " "$@"
+    do_run "$@"
+    [ $? -ne 0 ] && die "expected success, but got failure! cmd: $@"
+}
+
+run() {
+    echo "RUN: " $@
+    do_run "$@"
+}
+
+if [ -n "$CEPH_BIN" ] ; then
+   # CMake env
+   RADOS_TOOL="$CEPH_BIN/rados"
+   CEPH_TOOL="$CEPH_BIN/ceph"
+   DEDUP_TOOL="$CEPH_BIN/ceph-dedup-tool"
+else
+   # executables should be installed by the QA env 
+   RADOS_TOOL=$(which rados)
+   CEPH_TOOL=$(which ceph)
+   DEDUP_TOOL=$(which ceph-dedup-tool)
+fi
+
+POOL=dedup_pool
+OBJ=test_rados_obj
+
+[ -x "$RADOS_TOOL" ] || die "couldn't find $RADOS_TOOL binary to test"
+[ -x "$CEPH_TOOL" ] || die "couldn't find $CEPH_TOOL binary to test"
+
+run_expect_succ "$CEPH_TOOL" osd pool create "$POOL" 8
+sleep 5
+
+function test_dedup_ratio_fixed()
+{
+  # case 1
+  dd if=/dev/urandom of=dedup_object_1k bs=1K count=1
+  for num in `seq 1 50`
+  do
+    dd if=dedup_object_1k of=dedup_object_100k bs=1K oflag=append conv=notrunc
+  done
+  for num in `seq 1 50`
+  do
+    dd if=/dev/zero of=dedup_object_100k bs=1K count=1 oflag=append conv=notrunc
+  done
+
+  $RADOS_TOOL -p $POOL put $OBJ ./dedup_object_100k
+  RESULT=$($DEDUP_TOOL --op estimate --pool $POOL --chunk-size 1024  --chunk-algorithm fixed --fingerprint-algorithm sha1 | grep chunk_size_average | awk '{print$2}' | sed "s/\,//g")
+  # total size / the number of deduped object = 100K / 1
+  if [ 51200 -ne $RESULT ];
+  then
+    die "Estimate failed expecting 51200 result $RESULT"
+  fi
+
+  # case 2
+  dd if=/dev/zero of=dedup_object_10m bs=10M count=1
+
+  $RADOS_TOOL -p $POOL put $OBJ ./dedup_object_10m
+  RESULT=$($DEDUP_TOOL --op estimate --pool $POOL --chunk-size 4096  --chunk-algorithm fixed --fingerprint-algorithm sha1 | grep examined_bytes | awk '{print$2}')
+  # 10485760
+  if [ 10485760 -ne $RESULT ];
+  then
+    die "Estimate failed expecting 10485760 result $RESULT"
+  fi
+
+  # case 3 max_thread
+  for num in `seq 0 20`
+  do
+    dd if=/dev/zero of=dedup_object_$num bs=4M count=1
+    $RADOS_TOOL -p $POOL put dedup_object_$num ./dedup_object_$num
+  done
+
+  RESULT=$($DEDUP_TOOL --op estimate --pool $POOL --chunk-size 4096  --chunk-algorithm fixed --fingerprint-algorithm sha1 --max-thread 4 | grep chunk_size_average | awk '{print$2}' | sed "s/\,//g")
+
+  if [ 98566144 -ne $RESULT ];
+  then
+    die "Estimate failed expecting 98566144 result $RESULT"
+  fi
+
+  rm -rf ./dedup_object_1k ./dedup_object_100k ./dedup_object_10m
+  for num in `seq 0 20`
+  do
+    rm -rf ./dedup_object_$num
+  done
+  $RADOS_TOOL -p $POOL rm $OBJ 
+  for num in `seq 0 20`
+  do
+    $RADOS_TOOL -p $POOL rm dedup_object_$num
+  done
+}
+
+function test_dedup_chunk_scrub()
+{
+
+  CHUNK_POOL=dedup_chunk_pool
+  run_expect_succ "$CEPH_TOOL" osd pool create "$CHUNK_POOL" 8
+
+  echo "hi there" > foo
+
+  echo "hi there" > bar
+
+  echo "there" > foo-chunk
+
+  echo "CHUNK" > bar-chunk
+
+  $CEPH_TOOL osd pool set $POOL fingerprint_algorithm sha1 --yes-i-really-mean-it
+  $CEPH_TOOL osd pool set $POOL dedup_chunk_algorithm fastcdc --yes-i-really-mean-it
+  $CEPH_TOOL osd pool set $POOL dedup_cdc_chunk_size 4096 --yes-i-really-mean-it
+  $CEPH_TOOL osd pool set $POOL dedup_tier $CHUNK_POOL --yes-i-really-mean-it
+
+  $RADOS_TOOL -p $POOL put foo ./foo
+  $RADOS_TOOL -p $POOL put bar ./bar
+
+  $RADOS_TOOL -p $CHUNK_POOL put bar-chunk ./bar-chunk
+  $RADOS_TOOL -p $CHUNK_POOL put foo-chunk ./foo-chunk
+
+  $RADOS_TOOL -p $POOL set-chunk bar 0 8 --target-pool $CHUNK_POOL bar-chunk 0 --with-reference
+
+  echo -n "There hi" > test_obj
+  # dirty
+  $RADOS_TOOL -p $POOL put foo ./test_obj
+  $RADOS_TOOL -p $POOL set-chunk foo 0 8 --target-pool $CHUNK_POOL foo-chunk 0 --with-reference
+  # flush
+  $RADOS_TOOL -p $POOL tier-flush foo
+  sleep 2
+
+  $RADOS_TOOL ls -p $CHUNK_POOL
+  CHUNK_OID=$(echo -n "There hi" | sha1sum | awk '{print $1}')
+
+  POOL_ID=$($CEPH_TOOL osd pool ls detail | grep $POOL |  awk '{print$2}')
+  $DEDUP_TOOL --op chunk-get-ref --chunk-pool $CHUNK_POOL --object $CHUNK_OID --target-ref bar --target-ref-pool-id $POOL_ID
+  RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID)
+
+  RESULT=$($DEDUP_TOOL --op chunk-scrub --chunk-pool $CHUNK_POOL | grep "Damaged object" | awk '{print$4}')
+  if [ $RESULT -ne "1" ] ; then
+    $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it
+    $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it
+    die "Chunk-scrub failed expecting damaged objects is not 1"
+  fi
+
+  $DEDUP_TOOL --op chunk-put-ref --chunk-pool $CHUNK_POOL --object $CHUNK_OID --target-ref bar --target-ref-pool-id $POOL_ID
+  RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID | grep bar)
+  if [ -n "$RESULT" ] ; then
+    $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it
+    $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it
+    die "Scrub failed expecting bar is removed"
+  fi
+
+  $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it
+
+  rm -rf ./foo ./bar ./foo-chunk ./bar-chunk ./test_obj
+  $RADOS_TOOL -p $POOL rm foo
+  $RADOS_TOOL -p $POOL rm bar
+}
+
+function test_dedup_chunk_repair()
+{
+
+  CHUNK_POOL=dedup_chunk_pool
+  run_expect_succ "$CEPH_TOOL" osd pool create "$CHUNK_POOL" 8
+
+  echo -n "hi there" > foo
+
+  echo -n "hi there" > bar
+
+  echo -n "there" > foo-chunk
+
+  echo -n "CHUNK" > bar-chunk
+
+  $CEPH_TOOL osd pool set $POOL fingerprint_algorithm sha1 --yes-i-really-mean-it
+  $CEPH_TOOL osd pool set $POOL dedup_chunk_algorithm fastcdc --yes-i-really-mean-it
+  $CEPH_TOOL osd pool set $POOL dedup_cdc_chunk_size 4096 --yes-i-really-mean-it
+  $CEPH_TOOL osd pool set $POOL dedup_tier $CHUNK_POOL --yes-i-really-mean-it
+
+  $RADOS_TOOL -p $POOL put foo ./foo
+  $RADOS_TOOL -p $POOL put bar ./bar
+
+  $RADOS_TOOL -p $CHUNK_POOL put bar-chunk ./bar-chunk
+  $RADOS_TOOL -p $CHUNK_POOL put foo-chunk ./foo-chunk
+
+  $RADOS_TOOL ls -p $CHUNK_POOL
+  CHUNK_OID=$(echo -n "hi there" | sha1sum | awk '{print $1}')
+
+  POOL_ID=$($CEPH_TOOL osd pool ls detail | grep $POOL |  awk '{print$2}')
+  $RADOS_TOOL -p $CHUNK_POOL put $CHUNK_OID ./foo
+
+  # increase ref count by two, resuling in mismatch
+  $DEDUP_TOOL --op chunk-get-ref --chunk-pool $CHUNK_POOL --object $CHUNK_OID --target-ref foo --target-ref-pool-id $POOL_ID
+  $DEDUP_TOOL --op chunk-get-ref --chunk-pool $CHUNK_POOL --object $CHUNK_OID --target-ref foo --target-ref-pool-id $POOL_ID
+  $DEDUP_TOOL --op chunk-get-ref --chunk-pool $CHUNK_POOL --object $CHUNK_OID --target-ref foo --target-ref-pool-id $POOL_ID
+  $DEDUP_TOOL --op chunk-get-ref --chunk-pool $CHUNK_POOL --object $CHUNK_OID --target-ref foo --target-ref-pool-id $POOL_ID
+  $DEDUP_TOOL --op chunk-get-ref --chunk-pool $CHUNK_POOL --object bar-chunk --target-ref bar --target-ref-pool-id $POOL_ID
+  $DEDUP_TOOL --op chunk-get-ref --chunk-pool $CHUNK_POOL --object bar-chunk --target-ref bar --target-ref-pool-id $POOL_ID
+
+  RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID)
+  RESULT=$($DEDUP_TOOL --op chunk-scrub --chunk-pool $CHUNK_POOL | grep "Damaged object" | awk '{print$4}')
+  if [ $RESULT -ne "2" ] ; then
+    $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it
+    $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it
+    die "Chunk-scrub failed expecting damaged objects is not 1"
+  fi
+
+  $DEDUP_TOOL --op chunk-repair --chunk-pool $CHUNK_POOL --object $CHUNK_OID --target-ref foo --target-ref-pool-id $POOL_ID
+  $DEDUP_TOOL --op chunk-repair --chunk-pool $CHUNK_POOL --object bar-chunk --target-ref bar --target-ref-pool-id $POOL_ID
+
+  RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID | grep foo | wc -l)
+  if [ 0 -ne "$RESULT" ] ; then
+    $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it
+    $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it
+    die "Scrub failed expecting bar is removed"
+  fi
+  RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object bar-chunk | grep bar | wc -l)
+  if [ 0 -ne "$RESULT" ] ; then
+    $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it
+    $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it
+    die "Scrub failed expecting bar is removed"
+  fi
+
+  $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it
+
+  rm -rf ./foo ./bar ./foo-chunk ./bar-chunk ./test_obj
+  $RADOS_TOOL -p $POOL rm foo
+  $RADOS_TOOL -p $POOL rm bar
+}
+
+function test_dedup_object()
+{
+
+  CHUNK_POOL=dedup_chunk_pool
+  run_expect_succ "$CEPH_TOOL" osd pool create "$CHUNK_POOL" 8
+
+  echo "There hiHI" > foo
+
+  $CEPH_TOOL osd pool set $POOL dedup_tier $CHUNK_POOL --yes-i-really-mean-it
+  $RADOS_TOOL -p $POOL put foo ./foo
+
+  sleep 2
+
+  rados ls -p $CHUNK_POOL
+
+  RESULT=$($DEDUP_TOOL --pool $POOL --op chunk-dedup --object foo --chunk-pool $CHUNK_POOL --source-off 0 --source-length 10 --fingerprint-algorithm sha1 )
+
+  POOL_ID=$($CEPH_TOOL osd pool ls detail | grep $POOL |  awk '{print$2}')
+  CHUNK_OID=$(echo -n "There hiHI" | sha1sum | awk '{print $1}')
+
+  RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID | grep foo)
+
+  if [ -z "$RESULT" ] ; then
+    $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it
+    $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it
+    die "Scrub failed expecting bar is removed"
+  fi
+
+  $RADOS_TOOL -p $CHUNK_POOL get $CHUNK_OID ./chunk
+  VERIFY=$(cat ./chunk | sha1sum | awk '{print $1}')
+  if [ "$CHUNK_OID" != "$VERIFY" ] ; then
+    $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it
+    $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it
+    die "Comparing failed expecting chunk mismatch"
+  fi
+
+  echo -n "There hihiHI" > bar
+
+  $RADOS_TOOL -p $POOL put bar ./bar
+  RESULT=$($DEDUP_TOOL --pool $POOL --op object-dedup --object bar --chunk-pool $CHUNK_POOL --fingerprint-algorithm sha1 --dedup-cdc-chunk-size 4096)
+
+  CHUNK_OID=$(echo -n "There hihiHI" | sha1sum | awk '{print $1}')
+
+  RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID | grep bar)
+  if [ -z "$RESULT" ] ; then
+    $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it
+    $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it
+    die "Scrub failed expecting bar is removed"
+  fi
+
+  $RADOS_TOOL -p $CHUNK_POOL get $CHUNK_OID ./chunk
+  VERIFY=$(cat ./chunk | sha1sum | awk '{print $1}')
+  if [ "$CHUNK_OID" != "$VERIFY" ] ; then
+    $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it
+    $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it
+    die "Comparing failed expecting chunk mismatch"
+  fi
+
+  echo -n "THERE HIHIHI" > bar
+  $RADOS_TOOL -p $POOL put bar ./bar
+  $RADOS_TOOL -p $POOL mksnap mysnap
+
+  echo -n "There HIHIHI" > bar
+  $RADOS_TOOL -p $POOL put bar ./bar
+
+  RESULT=$($DEDUP_TOOL --pool $POOL --op object-dedup --object bar --chunk-pool $CHUNK_POOL --fingerprint-algorithm sha1 --dedup-cdc-chunk-size 4096 --snap)
+
+  CHUNK_OID=$(echo -n "THERE HIHIHI" | sha1sum | awk '{print $1}')
+  RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID | grep bar)
+  if [ -z "$RESULT" ] ; then
+    $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it
+    $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it
+    die "Scrub failed expecting bar is removed"
+  fi
+
+  CHUNK_OID=$(echo -n "There HIHIHI" | sha1sum | awk '{print $1}')
+  RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID | grep bar)
+  if [ -z "$RESULT" ] ; then
+    $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it
+    $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it
+    die "Scrub failed expecting bar is removed"
+  fi
+  # rerun tier-flush
+
+  RESULT=$($DEDUP_TOOL --pool $POOL --op object-dedup --object bar --chunk-pool $CHUNK_POOL --fingerprint-algorithm sha1 --dedup-cdc-chunk-size 4096)
+
+  CHUNK_OID=$(echo -n "There HIHIHI" | sha1sum | awk '{print $1}')
+  RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID | grep bar)
+  if [ -z "$RESULT" ] ; then
+    $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it
+    $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it
+    die "Scrub failed expecting bar is removed"
+  fi
+
+  $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it
+
+  rm -rf ./foo ./bar ./chunk
+  $RADOS_TOOL -p $POOL rm foo
+  $RADOS_TOOL -p $POOL rm bar
+}
+
+function test_sample_dedup()
+{
+  CHUNK_POOL=dedup_chunk_pool
+  $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it
+  $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it
+
+  sleep 2
+
+  run_expect_succ "$CEPH_TOOL" osd pool create "$POOL" 8
+  run_expect_succ "$CEPH_TOOL" osd pool create "$CHUNK_POOL" 8
+  run_expect_succ "$CEPH_TOOL" osd pool set "$POOL" dedup_tier "$CHUNK_POOL"
+  run_expect_succ "$CEPH_TOOL" osd pool set "$POOL" dedup_chunk_algorithm fastcdc
+  run_expect_succ "$CEPH_TOOL" osd pool set "$POOL" dedup_cdc_chunk_size 8192
+  run_expect_succ "$CEPH_TOOL" osd pool set "$POOL" fingerprint_algorithm sha1
+
+  # 8 Dedupable objects
+  CONTENT_1="There hiHI"
+  echo $CONTENT_1 > foo
+  for num in `seq 1 8`
+  do
+    $RADOS_TOOL -p $POOL put foo_$num ./foo
+  done
+
+  # 1 Unique object
+  CONTENT_3="There hiHI3"
+  echo $CONTENT_3 > foo3
+  $RADOS_TOOL -p $POOL put foo3_1 ./foo3
+
+  sleep 2
+
+  # Execute dedup crawler
+  RESULT=$($DEDUP_TOOL --pool $POOL --chunk-pool $CHUNK_POOL --op sample-dedup --chunk-algorithm fastcdc --fingerprint-algorithm sha1 --chunk-dedup-threshold 3 --sampling-ratio 50)
+
+  CHUNK_OID_1=$(echo $CONTENT_1 | sha1sum | awk '{print $1}')
+  CHUNK_OID_3=$(echo $CONTENT_3 | sha1sum | awk '{print $1}')
+
+  # Find chunk object has references of 8 dedupable meta objects
+  RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID_1)
+  DEDUP_COUNT=0
+  for num in `seq 1 8`
+  do
+    GREP_RESULT=$(echo $RESULT | grep foo_$num)
+    if [ -n "$GREP_RESULT" ]; then
+      DEDUP_COUNT=$(($DEDUP_COUNT + 1))
+    fi
+  done
+  if [ $DEDUP_COUNT -lt 2 ]; then
+    $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it
+    $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it
+    die "Chunk object has no reference of first meta object"
+  fi
+
+  # 7 Duplicated objects but less than chunk dedup threshold
+  CONTENT_2="There hiHI2"
+  echo $CONTENT_2 > foo2
+  for num in `seq 1 7`
+  do
+    $RADOS_TOOL -p $POOL put foo2_$num ./foo2
+  done
+  CHUNK_OID_2=$(echo $CONTENT_2 | sha1sum | awk '{print $1}')
+
+  RESULT=$($DEDUP_TOOL --pool $POOL --chunk-pool $CHUNK_POOL --op sample-dedup --chunk-algorithm fastcdc --fingerprint-algorithm sha1 --sampling-ratio 100 --chunk-dedup-threshold 2)
+
+  # Objects duplicates less than chunk dedup threshold should be deduplicated because of they satisfies object-dedup-threshold
+  # The only object, which is crawled at the very first, should not be deduplicated because it was not duplicated at initial time
+  RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID_2)
+  DEDUP_COUNT=0
+  for num in `seq 1 7`
+  do
+    GREP_RESULT=$(echo $RESULT | grep foo2_$num)
+    if [ -n "$GREP_RESULT" ]; then
+      DEDUP_COUNT=$(($DEDUP_COUNT + 1))
+    fi
+  done
+  if [ $DEDUP_COUNT -ne 6 ]; then
+    $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it
+    $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it
+    die "Chunk object has no reference of first meta object"
+  fi
+
+  # Unique object should not be deduplicated
+  RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID_3)
+  GREP_RESULT=$($RESULT | grep $CHUNK_OID_3)
+  if [ -n "$GREP_RESULT" ]; then
+    $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it
+    $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it
+    die "Chunk object has no reference of second meta object"
+  fi
+
+  rm -rf ./foo ./foo2 ./foo3
+  for num in `seq 1 8`
+  do
+    $RADOS_TOOL -p $POOL rm foo_$num
+  done
+  for num in `seq 1 2`
+  do
+    $RADOS_TOOL -p $POOL rm foo2_$num
+  done
+  $RADOS_TOOL -p $POOL rm foo3_1
+
+  $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it
+}
+
+test_dedup_ratio_fixed
+test_dedup_chunk_scrub
+test_dedup_chunk_repair
+test_dedup_object
+test_sample_dedup
+
+$CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it
+
+echo "SUCCESS!"
+exit 0
diff --git a/qa/workunits/rados/test_envlibrados_for_rocksdb.sh b/qa/workunits/rados/test_envlibrados_for_rocksdb.sh
new file mode 100755
index 000000000..371452f40
--- /dev/null
+++ b/qa/workunits/rados/test_envlibrados_for_rocksdb.sh
@@ -0,0 +1,97 @@
+#!/usr/bin/env bash
+set -ex
+
+############################################
+#			Helper functions
+############################################
+source $(dirname $0)/../ceph-helpers-root.sh
+
+############################################
+#			Install required tools
+############################################
+echo "Install required tools"
+
+CURRENT_PATH=`pwd`
+
+############################################
+#			Compile&Start RocksDB
+############################################
+# install prerequisites
+# for rocksdb
+case $(distro_id) in
+	ubuntu|debian|devuan|softiron)
+		install git g++ libsnappy-dev zlib1g-dev libbz2-dev libradospp-dev cmake
+		;;
+	centos|fedora|rhel)
+        case $(distro_id) in
+            rhel)
+                # RHEL needs CRB repo for snappy-devel
+                sudo subscription-manager repos --enable "codeready-builder-for-rhel-8-x86_64-rpms"
+                ;;
+        esac
+        install git gcc-c++.x86_64 snappy-devel zlib zlib-devel bzip2 bzip2-devel libradospp-devel.x86_64 cmake libarchive-3.3.3
+        ;;
+	opensuse*|suse|sles)
+		install git gcc-c++ snappy-devel zlib-devel libbz2-devel libradospp-devel
+		;;
+	*)
+        echo "$(distro_id) is unknown, $@ will have to be installed manually."
+        ;;
+esac
+
+# # gflags
+# sudo yum install gflags-devel
+# 
+# wget https://github.com/schuhschuh/gflags/archive/master.zip
+# unzip master.zip
+# cd gflags-master
+# mkdir build && cd build
+# export CXXFLAGS="-fPIC" && cmake .. && make VERBOSE=1
+# make && make install
+
+# # snappy-devel
+
+
+echo "Compile rocksdb"
+if [ -e rocksdb ]; then
+	rm -fr rocksdb
+fi
+
+pushd $(dirname /home/ubuntu/cephtest/clone.client.0/qa/workunits/rados/bash.sh)/../../../
+git submodule update --init src/rocksdb
+popd
+git clone $(dirname /home/ubuntu/cephtest/clone.client.0/qa/workunits/rados/bash.sh)/../../../src/rocksdb rocksdb
+
+# compile code
+cd rocksdb
+if type cmake3 > /dev/null 2>&1 ; then
+    CMAKE=cmake3
+else
+    CMAKE=cmake
+fi
+
+[ -z "$BUILD_DIR" ] && BUILD_DIR=build
+mkdir ${BUILD_DIR} && cd ${BUILD_DIR} && ${CMAKE} -DCMAKE_BUILD_TYPE=Debug -DWITH_TESTS=ON -DWITH_LIBRADOS=ON -DWITH_SNAPPY=ON -DWITH_GFLAGS=OFF -DFAIL_ON_WARNINGS=OFF ..
+make rocksdb_env_librados_test -j8
+
+echo "Copy ceph.conf"
+# prepare ceph.conf
+mkdir -p ../ceph/src/
+if [ -f "/etc/ceph/ceph.conf" ]; then
+    cp /etc/ceph/ceph.conf ../ceph/src/
+elif [ -f "/etc/ceph/ceph/ceph.conf" ]; then
+	cp /etc/ceph/ceph/ceph.conf ../ceph/src/
+else 
+	echo "/etc/ceph/ceph/ceph.conf doesn't exist"
+fi
+
+echo "Run EnvLibrados test"
+# run test
+if [ -f "../ceph/src/ceph.conf" ]
+	then
+	cp env_librados_test ~/cephtest/archive
+	./env_librados_test
+else 
+	echo "../ceph/src/ceph.conf doesn't exist"
+fi
+cd ${CURRENT_PATH}
diff --git a/qa/workunits/rados/test_hang.sh b/qa/workunits/rados/test_hang.sh
new file mode 100755
index 000000000..724e0bb82
--- /dev/null
+++ b/qa/workunits/rados/test_hang.sh
@@ -0,0 +1,8 @@
+#!/bin/sh -ex
+
+# Hang forever for manual testing using the thrasher
+while(true)
+do
+   sleep 300
+done
+exit 0
diff --git a/qa/workunits/rados/test_health_warnings.sh b/qa/workunits/rados/test_health_warnings.sh
new file mode 100755
index 000000000..d393e5c68
--- /dev/null
+++ b/qa/workunits/rados/test_health_warnings.sh
@@ -0,0 +1,76 @@
+#!/usr/bin/env bash
+
+set -uex
+
+# number of osds = 10
+crushtool -o crushmap --build --num_osds 10 host straw 2 rack straw 2 row straw 2 root straw 0
+ceph osd setcrushmap -i crushmap
+ceph osd tree
+ceph tell osd.* injectargs --osd_max_markdown_count 1024 --osd_max_markdown_period 1
+ceph osd set noout
+
+wait_for_healthy() {
+  while ceph health | grep down
+  do
+    sleep 1
+  done
+}
+
+test_mark_two_osds_same_host_down() {
+  ceph osd set noup
+  ceph osd down osd.0 osd.1
+  ceph health detail
+  ceph health | grep "1 host"
+  ceph health | grep "2 osds"
+  ceph health detail | grep "osd.0"
+  ceph health detail | grep "osd.1"
+  ceph osd unset noup
+  wait_for_healthy
+}
+
+test_mark_two_osds_same_rack_down() {
+  ceph osd set noup
+  ceph osd down osd.8 osd.9
+  ceph health detail
+  ceph health | grep "1 host"
+  ceph health | grep "1 rack"
+  ceph health | grep "1 row"
+  ceph health | grep "2 osds"
+  ceph health detail | grep "osd.8"
+  ceph health detail | grep "osd.9"
+  ceph osd unset noup
+  wait_for_healthy
+}
+
+test_mark_all_but_last_osds_down() {
+  ceph osd set noup
+  ceph osd down $(ceph osd ls | sed \$d)
+  ceph health detail
+  ceph health | grep "1 row"
+  ceph health | grep "2 racks"
+  ceph health | grep "4 hosts"
+  ceph health | grep "9 osds"
+  ceph osd unset noup
+  wait_for_healthy
+}
+
+test_mark_two_osds_same_host_down_with_classes() {
+    ceph osd set noup
+    ceph osd crush set-device-class ssd osd.0 osd.2 osd.4 osd.6 osd.8
+    ceph osd crush set-device-class hdd osd.1 osd.3 osd.5 osd.7 osd.9
+    ceph osd down osd.0 osd.1
+    ceph health detail
+    ceph health | grep "1 host"
+    ceph health | grep "2 osds"
+    ceph health detail | grep "osd.0"
+    ceph health detail | grep "osd.1"
+    ceph osd unset noup
+    wait_for_healthy
+}
+
+test_mark_two_osds_same_host_down
+test_mark_two_osds_same_rack_down
+test_mark_all_but_last_osds_down
+test_mark_two_osds_same_host_down_with_classes
+
+exit 0
diff --git a/qa/workunits/rados/test_large_omap_detection.py b/qa/workunits/rados/test_large_omap_detection.py
new file mode 100755
index 000000000..b5c81a8d8
--- /dev/null
+++ b/qa/workunits/rados/test_large_omap_detection.py
@@ -0,0 +1,134 @@
+#!/usr/bin/python3
+# -*- mode:python -*-
+# vim: ts=4 sw=4 smarttab expandtab
+#
+# Copyright (C) 2017 Red Hat <contact@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+import json
+import rados
+import shlex
+import subprocess
+import time
+
+def cleanup(cluster):
+    cluster.delete_pool('large-omap-test-pool')
+    cluster.shutdown()
+
+def init():
+    # For local testing
+    #cluster = rados.Rados(conffile='./ceph.conf')
+    cluster = rados.Rados(conffile='/etc/ceph/ceph.conf')
+    cluster.connect()
+    print("\nCluster ID: " + cluster.get_fsid())
+    cluster.create_pool('large-omap-test-pool')
+    ioctx = cluster.open_ioctx('large-omap-test-pool')
+    ioctx.write_full('large-omap-test-object1', b"Lorem ipsum")
+    op = ioctx.create_write_op()
+
+    keys = []
+    values = []
+    for x in range(20001):
+        keys.append(str(x))
+        values.append(b"X")
+
+    ioctx.set_omap(op, tuple(keys), tuple(values))
+    ioctx.operate_write_op(op, 'large-omap-test-object1', 0)
+    ioctx.release_write_op(op)
+
+    ioctx.write_full('large-omap-test-object2', b"Lorem ipsum dolor")
+    op = ioctx.create_write_op()
+
+    buffer = ("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do "
+              "eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut "
+              "enim ad minim veniam, quis nostrud exercitation ullamco laboris "
+              "nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in "
+              "reprehenderit in voluptate velit esse cillum dolore eu fugiat "
+              "nulla pariatur. Excepteur sint occaecat cupidatat non proident, "
+              "sunt in culpa qui officia deserunt mollit anim id est laborum.")
+
+    keys = []
+    values = []
+    for x in range(20000):
+        keys.append(str(x))
+        values.append(buffer.encode())
+
+    ioctx.set_omap(op, tuple(keys), tuple(values))
+    ioctx.operate_write_op(op, 'large-omap-test-object2', 0)
+    ioctx.release_write_op(op)
+    ioctx.close()
+    return cluster
+
+def get_deep_scrub_timestamp(pgid):
+    cmd = ['ceph', 'pg', 'dump', '--format=json-pretty']
+    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
+    out = proc.communicate()[0]
+    try:
+        pgstats = json.loads(out)['pg_map']['pg_stats']
+    except KeyError:
+        pgstats = json.loads(out)['pg_stats']
+    for stat in pgstats:
+        if stat['pgid'] == pgid:
+            return stat['last_deep_scrub_stamp']
+
+def wait_for_scrub():
+    osds = set();
+    pgs = dict();
+    cmd = ['ceph', 'osd', 'map', 'large-omap-test-pool',
+           'large-omap-test-object1', '--format=json-pretty']
+    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
+    out = proc.communicate()[0]
+    osds.add(json.loads(out)['acting_primary'])
+    pgs[json.loads(out)['pgid']] = get_deep_scrub_timestamp(json.loads(out)['pgid'])
+    cmd = ['ceph', 'osd', 'map', 'large-omap-test-pool',
+           'large-omap-test-object2', '--format=json-pretty']
+    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
+    out = proc.communicate()[0]
+    osds.add(json.loads(out)['acting_primary'])
+    pgs[json.loads(out)['pgid']] = get_deep_scrub_timestamp(json.loads(out)['pgid'])
+
+    for pg in pgs:
+        command = "ceph pg deep-scrub " + str(pg)
+        subprocess.check_call(shlex.split(command))
+
+    for pg in pgs:
+        RETRIES = 0
+        while RETRIES < 60 and pgs[pg] == get_deep_scrub_timestamp(pg):
+            time.sleep(10)
+            RETRIES += 1
+
+def check_health_output():
+    RETRIES = 0
+    result = 0
+    while RETRIES < 6 and result != 2:
+        result = 0
+        RETRIES += 1
+        output = subprocess.check_output(["ceph", "health", "detail"])
+        for line in output.splitlines():
+            result += int(line.find(b'2 large omap objects') != -1)
+        time.sleep(10)
+
+    if result != 2:
+        print("Error, got invalid output:")
+        print(output)
+        raise Exception
+
+def main():
+    cluster = init()
+    wait_for_scrub()
+    check_health_output()
+
+    cleanup(cluster)
+
+if __name__ == '__main__':
+    main()
diff --git a/qa/workunits/rados/test_libcephsqlite.sh b/qa/workunits/rados/test_libcephsqlite.sh
new file mode 100755
index 000000000..1810a3f3f
--- /dev/null
+++ b/qa/workunits/rados/test_libcephsqlite.sh
@@ -0,0 +1,136 @@
+#!/bin/bash -ex
+
+# The main point of these tests beyond ceph_test_libcephsqlite is to:
+#
+# - Ensure you can load the Ceph VFS via the dynamic load extension mechanism
+#   in SQLite.
+# - Check the behavior of a dead application, that it does not hold locks
+#   indefinitely.
+
+pool="$1"
+ns="$(basename $0)"
+
+function sqlite {
+  background="$1"
+  if [ "$background" = b ]; then
+    shift
+  fi
+  a=$(cat)
+  printf "%s" "$a" >&2
+  # We're doing job control gymnastics here to make sure that sqlite3 is the
+  # main process (i.e. the process group leader) in the background, not a bash
+  # function or job pipeline.
+  sqlite3 -cmd '.output /dev/null' -cmd '.load libcephsqlite.so' -cmd 'pragma journal_mode = PERSIST' -cmd ".open file:///$pool:$ns/baz.db?vfs=ceph" -cmd '.output stdout' <<<"$a" &
+  if [ "$background" != b ]; then
+    wait
+  fi
+}
+
+function striper {
+  rados --pool=$pool --namespace="$ns" --striper "$@"
+}
+
+function repeat {
+  n=$1
+  shift
+  for ((i = 0; i < "$n"; ++i)); do
+    echo "$*"
+  done
+}
+
+striper rm baz.db || true
+
+time sqlite <<EOF
+create table if not exists foo (a INT);
+insert into foo (a) values (RANDOM());
+drop table foo;
+EOF
+
+striper stat baz.db
+striper rm baz.db
+
+time sqlite <<EOF
+CREATE TABLE IF NOT EXISTS rand(text BLOB NOT NULL);
+$(repeat 10 'INSERT INTO rand (text) VALUES (RANDOMBLOB(4096));')
+SELECT LENGTH(text) FROM rand;
+DROP TABLE rand;
+EOF
+
+time sqlite <<EOF
+BEGIN TRANSACTION;
+CREATE TABLE IF NOT EXISTS rand(text BLOB NOT NULL);
+$(repeat 100 'INSERT INTO rand (text) VALUES (RANDOMBLOB(4096));')
+COMMIT;
+SELECT LENGTH(text) FROM rand;
+DROP TABLE rand;
+EOF
+
+# Connection death drops the lock:
+
+striper rm baz.db
+date
+sqlite b <<EOF
+CREATE TABLE foo (a BLOB);
+INSERT INTO foo VALUES ("start");
+WITH RECURSIVE c(x) AS
+  (
+   VALUES(1)
+  UNION ALL
+   SELECT x+1
+   FROM c
+  )
+INSERT INTO foo (a)
+  SELECT RANDOMBLOB(1<<20)
+  FROM c
+  LIMIT (1<<20);
+EOF
+
+# Let it chew on that INSERT for a while so it writes data, it will not finish as it's trying to write 2^40 bytes...
+sleep 10
+echo done
+
+jobs -l
+kill -KILL -- $(jobs -p)
+date
+wait
+date
+
+n=$(sqlite <<<"SELECT COUNT(*) FROM foo;")
+[ "$n" -eq 1 ]
+
+# Connection "hang" loses the lock and cannot reacquire it:
+
+striper rm baz.db
+date
+sqlite b <<EOF
+CREATE TABLE foo (a BLOB);
+INSERT INTO foo VALUES ("start");
+WITH RECURSIVE c(x) AS
+  (
+   VALUES(1)
+  UNION ALL
+   SELECT x+1
+   FROM c
+  )
+INSERT INTO foo (a)
+  SELECT RANDOMBLOB(1<<20)
+  FROM c
+  LIMIT (1<<20);
+EOF
+
+# Same thing, let it chew on the INSERT for a while...
+sleep 20
+jobs -l
+kill -STOP -- $(jobs -p)
+# cephsqlite_lock_renewal_timeout is 30s
+sleep 45
+date
+kill -CONT -- $(jobs -p)
+sleep 10
+date
+# it should exit with an error as it lost the lock
+wait
+date
+
+n=$(sqlite <<<"SELECT COUNT(*) FROM foo;")
+[ "$n" -eq 1 ]
diff --git a/qa/workunits/rados/test_librados_build.sh b/qa/workunits/rados/test_librados_build.sh
new file mode 100755
index 000000000..14e332515
--- /dev/null
+++ b/qa/workunits/rados/test_librados_build.sh
@@ -0,0 +1,87 @@
+#!/bin/bash -ex
+#
+# Compile and run a librados application outside of the ceph build system, so
+# that we can be sure librados.h[pp] is still usable and hasn't accidentally
+# started depending on internal headers.
+#
+# The script assumes all dependencies - e.g. curl, make, gcc, librados headers,
+# libradosstriper headers, boost headers, etc. - are already installed.
+#
+
+source $(dirname $0)/../ceph-helpers-root.sh
+
+trap cleanup EXIT
+
+SOURCES="hello_radosstriper.cc
+hello_world_c.c
+hello_world.cc
+Makefile
+"
+BINARIES_TO_RUN="hello_world_c
+hello_world_cpp
+"
+BINARIES="${BINARIES_TO_RUN}hello_radosstriper_cpp
+"
+# parse output like "octopus (dev)"
+case $(librados-config --release | grep -Po ' \(\K[^\)]+') in
+    dev)
+        BRANCH=main;;
+    rc|stable)
+        BRANCH=$(librados-config --release | cut -d' ' -f1);;
+    *)
+        echo "unknown release '$(librados-config --release)'" >&2
+        return 1;;
+esac
+DL_PREFIX="http://git.ceph.com/?p=ceph.git;a=blob_plain;hb=${BRANCH};f=examples/librados/"
+#DL_PREFIX="https://raw.githubusercontent.com/ceph/ceph/master/examples/librados/"
+DESTDIR=$(pwd)
+
+function cleanup () {
+    for f in $BINARIES$SOURCES ; do
+        rm -f "${DESTDIR}/$f"
+    done
+}
+
+function get_sources () {
+    for s in $SOURCES ; do
+        curl --progress-bar --output $s -L ${DL_PREFIX}$s
+    done
+}
+
+function check_sources () {
+    for s in $SOURCES ; do
+        test -f $s
+    done
+}
+
+function check_binaries () {
+    for b in $BINARIES ; do
+        file $b
+        test -f $b
+    done
+}
+
+function run_binaries () {
+    for b in $BINARIES_TO_RUN ; do
+        ./$b -c /etc/ceph/ceph.conf
+    done
+}
+
+pushd $DESTDIR
+case $(distro_id) in
+    centos|fedora|rhel|opensuse*|suse|sles)
+        install gcc-c++ make libradospp-devel librados-devel;;
+    ubuntu)
+        install gcc-11 g++-11 make libradospp-dev librados-dev
+        export CXX_FLAGS="-std=c++20";;
+    debian|devuan|softiron)
+        install g++ make libradospp-dev librados-dev;;
+    *)
+        echo "$(distro_id) is unknown, $@ will have to be installed manually."
+esac
+get_sources
+check_sources
+make all-system
+check_binaries
+run_binaries
+popd
diff --git a/qa/workunits/rados/test_pool_access.sh b/qa/workunits/rados/test_pool_access.sh
new file mode 100755
index 000000000..4082870bc
--- /dev/null
+++ b/qa/workunits/rados/test_pool_access.sh
@@ -0,0 +1,108 @@
+#!/usr/bin/env bash
+
+set -ex
+
+KEYRING=$(mktemp)
+trap cleanup EXIT ERR HUP INT QUIT
+
+cleanup() {
+    (ceph auth del client.mon_read || true) >/dev/null 2>&1
+    (ceph auth del client.mon_write || true) >/dev/null 2>&1
+
+    rm -f $KEYRING
+}
+
+expect_false()
+{
+	set -x
+	if "$@"; then return 1; else return 0; fi
+}
+
+create_pool_op() {
+  ID=$1
+  POOL=$2
+
+  cat << EOF | CEPH_ARGS="-k $KEYRING" python3
+import rados
+
+cluster = rados.Rados(conffile="", rados_id="${ID}")
+cluster.connect()
+cluster.create_pool("${POOL}")
+EOF
+}
+
+delete_pool_op() {
+  ID=$1
+  POOL=$2
+
+  cat << EOF | CEPH_ARGS="-k $KEYRING" python3
+import rados
+
+cluster = rados.Rados(conffile="", rados_id="${ID}")
+cluster.connect()
+cluster.delete_pool("${POOL}")
+EOF
+}
+
+create_pool_snap_op() {
+  ID=$1
+  POOL=$2
+  SNAP=$3
+
+  cat << EOF | CEPH_ARGS="-k $KEYRING" python3
+import rados
+
+cluster = rados.Rados(conffile="", rados_id="${ID}")
+cluster.connect()
+ioctx = cluster.open_ioctx("${POOL}")
+
+ioctx.create_snap("${SNAP}")
+EOF
+}
+
+remove_pool_snap_op() {
+  ID=$1
+  POOL=$2
+  SNAP=$3
+
+  cat << EOF | CEPH_ARGS="-k $KEYRING" python3
+import rados
+
+cluster = rados.Rados(conffile="", rados_id="${ID}")
+cluster.connect()
+ioctx = cluster.open_ioctx("${POOL}")
+
+ioctx.remove_snap("${SNAP}")
+EOF
+}
+
+test_pool_op()
+{
+    ceph auth get-or-create client.mon_read mon 'allow r' >> $KEYRING
+    ceph auth get-or-create client.mon_write mon 'allow *' >> $KEYRING
+
+    expect_false create_pool_op mon_read pool1
+    create_pool_op mon_write pool1
+
+    expect_false create_pool_snap_op mon_read pool1 snap1
+    create_pool_snap_op mon_write pool1 snap1
+
+    expect_false remove_pool_snap_op mon_read pool1 snap1
+    remove_pool_snap_op mon_write pool1 snap1
+
+    expect_false delete_pool_op mon_read pool1
+    delete_pool_op mon_write pool1
+}
+
+key=`ceph auth get-or-create-key client.poolaccess1 mon 'allow r' osd 'allow *'`
+rados --id poolaccess1 --key $key -p rbd ls
+
+key=`ceph auth get-or-create-key client.poolaccess2 mon 'allow r' osd 'allow * pool=nopool'`
+expect_false rados --id poolaccess2 --key $key -p rbd ls
+
+key=`ceph auth get-or-create-key client.poolaccess3 mon 'allow r' osd 'allow rw pool=nopool'`
+expect_false rados --id poolaccess3 --key $key -p rbd ls
+
+test_pool_op
+
+echo OK
diff --git a/qa/workunits/rados/test_pool_quota.sh b/qa/workunits/rados/test_pool_quota.sh
new file mode 100755
index 000000000..0eacefc64
--- /dev/null
+++ b/qa/workunits/rados/test_pool_quota.sh
@@ -0,0 +1,68 @@
+#!/bin/sh -ex
+
+p=`uuidgen`
+
+# objects
+ceph osd pool create $p 12
+ceph osd pool set-quota $p max_objects 10
+ceph osd pool application enable $p rados
+
+for f in `seq 1 10` ; do
+ rados -p $p put obj$f /etc/passwd
+done
+
+sleep 30
+
+rados -p $p put onemore /etc/passwd  &
+pid=$!
+
+ceph osd pool set-quota $p max_objects 100
+wait $pid 
+[ $? -ne 0 ] && exit 1 || true
+
+rados -p $p put twomore /etc/passwd
+
+# bytes
+ceph osd pool set-quota $p max_bytes 100
+sleep 30
+
+rados -p $p put two /etc/passwd &
+pid=$!
+
+ceph osd pool set-quota $p max_bytes 0
+ceph osd pool set-quota $p max_objects 0
+wait $pid 
+[ $? -ne 0 ] && exit 1 || true
+
+rados -p $p put three /etc/passwd
+
+
+#one pool being full does not block a different pool
+
+pp=`uuidgen`
+
+ceph osd pool create $pp 12
+ceph osd pool application enable $pp rados
+
+# set objects quota 
+ceph osd pool set-quota $pp max_objects 10
+sleep 30
+
+for f in `seq 1 10` ; do
+ rados -p $pp put obj$f /etc/passwd
+done
+
+sleep 30
+
+rados -p $p put threemore /etc/passwd 
+
+ceph osd pool set-quota $p max_bytes 0
+ceph osd pool set-quota $p max_objects 0
+
+sleep 30
+# done
+ceph osd pool delete $p $p --yes-i-really-really-mean-it
+ceph osd pool delete $pp $pp --yes-i-really-really-mean-it
+
+echo OK
+
diff --git a/qa/workunits/rados/test_python.sh b/qa/workunits/rados/test_python.sh
new file mode 100755
index 000000000..cf4597a41
--- /dev/null
+++ b/qa/workunits/rados/test_python.sh
@@ -0,0 +1,5 @@
+#!/bin/sh -ex
+
+ceph osd pool create rbd
+${PYTHON:-python3} -m pytest -v $(dirname $0)/../../../src/test/pybind/test_rados.py "$@"
+exit 0
diff --git a/qa/workunits/rados/test_rados_timeouts.sh b/qa/workunits/rados/test_rados_timeouts.sh
new file mode 100755
index 000000000..327c7ab32
--- /dev/null
+++ b/qa/workunits/rados/test_rados_timeouts.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+set -x
+
+delay_mon() {
+    MSGTYPE=$1
+    shift
+    $@ --rados-mon-op-timeout 1 --ms-inject-delay-type mon --ms-inject-delay-max 10000000 --ms-inject-delay-probability 1 --ms-inject-delay-msg-type $MSGTYPE
+    if [ $? -eq 0 ]; then
+        exit 1
+    fi
+}
+
+delay_osd() {
+    MSGTYPE=$1
+    shift
+    $@ --rados-osd-op-timeout 1 --ms-inject-delay-type osd --ms-inject-delay-max 10000000 --ms-inject-delay-probability 1 --ms-inject-delay-msg-type $MSGTYPE
+    if [ $? -eq 0 ]; then
+        exit 2
+    fi
+}
+
+# pool ops
+delay_mon omap rados lspools
+delay_mon poolopreply ceph osd pool create test 8
+delay_mon poolopreply rados mksnap -p test snap
+delay_mon poolopreply ceph osd pool rm test test --yes-i-really-really-mean-it
+
+# other mon ops
+delay_mon getpoolstats rados df
+delay_mon mon_command ceph df
+delay_mon omap ceph osd dump
+delay_mon omap ceph -s
+
+# osd ops
+delay_osd osd_op_reply rados -p data put ls /bin/ls
+delay_osd osd_op_reply rados -p data get ls - >/dev/null
+delay_osd osd_op_reply rados -p data ls
+delay_osd command_reply ceph tell osd.0 bench 1 1
+
+# rbd commands, using more kinds of osd ops
+rbd create -s 1 test
+delay_osd osd_op_reply rbd watch test
+delay_osd osd_op_reply rbd info test
+delay_osd osd_op_reply rbd snap create test@snap
+delay_osd osd_op_reply rbd import /bin/ls ls
+rbd rm test
+
+echo OK
diff --git a/qa/workunits/rados/test_rados_tool.sh b/qa/workunits/rados/test_rados_tool.sh
new file mode 100755
index 000000000..9d025eee8
--- /dev/null
+++ b/qa/workunits/rados/test_rados_tool.sh
@@ -0,0 +1,924 @@
+#!/usr/bin/env bash
+
+set -x
+
+die() {
+    echo "$@"
+    exit 1
+}
+
+usage() {
+    cat <<EOF
+test_rados_tool.sh: tests rados_tool
+-c:        RADOS configuration file to use [optional]
+-k:        keep temp files
+-h:        this help message
+-p:        set temporary pool to use [optional]
+EOF
+}
+
+do_run() {
+    if [ "$1" == "--tee" ]; then
+      shift
+      tee_out="$1"
+      shift
+      "$@" | tee $tee_out
+    else
+      "$@"
+    fi
+}
+
+run_expect_fail() {
+    echo "RUN_EXPECT_FAIL: " "$@"
+    do_run "$@"
+    [ $? -eq 0 ] && die "expected failure, but got success! cmd: $@"
+}
+
+run_expect_succ() {
+    echo "RUN_EXPECT_SUCC: " "$@"
+    do_run "$@"
+    [ $? -ne 0 ] && die "expected success, but got failure! cmd: $@"
+}
+
+run_expect_nosignal() {
+    echo "RUN_EXPECT_NOSIGNAL: " "$@"
+    do_run "$@"
+    [ $? -ge 128 ] && die "expected success or fail, but got signal! cmd: $@"
+}
+
+run() {
+    echo "RUN: " $@
+    do_run "$@"
+}
+
+if [ -n "$CEPH_BIN" ] ; then
+   # CMake env
+   RADOS_TOOL="$CEPH_BIN/rados"
+   CEPH_TOOL="$CEPH_BIN/ceph"
+else
+   # executables should be installed by the QA env 
+   RADOS_TOOL=$(which rados)
+   CEPH_TOOL=$(which ceph)
+fi
+
+KEEP_TEMP_FILES=0
+POOL=trs_pool
+POOL_CP_TARGET=trs_pool.2
+POOL_EC=trs_pool_ec
+
+[ -x "$RADOS_TOOL" ] || die "couldn't find $RADOS_TOOL binary to test"
+[ -x "$CEPH_TOOL" ] || die "couldn't find $CEPH_TOOL binary to test"
+
+while getopts  "c:hkp:" flag; do
+    case $flag in
+        c)  RADOS_TOOL="$RADOS_TOOL -c $OPTARG";;
+        k)  KEEP_TEMP_FILES=1;;
+        h)  usage; exit 0;;
+        p)  POOL=$OPTARG;;
+        *)  echo; usage; exit 1;;
+    esac
+done
+
+TDIR=`mktemp -d -t test_rados_tool.XXXXXXXXXX` || die "mktemp failed"
+[ $KEEP_TEMP_FILES -eq 0 ] && trap "rm -rf ${TDIR}; exit" INT TERM EXIT
+
+# ensure rados doesn't segfault without --pool
+run_expect_nosignal "$RADOS_TOOL" --snap "asdf" ls
+run_expect_nosignal "$RADOS_TOOL" --snapid "0" ls
+run_expect_nosignal "$RADOS_TOOL" --object-locator "asdf" ls
+run_expect_nosignal "$RADOS_TOOL" --namespace "asdf" ls
+
+run_expect_succ "$CEPH_TOOL" osd pool create "$POOL" 8
+run_expect_succ "$CEPH_TOOL" osd erasure-code-profile set myprofile k=2 m=1 stripe_unit=2K crush-failure-domain=osd --force
+run_expect_succ "$CEPH_TOOL" osd pool create "$POOL_EC" 100 100 erasure myprofile
+
+
+# expb happens to be the empty export for legacy reasons
+run_expect_succ "$RADOS_TOOL" -p "$POOL" export "$TDIR/expb"
+
+# expa has objects foo, foo2 and bar
+run_expect_succ "$RADOS_TOOL" -p "$POOL" put foo /etc/fstab
+run_expect_succ "$RADOS_TOOL" -p "$POOL" put foo2 /etc/fstab
+run_expect_succ "$RADOS_TOOL" -p "$POOL" put bar /etc/fstab
+run_expect_succ "$RADOS_TOOL" -p "$POOL" export "$TDIR/expa"
+
+# expc has foo and foo2 with some attributes and omaps set
+run_expect_succ "$RADOS_TOOL" -p "$POOL" rm bar
+run_expect_succ "$RADOS_TOOL" -p "$POOL" setxattr foo "rados.toothbrush" "toothbrush"
+run_expect_succ "$RADOS_TOOL" -p "$POOL" setxattr foo "rados.toothpaste" "crest"
+run_expect_succ "$RADOS_TOOL" -p "$POOL" setomapval foo "rados.floss" "myfloss"
+run_expect_succ "$RADOS_TOOL" -p "$POOL" setxattr foo2 "rados.toothbrush" "green"
+run_expect_succ "$RADOS_TOOL" -p "$POOL" setomapheader foo2 "foo2.header"
+run_expect_succ "$RADOS_TOOL" -p "$POOL" export "$TDIR/expc"
+
+# make sure that --create works
+run "$CEPH_TOOL" osd pool rm "$POOL" "$POOL" --yes-i-really-really-mean-it
+run_expect_succ "$RADOS_TOOL" -p "$POOL" --create import "$TDIR/expa"
+
+# make sure that lack of --create fails
+run_expect_succ "$CEPH_TOOL" osd pool rm "$POOL" "$POOL" --yes-i-really-really-mean-it
+run_expect_fail "$RADOS_TOOL" -p "$POOL" import "$TDIR/expa"
+
+run_expect_succ "$RADOS_TOOL" -p "$POOL" --create import "$TDIR/expa"
+
+# inaccessible import src should fail
+run_expect_fail "$RADOS_TOOL" -p "$POOL" import "$TDIR/dir_nonexistent"
+
+# export an empty pool to test purge
+run_expect_succ "$RADOS_TOOL" purge "$POOL" --yes-i-really-really-mean-it
+run_expect_succ "$RADOS_TOOL" -p "$POOL" export "$TDIR/empty"
+cmp -s "$TDIR/expb" "$TDIR/empty" \
+    || die "failed to export the same stuff we imported!"
+rm -f "$TDIR/empty"
+
+# import some stuff with extended attributes on it
+run_expect_succ "$RADOS_TOOL" -p "$POOL" import "$TDIR/expc"
+VAL=`"$RADOS_TOOL" -p "$POOL" getxattr foo "rados.toothbrush"`
+[ ${VAL} = "toothbrush" ] || die "Invalid attribute after import"
+
+# the second time, the xattrs should match, so there should be nothing to do.
+run_expect_succ "$RADOS_TOOL" -p "$POOL" import "$TDIR/expc"
+VAL=`"$RADOS_TOOL" -p "$POOL" getxattr foo "rados.toothbrush"`
+[ "${VAL}" = "toothbrush" ] || die "Invalid attribute after second import"
+
+# Now try with --no-overwrite option after changing an attribute
+run_expect_succ "$RADOS_TOOL" -p "$POOL" setxattr foo "rados.toothbrush" "dentist"
+run_expect_succ "$RADOS_TOOL" -p "$POOL" import --no-overwrite "$TDIR/expc"
+VAL=`"$RADOS_TOOL" -p "$POOL" getxattr foo "rados.toothbrush"`
+[ "${VAL}" = "dentist" ] || die "Invalid attribute after second import"
+
+# now force it to copy everything
+run_expect_succ "$RADOS_TOOL" -p "$POOL" import "$TDIR/expc"
+VAL=`"$RADOS_TOOL" -p "$POOL" getxattr foo "rados.toothbrush"`
+[ "${VAL}" = "toothbrush" ] || die "Invalid attribute after second import"
+
+# test copy pool
+run "$CEPH_TOOL" osd pool rm "$POOL" "$POOL" --yes-i-really-really-mean-it
+run "$CEPH_TOOL" osd pool rm "$POOL_CP_TARGET" "$POOL_CP_TARGET" --yes-i-really-really-mean-it
+run_expect_succ "$CEPH_TOOL" osd pool create "$POOL" 8
+run_expect_succ "$CEPH_TOOL" osd pool create "$POOL_CP_TARGET" 8
+
+# create src files
+mkdir -p "$TDIR/dir_cp_src"
+for i in `seq 1 5`; do
+  fname="$TDIR/dir_cp_src/f.$i"
+  objname="f.$i"
+  dd if=/dev/urandom of="$fname" bs=$((1024*1024)) count=$i
+  run_expect_succ "$RADOS_TOOL" -p "$POOL" put $objname "$fname"
+
+# a few random attrs
+  for j in `seq 1 4`; do
+    rand_str=`dd if=/dev/urandom bs=4 count=1 | hexdump -x`
+    run_expect_succ "$RADOS_TOOL" -p "$POOL" setxattr $objname attr.$j "$rand_str"
+    run_expect_succ --tee "$fname.attr.$j" "$RADOS_TOOL" -p "$POOL" getxattr $objname attr.$j
+  done
+
+  rand_str=`dd if=/dev/urandom bs=4 count=1 | hexdump -x`
+  run_expect_succ "$RADOS_TOOL" -p "$POOL" setomapheader $objname "$rand_str"
+  run_expect_succ --tee "$fname.omap.header" "$RADOS_TOOL" -p "$POOL" getomapheader $objname
+
+# a few random omap keys
+  for j in `seq 1 4`; do
+    rand_str=`dd if=/dev/urandom bs=4 count=1 | hexdump -x`
+    run_expect_succ "$RADOS_TOOL" -p "$POOL" setomapval $objname key.$j "$rand_str"
+  done
+  run_expect_succ --tee "$fname.omap.vals" "$RADOS_TOOL" -p "$POOL" listomapvals $objname
+done
+
+run_expect_succ "$RADOS_TOOL" cppool "$POOL" "$POOL_CP_TARGET"
+
+mkdir -p "$TDIR/dir_cp_dst"
+for i in `seq 1 5`; do
+  fname="$TDIR/dir_cp_dst/f.$i"
+  objname="f.$i"
+  run_expect_succ "$RADOS_TOOL" -p "$POOL_CP_TARGET" get $objname "$fname"
+
+# a few random attrs
+  for j in `seq 1 4`; do
+    run_expect_succ --tee "$fname.attr.$j" "$RADOS_TOOL" -p "$POOL_CP_TARGET" getxattr $objname attr.$j
+  done
+
+  run_expect_succ --tee "$fname.omap.header" "$RADOS_TOOL" -p "$POOL_CP_TARGET" getomapheader $objname
+  run_expect_succ --tee "$fname.omap.vals" "$RADOS_TOOL" -p "$POOL_CP_TARGET" listomapvals $objname
+done
+
+diff -q -r "$TDIR/dir_cp_src" "$TDIR/dir_cp_dst" \
+    || die "copy pool validation failed!"
+
+for opt in \
+    block-size \
+    concurrent-ios \
+    min-object-size \
+    max-object-size \
+    min-op-len \
+    max-op-len \
+    max-ops \
+    max-backlog \
+    target-throughput \
+    read-percent \
+    num-objects \
+    run-length \
+    ; do
+    run_expect_succ "$RADOS_TOOL" --$opt 4 df
+    run_expect_fail "$RADOS_TOOL" --$opt 4k df
+done
+
+run_expect_succ "$RADOS_TOOL" lock list f.1 --lock-duration 4 --pool "$POOL"
+echo # previous command doesn't output an end of line: issue #9735
+run_expect_fail "$RADOS_TOOL" lock list f.1 --lock-duration 4k --pool "$POOL"
+
+run_expect_succ "$RADOS_TOOL" mksnap snap1 --pool "$POOL"
+snapid=$("$RADOS_TOOL" lssnap --pool "$POOL" | grep snap1 | cut -f1)
+[ $? -ne 0 ] && die "expected success, but got failure! cmd: \"$RADOS_TOOL\" lssnap --pool \"$POOL\" | grep snap1 | cut -f1"
+run_expect_succ "$RADOS_TOOL" ls --pool "$POOL" --snapid="$snapid"
+run_expect_fail "$RADOS_TOOL" ls --pool "$POOL" --snapid="$snapid"k
+
+run_expect_succ "$RADOS_TOOL" truncate f.1 0 --pool "$POOL"
+run_expect_fail "$RADOS_TOOL" truncate f.1 0k --pool "$POOL"
+
+run "$CEPH_TOOL" osd pool rm delete_me_mkpool_test delete_me_mkpool_test --yes-i-really-really-mean-it
+run_expect_succ "$CEPH_TOOL" osd pool create delete_me_mkpool_test 1
+
+run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 1 write
+run_expect_fail "$RADOS_TOOL" --pool "$POOL" bench 1k write
+run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 1 write --format json --output "$TDIR/bench.json"
+run_expect_fail "$RADOS_TOOL" --pool "$POOL" bench 1 write --output "$TDIR/bench.json"
+run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 5 write --format json --no-cleanup
+run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 1 rand --format json
+run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 1 rand -f json
+run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 1 seq --format json
+run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 1 seq -f json
+run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 5 write --write-omap
+run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 5 write --write-object
+run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 5 write --write-xattr
+run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 5 write --write-xattr --write-object
+run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 5 write --write-xattr --write-omap
+run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 5 write --write-omap --write-object
+run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 5 write --write-xattr --write-omap --write-object
+run_expect_fail "$RADOS_TOOL" --pool "$POOL" bench 5 read --write-omap
+run_expect_fail "$RADOS_TOOL" --pool "$POOL" bench 5 read --write-object
+run_expect_fail "$RADOS_TOOL" --pool "$POOL" bench 5 read --write-xattr
+run_expect_fail "$RADOS_TOOL" --pool "$POOL" bench 5 read --write-xattr --write-object
+run_expect_fail "$RADOS_TOOL" --pool "$POOL" bench 5 read --write-xattr --write-omap
+run_expect_fail "$RADOS_TOOL" --pool "$POOL" bench 5 read --write-omap --write-object
+run_expect_fail "$RADOS_TOOL" --pool "$POOL" bench 5 read --write-xattr --write-omap --write-object
+
+for i in $("$RADOS_TOOL" --pool "$POOL" ls | grep "benchmark_data"); do
+    "$RADOS_TOOL" --pool "$POOL" truncate $i 0
+done
+
+run_expect_nosignal "$RADOS_TOOL" --pool "$POOL" bench 1 rand
+run_expect_nosignal "$RADOS_TOOL" --pool "$POOL" bench 1 seq
+
+set -e
+
+OBJ=test_rados_obj
+
+expect_false()
+{
+	if "$@"; then return 1; else return 0; fi
+}
+
+cleanup() {
+    $RADOS_TOOL -p $POOL rm $OBJ > /dev/null 2>&1 || true
+    $RADOS_TOOL -p $POOL_EC rm $OBJ > /dev/null 2>&1 || true
+}
+
+test_omap() {
+    cleanup
+    for i in $(seq 1 1 10)
+    do
+	if [ $(($i % 2)) -eq 0 ]; then
+            $RADOS_TOOL -p $POOL setomapval $OBJ $i $i
+	else
+            echo -n "$i" | $RADOS_TOOL -p $POOL setomapval $OBJ $i
+	fi
+        $RADOS_TOOL -p $POOL getomapval $OBJ $i | grep -q "|$i|\$"
+    done
+    $RADOS_TOOL -p $POOL listomapvals $OBJ | grep -c value | grep 10
+    for i in $(seq 1 1 5)
+    do
+        $RADOS_TOOL -p $POOL rmomapkey $OBJ $i
+    done
+    $RADOS_TOOL -p $POOL listomapvals $OBJ | grep -c value | grep 5
+    $RADOS_TOOL -p $POOL clearomap $OBJ
+    $RADOS_TOOL -p $POOL listomapvals $OBJ | wc -l | grep 0
+    cleanup
+
+    for i in $(seq 1 1 10)
+    do
+        dd if=/dev/urandom bs=128 count=1 > $TDIR/omap_key
+        if [ $(($i % 2)) -eq 0 ]; then
+            $RADOS_TOOL -p $POOL --omap-key-file $TDIR/omap_key setomapval $OBJ $i
+        else
+            echo -n "$i" | $RADOS_TOOL -p $POOL --omap-key-file $TDIR/omap_key setomapval $OBJ
+        fi
+        $RADOS_TOOL -p $POOL --omap-key-file $TDIR/omap_key getomapval $OBJ | grep -q "|$i|\$"
+        $RADOS_TOOL -p $POOL --omap-key-file $TDIR/omap_key rmomapkey $OBJ
+        $RADOS_TOOL -p $POOL listomapvals $OBJ | grep -c value | grep 0
+    done
+    cleanup
+}
+
+test_xattr() {
+    cleanup
+    $RADOS_TOOL -p $POOL put $OBJ /etc/passwd
+    V1=`mktemp fooattrXXXXXXX`
+    V2=`mktemp fooattrXXXXXXX`
+    echo -n fooval > $V1
+    expect_false $RADOS_TOOL -p $POOL setxattr $OBJ 2>/dev/null
+    expect_false $RADOS_TOOL -p $POOL setxattr $OBJ foo fooval extraarg 2>/dev/null
+    $RADOS_TOOL -p $POOL setxattr $OBJ foo fooval
+    $RADOS_TOOL -p $POOL getxattr $OBJ foo > $V2
+    cmp $V1 $V2
+    cat $V1 | $RADOS_TOOL -p $POOL setxattr $OBJ bar
+    $RADOS_TOOL -p $POOL getxattr $OBJ bar > $V2
+    cmp $V1 $V2
+    $RADOS_TOOL -p $POOL listxattr $OBJ > $V1
+    grep -q foo $V1
+    grep -q bar $V1
+    [ `cat $V1 | wc -l` -eq 2 ]
+    rm $V1 $V2
+    cleanup
+}
+test_rmobj() {
+    p=`uuidgen`
+    $CEPH_TOOL osd pool create $p 1
+    $CEPH_TOOL osd pool set-quota $p max_objects 1
+    V1=`mktemp fooattrXXXXXXX`
+    $RADOS_TOOL put $OBJ $V1 -p $p
+    while ! $CEPH_TOOL osd dump | grep 'full_quota max_objects'
+    do
+	sleep 2
+    done
+    $RADOS_TOOL -p $p rm $OBJ --force-full
+    $CEPH_TOOL osd pool rm $p $p --yes-i-really-really-mean-it
+    rm $V1
+}
+
+test_ls() {
+    echo "Testing rados ls command"
+    p=`uuidgen`
+    $CEPH_TOOL osd pool create $p 1
+    NS=10
+    OBJS=20
+    # Include default namespace (0) in the total
+    TOTAL=$(expr $OBJS \* $(expr $NS + 1))
+
+    for nsnum in `seq 0 $NS`
+    do
+        for onum in `seq 1 $OBJS`
+        do
+	    if [ "$nsnum" = "0" ];
+	    then
+                "$RADOS_TOOL" -p $p put obj${onum} /etc/fstab 2> /dev/null
+            else
+                "$RADOS_TOOL" -p $p -N "NS${nsnum}" put obj${onum} /etc/fstab 2> /dev/null
+	    fi
+	done
+    done
+    CHECK=$("$RADOS_TOOL" -p $p ls 2> /dev/null | wc -l)
+    if [ "$OBJS" -ne "$CHECK" ];
+    then
+        die "Created $OBJS objects in default namespace but saw $CHECK"
+    fi
+    TESTNS=NS${NS}
+    CHECK=$("$RADOS_TOOL" -p $p -N $TESTNS ls 2> /dev/null | wc -l)
+    if [ "$OBJS" -ne "$CHECK" ];
+    then
+        die "Created $OBJS objects in $TESTNS namespace but saw $CHECK"
+    fi
+    CHECK=$("$RADOS_TOOL" -p $p --all ls 2> /dev/null | wc -l)
+    if [ "$TOTAL" -ne "$CHECK" ];
+    then
+        die "Created $TOTAL objects but saw $CHECK"
+    fi
+
+    $CEPH_TOOL osd pool rm $p $p --yes-i-really-really-mean-it
+}
+
+test_cleanup() {
+    echo "Testing rados cleanup command"
+    p=`uuidgen`
+    $CEPH_TOOL osd pool create $p 1
+    NS=5
+    OBJS=4
+    # Include default namespace (0) in the total
+    TOTAL=$(expr $OBJS \* $(expr $NS + 1))
+
+    for nsnum in `seq 0 $NS`
+    do
+        for onum in `seq 1 $OBJS`
+        do
+	    if [ "$nsnum" = "0" ];
+	    then
+                "$RADOS_TOOL" -p $p put obj${onum} /etc/fstab 2> /dev/null
+            else
+                "$RADOS_TOOL" -p $p -N "NS${nsnum}" put obj${onum} /etc/fstab 2> /dev/null
+	    fi
+	done
+    done
+
+    $RADOS_TOOL -p $p --all ls > $TDIR/before.ls.out 2> /dev/null
+
+    $RADOS_TOOL -p $p bench 3 write --no-cleanup 2> /dev/null
+    $RADOS_TOOL -p $p -N NS1 bench 3 write --no-cleanup 2> /dev/null
+    $RADOS_TOOL -p $p -N NS2 bench 3 write --no-cleanup 2> /dev/null
+    $RADOS_TOOL -p $p -N NS3 bench 3 write --no-cleanup 2> /dev/null
+    # Leave dangling objects without a benchmark_last_metadata in NS4
+    expect_false timeout 3 $RADOS_TOOL -p $p -N NS4 bench 30 write --no-cleanup 2> /dev/null
+    $RADOS_TOOL -p $p -N NS5 bench 3 write --no-cleanup 2> /dev/null
+
+    $RADOS_TOOL -p $p -N NS3 cleanup 2> /dev/null
+    #echo "Check NS3 after specific cleanup"
+    CHECK=$($RADOS_TOOL -p $p -N NS3 ls | wc -l)
+    if [ "$OBJS" -ne "$CHECK" ] ;
+    then
+        die "Expected $OBJS objects in NS3 but saw $CHECK"
+    fi
+
+    #echo "Try to cleanup all"
+    $RADOS_TOOL -p $p --all cleanup
+    #echo "Check all namespaces"
+    $RADOS_TOOL -p $p --all ls > $TDIR/after.ls.out 2> /dev/null
+    CHECK=$(cat $TDIR/after.ls.out | wc -l)
+    if [ "$TOTAL" -ne "$CHECK" ];
+    then
+        die "Expected $TOTAL objects but saw $CHECK"
+    fi
+    if ! diff $TDIR/before.ls.out $TDIR/after.ls.out
+    then
+        die "Different objects found after cleanup"
+    fi
+
+    set +e
+    run_expect_fail $RADOS_TOOL -p $p cleanup --prefix illegal_prefix
+    run_expect_succ $RADOS_TOOL -p $p cleanup --prefix benchmark_data_otherhost
+    set -e
+
+    $CEPH_TOOL osd pool rm $p $p --yes-i-really-really-mean-it
+}
+
+function test_append()
+{
+  cleanup
+
+  # create object
+  touch ./rados_append_null
+  $RADOS_TOOL -p $POOL append $OBJ ./rados_append_null
+  $RADOS_TOOL -p $POOL get $OBJ ./rados_append_0_out
+  cmp ./rados_append_null ./rados_append_0_out
+
+  # append 4k, total size 4k
+  dd if=/dev/zero of=./rados_append_4k bs=4k count=1
+  $RADOS_TOOL -p $POOL append $OBJ ./rados_append_4k
+  $RADOS_TOOL -p $POOL get $OBJ ./rados_append_4k_out
+  cmp ./rados_append_4k ./rados_append_4k_out
+
+  # append 4k, total size 8k
+  $RADOS_TOOL -p $POOL append $OBJ ./rados_append_4k
+  $RADOS_TOOL -p $POOL get $OBJ ./rados_append_4k_out
+  read_size=`ls -l ./rados_append_4k_out | awk -F ' '  '{print $5}'`
+  if [ 8192 -ne $read_size ];
+  then
+    die "Append failed expecting 8192 read $read_size"
+  fi
+
+  # append 10M, total size 10493952
+  dd if=/dev/zero of=./rados_append_10m bs=10M count=1
+  $RADOS_TOOL -p $POOL append $OBJ ./rados_append_10m
+  $RADOS_TOOL -p $POOL get $OBJ ./rados_append_10m_out
+  read_size=`ls -l ./rados_append_10m_out | awk -F ' '  '{print $5}'`
+  if [ 10493952 -ne $read_size ];
+  then
+    die "Append failed expecting 10493952 read $read_size"
+  fi
+
+  # cleanup
+  cleanup
+
+  # create object
+  $RADOS_TOOL -p $POOL_EC append $OBJ ./rados_append_null
+  $RADOS_TOOL -p $POOL_EC get $OBJ ./rados_append_0_out
+  cmp rados_append_null rados_append_0_out
+
+  # append 4k, total size 4k
+  $RADOS_TOOL -p $POOL_EC append $OBJ ./rados_append_4k
+  $RADOS_TOOL -p $POOL_EC get $OBJ ./rados_append_4k_out
+  cmp rados_append_4k rados_append_4k_out
+
+  # append 4k, total size 8k
+  $RADOS_TOOL -p $POOL_EC append $OBJ ./rados_append_4k
+  $RADOS_TOOL -p $POOL_EC get $OBJ ./rados_append_4k_out
+  read_size=`ls -l ./rados_append_4k_out | awk -F ' '  '{print $5}'`
+  if [ 8192 -ne $read_size ];
+  then
+    die "Append failed expecting 8192 read $read_size"
+  fi
+
+  # append 10M, total size 10493952
+  $RADOS_TOOL -p $POOL_EC append $OBJ ./rados_append_10m
+  $RADOS_TOOL -p $POOL_EC get $OBJ ./rados_append_10m_out
+  read_size=`ls -l ./rados_append_10m_out | awk -F ' '  '{print $5}'`
+  if [ 10493952 -ne $read_size ];
+  then
+    die "Append failed expecting 10493952 read $read_size"
+  fi
+
+  cleanup
+  rm -rf ./rados_append_null ./rados_append_0_out
+  rm -rf ./rados_append_4k ./rados_append_4k_out ./rados_append_10m ./rados_append_10m_out
+}
+
+function test_put()
+{
+  # rados put test:
+  cleanup
+
+  # create file in local fs
+  dd if=/dev/urandom of=rados_object_10k bs=1K count=10
+
+  # test put command
+  $RADOS_TOOL -p $POOL put $OBJ ./rados_object_10k
+  $RADOS_TOOL -p $POOL get $OBJ ./rados_object_10k_out
+  cmp ./rados_object_10k ./rados_object_10k_out
+  cleanup
+
+  # test put command with offset 0
+  $RADOS_TOOL -p $POOL put $OBJ ./rados_object_10k --offset 0
+  $RADOS_TOOL -p $POOL get $OBJ ./rados_object_offset_0_out
+  cmp ./rados_object_10k ./rados_object_offset_0_out
+  cleanup
+
+  # test put command with offset 1000
+  $RADOS_TOOL -p $POOL put $OBJ ./rados_object_10k --offset 1000
+  $RADOS_TOOL -p $POOL get $OBJ ./rados_object_offset_1000_out
+  cmp ./rados_object_10k ./rados_object_offset_1000_out 0 1000
+  cleanup
+
+  rm -rf ./rados_object_10k ./rados_object_10k_out ./rados_object_offset_0_out ./rados_object_offset_1000_out
+}
+
+function test_stat()
+{
+  bluestore=$("$CEPH_TOOL" osd metadata | grep '"osd_objectstore": "bluestore"' | cut -f1)
+  # create file in local fs
+  dd if=/dev/urandom of=rados_object_128k bs=64K count=2
+
+  # rados df test (replicated_pool):
+  $RADOS_TOOL purge $POOL --yes-i-really-really-mean-it
+  $CEPH_TOOL osd pool rm $POOL $POOL --yes-i-really-really-mean-it
+  $CEPH_TOOL osd pool create $POOL 8
+  $CEPH_TOOL osd pool set $POOL size 3
+
+  # put object with 1 MB gap in front
+  $RADOS_TOOL -p $POOL put $OBJ ./rados_object_128k --offset=1048576
+  MATCH_CNT=0
+  if [ "" == "$bluestore" ];
+  then
+    STORED=1.1
+    STORED_UNIT="MiB"
+  else
+    STORED=384
+    STORED_UNIT="KiB"
+  fi
+  for i in {1..60}
+  do
+    IN=$($RADOS_TOOL -p $POOL df | grep $POOL ; [[ ! -z $? ]] && echo "")
+    [[ -z $IN ]] && sleep 1 && continue
+    IFS=' ' read -ra VALS <<< "$IN"
+
+    # verification is a bit tricky due to stats report's eventual model
+    # VALS[1] - STORED
+    # VALS[2] - STORED units
+    # VALS[3] - OBJECTS
+    # VALS[5] - COPIES
+    # VALS[12] - WR_OPS
+    # VALS[13] - WR
+    # VALS[14] - WR uints
+    # implies replication factor 3
+    if [ ${VALS[1]} == $STORED ] && [ ${VALS[2]} == $STORED_UNIT ] && [ ${VALS[3]} == "1" ] && [ ${VALS[5]} == "3" ] && [ ${VALS[12]} == "1" ] && [ ${VALS[13]} == 128 ] && [ ${VALS[14]} == "KiB" ]
+    then
+      # enforce multiple match to make sure stats aren't changing any more
+      MATCH_CNT=$((MATCH_CNT+1))
+      [[ $MATCH_CNT == 3 ]] && break
+      sleep 1
+      continue
+    fi
+    MATCH_CNT=0
+    sleep 1
+    continue
+  done
+  [[ -z $IN ]] && die "Failed to retrieve any pool stats within 60 seconds"
+  if [ ${VALS[1]} != $STORED ] || [ ${VALS[2]} != $STORED_UNIT ] || [ ${VALS[3]} != "1" ] || [ ${VALS[5]} != "3" ] || [ ${VALS[12]} != "1" ] || [ ${VALS[13]} != 128 ] || [ ${VALS[14]} != "KiB" ]
+  then
+    die "Failed to retrieve proper pool stats within 60 seconds"
+  fi
+
+  # overwrite data at 1MB offset
+  $RADOS_TOOL -p $POOL put $OBJ ./rados_object_128k --offset=1048576
+  MATCH_CNT=0
+  if [ "" == "$bluestore" ];
+  then
+    STORED=1.1
+    STORED_UNIT="MiB"
+  else
+    STORED=384
+    STORED_UNIT="KiB"
+  fi
+  for i in {1..60}
+  do
+    IN=$($RADOS_TOOL -p $POOL df | grep $POOL ; [[ ! -z $? ]] && echo "")
+    IFS=' ' read -ra VALS <<< "$IN"
+
+    # verification is a bit tricky due to stats report's eventual model
+    # VALS[1] - STORED
+    # VALS[2] - STORED units
+    # VALS[3] - OBJECTS
+    # VALS[5] - COPIES
+    # VALS[12] - WR_OPS
+    # VALS[13] - WR
+    # VALS[14] - WR uints
+    # implies replication factor 3
+    if [ ${VALS[1]} == $STORED ] && [ ${VALS[2]} == $STORED_UNIT ] && [ ${VALS[3]} == "1" ] && [ ${VALS[5]} == "3" ] && [ ${VALS[12]} == "2" ] && [ ${VALS[13]} == 256 ] && [ ${VALS[14]} == "KiB" ]
+    then
+      # enforce multiple match to make sure stats aren't changing any more
+      MATCH_CNT=$((MATCH_CNT+1))
+      [[ $MATCH_CNT == 3 ]] && break
+      sleep 1
+      continue
+    fi
+    MATCH_CNT=0
+    sleep 1
+    continue
+  done
+  if [ ${VALS[1]} != $STORED ] || [ ${VALS[2]} != $STORED_UNIT ] || [ ${VALS[3]} != "1" ] || [ ${VALS[5]} != "3" ] || [ ${VALS[12]} != "2" ] || [ ${VALS[13]} != 256 ] || [ ${VALS[14]} != "KiB" ]
+  then
+    die "Failed to retrieve proper pool stats within 60 seconds"
+  fi
+
+  # write data at 64K offset
+  $RADOS_TOOL -p $POOL put $OBJ ./rados_object_128k --offset=65536
+  MATCH_CNT=0
+  if [ "" == "$bluestore" ];
+  then
+    STORED=1.1
+    STORED_UNIT="MiB"
+  else
+    STORED=768
+    STORED_UNIT="KiB"
+  fi
+  for i in {1..60}
+  do
+    IN=$($RADOS_TOOL -p $POOL df | grep $POOL ; [[ ! -z $? ]] && echo "")
+    IFS=' ' read -ra VALS <<< "$IN"
+
+    # verification is a bit tricky due to stats report's eventual model
+    # VALS[1] - STORED
+    # VALS[2] - STORED units
+    # VALS[3] - OBJECTS
+    # VALS[5] - COPIES
+    # VALS[12] - WR_OPS
+    # VALS[13] - WR
+    # VALS[14] - WR uints
+    # implies replication factor 3
+    if [ ${VALS[1]} == $STORED ] && [ ${VALS[2]} == $STORED_UNIT ] && [ ${VALS[3]} == "1" ] && [ ${VALS[5]} == "3" ] && [ ${VALS[12]} == "3" ] && [ ${VALS[13]} == 384 ] && [ ${VALS[14]} == "KiB" ]
+    then
+      # enforce multiple match to make sure stats aren't changing any more
+      MATCH_CNT=$((MATCH_CNT+1))
+      [[ $MATCH_CNT == 3 ]] && break
+      sleep 1
+      continue
+    fi
+    MATCH_CNT=0
+    sleep 1
+    continue
+  done
+  if [ ${VALS[1]} != $STORED ] || [ ${VALS[2]} != $STORED_UNIT ] || [ ${VALS[3]} != "1" ] || [ ${VALS[5]} != "3" ] || [ ${VALS[12]} != "3" ] || [ ${VALS[13]} != 384 ] || [ ${VALS[14]} != "KiB" ]
+  then
+    die "Failed to retrieve proper pool stats within 60 seconds"
+  fi
+
+  # overwrite object totally
+  $RADOS_TOOL -p $POOL put $OBJ ./rados_object_128k
+  MATCH_CNT=0
+  if [ "" == "$bluestore" ];
+  then
+    STORED=128
+    STORED_UNIT="KiB"
+  else
+    STORED=384
+    STORED_UNIT="KiB"
+  fi
+  for i in {1..60}
+  do
+    IN=$($RADOS_TOOL -p $POOL df | grep $POOL ; [[ ! -z $? ]] && echo "")
+    IFS=' ' read -ra VALS <<< "$IN"
+
+    # verification is a bit tricky due to stats report's eventual model
+    # VALS[1] - STORED
+    # VALS[2] - STORED units
+    # VALS[3] - OBJECTS
+    # VALS[5] - COPIES
+    # VALS[12] - WR_OPS
+    # VALS[13] - WR
+    # VALS[14] - WR uints
+    # implies replication factor 3
+    if [ ${VALS[1]} == $STORED ] && [ ${VALS[2]} == $STORED_UNIT ] && [ ${VALS[3]} == "1" ] && [ ${VALS[5]} == "3" ] && [ ${VALS[12]} == "4" ] && [ ${VALS[13]} == 512 ] && [ ${VALS[14]} == "KiB" ]
+    then
+      # enforce multiple match to make sure stats aren't changing any more
+      MATCH_CNT=$((MATCH_CNT+1))
+      [[ $MATCH_CNT == 3 ]] && break
+      sleep 1
+      continue
+    fi
+    MATCH_CNT=0
+    sleep 1
+    continue
+  done
+  if [ ${VALS[1]} != $STORED ] || [ ${VALS[2]} != $STORED_UNIT ] || [ ${VALS[3]} != "1" ] || [ ${VALS[5]} != "3" ] || [ ${VALS[12]} != "4" ] || [ ${VALS[13]} != 512 ] || [ ${VALS[14]} != "KiB" ]
+  then
+    die "Failed to retrieve proper pool stats within 60 seconds"
+  fi
+
+  cleanup
+
+  # after cleanup?
+  MATCH_CNT=0
+  for i in {1..60}
+  do
+    IN=$($RADOS_TOOL -p $POOL df | grep $POOL ; [[ ! -z $? ]] && echo "")
+    IFS=' ' read -ra VALS <<< "$IN"
+
+    # verification is a bit tricky due to stats report's eventual model
+    # VALS[1] - STORED
+    # VALS[2] - STORED units
+    # VALS[3] - OBJECTS
+    # VALS[5] - COPIES
+    # VALS[12] - WR_OPS
+    # VALS[13] - WR
+    # VALS[14] - WR uints
+    # implies replication factor 3
+    if [ ${VALS[1]} == 0 ] && [ ${VALS[2]} == "B" ] && [ ${VALS[3]} == "0" ] && [ ${VALS[5]} == "0" ] && [ ${VALS[12]} == "5" ] && [ ${VALS[13]} == 512 ] && [ ${VALS[14]} == "KiB" ]
+    then
+      # enforce multiple match to make sure stats aren't changing any more
+      MATCH_CNT=$((MATCH_CNT+1))
+      [[ $MATCH_CNT == 3 ]] && break
+      sleep 1
+      continue
+    fi
+    MATCH_CNT=0
+    sleep 1
+    continue
+  done
+  if [ ${VALS[1]} != 0 ] || [ ${VALS[2]} != "B" ] || [ ${VALS[3]} != "0" ] || [ ${VALS[5]} != "0" ] || [ ${VALS[12]} != "5" ] || [ ${VALS[13]} != 512 ] || [ ${VALS[14]} != "KiB" ]
+  then
+    die "Failed to retrieve proper pool stats within 60 seconds"
+  fi
+
+  ############ rados df test (EC pool): ##############
+  $RADOS_TOOL purge $POOL_EC --yes-i-really-really-mean-it
+  $CEPH_TOOL osd pool rm $POOL_EC $POOL_EC --yes-i-really-really-mean-it
+  $CEPH_TOOL osd erasure-code-profile set myprofile k=2 m=1 stripe_unit=2K crush-failure-domain=osd --force
+  $CEPH_TOOL osd pool create $POOL_EC 8 8 erasure
+
+  # put object
+  $RADOS_TOOL -p $POOL_EC put $OBJ ./rados_object_128k
+  MATCH_CNT=0
+  if [ "" == "$bluestore" ];
+  then
+    STORED=128
+    STORED_UNIT="KiB"
+  else
+    STORED=192
+    STORED_UNIT="KiB"
+  fi
+  for i in {1..60}
+  do
+    IN=$($RADOS_TOOL -p $POOL_EC df | grep $POOL_EC ; [[ ! -z $? ]] && echo "")
+    [[ -z $IN ]] && sleep 1 && continue
+    IFS=' ' read -ra VALS <<< "$IN"
+
+    # verification is a bit tricky due to stats report's eventual model
+    # VALS[1] - STORED
+    # VALS[2] - STORED units
+    # VALS[3] - OBJECTS
+    # VALS[5] - COPIES
+    # VALS[12] - WR_OPS
+    # VALS[13] - WR
+    # VALS[14] - WR uints
+    # implies replication factor 2+1
+    if [ ${VALS[1]} == $STORED ] && [ ${VALS[2]} == $STORED_UNIT ] && [ ${VALS[3]} == "1" ] && [ ${VALS[5]} == "3" ] && [ ${VALS[12]} == "1" ] && [ ${VALS[13]} == 128 ] && [ ${VALS[14]} == "KiB" ]
+    then
+      # enforce multiple match to make sure stats aren't changing any more
+      MATCH_CNT=$((MATCH_CNT+1))
+      [[ $MATCH_CNT == 3 ]] && break
+      sleep 1
+      continue
+    fi
+    MATCH_CNT=0
+    sleep 1
+    continue
+  done
+  [[ -z $IN ]] && die "Failed to retrieve any pool stats within 60 seconds"
+  if [ ${VALS[1]} != $STORED ] || [ ${VALS[2]} != $STORED_UNIT ] || [ ${VALS[3]} != "1" ] || [ ${VALS[5]} != "3" ] || [ ${VALS[12]} != "1" ] || [ ${VALS[13]} != 128 ] || [ ${VALS[14]} != "KiB" ]
+  then
+    die "Failed to retrieve proper pool stats within 60 seconds"
+  fi
+
+  # overwrite object
+  $RADOS_TOOL -p $POOL_EC put $OBJ ./rados_object_128k
+  MATCH_CNT=0
+  if [ "" == "$bluestore" ];
+  then
+    STORED=128
+    STORED_UNIT="KiB"
+  else
+    STORED=192
+    STORED_UNIT="KiB"
+  fi
+  for i in {1..60}
+  do
+    IN=$($RADOS_TOOL -p $POOL_EC df | grep $POOL_EC ; [[ ! -z $? ]] && echo "")
+    IFS=' ' read -ra VALS <<< "$IN"
+
+    # verification is a bit tricky due to stats report's eventual model
+    # VALS[1] - STORED
+    # VALS[2] - STORED units
+    # VALS[3] - OBJECTS
+    # VALS[5] - COPIES
+    # VALS[12] - WR_OPS
+    # VALS[13] - WR
+    # VALS[14] - WR uints
+    # implies replication factor 2+1
+    if [ ${VALS[1]} == $STORED ] && [ ${VALS[2]} == $STORED_UNIT ] && [ ${VALS[3]} == "1" ] && [ ${VALS[5]} == "3" ] && [ ${VALS[12]} == "2" ] && [ ${VALS[13]} == 256 ] && [ ${VALS[14]} == "KiB" ]
+    then
+      # enforce multiple match to make sure stats aren't changing any more
+      MATCH_CNT=$((MATCH_CNT+1))
+      [[ $MATCH_CNT == 3 ]] && break
+      sleep 1
+      continue
+    fi
+    MATCH_CNT=0
+    sleep 1
+    continue
+  done
+  if [ ${VALS[1]} != $STORED ] || [ ${VALS[2]} != $STORED_UNIT ] || [ ${VALS[3]} != "1" ] || [ ${VALS[5]} != "3" ] || [ ${VALS[12]} != "2" ] || [ ${VALS[13]} != 256 ] || [ ${VALS[14]} != "KiB" ]
+  then
+    die "Failed to retrieve proper pool stats within 60 seconds"
+  fi
+
+  cleanup
+
+  # after cleanup?
+  MATCH_CNT=0
+  for i in {1..60}
+  do
+    IN=$($RADOS_TOOL -p $POOL_EC df | grep $POOL_EC ; [[ ! -z $? ]] && echo "")
+    IFS=' ' read -ra VALS <<< "$IN"
+
+    # verification is a bit tricky due to stats report's eventual model
+    # VALS[1] - STORED
+    # VALS[2] - STORED units
+    # VALS[3] - OBJECTS
+    # VALS[5] - COPIES
+    # VALS[12] - WR_OPS
+    # VALS[13] - WR
+    # VALS[14] - WR uints
+    # implies replication factor 2+1
+    if [ ${VALS[1]} == 0 ] && [ ${VALS[2]} == "B" ] && [ ${VALS[3]} == "0" ] && [ ${VALS[5]} == "0" ] && [ ${VALS[12]} == "3" ] && [ ${VALS[13]} == 256 ] && [ ${VALS[14]} == "KiB" ]
+    then
+      # enforce multiple match to make sure stats aren't changing any more
+      MATCH_CNT=$((MATCH_CNT+1))
+      [[ $MATCH_CNT == 3 ]] && break
+      sleep 1
+      continue
+    fi
+    MATCH_CNT=0
+    sleep 1
+    continue
+  done
+  if [ ${VALS[1]} != 0 ] || [ ${VALS[2]} != "B" ] || [ ${VALS[3]} != "0" ] || [ ${VALS[5]} != "0" ] || [ ${VALS[12]} != "3" ] || [ ${VALS[13]} != 256 ] || [ ${VALS[14]} != "KiB" ]
+  then
+    die "Failed to retrieve proper pool stats within 60 seconds"
+  fi
+
+  rm -rf ./rados_object_128k
+}
+
+test_xattr
+test_omap
+test_rmobj
+test_ls
+test_cleanup
+test_append
+test_put
+test_stat
+
+# clean up environment, delete pool
+$CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it
+$CEPH_TOOL osd pool delete $POOL_EC $POOL_EC --yes-i-really-really-mean-it
+$CEPH_TOOL osd pool delete $POOL_CP_TARGET $POOL_CP_TARGET --yes-i-really-really-mean-it
+
+echo "SUCCESS!"
+exit 0
diff --git a/qa/workunits/rados/version_number_sanity.sh b/qa/workunits/rados/version_number_sanity.sh
new file mode 100755
index 000000000..e7eb9be64
--- /dev/null
+++ b/qa/workunits/rados/version_number_sanity.sh
@@ -0,0 +1,30 @@
+#!/bin/bash -ex
+#
+# test that ceph RPM/DEB package version matches "ceph --version"
+# (for a loose definition of "matches")
+#
+source /etc/os-release
+case $ID in
+debian|ubuntu)
+    RPMDEB='DEB'
+    dpkg-query --show ceph-common
+    PKG_NAME_AND_VERSION=$(dpkg-query --show ceph-common)
+    ;;
+centos|fedora|rhel|opensuse*|suse|sles)
+    RPMDEB='RPM'
+    rpm -q ceph
+    PKG_NAME_AND_VERSION=$(rpm -q ceph)
+    ;;
+*)
+    echo "Unsupported distro ->$ID<-! Bailing out."
+    exit 1
+esac
+PKG_CEPH_VERSION=$(perl -e '"'"$PKG_NAME_AND_VERSION"'" =~ m/(\d+(\.\d+)+)/; print "$1\n";')
+echo "According to $RPMDEB package, the ceph version under test is ->$PKG_CEPH_VERSION<-"
+test -n "$PKG_CEPH_VERSION"
+ceph --version
+BUFFER=$(ceph --version)
+CEPH_CEPH_VERSION=$(perl -e '"'"$BUFFER"'" =~ m/ceph version (\d+(\.\d+)+)/; print "$1\n";')
+echo "According to \"ceph --version\", the ceph version under test is ->$CEPH_CEPH_VERSION<-"
+test -n "$CEPH_CEPH_VERSION"
+test "$PKG_CEPH_VERSION" = "$CEPH_CEPH_VERSION"
diff --git a/qa/workunits/rbd/cli_generic.sh b/qa/workunits/rbd/cli_generic.sh
new file mode 100755
index 000000000..57279d26d
--- /dev/null
+++ b/qa/workunits/rbd/cli_generic.sh
@@ -0,0 +1,1715 @@
+#!/usr/bin/env bash
+set -ex
+
+. $(dirname $0)/../../standalone/ceph-helpers.sh
+
+export RBD_FORCE_ALLOW_V1=1
+
+# make sure rbd pool is EMPTY.. this is a test script!!
+rbd ls | wc -l | grep -v '^0$' && echo "nonempty rbd pool, aborting!  run this script on an empty test cluster only." && exit 1
+
+IMGS="testimg1 testimg2 testimg3 testimg4 testimg5 testimg6 testimg-diff1 testimg-diff2 testimg-diff3 foo foo2 bar bar2 test1 test2 test3 test4 clone2"
+
+expect_fail() {
+    "$@" && return 1 || return 0
+}
+
+tiered=0
+if ceph osd dump | grep ^pool | grep "'rbd'" | grep tier; then
+    tiered=1
+fi
+
+remove_images() {
+    for img in $IMGS
+    do
+        (rbd snap purge $img || true) >/dev/null 2>&1
+        (rbd rm $img || true) >/dev/null 2>&1
+    done
+}
+
+test_others() {
+    echo "testing import, export, resize, and snapshots..."
+    TMP_FILES="/tmp/img1 /tmp/img1.new /tmp/img2 /tmp/img2.new /tmp/img3 /tmp/img3.new /tmp/img-diff1.new /tmp/img-diff2.new /tmp/img-diff3.new /tmp/img1.snap1 /tmp/img1.snap1 /tmp/img-diff1.snap1"
+
+    remove_images
+    rm -f $TMP_FILES
+
+    # create an image
+    dd if=/bin/sh of=/tmp/img1 bs=1k count=1 seek=10
+    dd if=/bin/dd of=/tmp/img1 bs=1k count=10 seek=100
+    dd if=/bin/rm of=/tmp/img1 bs=1k count=100 seek=1000
+    dd if=/bin/ls of=/tmp/img1 bs=1k seek=10000
+    dd if=/bin/ln of=/tmp/img1 bs=1k seek=100000
+
+    # import, snapshot
+    rbd import $RBD_CREATE_ARGS /tmp/img1 testimg1
+    rbd resize testimg1 --size=256 --allow-shrink
+    rbd export testimg1 /tmp/img2
+    rbd snap create testimg1 --snap=snap1
+    rbd resize testimg1 --size=128 && exit 1 || true   # shrink should fail
+    rbd resize testimg1 --size=128 --allow-shrink
+    rbd export testimg1 /tmp/img3
+
+    # info
+    rbd info testimg1 | grep 'size 128 MiB'
+    rbd info --snap=snap1 testimg1 | grep 'size 256 MiB'
+
+    # export-diff
+    rm -rf /tmp/diff-testimg1-1 /tmp/diff-testimg1-2
+    rbd export-diff testimg1 --snap=snap1 /tmp/diff-testimg1-1
+    rbd export-diff testimg1 --from-snap=snap1 /tmp/diff-testimg1-2
+
+    # import-diff
+    rbd create $RBD_CREATE_ARGS --size=1 testimg-diff1
+    rbd import-diff --sparse-size 8K /tmp/diff-testimg1-1 testimg-diff1
+    rbd import-diff --sparse-size 8K /tmp/diff-testimg1-2 testimg-diff1
+
+    # info
+    rbd info testimg1 | grep 'size 128 MiB'
+    rbd info --snap=snap1 testimg1 | grep 'size 256 MiB'
+    rbd info testimg-diff1 | grep 'size 128 MiB'
+    rbd info --snap=snap1 testimg-diff1 | grep 'size 256 MiB'
+
+    # make copies
+    rbd copy testimg1 --snap=snap1 testimg2
+    rbd copy testimg1 testimg3
+    rbd copy testimg-diff1 --sparse-size 768K --snap=snap1 testimg-diff2
+    rbd copy testimg-diff1 --sparse-size 768K testimg-diff3
+
+    # verify the result
+    rbd info testimg2 | grep 'size 256 MiB'
+    rbd info testimg3 | grep 'size 128 MiB'
+    rbd info testimg-diff2 | grep 'size 256 MiB'
+    rbd info testimg-diff3 | grep 'size 128 MiB'
+
+    # deep copies
+    rbd deep copy testimg1 testimg4
+    rbd deep copy testimg1 --snap=snap1 testimg5
+    rbd info testimg4 | grep 'size 128 MiB'
+    rbd info testimg5 | grep 'size 256 MiB'
+    rbd snap ls testimg4 | grep -v 'SNAPID' | wc -l | grep 1
+    rbd snap ls testimg4 | grep '.*snap1.*'
+
+    rbd export testimg1 /tmp/img1.new
+    rbd export testimg2 /tmp/img2.new
+    rbd export testimg3 /tmp/img3.new
+    rbd export testimg-diff1 /tmp/img-diff1.new
+    rbd export testimg-diff2 /tmp/img-diff2.new
+    rbd export testimg-diff3 /tmp/img-diff3.new
+
+    cmp /tmp/img2 /tmp/img2.new
+    cmp /tmp/img3 /tmp/img3.new
+    cmp /tmp/img2 /tmp/img-diff2.new
+    cmp /tmp/img3 /tmp/img-diff3.new
+
+    # rollback
+    rbd snap rollback --snap=snap1 testimg1
+    rbd snap rollback --snap=snap1 testimg-diff1
+    rbd info testimg1 | grep 'size 256 MiB'
+    rbd info testimg-diff1 | grep 'size 256 MiB'
+    rbd export testimg1 /tmp/img1.snap1
+    rbd export testimg-diff1 /tmp/img-diff1.snap1
+    cmp /tmp/img2 /tmp/img1.snap1
+    cmp /tmp/img2 /tmp/img-diff1.snap1
+
+    # test create, copy of zero-length images
+    rbd rm testimg2
+    rbd rm testimg3
+    rbd create testimg2 -s 0
+    rbd cp testimg2 testimg3
+    rbd deep cp testimg2 testimg6
+
+    # remove snapshots
+    rbd snap rm --snap=snap1 testimg1
+    rbd snap rm --snap=snap1 testimg-diff1
+    rbd info --snap=snap1 testimg1 2>&1 | grep 'error setting snapshot context: (2) No such file or directory'
+    rbd info --snap=snap1 testimg-diff1 2>&1 | grep 'error setting snapshot context: (2) No such file or directory'
+
+    # sparsify
+    rbd sparsify testimg1
+
+    remove_images
+    rm -f $TMP_FILES
+}
+
+test_rename() {
+    echo "testing rename..."
+    remove_images
+
+    rbd create --image-format 1 -s 1 foo
+    rbd create --image-format 2 -s 1 bar
+    rbd rename foo foo2
+    rbd rename foo2 bar 2>&1 | grep exists
+    rbd rename bar bar2
+    rbd rename bar2 foo2 2>&1 | grep exists
+
+    ceph osd pool create rbd2 8
+    rbd pool init rbd2
+    rbd create -p rbd2 -s 1 foo
+    rbd rename rbd2/foo rbd2/bar
+    rbd -p rbd2 ls | grep bar
+    rbd rename rbd2/bar foo
+    rbd rename --pool rbd2 foo bar
+    ! rbd rename rbd2/bar --dest-pool rbd foo
+    rbd rename --pool rbd2 bar --dest-pool rbd2 foo
+    rbd -p rbd2 ls | grep foo
+    ceph osd pool rm rbd2 rbd2 --yes-i-really-really-mean-it
+
+    remove_images
+}
+
+test_ls() {
+    echo "testing ls..."
+    remove_images
+
+    rbd create --image-format 1 -s 1 test1
+    rbd create --image-format 1 -s 1 test2
+    rbd ls | grep test1
+    rbd ls | grep test2
+    rbd ls | wc -l | grep 2
+    # look for fields in output of ls -l without worrying about space
+    rbd ls -l | grep 'test1.*1 MiB.*1'
+    rbd ls -l | grep 'test2.*1 MiB.*1'
+
+    rbd rm test1
+    rbd rm test2
+
+    rbd create --image-format 2 -s 1 test1
+    rbd create --image-format 2 -s 1 test2
+    rbd ls | grep test1
+    rbd ls | grep test2
+    rbd ls | wc -l | grep 2
+    rbd ls -l | grep 'test1.*1 MiB.*2'
+    rbd ls -l | grep 'test2.*1 MiB.*2'
+
+    rbd rm test1
+    rbd rm test2
+
+    rbd create --image-format 2 -s 1 test1
+    rbd create --image-format 1 -s 1 test2
+    rbd ls | grep test1
+    rbd ls | grep test2
+    rbd ls | wc -l | grep 2
+    rbd ls -l | grep 'test1.*1 MiB.*2'
+    rbd ls -l | grep 'test2.*1 MiB.*1'
+    remove_images
+
+    # test that many images can be shown by ls
+    for i in $(seq -w 00 99); do
+	rbd create image.$i -s 1
+    done
+    rbd ls | wc -l | grep 100
+    rbd ls -l | grep image | wc -l | grep 100
+    for i in $(seq -w 00 99); do
+	rbd rm image.$i
+    done
+
+    for i in $(seq -w 00 99); do
+	rbd create image.$i --image-format 2 -s 1
+    done
+    rbd ls | wc -l | grep 100
+    rbd ls -l | grep image |  wc -l | grep 100
+    for i in $(seq -w 00 99); do
+	rbd rm image.$i
+    done
+}
+
+test_remove() {
+    echo "testing remove..."
+    remove_images
+
+    rbd remove "NOT_EXIST" && exit 1 || true	# remove should fail
+    rbd create --image-format 1 -s 1 test1
+    rbd rm test1
+    rbd ls | wc -l | grep "^0$"
+
+    rbd create --image-format 2 -s 1 test2
+    rbd rm test2
+    rbd ls | wc -l | grep "^0$"
+
+    # check that remove succeeds even if it's
+    # interrupted partway through. simulate this
+    # by removing some objects manually.
+
+    # remove with header missing (old format)
+    rbd create --image-format 1 -s 1 test1
+    rados rm -p rbd test1.rbd
+    rbd rm test1
+    rbd ls | wc -l | grep "^0$"
+
+    if [ $tiered -eq 0 ]; then
+        # remove with header missing
+	rbd create --image-format 2 -s 1 test2
+	HEADER=$(rados -p rbd ls | grep '^rbd_header')
+	rados -p rbd rm $HEADER
+	rbd rm test2
+	rbd ls | wc -l | grep "^0$"
+
+        # remove with id missing
+	rbd create --image-format 2 -s 1 test2
+	rados -p rbd rm rbd_id.test2
+	rbd rm test2
+	rbd ls | wc -l | grep "^0$"
+
+        # remove with header and id missing
+	rbd create --image-format 2 -s 1 test2
+	HEADER=$(rados -p rbd ls | grep '^rbd_header')
+	rados -p rbd rm $HEADER
+	rados -p rbd rm rbd_id.test2
+	rbd rm test2
+	rbd ls | wc -l | grep "^0$"
+    fi
+
+    # remove with rbd_children object missing (and, by extension,
+    # with child not mentioned in rbd_children)
+    rbd create --image-format 2 -s 1 test2
+    rbd snap create test2@snap
+    rbd snap protect test2@snap
+    rbd clone test2@snap clone --rbd-default-clone-format 1
+
+    rados -p rbd rm rbd_children
+    rbd rm clone
+    rbd ls | grep clone | wc -l | grep '^0$'
+
+    rbd snap unprotect test2@snap
+    rbd snap rm test2@snap
+    rbd rm test2
+}
+
+test_locking() {
+    echo "testing locking..."
+    remove_images
+
+    rbd create $RBD_CREATE_ARGS -s 1 test1
+    rbd lock list test1 | wc -l | grep '^0$'
+    rbd lock add test1 id
+    rbd lock list test1 | grep ' 1 '
+    LOCKER=$(rbd lock list test1 | tail -n 1 | awk '{print $1;}')
+    rbd lock remove test1 id $LOCKER
+    rbd lock list test1 | wc -l | grep '^0$'
+
+    rbd lock add test1 id --shared tag
+    rbd lock list test1 | grep ' 1 '
+    rbd lock add test1 id --shared tag
+    rbd lock list test1 | grep ' 2 '
+    rbd lock add test1 id2 --shared tag
+    rbd lock list test1 | grep ' 3 '
+    rbd lock list test1 | tail -n 1 | awk '{print $2, $1;}' | xargs rbd lock remove test1
+    if rbd info test1 | grep -qE "features:.*exclusive"
+    then
+      # new locking functionality requires all locks to be released
+      while [ -n "$(rbd lock list test1)" ]
+      do
+        rbd lock list test1 | tail -n 1 | awk '{print $2, $1;}' | xargs rbd lock remove test1
+      done
+    fi
+    rbd rm test1
+}
+
+test_pool_image_args() {
+    echo "testing pool and image args..."
+    remove_images
+
+    ceph osd pool delete test test --yes-i-really-really-mean-it || true
+    ceph osd pool create test 32
+    rbd pool init test
+    truncate -s 1 /tmp/empty /tmp/empty@snap
+
+    rbd ls | wc -l | grep 0
+    rbd create -s 1 test1
+    rbd ls | grep -q test1
+    rbd import --image test2 /tmp/empty
+    rbd ls | grep -q test2
+    rbd --dest test3 import /tmp/empty
+    rbd ls | grep -q test3
+    rbd import /tmp/empty foo
+    rbd ls | grep -q foo
+
+    # should fail due to "destination snapname specified"
+    rbd import --dest test/empty@snap /tmp/empty && exit 1 || true
+    rbd import /tmp/empty test/empty@snap && exit 1 || true
+    rbd import --image test/empty@snap /tmp/empty && exit 1 || true
+    rbd import /tmp/empty@snap && exit 1 || true
+
+    rbd ls test | wc -l | grep 0
+    rbd import /tmp/empty test/test1
+    rbd ls test | grep -q test1
+    rbd -p test import /tmp/empty test2
+    rbd ls test | grep -q test2
+    rbd --image test3 -p test import /tmp/empty
+    rbd ls test | grep -q test3
+    rbd --image test4 -p test import /tmp/empty
+    rbd ls test | grep -q test4
+    rbd --dest test5 -p test import /tmp/empty
+    rbd ls test | grep -q test5
+    rbd --dest test6 --dest-pool test import /tmp/empty
+    rbd ls test | grep -q test6
+    rbd --image test7 --dest-pool test import /tmp/empty
+    rbd ls test | grep -q test7
+    rbd --image test/test8 import /tmp/empty
+    rbd ls test | grep -q test8
+    rbd --dest test/test9 import /tmp/empty
+    rbd ls test | grep -q test9
+    rbd import --pool test /tmp/empty
+    rbd ls test | grep -q empty
+
+    # copy with no explicit pool goes to pool rbd
+    rbd copy test/test9 test10
+    rbd ls test | grep -qv test10
+    rbd ls | grep -q test10
+    rbd copy test/test9 test/test10
+    rbd ls test | grep -q test10
+    rbd copy --pool test test10 --dest-pool test test11
+    rbd ls test | grep -q test11
+    rbd copy --dest-pool rbd --pool test test11 test12
+    rbd ls | grep test12
+    rbd ls test | grep -qv test12
+
+    rm -f /tmp/empty /tmp/empty@snap
+    ceph osd pool delete test test --yes-i-really-really-mean-it
+
+    for f in foo test1 test10 test12 test2 test3 ; do
+	rbd rm $f
+    done
+}
+
+test_clone() {
+    echo "testing clone..."
+    remove_images
+    rbd create test1 $RBD_CREATE_ARGS -s 1
+    rbd snap create test1@s1
+    rbd snap protect test1@s1
+
+    ceph osd pool create rbd2 8
+    rbd pool init rbd2
+    rbd clone test1@s1 rbd2/clone
+    rbd -p rbd2 ls | grep clone
+    rbd -p rbd2 ls -l | grep clone | grep test1@s1
+    rbd ls | grep -v clone
+    rbd flatten rbd2/clone
+    rbd snap create rbd2/clone@s1
+    rbd snap protect rbd2/clone@s1
+    rbd clone rbd2/clone@s1 clone2
+    rbd ls | grep clone2
+    rbd ls -l | grep clone2 | grep rbd2/clone@s1
+    rbd -p rbd2 ls | grep -v clone2
+
+    rbd rm clone2
+    rbd snap unprotect rbd2/clone@s1
+    rbd snap rm rbd2/clone@s1
+    rbd rm rbd2/clone
+    rbd snap unprotect test1@s1
+    rbd snap rm test1@s1
+    rbd rm test1
+    ceph osd pool rm rbd2 rbd2 --yes-i-really-really-mean-it
+}
+
+test_trash() {
+    echo "testing trash..."
+    remove_images
+
+    rbd create $RBD_CREATE_ARGS -s 1 test1
+    rbd create $RBD_CREATE_ARGS -s 1 test2
+    rbd ls | grep test1
+    rbd ls | grep test2
+    rbd ls | wc -l | grep 2
+    rbd ls -l | grep 'test1.*2.*'
+    rbd ls -l | grep 'test2.*2.*'
+
+    rbd trash mv test1
+    rbd ls | grep test2
+    rbd ls | wc -l | grep 1
+    rbd ls -l | grep 'test2.*2.*'
+
+    rbd trash ls | grep test1
+    rbd trash ls | wc -l | grep 1
+    rbd trash ls -l | grep 'test1.*USER.*'
+    rbd trash ls -l | grep -v 'protected until'
+
+    ID=`rbd trash ls | cut -d ' ' -f 1`
+    rbd trash rm $ID
+
+    rbd trash mv test2
+    ID=`rbd trash ls | cut -d ' ' -f 1`
+    rbd info --image-id $ID | grep "rbd image 'test2'"
+
+    rbd trash restore $ID
+    rbd ls | grep test2
+    rbd ls | wc -l | grep 1
+    rbd ls -l | grep 'test2.*2.*'
+
+    rbd trash mv test2 --expires-at "3600 sec"
+    rbd trash ls | grep test2
+    rbd trash ls | wc -l | grep 1
+    rbd trash ls -l | grep 'test2.*USER.*protected until'
+
+    rbd trash rm $ID 2>&1 | grep 'Deferment time has not expired'
+    rbd trash rm --image-id $ID --force
+
+    rbd create $RBD_CREATE_ARGS -s 1 test1
+    rbd snap create test1@snap1
+    rbd snap protect test1@snap1
+    rbd trash mv test1
+
+    rbd trash ls | grep test1
+    rbd trash ls | wc -l | grep 1
+    rbd trash ls -l | grep 'test1.*USER.*'
+    rbd trash ls -l | grep -v 'protected until'
+
+    ID=`rbd trash ls | cut -d ' ' -f 1`
+    rbd snap ls --image-id $ID | grep -v 'SNAPID' | wc -l | grep 1
+    rbd snap ls --image-id $ID | grep '.*snap1.*'
+
+    rbd snap unprotect --image-id $ID --snap snap1
+    rbd snap rm --image-id $ID --snap snap1
+    rbd snap ls --image-id $ID | grep -v 'SNAPID' | wc -l | grep 0
+
+    rbd trash restore $ID
+    rbd snap create test1@snap1
+    rbd snap create test1@snap2
+    rbd snap ls --image-id $ID | grep -v 'SNAPID' | wc -l | grep 2
+    rbd snap purge --image-id $ID
+    rbd snap ls --image-id $ID | grep -v 'SNAPID' | wc -l | grep 0
+
+    rbd rm --rbd_move_to_trash_on_remove=true --rbd_move_to_trash_on_remove_expire_seconds=3600 test1
+    rbd trash ls | grep test1
+    rbd trash ls | wc -l | grep 1
+    rbd trash ls -l | grep 'test1.*USER.*protected until'
+    rbd trash rm $ID 2>&1 | grep 'Deferment time has not expired'
+    rbd trash rm --image-id $ID --force
+
+    remove_images
+}
+
+test_purge() {
+    echo "testing trash purge..."
+    remove_images
+
+    rbd trash ls | wc -l | grep 0
+    rbd trash purge
+
+    rbd create $RBD_CREATE_ARGS --size 256 testimg1
+    rbd create $RBD_CREATE_ARGS --size 256 testimg2
+    rbd trash mv testimg1
+    rbd trash mv testimg2
+    rbd trash ls | wc -l | grep 2
+    rbd trash purge
+    rbd trash ls | wc -l | grep 0
+
+    rbd create $RBD_CREATE_ARGS --size 256 testimg1
+    rbd create $RBD_CREATE_ARGS --size 256 testimg2
+    rbd trash mv testimg1 --expires-at "1 hour"
+    rbd trash mv testimg2 --expires-at "3 hours"
+    rbd trash ls | wc -l | grep 2
+    rbd trash purge
+    rbd trash ls | wc -l | grep 2
+    rbd trash purge --expired-before "now + 2 hours"
+    rbd trash ls | wc -l | grep 1
+    rbd trash ls | grep testimg2
+    rbd trash purge --expired-before "now + 4 hours"
+    rbd trash ls | wc -l | grep 0
+
+    rbd create $RBD_CREATE_ARGS --size 256 testimg1
+    rbd snap create testimg1@snap  # pin testimg1
+    rbd create $RBD_CREATE_ARGS --size 256 testimg2
+    rbd create $RBD_CREATE_ARGS --size 256 testimg3
+    rbd trash mv testimg1
+    rbd trash mv testimg2
+    rbd trash mv testimg3
+    rbd trash ls | wc -l | grep 3
+    rbd trash purge 2>&1 | grep 'some expired images could not be removed'
+    rbd trash ls | wc -l | grep 1
+    rbd trash ls | grep testimg1
+    ID=$(rbd trash ls | awk '{ print $1 }')
+    rbd snap purge --image-id $ID
+    rbd trash purge
+    rbd trash ls | wc -l | grep 0
+
+    rbd create $RBD_CREATE_ARGS --size 256 testimg1
+    rbd create $RBD_CREATE_ARGS --size 256 testimg2
+    rbd snap create testimg2@snap  # pin testimg2
+    rbd create $RBD_CREATE_ARGS --size 256 testimg3
+    rbd trash mv testimg1
+    rbd trash mv testimg2
+    rbd trash mv testimg3
+    rbd trash ls | wc -l | grep 3
+    rbd trash purge 2>&1 | grep 'some expired images could not be removed'
+    rbd trash ls | wc -l | grep 1
+    rbd trash ls | grep testimg2
+    ID=$(rbd trash ls | awk '{ print $1 }')
+    rbd snap purge --image-id $ID
+    rbd trash purge
+    rbd trash ls | wc -l | grep 0
+
+    rbd create $RBD_CREATE_ARGS --size 256 testimg1
+    rbd create $RBD_CREATE_ARGS --size 256 testimg2
+    rbd create $RBD_CREATE_ARGS --size 256 testimg3
+    rbd snap create testimg3@snap  # pin testimg3
+    rbd trash mv testimg1
+    rbd trash mv testimg2
+    rbd trash mv testimg3
+    rbd trash ls | wc -l | grep 3
+    rbd trash purge 2>&1 | grep 'some expired images could not be removed'
+    rbd trash ls | wc -l | grep 1
+    rbd trash ls | grep testimg3
+    ID=$(rbd trash ls | awk '{ print $1 }')
+    rbd snap purge --image-id $ID
+    rbd trash purge
+    rbd trash ls | wc -l | grep 0
+
+    # test purging a clone with a chain of parents
+    rbd create $RBD_CREATE_ARGS --size 256 testimg1
+    rbd snap create testimg1@snap
+    rbd clone --rbd-default-clone-format=2 testimg1@snap testimg2
+    rbd snap rm testimg1@snap
+    rbd create $RBD_CREATE_ARGS --size 256 testimg3
+    rbd snap create testimg2@snap
+    rbd clone --rbd-default-clone-format=2 testimg2@snap testimg4
+    rbd clone --rbd-default-clone-format=2 testimg2@snap testimg5
+    rbd snap rm testimg2@snap
+    rbd snap create testimg4@snap
+    rbd clone --rbd-default-clone-format=2 testimg4@snap testimg6
+    rbd snap rm testimg4@snap
+    rbd trash mv testimg1
+    rbd trash mv testimg2
+    rbd trash mv testimg3
+    rbd trash mv testimg4
+    rbd trash ls | wc -l | grep 4
+    rbd trash purge 2>&1 | grep 'some expired images could not be removed'
+    rbd trash ls | wc -l | grep 3
+    rbd trash ls | grep testimg1
+    rbd trash ls | grep testimg2
+    rbd trash ls | grep testimg4
+    rbd trash mv testimg6
+    rbd trash ls | wc -l | grep 4
+    rbd trash purge 2>&1 | grep 'some expired images could not be removed'
+    rbd trash ls | wc -l | grep 2
+    rbd trash ls | grep testimg1
+    rbd trash ls | grep testimg2
+    rbd trash mv testimg5
+    rbd trash ls | wc -l | grep 3
+    rbd trash purge
+    rbd trash ls | wc -l | grep 0
+
+    rbd create $RBD_CREATE_ARGS --size 256 testimg1
+    rbd snap create testimg1@snap
+    rbd clone --rbd-default-clone-format=2 testimg1@snap testimg2
+    rbd snap rm testimg1@snap
+    rbd create $RBD_CREATE_ARGS --size 256 testimg3
+    rbd snap create testimg3@snap  # pin testimg3
+    rbd snap create testimg2@snap
+    rbd clone --rbd-default-clone-format=2 testimg2@snap testimg4
+    rbd clone --rbd-default-clone-format=2 testimg2@snap testimg5
+    rbd snap rm testimg2@snap
+    rbd snap create testimg4@snap
+    rbd clone --rbd-default-clone-format=2 testimg4@snap testimg6
+    rbd snap rm testimg4@snap
+    rbd trash mv testimg1
+    rbd trash mv testimg2
+    rbd trash mv testimg3
+    rbd trash mv testimg4
+    rbd trash ls | wc -l | grep 4
+    rbd trash purge 2>&1 | grep 'some expired images could not be removed'
+    rbd trash ls | wc -l | grep 4
+    rbd trash mv testimg6
+    rbd trash ls | wc -l | grep 5
+    rbd trash purge 2>&1 | grep 'some expired images could not be removed'
+    rbd trash ls | wc -l | grep 3
+    rbd trash ls | grep testimg1
+    rbd trash ls | grep testimg2
+    rbd trash ls | grep testimg3
+    rbd trash mv testimg5
+    rbd trash ls | wc -l | grep 4
+    rbd trash purge 2>&1 | grep 'some expired images could not be removed'
+    rbd trash ls | wc -l | grep 1
+    rbd trash ls | grep testimg3
+    ID=$(rbd trash ls | awk '{ print $1 }')
+    rbd snap purge --image-id $ID
+    rbd trash purge
+    rbd trash ls | wc -l | grep 0
+
+    # test purging a clone with a chain of auto-delete parents
+    rbd create $RBD_CREATE_ARGS --size 256 testimg1
+    rbd snap create testimg1@snap
+    rbd clone --rbd-default-clone-format=2 testimg1@snap testimg2
+    rbd snap rm testimg1@snap
+    rbd create $RBD_CREATE_ARGS --size 256 testimg3
+    rbd snap create testimg2@snap
+    rbd clone --rbd-default-clone-format=2 testimg2@snap testimg4
+    rbd clone --rbd-default-clone-format=2 testimg2@snap testimg5
+    rbd snap rm testimg2@snap
+    rbd snap create testimg4@snap
+    rbd clone --rbd-default-clone-format=2 testimg4@snap testimg6
+    rbd snap rm testimg4@snap
+    rbd rm --rbd_move_parent_to_trash_on_remove=true testimg1
+    rbd rm --rbd_move_parent_to_trash_on_remove=true testimg2
+    rbd trash mv testimg3
+    rbd rm --rbd_move_parent_to_trash_on_remove=true testimg4
+    rbd trash ls | wc -l | grep 4
+    rbd trash purge 2>&1 | grep 'some expired images could not be removed'
+    rbd trash ls | wc -l | grep 3
+    rbd trash ls | grep testimg1
+    rbd trash ls | grep testimg2
+    rbd trash ls | grep testimg4
+    rbd trash mv testimg6
+    rbd trash ls | wc -l | grep 4
+    rbd trash purge 2>&1 | grep 'some expired images could not be removed'
+    rbd trash ls | wc -l | grep 2
+    rbd trash ls | grep testimg1
+    rbd trash ls | grep testimg2
+    rbd trash mv testimg5
+    rbd trash ls | wc -l | grep 3
+    rbd trash purge
+    rbd trash ls | wc -l | grep 0
+
+    rbd create $RBD_CREATE_ARGS --size 256 testimg1
+    rbd snap create testimg1@snap
+    rbd clone --rbd-default-clone-format=2 testimg1@snap testimg2
+    rbd snap rm testimg1@snap
+    rbd create $RBD_CREATE_ARGS --size 256 testimg3
+    rbd snap create testimg3@snap  # pin testimg3
+    rbd snap create testimg2@snap
+    rbd clone --rbd-default-clone-format=2 testimg2@snap testimg4
+    rbd clone --rbd-default-clone-format=2 testimg2@snap testimg5
+    rbd snap rm testimg2@snap
+    rbd snap create testimg4@snap
+    rbd clone --rbd-default-clone-format=2 testimg4@snap testimg6
+    rbd snap rm testimg4@snap
+    rbd rm --rbd_move_parent_to_trash_on_remove=true testimg1
+    rbd rm --rbd_move_parent_to_trash_on_remove=true testimg2
+    rbd trash mv testimg3
+    rbd rm --rbd_move_parent_to_trash_on_remove=true testimg4
+    rbd trash ls | wc -l | grep 4
+    rbd trash purge 2>&1 | grep 'some expired images could not be removed'
+    rbd trash ls | wc -l | grep 4
+    rbd trash mv testimg6
+    rbd trash ls | wc -l | grep 5
+    rbd trash purge 2>&1 | grep 'some expired images could not be removed'
+    rbd trash ls | wc -l | grep 3
+    rbd trash ls | grep testimg1
+    rbd trash ls | grep testimg2
+    rbd trash ls | grep testimg3
+    rbd trash mv testimg5
+    rbd trash ls | wc -l | grep 4
+    rbd trash purge 2>&1 | grep 'some expired images could not be removed'
+    rbd trash ls | wc -l | grep 1
+    rbd trash ls | grep testimg3
+    ID=$(rbd trash ls | awk '{ print $1 }')
+    rbd snap purge --image-id $ID
+    rbd trash purge
+    rbd trash ls | wc -l | grep 0
+}
+
+test_deep_copy_clone() {
+    echo "testing deep copy clone..."
+    remove_images
+
+    rbd create testimg1 $RBD_CREATE_ARGS --size 256
+    rbd snap create testimg1 --snap=snap1
+    rbd snap protect testimg1@snap1
+    rbd clone testimg1@snap1 testimg2
+    rbd snap create testimg2@snap2
+    rbd deep copy testimg2 testimg3
+    rbd info testimg3 | grep 'size 256 MiB'
+    rbd info testimg3 | grep 'parent: rbd/testimg1@snap1'
+    rbd snap ls testimg3 | grep -v 'SNAPID' | wc -l | grep 1
+    rbd snap ls testimg3 | grep '.*snap2.*'
+    rbd info testimg2 | grep 'features:.*deep-flatten' || rbd snap rm testimg2@snap2
+    rbd info testimg3 | grep 'features:.*deep-flatten' || rbd snap rm testimg3@snap2
+    rbd flatten testimg2
+    rbd flatten testimg3
+    rbd snap unprotect testimg1@snap1
+    rbd snap purge testimg2
+    rbd snap purge testimg3
+    rbd rm testimg2
+    rbd rm testimg3
+
+    rbd snap protect testimg1@snap1
+    rbd clone testimg1@snap1 testimg2
+    rbd snap create testimg2@snap2
+    rbd deep copy --flatten testimg2 testimg3
+    rbd info testimg3 | grep 'size 256 MiB'
+    rbd info testimg3 | grep -v 'parent:'
+    rbd snap ls testimg3 | grep -v 'SNAPID' | wc -l | grep 1
+    rbd snap ls testimg3 | grep '.*snap2.*'
+    rbd info testimg2 | grep 'features:.*deep-flatten' || rbd snap rm testimg2@snap2
+    rbd flatten testimg2
+    rbd snap unprotect testimg1@snap1
+
+    remove_images
+}
+
+test_clone_v2() {
+    echo "testing clone v2..."
+    remove_images
+
+    rbd create $RBD_CREATE_ARGS -s 1 test1
+    rbd snap create test1@1
+    rbd clone --rbd-default-clone-format=1 test1@1 test2 && exit 1 || true
+    rbd clone --rbd-default-clone-format=2 test1@1 test2
+    rbd clone --rbd-default-clone-format=2 test1@1 test3
+
+    rbd snap protect test1@1
+    rbd clone --rbd-default-clone-format=1 test1@1 test4
+
+    rbd children test1@1 | sort | tr '\n' ' ' | grep -E "test2.*test3.*test4"
+    rbd children --descendants test1 | sort | tr '\n' ' ' | grep -E "test2.*test3.*test4"
+
+    rbd remove test4
+    rbd snap unprotect test1@1
+
+    rbd snap remove test1@1
+    rbd snap list --all test1 | grep -E "trash \(1\) *$"
+
+    rbd snap create test1@2
+    rbd rm test1 2>&1 | grep 'image has snapshots'
+
+    rbd snap rm test1@2
+    rbd rm test1 2>&1 | grep 'linked clones'
+
+    rbd rm test3
+    rbd rm test1 2>&1 | grep 'linked clones'
+
+    rbd flatten test2
+    rbd snap list --all test1 | wc -l | grep '^0$'
+    rbd rm test1
+    rbd rm test2
+
+    rbd create $RBD_CREATE_ARGS -s 1 test1
+    rbd snap create test1@1
+    rbd snap create test1@2
+    rbd clone test1@1 test2 --rbd-default-clone-format 2
+    rbd clone test1@2 test3 --rbd-default-clone-format 2
+    rbd snap rm test1@1
+    rbd snap rm test1@2
+    expect_fail rbd rm test1
+    rbd rm test1 --rbd-move-parent-to-trash-on-remove=true
+    rbd trash ls -a | grep test1
+    rbd rm test2
+    rbd trash ls -a | grep test1
+    rbd rm test3
+    rbd trash ls -a | expect_fail grep test1
+}
+
+test_thick_provision() {
+    echo "testing thick provision..."
+    remove_images
+
+    # Try to create small and large thick-pro image and
+    # check actual size. (64M and 4G)
+
+    # Small thick-pro image test
+    rbd create $RBD_CREATE_ARGS --thick-provision -s 64M test1
+    count=0
+    ret=""
+    while [ $count -lt 10 ]
+    do
+        rbd du|grep test1|tr -s " "|cut -d " " -f 4-5|grep '^64 MiB' && ret=$?
+        if [ "$ret" = "0" ]
+        then
+            break;
+        fi
+        count=`expr $count + 1`
+        sleep 2
+    done
+    rbd du
+    if [ "$ret" != "0" ]
+    then
+        exit 1
+    fi
+    rbd rm test1
+    rbd ls | grep test1 | wc -l | grep '^0$'
+
+    # Large thick-pro image test
+    rbd create $RBD_CREATE_ARGS --thick-provision -s 4G test1
+    count=0
+    ret=""
+    while [ $count -lt 10 ]
+    do
+        rbd du|grep test1|tr -s " "|cut -d " " -f 4-5|grep '^4 GiB' && ret=$?
+        if [ "$ret" = "0" ]
+        then
+            break;
+        fi
+        count=`expr $count + 1`
+        sleep 2
+    done
+    rbd du
+    if [ "$ret" != "0" ]
+    then
+        exit 1
+    fi
+    rbd rm test1
+    rbd ls | grep test1 | wc -l | grep '^0$'
+}
+
+test_namespace() {
+    echo "testing namespace..."
+    remove_images
+
+    rbd namespace ls | wc -l | grep '^0$'
+    rbd namespace create rbd/test1
+    rbd namespace create --pool rbd --namespace test2
+    rbd namespace create --namespace test3
+    expect_fail rbd namespace create rbd/test3
+
+    rbd namespace list | grep 'test' | wc -l | grep '^3$'
+
+    expect_fail rbd namespace remove --pool rbd missing
+
+    rbd create $RBD_CREATE_ARGS --size 1G rbd/test1/image1
+
+    # default test1 ns to test2 ns clone
+    rbd bench --io-type write --io-pattern rand --io-total 32M --io-size 4K rbd/test1/image1
+    rbd snap create rbd/test1/image1@1
+    rbd clone --rbd-default-clone-format 2 rbd/test1/image1@1 rbd/test2/image1
+    rbd snap rm rbd/test1/image1@1
+    cmp <(rbd export rbd/test1/image1 -) <(rbd export rbd/test2/image1 -)
+    rbd rm rbd/test2/image1
+
+    # default ns to test1 ns clone
+    rbd create $RBD_CREATE_ARGS --size 1G rbd/image2
+    rbd bench --io-type write --io-pattern rand --io-total 32M --io-size 4K rbd/image2
+    rbd snap create rbd/image2@1
+    rbd clone --rbd-default-clone-format 2 rbd/image2@1 rbd/test2/image2
+    rbd snap rm rbd/image2@1
+    cmp <(rbd export rbd/image2 -) <(rbd export rbd/test2/image2 -)
+    expect_fail rbd rm rbd/image2
+    rbd rm rbd/test2/image2
+    rbd rm rbd/image2
+
+    # v1 clones are supported within the same namespace
+    rbd create $RBD_CREATE_ARGS --size 1G rbd/test1/image3
+    rbd snap create rbd/test1/image3@1
+    rbd snap protect rbd/test1/image3@1
+    rbd clone --rbd-default-clone-format 1 rbd/test1/image3@1 rbd/test1/image4
+    rbd rm rbd/test1/image4
+    rbd snap unprotect rbd/test1/image3@1
+    rbd snap rm rbd/test1/image3@1
+    rbd rm rbd/test1/image3
+
+    rbd create $RBD_CREATE_ARGS --size 1G --namespace test1 image2
+    expect_fail rbd namespace remove rbd/test1
+
+    rbd group create rbd/test1/group1
+    rbd group image add rbd/test1/group1 rbd/test1/image1
+    rbd group rm rbd/test1/group1
+
+    rbd trash move rbd/test1/image1
+    ID=`rbd trash --namespace test1 ls | cut -d ' ' -f 1`
+    rbd trash rm rbd/test1/${ID}
+
+    rbd remove rbd/test1/image2
+
+    rbd namespace remove --pool rbd --namespace test1
+    rbd namespace remove --namespace test3
+
+    rbd namespace list | grep 'test' | wc -l | grep '^1$'
+    rbd namespace remove rbd/test2
+}
+
+get_migration_state() {
+    local image=$1
+
+    rbd --format xml status $image |
+        $XMLSTARLET sel -t -v '//status/migration/state'
+}
+
+test_migration() {
+    echo "testing migration..."
+    remove_images
+    ceph osd pool create rbd2 8
+    rbd pool init rbd2
+
+    # Convert to new format
+    rbd create --image-format 1 -s 128M test1
+    rbd info test1 | grep 'format: 1'
+    rbd migration prepare test1 --image-format 2
+    test "$(get_migration_state test1)" = prepared
+    rbd info test1 | grep 'format: 2'
+    rbd rm test1 && exit 1 || true
+    rbd migration execute test1
+    test "$(get_migration_state test1)" = executed
+    rbd migration commit test1
+    get_migration_state test1 && exit 1 || true
+
+    # Enable layering (and some other features)
+    rbd info test1 | grep 'features: .*layering' && exit 1 || true
+    rbd migration prepare test1 --image-feature \
+        layering,exclusive-lock,object-map,fast-diff,deep-flatten
+    rbd info test1 | grep 'features: .*layering'
+    rbd migration execute test1
+    rbd migration commit test1
+
+    # Migration to other pool
+    rbd migration prepare test1 rbd2/test1
+    test "$(get_migration_state rbd2/test1)" = prepared
+    rbd ls | wc -l | grep '^0$'
+    rbd -p rbd2 ls | grep test1
+    rbd migration execute test1
+    test "$(get_migration_state rbd2/test1)" = executed
+    rbd rm rbd2/test1 && exit 1 || true
+    rbd migration commit test1
+
+    # Migration to other namespace
+    rbd namespace create rbd2/ns1
+    rbd namespace create rbd2/ns2
+    rbd migration prepare rbd2/test1 rbd2/ns1/test1
+    test "$(get_migration_state rbd2/ns1/test1)" = prepared
+    rbd migration execute rbd2/test1
+    test "$(get_migration_state rbd2/ns1/test1)" = executed
+    rbd migration commit rbd2/test1
+    rbd migration prepare rbd2/ns1/test1 rbd2/ns2/test1
+    rbd migration execute rbd2/ns2/test1
+    rbd migration commit rbd2/ns2/test1
+
+    # Enable data pool
+    rbd create -s 128M test1
+    rbd migration prepare test1 --data-pool rbd2
+    rbd info test1 | grep 'data_pool: rbd2'
+    rbd migration execute test1
+    rbd migration commit test1
+
+    # testing trash
+    rbd migration prepare test1
+    expect_fail rbd trash mv test1
+    ID=`rbd trash ls -a | cut -d ' ' -f 1`
+    expect_fail rbd trash rm $ID
+    expect_fail rbd trash restore $ID
+    rbd migration abort test1
+
+    # Migrate parent
+    rbd remove test1
+    dd if=/dev/urandom bs=1M count=1 | rbd --image-format 2 import - test1
+    md5sum=$(rbd export test1 - | md5sum)
+    rbd snap create test1@snap1
+    rbd snap protect test1@snap1
+    rbd snap create test1@snap2
+    rbd clone test1@snap1 clone_v1 --rbd_default_clone_format=1
+    rbd clone test1@snap2 clone_v2 --rbd_default_clone_format=2
+    rbd info clone_v1 | fgrep 'parent: rbd/test1@snap1'
+    rbd info clone_v2 | fgrep 'parent: rbd/test1@snap2'
+    rbd info clone_v2 |grep 'op_features: clone-child'
+    test "$(rbd export clone_v1 - | md5sum)" = "${md5sum}"
+    test "$(rbd export clone_v2 - | md5sum)" = "${md5sum}"
+    test "$(rbd children test1@snap1)" = "rbd/clone_v1"
+    test "$(rbd children test1@snap2)" = "rbd/clone_v2"
+    rbd migration prepare test1 rbd2/test2
+    rbd info clone_v1 | fgrep 'parent: rbd2/test2@snap1'
+    rbd info clone_v2 | fgrep 'parent: rbd2/test2@snap2'
+    rbd info clone_v2 | fgrep 'op_features: clone-child'
+    test "$(rbd children rbd2/test2@snap1)" = "rbd/clone_v1"
+    test "$(rbd children rbd2/test2@snap2)" = "rbd/clone_v2"
+    rbd migration execute test1
+    expect_fail rbd migration commit test1
+    rbd migration commit test1 --force
+    test "$(rbd export clone_v1 - | md5sum)" = "${md5sum}"
+    test "$(rbd export clone_v2 - | md5sum)" = "${md5sum}"
+    rbd migration prepare rbd2/test2 test1
+    rbd info clone_v1 | fgrep 'parent: rbd/test1@snap1'
+    rbd info clone_v2 | fgrep 'parent: rbd/test1@snap2'
+    rbd info clone_v2 | fgrep 'op_features: clone-child'
+    test "$(rbd children test1@snap1)" = "rbd/clone_v1"
+    test "$(rbd children test1@snap2)" = "rbd/clone_v2"
+    rbd migration execute test1
+    expect_fail rbd migration commit test1
+    rbd migration commit test1 --force
+    test "$(rbd export clone_v1 - | md5sum)" = "${md5sum}"
+    test "$(rbd export clone_v2 - | md5sum)" = "${md5sum}"
+    rbd remove clone_v1
+    rbd remove clone_v2
+    rbd snap unprotect test1@snap1
+    rbd snap purge test1
+    rbd rm test1
+
+    for format in 1 2; do
+        # Abort migration after successful prepare
+        rbd create -s 128M --image-format ${format} test2
+        rbd migration prepare test2 --data-pool rbd2
+        rbd bench --io-type write --io-size 1024 --io-total 1024 test2
+        rbd migration abort test2
+        rbd bench --io-type write --io-size 1024 --io-total 1024 test2
+        rbd rm test2
+
+        # Abort migration after successful execute
+        rbd create -s 128M --image-format ${format} test2
+        rbd migration prepare test2 --data-pool rbd2
+        rbd bench --io-type write --io-size 1024 --io-total 1024 test2
+        rbd migration execute test2
+        rbd migration abort test2
+        rbd bench --io-type write --io-size 1024 --io-total 1024 test2
+        rbd rm test2
+
+        # Migration is automatically aborted if prepare failed
+        rbd create -s 128M --image-format ${format} test2
+        rbd migration prepare test2 --data-pool INVALID_DATA_POOL && exit 1 || true
+        rbd bench --io-type write --io-size 1024 --io-total 1024 test2
+        rbd rm test2
+
+        # Abort migration to other pool
+        rbd create -s 128M --image-format ${format} test2
+        rbd migration prepare test2 rbd2/test2
+        rbd bench --io-type write --io-size 1024 --io-total 1024 rbd2/test2
+        rbd migration abort test2
+        rbd bench --io-type write --io-size 1024 --io-total 1024 test2
+        rbd rm test2
+
+        # The same but abort using destination image
+        rbd create -s 128M --image-format ${format} test2
+        rbd migration prepare test2 rbd2/test2
+        rbd migration abort rbd2/test2
+        rbd bench --io-type write --io-size 1024 --io-total 1024 test2
+        rbd rm test2
+
+        test $format = 1 && continue
+
+        # Abort migration to other namespace
+        rbd create -s 128M --image-format ${format} test2
+        rbd migration prepare test2 rbd2/ns1/test3
+        rbd bench --io-type write --io-size 1024 --io-total 1024 rbd2/ns1/test3
+        rbd migration abort test2
+        rbd bench --io-type write --io-size 1024 --io-total 1024 test2
+        rbd rm test2
+    done
+
+    remove_images
+    ceph osd pool rm rbd2 rbd2 --yes-i-really-really-mean-it
+}
+
+test_config() {
+    echo "testing config..."
+    remove_images
+
+    expect_fail rbd config global set osd rbd_cache true
+    expect_fail rbd config global set global debug_ms 10
+    expect_fail rbd config global set global rbd_UNKNOWN false
+    expect_fail rbd config global set global rbd_cache INVALID
+    rbd config global set global rbd_cache false
+    rbd config global set client rbd_cache true
+    rbd config global set client.123 rbd_cache false
+    rbd config global get global rbd_cache | grep '^false$'
+    rbd config global get client rbd_cache | grep '^true$'
+    rbd config global get client.123 rbd_cache | grep '^false$'
+    expect_fail rbd config global get client.UNKNOWN rbd_cache
+    rbd config global list global | grep '^rbd_cache * false * global *$'
+    rbd config global list client | grep '^rbd_cache * true * client *$'
+    rbd config global list client.123 | grep '^rbd_cache * false * client.123 *$'
+    rbd config global list client.UNKNOWN | grep '^rbd_cache * true * client *$'
+    rbd config global rm client rbd_cache
+    expect_fail rbd config global get client rbd_cache
+    rbd config global list client | grep '^rbd_cache * false * global *$'
+    rbd config global rm client.123 rbd_cache
+    rbd config global rm global rbd_cache
+
+    rbd config pool set rbd rbd_cache true
+    rbd config pool list rbd | grep '^rbd_cache * true * pool *$'
+    rbd config pool get rbd rbd_cache | grep '^true$'
+
+    rbd create $RBD_CREATE_ARGS -s 1 test1
+
+    rbd config image list rbd/test1 | grep '^rbd_cache * true * pool *$'
+    rbd config image set rbd/test1 rbd_cache false
+    rbd config image list rbd/test1 | grep '^rbd_cache * false * image *$'
+    rbd config image get rbd/test1 rbd_cache | grep '^false$'
+    rbd config image remove rbd/test1 rbd_cache
+    expect_fail rbd config image get rbd/test1 rbd_cache
+    rbd config image list rbd/test1 | grep '^rbd_cache * true * pool *$'
+
+    rbd config pool remove rbd rbd_cache
+    expect_fail rbd config pool get rbd rbd_cache
+    rbd config pool list rbd | grep '^rbd_cache * true * config *$'
+
+    rbd rm test1
+}
+
+test_trash_purge_schedule() {
+    echo "testing trash purge schedule..."
+    remove_images
+    ceph osd pool create rbd2 8
+    rbd pool init rbd2
+    rbd namespace create rbd2/ns1
+
+    test "$(ceph rbd trash purge schedule list)" = "{}"
+    ceph rbd trash purge schedule status | fgrep '"scheduled": []'
+
+    expect_fail rbd trash purge schedule ls
+    test "$(rbd trash purge schedule ls -R --format json)" = "[]"
+
+    rbd trash purge schedule add -p rbd 1d 01:30
+
+    rbd trash purge schedule ls -p rbd | grep 'every 1d starting at 01:30'
+    expect_fail rbd trash purge schedule ls
+    rbd trash purge schedule ls -R | grep 'every 1d starting at 01:30'
+    rbd trash purge schedule ls -R -p rbd | grep 'every 1d starting at 01:30'
+    expect_fail rbd trash purge schedule ls -p rbd2
+    test "$(rbd trash purge schedule ls -p rbd2 -R --format json)" = "[]"
+
+    rbd trash purge schedule add -p rbd2/ns1 2d
+    test "$(rbd trash purge schedule ls -p rbd2 -R --format json)" != "[]"
+    rbd trash purge schedule ls -p rbd2 -R | grep 'rbd2 *ns1 *every 2d'
+    rbd trash purge schedule rm -p rbd2/ns1
+    test "$(rbd trash purge schedule ls -p rbd2 -R --format json)" = "[]"
+
+    for i in `seq 12`; do
+        test "$(rbd trash purge schedule status --format xml |
+            $XMLSTARLET sel -t -v '//scheduled/item/pool')" = 'rbd' && break
+        sleep 10
+    done
+    rbd trash purge schedule status
+    test "$(rbd trash purge schedule status --format xml |
+        $XMLSTARLET sel -t -v '//scheduled/item/pool')" = 'rbd'
+    test "$(rbd trash purge schedule status -p rbd --format xml |
+        $XMLSTARLET sel -t -v '//scheduled/item/pool')" = 'rbd'
+
+    rbd trash purge schedule add 2d 00:17
+    rbd trash purge schedule ls | grep 'every 2d starting at 00:17'
+    rbd trash purge schedule ls -R | grep 'every 2d starting at 00:17'
+    expect_fail rbd trash purge schedule ls -p rbd2
+    rbd trash purge schedule ls -p rbd2 -R | grep 'every 2d starting at 00:17'
+    rbd trash purge schedule ls -p rbd2/ns1 -R | grep 'every 2d starting at 00:17'
+    test "$(rbd trash purge schedule ls -R -p rbd2/ns1 --format xml |
+        $XMLSTARLET sel -t -v '//schedules/schedule/pool')" = "-"
+    test "$(rbd trash purge schedule ls -R -p rbd2/ns1 --format xml |
+        $XMLSTARLET sel -t -v '//schedules/schedule/namespace')" = "-"
+    test "$(rbd trash purge schedule ls -R -p rbd2/ns1 --format xml |
+        $XMLSTARLET sel -t -v '//schedules/schedule/items/item/start_time')" = "00:17:00"
+
+    for i in `seq 12`; do
+        rbd trash purge schedule status --format xml |
+            $XMLSTARLET sel -t -v '//scheduled/item/pool' | grep 'rbd2' && break
+        sleep 10
+    done
+    rbd trash purge schedule status
+    rbd trash purge schedule status --format xml |
+        $XMLSTARLET sel -t -v '//scheduled/item/pool' | grep 'rbd2'
+    echo $(rbd trash purge schedule status --format xml |
+        $XMLSTARLET sel -t -v '//scheduled/item/pool') | grep 'rbd rbd2 rbd2'
+    test "$(rbd trash purge schedule status -p rbd --format xml |
+        $XMLSTARLET sel -t -v '//scheduled/item/pool')" = 'rbd'
+    test "$(echo $(rbd trash purge schedule status -p rbd2 --format xml |
+        $XMLSTARLET sel -t -v '//scheduled/item/pool'))" = 'rbd2 rbd2'
+
+    test "$(echo $(rbd trash purge schedule ls -R --format xml |
+        $XMLSTARLET sel -t -v '//schedules/schedule/items'))" = "2d00:17:00 1d01:30:00"
+
+    rbd trash purge schedule add 1d
+    rbd trash purge schedule ls | grep 'every 2d starting at 00:17'
+    rbd trash purge schedule ls | grep 'every 1d'
+
+    rbd trash purge schedule ls -R --format xml |
+        $XMLSTARLET sel -t -v '//schedules/schedule/items' | grep '2d00:17'
+
+    rbd trash purge schedule rm 1d
+    rbd trash purge schedule ls | grep 'every 2d starting at 00:17'
+    rbd trash purge schedule rm 2d 00:17
+    expect_fail rbd trash purge schedule ls
+
+    for p in rbd2 rbd2/ns1; do
+        rbd create $RBD_CREATE_ARGS -s 1 rbd2/ns1/test1
+        rbd trash mv rbd2/ns1/test1
+        rbd trash ls rbd2/ns1 | wc -l | grep '^1$'
+
+        rbd trash purge schedule add -p $p 1m
+        rbd trash purge schedule list -p rbd2 -R | grep 'every 1m'
+        rbd trash purge schedule list -p rbd2/ns1 -R | grep 'every 1m'
+
+        for i in `seq 12`; do
+            rbd trash ls rbd2/ns1 | wc -l | grep '^1$' || break
+            sleep 10
+        done
+        rbd trash ls rbd2/ns1 | wc -l | grep '^0$'
+
+        # repeat with kicked in schedule, see https://tracker.ceph.com/issues/53915
+        rbd trash purge schedule list -p rbd2 -R | grep 'every 1m'
+        rbd trash purge schedule list -p rbd2/ns1 -R | grep 'every 1m'
+
+        rbd trash purge schedule status | grep 'rbd2  *ns1'
+        rbd trash purge schedule status -p rbd2 | grep 'rbd2  *ns1'
+        rbd trash purge schedule status -p rbd2/ns1 | grep 'rbd2  *ns1'
+
+        rbd trash purge schedule rm -p $p 1m
+    done
+
+    # Negative tests
+    rbd trash purge schedule add 2m
+    expect_fail rbd trash purge schedule add -p rbd dummy
+    expect_fail rbd trash purge schedule add dummy
+    expect_fail rbd trash purge schedule remove -p rbd dummy
+    expect_fail rbd trash purge schedule remove dummy
+    rbd trash purge schedule ls -p rbd | grep 'every 1d starting at 01:30'
+    rbd trash purge schedule ls | grep 'every 2m'
+    rbd trash purge schedule remove -p rbd 1d 01:30
+    rbd trash purge schedule remove 2m
+    test "$(rbd trash purge schedule ls -R --format json)" = "[]"
+
+    remove_images
+    ceph osd pool rm rbd2 rbd2 --yes-i-really-really-mean-it
+}
+
+test_trash_purge_schedule_recovery() {
+    echo "testing recovery of trash_purge_schedule handler after module's RADOS client is blocklisted..."
+    remove_images
+    ceph osd pool create rbd3 8
+    rbd pool init rbd3
+    rbd namespace create rbd3/ns1
+
+    rbd trash purge schedule add -p rbd3/ns1 2d
+    rbd trash purge schedule ls -p rbd3 -R | grep 'rbd3 *ns1 *every 2d'
+
+    # Fetch and blocklist the rbd_support module's RADOS client
+    CLIENT_ADDR=$(ceph mgr dump | jq .active_clients[] |
+	jq 'select(.name == "rbd_support")' |
+	jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add')
+    ceph osd blocklist add $CLIENT_ADDR
+    ceph osd blocklist ls | grep $CLIENT_ADDR
+
+    # Check that you can add a trash purge schedule after a few retries
+    expect_fail rbd trash purge schedule add -p rbd3 10m
+    sleep 10
+    for i in `seq 24`; do
+        rbd trash purge schedule add -p rbd3 10m && break
+	sleep 10
+    done
+
+    rbd trash purge schedule ls -p rbd3 -R | grep 'every 10m'
+    # Verify that the schedule present before client blocklisting is preserved
+    rbd trash purge schedule ls -p rbd3 -R | grep 'rbd3 *ns1 *every 2d'
+
+    rbd trash purge schedule remove -p rbd3 10m
+    rbd trash purge schedule remove -p rbd3/ns1 2d
+    rbd trash purge schedule ls -p rbd3 -R | expect_fail grep 'every 10m'
+    rbd trash purge schedule ls -p rbd3 -R | expect_fail grep 'rbd3 *ns1 *every 2d'
+
+    ceph osd pool rm rbd3 rbd3 --yes-i-really-really-mean-it
+
+}
+
+test_mirror_snapshot_schedule() {
+    echo "testing mirror snapshot schedule..."
+    remove_images
+    ceph osd pool create rbd2 8
+    rbd pool init rbd2
+    rbd namespace create rbd2/ns1
+
+    rbd mirror pool enable rbd2 image
+    rbd mirror pool enable rbd2/ns1 image
+    rbd mirror pool peer add rbd2 cluster1
+
+    test "$(ceph rbd mirror snapshot schedule list)" = "{}"
+    ceph rbd mirror snapshot schedule status | fgrep '"scheduled_images": []'
+
+    expect_fail rbd mirror snapshot schedule ls
+    test "$(rbd mirror snapshot schedule ls -R --format json)" = "[]"
+
+    rbd create $RBD_CREATE_ARGS -s 1 rbd2/ns1/test1
+
+    test "$(rbd mirror image status rbd2/ns1/test1 |
+        grep -c mirror.primary)" = '0'
+
+    rbd mirror image enable rbd2/ns1/test1 snapshot
+
+    test "$(rbd mirror image status rbd2/ns1/test1 |
+        grep -c mirror.primary)" = '1'
+
+    rbd mirror snapshot schedule add -p rbd2/ns1 --image test1 1m
+    expect_fail rbd mirror snapshot schedule ls
+    rbd mirror snapshot schedule ls -R | grep 'rbd2 *ns1 *test1 *every 1m'
+    expect_fail rbd mirror snapshot schedule ls -p rbd2
+    rbd mirror snapshot schedule ls -p rbd2 -R | grep 'rbd2 *ns1 *test1 *every 1m'
+    expect_fail rbd mirror snapshot schedule ls -p rbd2/ns1
+    rbd mirror snapshot schedule ls -p rbd2/ns1 -R | grep 'rbd2 *ns1 *test1 *every 1m'
+    test "$(rbd mirror snapshot schedule ls -p rbd2/ns1 --image test1)" = 'every 1m'
+
+    for i in `seq 12`; do
+        test "$(rbd mirror image status rbd2/ns1/test1 |
+            grep -c mirror.primary)" -gt '1' && break
+        sleep 10
+    done
+
+    test "$(rbd mirror image status rbd2/ns1/test1 |
+        grep -c mirror.primary)" -gt '1'
+
+    # repeat with kicked in schedule, see https://tracker.ceph.com/issues/53915
+    expect_fail rbd mirror snapshot schedule ls
+    rbd mirror snapshot schedule ls -R | grep 'rbd2 *ns1 *test1 *every 1m'
+    expect_fail rbd mirror snapshot schedule ls -p rbd2
+    rbd mirror snapshot schedule ls -p rbd2 -R | grep 'rbd2 *ns1 *test1 *every 1m'
+    expect_fail rbd mirror snapshot schedule ls -p rbd2/ns1
+    rbd mirror snapshot schedule ls -p rbd2/ns1 -R | grep 'rbd2 *ns1 *test1 *every 1m'
+    test "$(rbd mirror snapshot schedule ls -p rbd2/ns1 --image test1)" = 'every 1m'
+
+    rbd mirror snapshot schedule status
+    test "$(rbd mirror snapshot schedule status --format xml |
+        $XMLSTARLET sel -t -v '//scheduled_images/image/image')" = 'rbd2/ns1/test1'
+    test "$(rbd mirror snapshot schedule status -p rbd2 --format xml |
+        $XMLSTARLET sel -t -v '//scheduled_images/image/image')" = 'rbd2/ns1/test1'
+    test "$(rbd mirror snapshot schedule status -p rbd2/ns1 --format xml |
+        $XMLSTARLET sel -t -v '//scheduled_images/image/image')" = 'rbd2/ns1/test1'
+    test "$(rbd mirror snapshot schedule status -p rbd2/ns1 --image test1 --format xml |
+        $XMLSTARLET sel -t -v '//scheduled_images/image/image')" = 'rbd2/ns1/test1'
+
+    rbd mirror image demote rbd2/ns1/test1
+    for i in `seq 12`; do
+        rbd mirror snapshot schedule status | grep 'rbd2/ns1/test1' || break
+        sleep 10
+    done
+    rbd mirror snapshot schedule status | expect_fail grep 'rbd2/ns1/test1'
+
+    rbd mirror image promote rbd2/ns1/test1
+    for i in `seq 12`; do
+        rbd mirror snapshot schedule status | grep 'rbd2/ns1/test1' && break
+        sleep 10
+    done
+    rbd mirror snapshot schedule status | grep 'rbd2/ns1/test1'
+
+    rbd mirror snapshot schedule add 1h 00:15
+    test "$(rbd mirror snapshot schedule ls)" = 'every 1h starting at 00:15:00'
+    rbd mirror snapshot schedule ls -R | grep 'every 1h starting at 00:15:00'
+    rbd mirror snapshot schedule ls -R | grep 'rbd2 *ns1 *test1 *every 1m'
+    expect_fail rbd mirror snapshot schedule ls -p rbd2
+    rbd mirror snapshot schedule ls -p rbd2 -R | grep 'every 1h starting at 00:15:00'
+    rbd mirror snapshot schedule ls -p rbd2 -R | grep 'rbd2 *ns1 *test1 *every 1m'
+    expect_fail rbd mirror snapshot schedule ls -p rbd2/ns1
+    rbd mirror snapshot schedule ls -p rbd2/ns1 -R | grep 'every 1h starting at 00:15:00'
+    rbd mirror snapshot schedule ls -p rbd2/ns1 -R | grep 'rbd2 *ns1 *test1 *every 1m'
+    test "$(rbd mirror snapshot schedule ls -p rbd2/ns1 --image test1)" = 'every 1m'
+
+    # Negative tests
+    expect_fail rbd mirror snapshot schedule add dummy
+    expect_fail rbd mirror snapshot schedule add -p rbd2/ns1 --image test1 dummy
+    expect_fail rbd mirror snapshot schedule remove dummy
+    expect_fail rbd mirror snapshot schedule remove -p rbd2/ns1 --image test1 dummy
+    test "$(rbd mirror snapshot schedule ls)" = 'every 1h starting at 00:15:00'
+    test "$(rbd mirror snapshot schedule ls -p rbd2/ns1 --image test1)" = 'every 1m'
+
+    rbd rm rbd2/ns1/test1
+    for i in `seq 12`; do
+        rbd mirror snapshot schedule status | grep 'rbd2/ns1/test1' || break
+        sleep 10
+    done
+    rbd mirror snapshot schedule status | expect_fail grep 'rbd2/ns1/test1'
+
+    rbd mirror snapshot schedule remove
+    test "$(rbd mirror snapshot schedule ls -R --format json)" = "[]"
+
+    remove_images
+    ceph osd pool rm rbd2 rbd2 --yes-i-really-really-mean-it
+}
+
+test_mirror_snapshot_schedule_recovery() {
+    echo "testing recovery of mirror snapshot scheduler after module's RADOS client is blocklisted..."
+    remove_images
+    ceph osd pool create rbd3 8
+    rbd pool init rbd3
+    rbd namespace create rbd3/ns1
+
+    rbd mirror pool enable rbd3 image
+    rbd mirror pool enable rbd3/ns1 image
+    rbd mirror pool peer add rbd3 cluster1
+
+    rbd create $RBD_CREATE_ARGS -s 1 rbd3/ns1/test1
+    rbd mirror image enable rbd3/ns1/test1 snapshot
+    test "$(rbd mirror image status rbd3/ns1/test1 |
+        grep -c mirror.primary)" = '1'
+
+    rbd mirror snapshot schedule add -p rbd3/ns1 --image test1 1m
+    test "$(rbd mirror snapshot schedule ls -p rbd3/ns1 --image test1)" = 'every 1m'
+
+    # Fetch and blocklist rbd_support module's RADOS client
+    CLIENT_ADDR=$(ceph mgr dump | jq .active_clients[] |
+	jq 'select(.name == "rbd_support")' |
+	jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add')
+    ceph osd blocklist add $CLIENT_ADDR
+    ceph osd blocklist ls | grep $CLIENT_ADDR
+
+    # Check that you can add a mirror snapshot schedule after a few retries
+    expect_fail rbd mirror snapshot schedule add -p rbd3/ns1 --image test1 2m
+    sleep 10
+    for i in `seq 24`; do
+        rbd mirror snapshot schedule add -p rbd3/ns1 --image test1 2m && break
+	sleep 10
+    done
+
+    rbd mirror snapshot schedule ls -p rbd3/ns1 --image test1 | grep 'every 2m'
+    # Verify that the schedule present before client blocklisting is preserved
+    rbd mirror snapshot schedule ls -p rbd3/ns1 --image test1 | grep 'every 1m'
+
+    rbd mirror snapshot schedule rm -p rbd3/ns1 --image test1 2m
+    rbd mirror snapshot schedule rm -p rbd3/ns1 --image test1 1m
+    rbd mirror snapshot schedule ls -p rbd3/ns1 --image test1 | expect_fail grep 'every 2m'
+    rbd mirror snapshot schedule ls -p rbd3/ns1 --image test1 | expect_fail grep 'every 1m'
+
+    rbd snap purge rbd3/ns1/test1
+    rbd rm rbd3/ns1/test1
+    ceph osd pool rm rbd3 rbd3 --yes-i-really-really-mean-it
+}
+
+test_perf_image_iostat() {
+    echo "testing perf image iostat..."
+    remove_images
+
+    ceph osd pool create rbd1 8
+    rbd pool init rbd1
+    rbd namespace create rbd1/ns
+    ceph osd pool create rbd2 8
+    rbd pool init rbd2
+    rbd namespace create rbd2/ns
+
+    IMAGE_SPECS=("test1" "rbd1/test2" "rbd1/ns/test3" "rbd2/test4" "rbd2/ns/test5")
+    for spec in "${IMAGE_SPECS[@]}"; do
+        # ensure all images are created without a separate data pool
+        # as we filter iostat by specific pool specs below
+        rbd create $RBD_CREATE_ARGS --size 10G --rbd-default-data-pool '' $spec
+    done
+
+    BENCH_PIDS=()
+    for spec in "${IMAGE_SPECS[@]}"; do
+        rbd bench --io-type write --io-pattern rand --io-total 10G --io-threads 1 \
+            --rbd-cache false $spec >/dev/null 2>&1 &
+        BENCH_PIDS+=($!)
+    done
+
+    # test specifying pool spec via spec syntax
+    test "$(rbd perf image iostat --format json rbd1 |
+        jq -r 'map(.image) | sort | join(" ")')" = 'test2'
+    test "$(rbd perf image iostat --format json rbd1/ns |
+        jq -r 'map(.image) | sort | join(" ")')" = 'test3'
+    test "$(rbd perf image iostat --format json --rbd-default-pool rbd1 /ns |
+        jq -r 'map(.image) | sort | join(" ")')" = 'test3'
+
+    # test specifying pool spec via options
+    test "$(rbd perf image iostat --format json --pool rbd2 |
+        jq -r 'map(.image) | sort | join(" ")')" = 'test4'
+    test "$(rbd perf image iostat --format json --pool rbd2 --namespace ns |
+        jq -r 'map(.image) | sort | join(" ")')" = 'test5'
+    test "$(rbd perf image iostat --format json --rbd-default-pool rbd2 --namespace ns |
+        jq -r 'map(.image) | sort | join(" ")')" = 'test5'
+
+    # test omitting pool spec (-> GLOBAL_POOL_KEY)
+    test "$(rbd perf image iostat --format json |
+        jq -r 'map(.image) | sort | join(" ")')" = 'test1 test2 test3 test4 test5'
+
+    for pid in "${BENCH_PIDS[@]}"; do
+        kill $pid
+    done
+    wait
+
+    remove_images
+    ceph osd pool rm rbd2 rbd2 --yes-i-really-really-mean-it
+    ceph osd pool rm rbd1 rbd1 --yes-i-really-really-mean-it
+}
+
+test_perf_image_iostat_recovery() {
+    echo "testing recovery of perf handler after module's RADOS client is blocklisted..."
+    remove_images
+
+    ceph osd pool create rbd3 8
+    rbd pool init rbd3
+    rbd namespace create rbd3/ns
+
+    IMAGE_SPECS=("rbd3/test1" "rbd3/ns/test2")
+    for spec in "${IMAGE_SPECS[@]}"; do
+        # ensure all images are created without a separate data pool
+        # as we filter iostat by specific pool specs below
+        rbd create $RBD_CREATE_ARGS --size 10G --rbd-default-data-pool '' $spec
+    done
+
+    BENCH_PIDS=()
+    for spec in "${IMAGE_SPECS[@]}"; do
+        rbd bench --io-type write --io-pattern rand --io-total 10G --io-threads 1 \
+            --rbd-cache false $spec >/dev/null 2>&1 &
+        BENCH_PIDS+=($!)
+    done
+
+    test "$(rbd perf image iostat --format json rbd3 |
+        jq -r 'map(.image) | sort | join(" ")')" = 'test1'
+
+    # Fetch and blocklist the rbd_support module's RADOS client
+    CLIENT_ADDR=$(ceph mgr dump | jq .active_clients[] |
+	jq 'select(.name == "rbd_support")' |
+	jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add')
+    ceph osd blocklist add $CLIENT_ADDR
+    ceph osd blocklist ls | grep $CLIENT_ADDR
+
+    expect_fail rbd perf image iostat --format json rbd3/ns
+    sleep 10
+    for i in `seq 24`; do
+        test "$(rbd perf image iostat --format json rbd3/ns |
+            jq -r 'map(.image) | sort | join(" ")')" = 'test2' && break
+	sleep 10
+    done
+
+    for pid in "${BENCH_PIDS[@]}"; do
+        kill $pid
+    done
+    wait
+
+    remove_images
+    ceph osd pool rm rbd3 rbd3 --yes-i-really-really-mean-it
+}
+
+test_mirror_pool_peer_bootstrap_create() {
+    echo "testing mirror pool peer bootstrap create..."
+    remove_images
+
+    ceph osd pool create rbd1 8
+    rbd pool init rbd1
+    rbd mirror pool enable rbd1 image
+    ceph osd pool create rbd2 8
+    rbd pool init rbd2
+    rbd mirror pool enable rbd2 pool
+
+    readarray -t MON_ADDRS < <(ceph mon dump |
+        sed -n 's/^[0-9]: \(.*\) mon\.[a-z]$/\1/p')
+
+    # check that all monitors make it to the token even if only one
+    # valid monitor is specified
+    BAD_MON_ADDR="1.2.3.4:6789"
+    MON_HOST="${MON_ADDRS[0]},$BAD_MON_ADDR"
+    TOKEN="$(rbd mirror pool peer bootstrap create \
+        --mon-host "$MON_HOST" rbd1 | base64 -d)"
+    TOKEN_FSID="$(jq -r '.fsid' <<< "$TOKEN")"
+    TOKEN_CLIENT_ID="$(jq -r '.client_id' <<< "$TOKEN")"
+    TOKEN_KEY="$(jq -r '.key' <<< "$TOKEN")"
+    TOKEN_MON_HOST="$(jq -r '.mon_host' <<< "$TOKEN")"
+
+    test "$TOKEN_FSID" = "$(ceph fsid)"
+    test "$TOKEN_KEY" = "$(ceph auth get-key client.$TOKEN_CLIENT_ID)"
+    for addr in "${MON_ADDRS[@]}"; do
+        fgrep "$addr" <<< "$TOKEN_MON_HOST"
+    done
+    expect_fail fgrep "$BAD_MON_ADDR" <<< "$TOKEN_MON_HOST"
+
+    # check that the token does not change, including across pools
+    test "$(rbd mirror pool peer bootstrap create \
+        --mon-host "$MON_HOST" rbd1 | base64 -d)" = "$TOKEN"
+    test "$(rbd mirror pool peer bootstrap create \
+        rbd1 | base64 -d)" = "$TOKEN"
+    test "$(rbd mirror pool peer bootstrap create \
+        --mon-host "$MON_HOST" rbd2 | base64 -d)" = "$TOKEN"
+    test "$(rbd mirror pool peer bootstrap create \
+        rbd2 | base64 -d)" = "$TOKEN"
+
+    ceph osd pool rm rbd2 rbd2 --yes-i-really-really-mean-it
+    ceph osd pool rm rbd1 rbd1 --yes-i-really-really-mean-it
+}
+
+test_tasks_removed_pool() {
+    echo "testing removing pool under running tasks..."
+    remove_images
+
+    ceph osd pool create rbd2 8
+    rbd pool init rbd2
+
+    rbd create $RBD_CREATE_ARGS --size 1G foo
+    rbd snap create foo@snap
+    rbd snap protect foo@snap
+    rbd clone foo@snap bar
+
+    rbd create $RBD_CREATE_ARGS --size 1G rbd2/dummy
+    rbd bench --io-type write --io-pattern seq --io-size 1M --io-total 1G rbd2/dummy
+    rbd snap create rbd2/dummy@snap
+    rbd snap protect rbd2/dummy@snap
+    for i in {1..5}; do
+        rbd clone rbd2/dummy@snap rbd2/dummy$i
+    done
+
+    # queue flattens on a few dummy images and remove that pool
+    test "$(ceph rbd task list)" = "[]"
+    for i in {1..5}; do
+        ceph rbd task add flatten rbd2/dummy$i
+    done
+    ceph osd pool delete rbd2 rbd2 --yes-i-really-really-mean-it
+    test "$(ceph rbd task list)" != "[]"
+
+    # queue flatten on another image and check that it completes
+    rbd info bar | grep 'parent: '
+    expect_fail rbd snap unprotect foo@snap
+    ceph rbd task add flatten bar
+    for i in {1..12}; do
+        rbd info bar | grep 'parent: ' || break
+        sleep 10
+    done
+    rbd info bar | expect_fail grep 'parent: '
+    rbd snap unprotect foo@snap
+
+    # check that flattens disrupted by pool removal are cleaned up
+    for i in {1..12}; do
+        test "$(ceph rbd task list)" = "[]" && break
+        sleep 10
+    done
+    test "$(ceph rbd task list)" = "[]"
+
+    remove_images
+}
+
+test_tasks_recovery() {
+    echo "testing task handler recovery after module's RADOS client is blocklisted..."
+    remove_images
+
+    ceph osd pool create rbd2 8
+    rbd pool init rbd2
+
+    rbd create $RBD_CREATE_ARGS --size 1G rbd2/img1
+    rbd bench --io-type write --io-pattern seq --io-size 1M --io-total 1G rbd2/img1
+    rbd snap create rbd2/img1@snap
+    rbd snap protect rbd2/img1@snap
+    rbd clone rbd2/img1@snap rbd2/clone1
+
+    # Fetch and blocklist rbd_support module's RADOS client
+    CLIENT_ADDR=$(ceph mgr dump | jq .active_clients[] |
+	jq 'select(.name == "rbd_support")' |
+	jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add')
+    ceph osd blocklist add $CLIENT_ADDR
+    ceph osd blocklist ls | grep $CLIENT_ADDR
+
+    expect_fail ceph rbd task add flatten rbd2/clone1
+    sleep 10
+    for i in `seq 24`; do
+       ceph rbd task add flatten rbd2/clone1 && break
+       sleep 10
+    done
+    test "$(ceph rbd task list)" != "[]"
+
+    for i in {1..12}; do
+        rbd info rbd2/clone1 | grep 'parent: ' || break
+        sleep 10
+    done
+    rbd info rbd2/clone1 | expect_fail grep 'parent: '
+    rbd snap unprotect rbd2/img1@snap
+
+    test "$(ceph rbd task list)" = "[]"
+    ceph osd pool rm rbd2 rbd2 --yes-i-really-really-mean-it
+}
+
+test_pool_image_args
+test_rename
+test_ls
+test_remove
+test_migration
+test_config
+RBD_CREATE_ARGS=""
+test_others
+test_locking
+test_thick_provision
+RBD_CREATE_ARGS="--image-format 2"
+test_others
+test_locking
+test_clone
+test_trash
+test_purge
+test_deep_copy_clone
+test_clone_v2
+test_thick_provision
+test_namespace
+test_trash_purge_schedule
+test_trash_purge_schedule_recovery
+test_mirror_snapshot_schedule
+test_mirror_snapshot_schedule_recovery
+test_perf_image_iostat
+test_perf_image_iostat_recovery
+test_mirror_pool_peer_bootstrap_create
+test_tasks_removed_pool
+test_tasks_recovery
+
+echo OK
diff --git a/qa/workunits/rbd/cli_migration.sh b/qa/workunits/rbd/cli_migration.sh
new file mode 100755
index 000000000..be8e031fd
--- /dev/null
+++ b/qa/workunits/rbd/cli_migration.sh
@@ -0,0 +1,357 @@
+#!/usr/bin/env bash
+set -ex
+
+. $(dirname $0)/../../standalone/ceph-helpers.sh
+
+TEMPDIR=
+IMAGE1=image1
+IMAGE2=image2
+IMAGE3=image3
+IMAGES="${IMAGE1} ${IMAGE2} ${IMAGE3}"
+
+cleanup() {
+    cleanup_tempdir
+    remove_images
+}
+
+setup_tempdir() {
+    TEMPDIR=`mktemp -d`
+}
+
+cleanup_tempdir() {
+    rm -rf ${TEMPDIR}
+}
+
+create_base_image() {
+    local image=$1
+
+    rbd create --size 1G ${image}
+    rbd bench --io-type write --io-pattern rand --io-size=4K --io-total 256M ${image}
+    rbd snap create ${image}@1
+    rbd bench --io-type write --io-pattern rand --io-size=4K --io-total 64M ${image}
+    rbd snap create ${image}@2
+    rbd bench --io-type write --io-pattern rand --io-size=4K --io-total 128M ${image}
+}
+
+export_raw_image() {
+    local image=$1
+
+    rm -rf "${TEMPDIR}/${image}"
+    rbd export ${image} "${TEMPDIR}/${image}"
+}
+
+export_base_image() {
+    local image=$1
+
+    export_raw_image "${image}"
+    export_raw_image "${image}@1"
+    export_raw_image "${image}@2"
+}
+
+remove_image() {
+    local image=$1
+
+    (rbd migration abort $image || true) >/dev/null 2>&1
+    (rbd snap purge $image || true) >/dev/null 2>&1
+    (rbd rm $image || true) >/dev/null 2>&1
+}
+
+remove_images() {
+    for image in ${IMAGES}
+    do
+        remove_image ${image}
+    done
+}
+
+show_diff()
+{
+    local file1=$1
+    local file2=$2
+
+    xxd "${file1}" > "${file1}.xxd"
+    xxd "${file2}" > "${file2}.xxd"
+    sdiff -s "${file1}.xxd" "${file2}.xxd" | head -n 64
+    rm -f "${file1}.xxd" "${file2}.xxd"
+}
+
+compare_images() {
+    local src_image=$1
+    local dst_image=$2
+    local ret=0
+
+    export_raw_image ${dst_image}
+    if ! cmp "${TEMPDIR}/${src_image}" "${TEMPDIR}/${dst_image}"
+    then
+        show_diff "${TEMPDIR}/${src_image}" "${TEMPDIR}/${dst_image}"
+        ret=1
+    fi
+    return ${ret}
+}
+
+test_import_native_format() {
+    local base_image=$1
+    local dest_image=$2
+
+    rbd migration prepare --import-only "rbd/${base_image}@2" ${dest_image}
+    rbd migration abort ${dest_image}
+
+    local pool_id=$(ceph osd pool ls detail --format xml | xmlstarlet sel -t -v "//pools/pool[pool_name='rbd']/pool_id")
+    cat > ${TEMPDIR}/spec.json <<EOF
+{
+  "type": "native",
+  "pool_id": ${pool_id},
+  "pool_namespace": "",
+  "image_name": "${base_image}",
+  "snap_name": "2"
+}
+EOF
+    cat ${TEMPDIR}/spec.json
+
+    rbd migration prepare --import-only \
+	--source-spec-path ${TEMPDIR}/spec.json ${dest_image}
+
+    compare_images "${base_image}@1" "${dest_image}@1"
+    compare_images "${base_image}@2" "${dest_image}@2"
+
+    rbd migration abort ${dest_image}
+
+    rbd migration prepare --import-only \
+        --source-spec-path ${TEMPDIR}/spec.json ${dest_image}
+    rbd migration execute ${dest_image}
+
+    compare_images "${base_image}@1" "${dest_image}@1"
+    compare_images "${base_image}@2" "${dest_image}@2"
+
+    rbd migration abort ${dest_image}
+
+    rbd migration prepare --import-only \
+        --source-spec "{\"type\": \"native\", \"pool_id\": "${pool_id}", \"image_name\": \"${base_image}\", \"snap_name\": \"2\"}" \
+        ${dest_image}
+    rbd migration abort ${dest_image}
+
+    rbd migration prepare --import-only \
+        --source-spec "{\"type\": \"native\", \"pool_name\": \"rbd\", \"image_name\": \"${base_image}\", \"snap_name\": \"2\"}" \
+        ${dest_image}
+    rbd migration execute ${dest_image}
+    rbd migration commit ${dest_image}
+
+    compare_images "${base_image}@1" "${dest_image}@1"
+    compare_images "${base_image}@2" "${dest_image}@2"
+
+    remove_image "${dest_image}"
+}
+
+test_import_qcow_format() {
+    local base_image=$1
+    local dest_image=$2
+
+    if ! qemu-img convert -f raw -O qcow rbd:rbd/${base_image} ${TEMPDIR}/${base_image}.qcow; then
+        echo "skipping QCOW test"
+        return 0
+    fi
+    qemu-img info -f qcow ${TEMPDIR}/${base_image}.qcow
+
+    cat > ${TEMPDIR}/spec.json <<EOF
+{
+  "type": "qcow",
+  "stream": {
+    "type": "file",
+    "file_path": "${TEMPDIR}/${base_image}.qcow"
+  }
+}
+EOF
+    cat ${TEMPDIR}/spec.json
+
+    set +e
+    rbd migration prepare --import-only \
+        --source-spec-path ${TEMPDIR}/spec.json ${dest_image}
+    local error_code=$?
+    set -e
+
+    if [ $error_code -eq 95 ]; then
+        echo "skipping QCOW test (librbd support disabled)"
+        return 0
+    fi
+    test $error_code -eq 0
+
+    compare_images "${base_image}" "${dest_image}"
+
+    rbd migration abort ${dest_image}
+
+    rbd migration prepare --import-only \
+        --source-spec-path ${TEMPDIR}/spec.json ${dest_image}
+
+    compare_images "${base_image}" "${dest_image}"
+
+    rbd migration execute ${dest_image}
+
+    compare_images "${base_image}" "${dest_image}"
+
+    rbd migration commit ${dest_image}
+
+    compare_images "${base_image}" "${dest_image}"
+
+    remove_image "${dest_image}"
+}
+
+test_import_qcow2_format() {
+    local base_image=$1
+    local dest_image=$2
+
+    # create new image via qemu-img and its bench tool since we cannot
+    # import snapshot deltas into QCOW2
+    qemu-img create -f qcow2 ${TEMPDIR}/${base_image}.qcow2 1G
+
+    qemu-img bench -f qcow2 -w -c 65536 -d 16 --pattern 65 -s 4096 \
+        -S $((($RANDOM % 262144) * 4096)) ${TEMPDIR}/${base_image}.qcow2
+    qemu-img convert -f qcow2 -O raw ${TEMPDIR}/${base_image}.qcow2 \
+        "${TEMPDIR}/${base_image}@snap1"
+    qemu-img snapshot -c "snap1" ${TEMPDIR}/${base_image}.qcow2
+
+    qemu-img bench -f qcow2 -w -c 16384 -d 16 --pattern 66 -s 4096 \
+        -S $((($RANDOM % 262144) * 4096)) ${TEMPDIR}/${base_image}.qcow2
+    qemu-img convert -f qcow2 -O raw ${TEMPDIR}/${base_image}.qcow2 \
+        "${TEMPDIR}/${base_image}@snap2"
+    qemu-img snapshot -c "snap2" ${TEMPDIR}/${base_image}.qcow2
+
+    qemu-img bench -f qcow2 -w -c 32768 -d 16 --pattern 67 -s 4096 \
+        -S $((($RANDOM % 262144) * 4096)) ${TEMPDIR}/${base_image}.qcow2
+    qemu-img convert -f qcow2 -O raw ${TEMPDIR}/${base_image}.qcow2 \
+        ${TEMPDIR}/${base_image}
+
+    qemu-img info -f qcow2 ${TEMPDIR}/${base_image}.qcow2
+
+    cat > ${TEMPDIR}/spec.json <<EOF
+{
+  "type": "qcow",
+  "stream": {
+    "type": "file",
+    "file_path": "${TEMPDIR}/${base_image}.qcow2"
+  }
+}
+EOF
+    cat ${TEMPDIR}/spec.json
+
+    rbd migration prepare --import-only \
+        --source-spec-path ${TEMPDIR}/spec.json ${dest_image}
+
+    compare_images "${base_image}@snap1" "${dest_image}@snap1"
+    compare_images "${base_image}@snap2" "${dest_image}@snap2"
+    compare_images "${base_image}" "${dest_image}"
+
+    rbd migration abort ${dest_image}
+
+    rbd migration prepare --import-only \
+        --source-spec-path ${TEMPDIR}/spec.json ${dest_image}
+
+    compare_images "${base_image}@snap1" "${dest_image}@snap1"
+    compare_images "${base_image}@snap2" "${dest_image}@snap2"
+    compare_images "${base_image}" "${dest_image}"
+
+    rbd migration execute ${dest_image}
+
+    compare_images "${base_image}@snap1" "${dest_image}@snap1"
+    compare_images "${base_image}@snap2" "${dest_image}@snap2"
+    compare_images "${base_image}" "${dest_image}"
+
+    rbd migration commit ${dest_image}
+
+    compare_images "${base_image}@snap1" "${dest_image}@snap1"
+    compare_images "${base_image}@snap2" "${dest_image}@snap2"
+    compare_images "${base_image}" "${dest_image}"
+
+    remove_image "${dest_image}"
+}
+
+test_import_raw_format() {
+    local base_image=$1
+    local dest_image=$2
+
+    cat > ${TEMPDIR}/spec.json <<EOF
+{
+  "type": "raw",
+  "stream": {
+    "type": "file",
+    "file_path": "${TEMPDIR}/${base_image}"
+  }
+}
+EOF
+    cat ${TEMPDIR}/spec.json
+
+    cat ${TEMPDIR}/spec.json | rbd migration prepare --import-only \
+	--source-spec-path - ${dest_image}
+    compare_images ${base_image} ${dest_image}
+    rbd migration abort ${dest_image}
+
+    rbd migration prepare --import-only \
+	--source-spec-path ${TEMPDIR}/spec.json ${dest_image}
+    rbd migration execute ${dest_image}
+    rbd migration commit ${dest_image}
+
+    compare_images ${base_image} ${dest_image}
+
+    remove_image "${dest_image}"
+
+    cat > ${TEMPDIR}/spec.json <<EOF
+{
+  "type": "raw",
+  "stream": {
+    "type": "file",
+    "file_path": "${TEMPDIR}/${base_image}"
+  },
+  "snapshots": [{
+    "type": "raw",
+    "name": "snap1",
+    "stream": {
+      "type": "file",
+      "file_path": "${TEMPDIR}/${base_image}@1"
+     }
+  }, {
+    "type": "raw",
+    "name": "snap2",
+    "stream": {
+      "type": "file",
+      "file_path": "${TEMPDIR}/${base_image}@2"
+     }
+  }]
+}
+EOF
+    cat ${TEMPDIR}/spec.json
+
+    rbd migration prepare --import-only \
+        --source-spec-path ${TEMPDIR}/spec.json ${dest_image}
+
+    rbd snap create ${dest_image}@head
+    rbd bench --io-type write --io-pattern rand --io-size=32K --io-total=32M ${dest_image}
+
+    compare_images "${base_image}" "${dest_image}@head"
+    compare_images "${base_image}@1" "${dest_image}@snap1"
+    compare_images "${base_image}@2" "${dest_image}@snap2"
+    compare_images "${base_image}" "${dest_image}@head"
+
+    rbd migration execute ${dest_image}
+
+    compare_images "${base_image}@1" "${dest_image}@snap1"
+    compare_images "${base_image}@2" "${dest_image}@snap2"
+    compare_images "${base_image}" "${dest_image}@head"
+
+    rbd migration commit ${dest_image}
+
+    remove_image "${dest_image}"
+}
+
+# make sure rbd pool is EMPTY.. this is a test script!!
+rbd ls 2>&1 | wc -l | grep -v '^0$' && echo "nonempty rbd pool, aborting!  run this script on an empty test cluster only." && exit 1
+
+setup_tempdir
+trap 'cleanup $?' INT TERM EXIT
+
+create_base_image ${IMAGE1}
+export_base_image ${IMAGE1}
+
+test_import_native_format ${IMAGE1} ${IMAGE2}
+test_import_qcow_format ${IMAGE1} ${IMAGE2}
+test_import_qcow2_format ${IMAGE2} ${IMAGE3}
+test_import_raw_format ${IMAGE1} ${IMAGE2}
+
+echo OK
diff --git a/qa/workunits/rbd/concurrent.sh b/qa/workunits/rbd/concurrent.sh
new file mode 100755
index 000000000..abaad75f5
--- /dev/null
+++ b/qa/workunits/rbd/concurrent.sh
@@ -0,0 +1,375 @@
+#!/usr/bin/env bash
+
+# Copyright (C) 2013 Inktank Storage, Inc.
+#
+# This is free software; see the source for copying conditions.
+# There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE.
+#
+# This is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as
+# published by the Free Software Foundation version 2.
+
+# Alex Elder <elder@inktank.com>
+# January 29, 2013
+
+################################################################
+
+# The purpose of this test is to exercise paths through the rbd
+# code, making sure no bad pointer references or invalid reference
+# count operations occur in the face of concurrent activity.
+#
+# Each pass of the test creates an rbd image, maps it, and writes
+# some data into the image.  It also reads some data from all of the
+# other images that exist at the time the pass executes.  Finally,
+# the image is unmapped and removed.  The image removal completes in
+# the background.
+#
+# An iteration of the test consists of performing some number of
+# passes, initating each pass as a background job, and finally
+# sleeping for a variable delay.  The delay is initially a specified
+# value, but each iteration shortens that proportionally, such that
+# the last iteration will not delay at all.
+#
+# The result exercises concurrent creates and deletes of rbd images,
+# writes to new images, reads from both written and unwritten image
+# data (including reads concurrent with writes), and attempts to
+# unmap images being read.
+
+# Usage: concurrent [-i <iter>] [-c <count>] [-d <delay>]
+#
+# Exit status:
+#     0:  success
+#     1:  usage error
+#     2:  other runtime error
+#    99:  argument count error (programming error)
+#   100:  getopt error (internal error)
+
+################################################################
+
+set -ex
+
+# Default flag values; RBD_CONCURRENT_ITER names are intended
+# to be used in yaml scripts to pass in alternate values, e.g.:
+#    env:
+#        RBD_CONCURRENT_ITER: 20
+#        RBD_CONCURRENT_COUNT: 5
+#        RBD_CONCURRENT_DELAY: 3
+ITER_DEFAULT=${RBD_CONCURRENT_ITER:-100}
+COUNT_DEFAULT=${RBD_CONCURRENT_COUNT:-5}
+DELAY_DEFAULT=${RBD_CONCURRENT_DELAY:-5}		# seconds
+
+CEPH_SECRET_FILE=${CEPH_SECRET_FILE:-}
+CEPH_ID=${CEPH_ID:-admin}
+SECRET_ARGS=""
+if [ "${CEPH_SECRET_FILE}" ]; then
+	SECRET_ARGS="--secret $CEPH_SECRET_FILE"
+fi
+
+################################################################
+
+function setup() {
+	ID_MAX_DIR=$(mktemp -d /tmp/image_max_id.XXXXX)
+	ID_COUNT_DIR=$(mktemp -d /tmp/image_ids.XXXXXX)
+	NAMES_DIR=$(mktemp -d /tmp/image_names.XXXXXX)
+	SOURCE_DATA=$(mktemp /tmp/source_data.XXXXXX)
+
+	# Use urandom to generate SOURCE_DATA
+        dd if=/dev/urandom of=${SOURCE_DATA} bs=2048 count=66 \
+               >/dev/null 2>&1
+
+	# List of rbd id's *not* created by this script
+	export INITIAL_RBD_IDS=$(ls /sys/bus/rbd/devices)
+
+	# Set up some environment for normal teuthology test setup.
+	# This really should not be necessary but I found it was.
+
+	export CEPH_ARGS=" --name client.0"
+}
+
+function cleanup() {
+	[ ! "${ID_MAX_DIR}" ] && return
+	local id
+	local image
+
+	# Unmap mapped devices
+	for id in $(rbd_ids); do
+		image=$(cat "/sys/bus/rbd/devices/${id}/name")
+		rbd_unmap_image "${id}"
+		rbd_destroy_image "${image}"
+	done
+	# Get any leftover images
+	for image in $(rbd ls 2>/dev/null); do
+		rbd_destroy_image "${image}"
+	done
+	wait
+	sync
+	rm -f "${SOURCE_DATA}"
+	[ -d "${NAMES_DIR}" ] && rmdir "${NAMES_DIR}"
+	echo "Max concurrent rbd image count was $(get_max "${ID_COUNT_DIR}")"
+	rm -rf "${ID_COUNT_DIR}"
+	echo "Max rbd image id was $(get_max "${ID_MAX_DIR}")"
+	rm -rf "${ID_MAX_DIR}"
+}
+
+function get_max() {
+	[ $# -eq 1 ] || exit 99
+	local dir="$1"
+
+	ls -U "${dir}" | sort -n | tail -1
+}
+
+trap cleanup HUP INT QUIT
+
+# print a usage message and quit
+#
+# if a message is supplied, print that first, and then exit
+# with non-zero status
+function usage() {
+	if [ $# -gt 0 ]; then
+		echo "" >&2
+		echo "$@" >&2
+	fi
+
+	echo "" >&2
+	echo "Usage: ${PROGNAME} <options> <tests>" >&2
+	echo "" >&2
+	echo "    options:" >&2
+	echo "        -h or --help" >&2
+	echo "            show this message" >&2
+	echo "        -i or --iterations" >&2
+	echo "            iteration count (1 or more)" >&2
+	echo "        -c or --count" >&2
+	echo "            images created per iteration (1 or more)" >&2
+	echo "        -d or --delay" >&2
+	echo "            maximum delay between iterations" >&2
+	echo "" >&2
+	echo "    defaults:" >&2
+	echo "        iterations: ${ITER_DEFAULT}"
+	echo "        count: ${COUNT_DEFAULT}"
+	echo "        delay: ${DELAY_DEFAULT} (seconds)"
+	echo "" >&2
+
+	[ $# -gt 0 ] && exit 1
+
+	exit 0		# This is used for a --help
+}
+
+# parse command line arguments
+function parseargs() {
+	ITER="${ITER_DEFAULT}"
+	COUNT="${COUNT_DEFAULT}"
+	DELAY="${DELAY_DEFAULT}"
+
+	# Short option flags
+	SHORT_OPTS=""
+	SHORT_OPTS="${SHORT_OPTS},h"
+	SHORT_OPTS="${SHORT_OPTS},i:"
+	SHORT_OPTS="${SHORT_OPTS},c:"
+	SHORT_OPTS="${SHORT_OPTS},d:"
+
+	# Short option flags
+	LONG_OPTS=""
+	LONG_OPTS="${LONG_OPTS},help"
+	LONG_OPTS="${LONG_OPTS},iterations:"
+	LONG_OPTS="${LONG_OPTS},count:"
+	LONG_OPTS="${LONG_OPTS},delay:"
+
+	TEMP=$(getopt --name "${PROGNAME}" \
+		--options "${SHORT_OPTS}" \
+		--longoptions "${LONG_OPTS}" \
+		-- "$@")
+	eval set -- "$TEMP"
+
+	while [ "$1" != "--" ]; do
+		case "$1" in
+			-h|--help)
+				usage
+				;;
+			-i|--iterations)
+				ITER="$2"
+				[ "${ITER}" -lt 1 ] &&
+					usage "bad iterations value"
+				shift
+				;;
+			-c|--count)
+				COUNT="$2"
+				[ "${COUNT}" -lt 1 ] &&
+					usage "bad count value"
+				shift
+				;;
+			-d|--delay)
+				DELAY="$2"
+				shift
+				;;
+			*)
+				exit 100	# Internal error
+				;;
+		esac
+		shift
+	done
+	shift
+}
+
+function rbd_ids() {
+	[ $# -eq 0 ] || exit 99
+	local ids
+	local i
+
+	[ -d /sys/bus/rbd ] || return
+	ids=" $(echo $(ls /sys/bus/rbd/devices)) "
+	for i in ${INITIAL_RBD_IDS}; do
+		ids=${ids/ ${i} / }
+	done
+	echo ${ids}
+}
+
+function update_maxes() {
+	local ids="$@"
+	local last_id
+	# These aren't 100% safe against concurrent updates but it
+	# should be pretty close
+	count=$(echo ${ids} | wc -w)
+	touch "${ID_COUNT_DIR}/${count}"
+	last_id=${ids% }
+	last_id=${last_id##* }
+	touch "${ID_MAX_DIR}/${last_id}"
+}
+
+function rbd_create_image() {
+	[ $# -eq 0 ] || exit 99
+	local image=$(basename $(mktemp "${NAMES_DIR}/image.XXXXXX"))
+
+	rbd create "${image}" --size=1024
+	echo "${image}"
+}
+
+function rbd_image_id() {
+	[ $# -eq 1 ] || exit 99
+	local image="$1"
+
+	grep -l "${image}" /sys/bus/rbd/devices/*/name 2>/dev/null |
+		cut -d / -f 6
+}
+
+function rbd_map_image() {
+	[ $# -eq 1 ] || exit 99
+	local image="$1"
+	local id
+
+	sudo rbd map "${image}" --user "${CEPH_ID}" ${SECRET_ARGS} \
+		> /dev/null 2>&1
+
+	id=$(rbd_image_id "${image}")
+	echo "${id}"
+}
+
+function rbd_write_image() {
+	[ $# -eq 1 ] || exit 99
+	local id="$1"
+
+	# Offset and size here are meant to ensure beginning and end
+	# cross both (4K or 64K) page and (4MB) rbd object boundaries.
+	# It assumes the SOURCE_DATA file has size 66 * 2048 bytes
+	dd if="${SOURCE_DATA}" of="/dev/rbd${id}" bs=2048 seek=2015 \
+		> /dev/null 2>&1
+}
+
+# All starting and ending offsets here are selected so they are not
+# aligned on a (4 KB or 64 KB) page boundary
+function rbd_read_image() {
+	[ $# -eq 1 ] || exit 99
+	local id="$1"
+
+	# First read starting and ending at an offset before any
+	# written data.  The osd zero-fills data read from an
+	# existing rbd object, but before any previously-written
+	# data.
+	dd if="/dev/rbd${id}" of=/dev/null bs=2048 count=34 skip=3 \
+		> /dev/null 2>&1
+	# Next read starting at an offset before any written data,
+	# but ending at an offset that includes data that's been
+	# written.  The osd zero-fills unwritten data at the
+	# beginning of a read.
+	dd if="/dev/rbd${id}" of=/dev/null bs=2048 count=34 skip=1983 \
+		> /dev/null 2>&1
+	# Read the data at offset 2015 * 2048 bytes (where it was
+	# written) and make sure it matches the original data.
+	cmp --quiet "${SOURCE_DATA}" "/dev/rbd${id}" 0 4126720 ||
+		echo "MISMATCH!!!"
+	# Now read starting within the pre-written data, but ending
+	# beyond it.  The rbd client zero-fills the unwritten
+	# portion at the end of a read.
+	dd if="/dev/rbd${id}" of=/dev/null bs=2048 count=34 skip=2079 \
+		> /dev/null 2>&1
+	# Now read starting from an unwritten range within a written
+	# rbd object.  The rbd client zero-fills this.
+	dd if="/dev/rbd${id}" of=/dev/null bs=2048 count=34 skip=2115 \
+		> /dev/null 2>&1
+	# Finally read from an unwritten region which would reside
+	# in a different (non-existent) osd object.  The osd client
+	# zero-fills unwritten data when the target object doesn't
+	# exist.
+	dd if="/dev/rbd${id}" of=/dev/null bs=2048 count=34 skip=4098 \
+		> /dev/null 2>&1
+}
+
+function rbd_unmap_image() {
+	[ $# -eq 1 ] || exit 99
+	local id="$1"
+
+	sudo rbd unmap "/dev/rbd${id}"
+}
+
+function rbd_destroy_image() {
+	[ $# -eq 1 ] || exit 99
+	local image="$1"
+
+	# Don't wait for it to complete, to increase concurrency
+	rbd rm "${image}" >/dev/null 2>&1 &
+	rm -f "${NAMES_DIR}/${image}"
+}
+
+function one_pass() {
+	[ $# -eq 0 ] || exit 99
+	local image
+	local id
+	local ids
+	local i
+
+	image=$(rbd_create_image)
+	id=$(rbd_map_image "${image}")
+	ids=$(rbd_ids)
+	update_maxes "${ids}"
+	for i in ${rbd_ids}; do
+		if [ "${i}" -eq "${id}" ]; then
+			rbd_write_image "${i}"
+		else
+			rbd_read_image "${i}"
+		fi
+	done
+	rbd_unmap_image "${id}"
+	rbd_destroy_image "${image}"
+}
+
+################################################################
+
+parseargs "$@"
+
+setup
+
+for iter in $(seq 1 "${ITER}"); do
+	for count in $(seq 1 "${COUNT}"); do
+		one_pass &
+	done
+	# Sleep longer at first, overlap iterations more later.
+	# Use awk to get sub-second granularity (see sleep(1)).
+	sleep $(echo "${DELAY}" "${iter}" "${ITER}" |
+		awk '{ printf("%.2f\n", $1 - $1 * $2 / $3);}')
+
+done
+wait
+
+cleanup
+
+exit 0
diff --git a/qa/workunits/rbd/crimson/test_crimson_librbd.sh b/qa/workunits/rbd/crimson/test_crimson_librbd.sh
new file mode 100755
index 000000000..fb308de41
--- /dev/null
+++ b/qa/workunits/rbd/crimson/test_crimson_librbd.sh
@@ -0,0 +1,35 @@
+#!/bin/sh -e
+
+if [ -n "${VALGRIND}" ]; then
+  valgrind ${VALGRIND} --suppressions=${TESTDIR}/valgrind.supp \
+    --error-exitcode=1 ceph_test_librbd
+else
+  # Run test cases indivually to allow better selection
+  # of ongoing Crimson development.
+  # Disabled test groups are tracked here:
+  # https://tracker.ceph.com/issues/58791
+  ceph_test_librbd --gtest_filter='TestLibRBD.*'
+  ceph_test_librbd --gtest_filter='EncryptedFlattenTest/0.*'
+  ceph_test_librbd --gtest_filter='EncryptedFlattenTest/1.*'
+  ceph_test_librbd --gtest_filter='EncryptedFlattenTest/2.*'
+  ceph_test_librbd --gtest_filter='EncryptedFlattenTest/3.*'
+  ceph_test_librbd --gtest_filter='EncryptedFlattenTest/4.*'
+  ceph_test_librbd --gtest_filter='EncryptedFlattenTest/5.*'
+  ceph_test_librbd --gtest_filter='EncryptedFlattenTest/6.*'
+  ceph_test_librbd --gtest_filter='EncryptedFlattenTest/7.*'
+  # ceph_test_librbd --gtest_filter='DiffIterateTest/0.*'
+  # ceph_test_librbd --gtest_filter='DiffIterateTest/1.*'
+  ceph_test_librbd --gtest_filter='TestImageWatcher.*'
+  ceph_test_librbd --gtest_filter='TestInternal.*'
+  ceph_test_librbd --gtest_filter='TestMirroring.*'
+  # ceph_test_librbd --gtest_filter='TestDeepCopy.*'
+  ceph_test_librbd --gtest_filter='TestGroup.*'
+  # ceph_test_librbd --gtest_filter='TestMigration.*'
+  ceph_test_librbd --gtest_filter='TestMirroringWatcher.*'
+  ceph_test_librbd --gtest_filter='TestObjectMap.*'
+  ceph_test_librbd --gtest_filter='TestOperations.*'
+  ceph_test_librbd --gtest_filter='TestTrash.*'
+  ceph_test_librbd --gtest_filter='TestJournalEntries.*'
+  ceph_test_librbd --gtest_filter='TestJournalReplay.*'
+fi
+exit 0
diff --git a/qa/workunits/rbd/diff.sh b/qa/workunits/rbd/diff.sh
new file mode 100755
index 000000000..fbd6e0642
--- /dev/null
+++ b/qa/workunits/rbd/diff.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+set -ex
+
+function cleanup() {
+    rbd snap purge foo || :
+    rbd rm foo || :
+    rbd snap purge foo.copy || :
+    rbd rm foo.copy || :
+    rbd snap purge foo.copy2 || :
+    rbd rm foo.copy2 || :
+    rm -f foo.diff foo.out
+}
+
+cleanup
+
+rbd create foo --size 1000
+rbd bench --io-type write foo --io-size 4096 --io-threads 5 --io-total 4096000 --io-pattern rand
+
+#rbd cp foo foo.copy
+rbd create foo.copy --size 1000
+rbd export-diff foo - | rbd import-diff - foo.copy
+
+rbd snap create foo --snap=two
+rbd bench --io-type write foo --io-size 4096 --io-threads 5 --io-total 4096000 --io-pattern rand
+rbd snap create foo --snap=three
+rbd snap create foo.copy --snap=two
+
+rbd export-diff foo@two --from-snap three foo.diff && exit 1 || true  # wrong snap order
+rm -f foo.diff
+
+rbd export-diff foo@three --from-snap two foo.diff
+rbd import-diff foo.diff foo.copy
+rbd import-diff foo.diff foo.copy && exit 1 || true   # this should fail with EEXIST on the end snap
+rbd snap ls foo.copy | grep three
+
+rbd create foo.copy2 --size 1000
+rbd import-diff foo.diff foo.copy2 && exit 1 || true   # this should fail bc the start snap dne
+
+rbd export foo foo.out
+orig=`md5sum foo.out | awk '{print $1}'`
+rm foo.out
+rbd export foo.copy foo.out
+copy=`md5sum foo.out | awk '{print $1}'`
+
+if [ "$orig" != "$copy" ]; then
+    echo does not match
+    exit 1
+fi
+
+cleanup
+
+echo OK
+
diff --git a/qa/workunits/rbd/diff_continuous.sh b/qa/workunits/rbd/diff_continuous.sh
new file mode 100755
index 000000000..fd1785e07
--- /dev/null
+++ b/qa/workunits/rbd/diff_continuous.sh
@@ -0,0 +1,106 @@
+#!/usr/bin/env bash
+
+set -ex
+set -o pipefail
+
+function untar_workload() {
+    local i
+    for ((i = 0; i < 10; i++)); do
+        pv -L 10M linux-5.4.tar.gz > "${MOUNT}/linux-5.4.tar.gz"
+        tar -C "${MOUNT}" -xzf "${MOUNT}/linux-5.4.tar.gz"
+        sync "${MOUNT}"
+        rm -rf "${MOUNT}"/linux-5.4*
+    done
+}
+
+function check_object_map() {
+    local spec="$1"
+
+    rbd object-map check "${spec}"
+
+    local flags
+    flags="$(rbd info "${spec}" | grep 'flags: ')"
+    if [[ "${flags}" =~ object\ map\ invalid ]]; then
+        echo "Object map invalid at ${spec}"
+        exit 1
+    fi
+    if [[ "${flags}" =~ fast\ diff\ invalid ]]; then
+        echo "Fast diff invalid at ${spec}"
+        exit 1
+    fi
+}
+
+# RBD_DEVICE_TYPE is intended to be set from yaml, default to krbd
+readonly DEVICE_TYPE="${RBD_DEVICE_TYPE:-krbd}"
+
+BASE_UUID="$(uuidgen)"
+readonly BASE_UUID
+
+readonly SIZE="2G"
+readonly SRC="${BASE_UUID}-src"
+readonly DST="${BASE_UUID}-dst"
+readonly MOUNT="${BASE_UUID}-mnt"
+
+rbd create -s "${SIZE}" --stripe-unit 64K --stripe-count 8 \
+    --image-feature exclusive-lock,object-map,fast-diff "${SRC}"
+rbd create -s "${SIZE}" --object-size 512K "${DST}"
+
+dev="$(sudo rbd device map -t "${DEVICE_TYPE}" "${SRC}")"
+sudo mkfs.ext4 "${dev}"
+mkdir "${MOUNT}"
+sudo mount "${dev}" "${MOUNT}"
+sudo chown "$(whoami)" "${MOUNT}"
+
+# start untar in the background
+wget https://download.ceph.com/qa/linux-5.4.tar.gz
+untar_workload &
+untar_pid=$!
+
+# export initial incremental
+snap_num=1
+rbd snap create "${SRC}@snap${snap_num}"
+rbd export-diff "${SRC}@snap${snap_num}" "${BASE_UUID}@snap${snap_num}.diff"
+
+# keep exporting successive incrementals while untar is running
+while kill -0 "${untar_pid}"; do
+    snap_num=$((snap_num + 1))
+    rbd snap create "${SRC}@snap${snap_num}"
+    sleep $((RANDOM % 4 + 1))
+    rbd export-diff --whole-object --from-snap "snap$((snap_num - 1))" \
+        "${SRC}@snap${snap_num}" "${BASE_UUID}@snap${snap_num}.diff"
+done
+
+sudo umount "${MOUNT}"
+sudo rbd device unmap -t "${DEVICE_TYPE}" "${dev}"
+
+if ! wait "${untar_pid}"; then
+    echo "untar_workload failed"
+    exit 1
+fi
+
+echo "Exported ${snap_num} incrementals"
+if ((snap_num < 30)); then
+    echo "Too few incrementals"
+    exit 1
+fi
+
+# validate
+for ((i = 1; i <= snap_num; i++)); do
+    rbd import-diff "${BASE_UUID}@snap${i}.diff" "${DST}"
+    src_sum="$(rbd export "${SRC}@snap${i}" - | md5sum | awk '{print $1}')"
+    dst_sum="$(rbd export "${DST}@snap${i}" - | md5sum | awk '{print $1}')"
+    if [[ "${src_sum}" != "${dst_sum}" ]]; then
+        echo "Mismatch at snap${i}: ${src_sum} != ${dst_sum}"
+        exit 1
+    fi
+    check_object_map "${SRC}@snap${i}"
+    # FIXME: this reproduces http://tracker.ceph.com/issues/37876
+    # there is no fstrim involved but "rbd import-diff" can produce
+    # write-zeroes requests which turn into discards under the hood
+    # actual: EXISTS, expected: EXISTS_CLEAN inconsistency is harmless
+    # from a data integrity POV and data is validated above regardless,
+    # so just waive it for now
+    #check_object_map "${DST}@snap${i}"
+done
+
+echo OK
diff --git a/qa/workunits/rbd/huge-tickets.sh b/qa/workunits/rbd/huge-tickets.sh
new file mode 100755
index 000000000..22853c07a
--- /dev/null
+++ b/qa/workunits/rbd/huge-tickets.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+# This is a test for http://tracker.ceph.com/issues/8979 and the fallout
+# from triaging it.  #8979 itself was random crashes on corrupted memory
+# due to a buffer overflow (for tickets larger than 256 bytes), further
+# inspection showed that vmalloced tickets weren't handled correctly as
+# well.
+#
+# What we are doing here is generating three huge keyrings and feeding
+# them to libceph (through 'rbd map' on a scratch image).  Bad kernels
+# will crash reliably either on corrupted memory somewhere or a bad page
+# fault in scatterwalk_pagedone().
+
+set -ex
+
+function generate_keyring() {
+    local user=$1
+    local n=$2
+
+    ceph-authtool -C -n client.$user --cap mon 'allow *' --gen-key /tmp/keyring-$user
+
+    set +x # don't pollute trace with echos
+    echo -en "\tcaps osd = \"allow rwx pool=rbd" >>/tmp/keyring-$user
+    for i in $(seq 1 $n); do
+        echo -n ", allow rwx pool=pool$i" >>/tmp/keyring-$user
+    done
+    echo "\"" >>/tmp/keyring-$user
+    set -x
+}
+
+generate_keyring foo 1000 # ~25K, kmalloc
+generate_keyring bar 20000 # ~500K, vmalloc
+generate_keyring baz 300000 # ~8M, vmalloc + sg chaining
+
+rbd create --size 1 test
+
+for user in {foo,bar,baz}; do
+    ceph auth import -i /tmp/keyring-$user
+    DEV=$(sudo rbd map -n client.$user --keyring /tmp/keyring-$user test)
+    sudo rbd unmap $DEV
+done
diff --git a/qa/workunits/rbd/image_read.sh b/qa/workunits/rbd/image_read.sh
new file mode 100755
index 000000000..ddca8356e
--- /dev/null
+++ b/qa/workunits/rbd/image_read.sh
@@ -0,0 +1,680 @@
+#!/usr/bin/env bash
+
+# Copyright (C) 2013 Inktank Storage, Inc.
+#
+# This is free software; see the source for copying conditions.
+# There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE.
+#
+# This is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as
+# published by the Free Software Foundation version 2.
+
+# Alex Elder <elder@inktank.com>
+# April 10, 2013
+
+################################################################
+
+# The purpose of this test is to validate that data read from a
+# mapped rbd image is what it's expected to be.
+#
+# By default it creates an image and fills it with some data.  It
+# then reads back the data at a series of offsets known to cover
+# various situations (such as reading the beginning, end, or the
+# entirety of an object, or doing a read that spans multiple
+# objects), and stashes the results in a set of local files.
+#
+# It also creates and maps a snapshot of the original image after
+# it's been filled, and reads back the same ranges of data from the
+# snapshot.  It then compares the data read back with what was read
+# back from the original image, verifying they match.
+#
+# Clone functionality is tested as well, in which case a clone is
+# made of the snapshot, and the same ranges of data are again read
+# and compared with the original.  In addition, a snapshot of that
+# clone is created, and a clone of *that* snapshot is put through
+# the same set of tests.  (Clone testing can be optionally skipped.)
+
+################################################################
+
+# Default parameter values.  Environment variables, if set, will
+# supercede these defaults.  Such variables have names that begin
+# with "IMAGE_READ_", for e.g. use IMAGE_READ_PAGE_SIZE=65536
+# to use 65536 as the page size.
+set -e
+
+DEFAULT_VERBOSE=true
+DEFAULT_TEST_CLONES=true
+DEFAULT_LOCAL_FILES=false
+DEFAULT_FORMAT=2
+DEFAULT_DOUBLE_ORDER=true
+DEFAULT_HALF_ORDER=false
+DEFAULT_PAGE_SIZE=4096
+DEFAULT_OBJECT_ORDER=22
+MIN_OBJECT_ORDER=12	# technically 9, but the rbd CLI enforces 12
+MAX_OBJECT_ORDER=32
+
+RBD_FORCE_ALLOW_V1=1
+
+PROGNAME=$(basename $0)
+
+ORIGINAL=original-$$
+SNAP1=snap1-$$
+CLONE1=clone1-$$
+SNAP2=snap2-$$
+CLONE2=clone2-$$
+
+function err() {
+	if [ $# -gt 0 ]; then
+		echo "${PROGNAME}: $@" >&2
+	fi
+	exit 2
+}
+
+function usage() {
+	if [ $# -gt 0 ]; then
+		echo "" >&2
+		echo "${PROGNAME}: $@" >&2
+	fi
+	echo "" >&2
+	echo "Usage: ${PROGNAME} [<options>]" >&2
+	echo "" >&2
+	echo "options are:" >&2
+	echo "    -o object_order" >&2
+	echo "        must be ${MIN_OBJECT_ORDER}..${MAX_OBJECT_ORDER}" >&2
+	echo "    -p page_size    (in bytes)" >&2
+	echo "        note: there must be at least 4 pages per object" >&2
+	echo "    -1" >&2
+	echo "        test using format 1 rbd images (default)" >&2
+	echo "    -2" >&2
+	echo "        test using format 2 rbd images" >&2
+	echo "    -c" >&2
+	echo "        also test rbd clone images (implies format 2)" >&2
+	echo "    -d" >&2
+	echo "        clone object order double its parent's (format 2)" >&2
+	echo "    -h" >&2
+	echo "        clone object order half of its parent's (format 2)" >&2
+	echo "    -l" >&2
+	echo "        use local files rather than rbd images" >&2
+	echo "    -v" >&2
+	echo "        disable reporting of what's going on" >&2
+	echo "" >&2
+	exit 1
+}
+
+function verbose() {
+	[ "${VERBOSE}" = true ] && echo "$@"
+	true	# Don't let the verbose test spoil our return value
+}
+
+function quiet() {
+	"$@" 2> /dev/null
+}
+
+function boolean_toggle() {
+	[ $# -eq 1 ] || exit 99
+	test "$1" = "true" && echo false || echo true
+}
+
+function parseargs() {
+	local opts="o:p:12clv"
+	local lopts="order:,page_size:,local,clone,verbose"
+	local parsed
+	local clone_order_msg
+
+	# use values from environment if available
+	VERBOSE="${IMAGE_READ_VERBOSE:-${DEFAULT_VERBOSE}}"
+	TEST_CLONES="${IMAGE_READ_TEST_CLONES:-${DEFAULT_TEST_CLONES}}"
+	LOCAL_FILES="${IMAGE_READ_LOCAL_FILES:-${DEFAULT_LOCAL_FILES}}"
+	DOUBLE_ORDER="${IMAGE_READ_DOUBLE_ORDER:-${DEFAULT_DOUBLE_ORDER}}"
+	HALF_ORDER="${IMAGE_READ_HALF_ORDER:-${DEFAULT_HALF_ORDER}}"
+	FORMAT="${IMAGE_READ_FORMAT:-${DEFAULT_FORMAT}}"
+	PAGE_SIZE="${IMAGE_READ_PAGE_SIZE:-${DEFAULT_PAGE_SIZE}}"
+	OBJECT_ORDER="${IMAGE_READ_OBJECT_ORDER:-${DEFAULT_OBJECT_ORDER}}"
+
+	parsed=$(getopt -o "${opts}" -l "${lopts}" -n "${PROGNAME}" -- "$@") ||
+		usage
+	eval set -- "${parsed}"
+	while true; do
+		case "$1" in
+		-v|--verbose)
+			VERBOSE=$(boolean_toggle "${VERBOSE}");;
+		-c|--clone)
+			TEST_CLONES=$(boolean_toggle "${TEST_CLONES}");;
+		-d|--double)
+			DOUBLE_ORDER=$(boolean_toggle "${DOUBLE_ORDER}");;
+		-h|--half)
+			HALF_ORDER=$(boolean_toggle "${HALF_ORDER}");;
+		-l|--local)
+			LOCAL_FILES=$(boolean_toggle "${LOCAL_FILES}");;
+		-1|-2)
+			FORMAT="${1:1}";;
+		-p|--page_size)
+			PAGE_SIZE="$2"; shift;;
+		-o|--order)
+			OBJECT_ORDER="$2"; shift;;
+		--)
+			shift; break;;
+		*)
+			err "getopt internal error"
+		esac
+		shift
+	done
+	[ $# -gt 0 ] && usage "excess arguments ($*)"
+
+	if [ "${TEST_CLONES}" = true ]; then
+		# If we're using different object orders for clones,
+		# make sure the limits are updated accordingly.  If
+		# both "half" and "double" are specified, just
+		# ignore them both.
+		if [ "${DOUBLE_ORDER}" = true ]; then
+			if [ "${HALF_ORDER}" = true ]; then
+				DOUBLE_ORDER=false
+				HALF_ORDER=false
+			else
+				((MAX_OBJECT_ORDER -= 2))
+			fi
+		elif [ "${HALF_ORDER}" = true ]; then
+			((MIN_OBJECT_ORDER += 2))
+		fi
+	fi
+
+	[ "${OBJECT_ORDER}" -lt "${MIN_OBJECT_ORDER}" ] &&
+		usage "object order (${OBJECT_ORDER}) must be" \
+			"at least ${MIN_OBJECT_ORDER}"
+	[ "${OBJECT_ORDER}" -gt "${MAX_OBJECT_ORDER}" ] &&
+		usage "object order (${OBJECT_ORDER}) must be" \
+			"at most ${MAX_OBJECT_ORDER}"
+
+	if [ "${TEST_CLONES}" = true ]; then
+		if [ "${DOUBLE_ORDER}" = true ]; then
+			((CLONE1_ORDER = OBJECT_ORDER + 1))
+			((CLONE2_ORDER = OBJECT_ORDER + 2))
+			clone_order_msg="double"
+		elif [ "${HALF_ORDER}" = true ]; then
+			((CLONE1_ORDER = OBJECT_ORDER - 1))
+			((CLONE2_ORDER = OBJECT_ORDER - 2))
+			clone_order_msg="half of"
+		else
+			CLONE1_ORDER="${OBJECT_ORDER}"
+			CLONE2_ORDER="${OBJECT_ORDER}"
+			clone_order_msg="the same as"
+		fi
+	fi
+
+	[ "${TEST_CLONES}" != true ] || FORMAT=2
+
+	OBJECT_SIZE=$(echo "2 ^ ${OBJECT_ORDER}" | bc)
+	OBJECT_PAGES=$(echo "${OBJECT_SIZE} / ${PAGE_SIZE}" | bc)
+	IMAGE_SIZE=$((2 * 16 * OBJECT_SIZE / (1024 * 1024)))
+	[ "${IMAGE_SIZE}" -lt 1 ] && IMAGE_SIZE=1
+	IMAGE_OBJECTS=$((IMAGE_SIZE * (1024 * 1024) / OBJECT_SIZE))
+
+	[ "${OBJECT_PAGES}" -lt 4 ] &&
+		usage "object size (${OBJECT_SIZE}) must be" \
+			"at least 4 * page size (${PAGE_SIZE})"
+
+	echo "parameters for this run:"
+	echo "    format ${FORMAT} images will be tested"
+	echo "    object order is ${OBJECT_ORDER}, so" \
+		"objects are ${OBJECT_SIZE} bytes"
+	echo "    page size is ${PAGE_SIZE} bytes, so" \
+		"there are are ${OBJECT_PAGES} pages in an object"
+	echo "    derived image size is ${IMAGE_SIZE} MB, so" \
+		"there are ${IMAGE_OBJECTS} objects in an image"
+	if [ "${TEST_CLONES}" = true ]; then
+		echo "    clone functionality will be tested"
+		echo "    object size for a clone will be ${clone_order_msg}"
+		echo "        the object size of its parent image"
+	fi
+
+	true	# Don't let the clones test spoil our return value
+}
+
+function image_dev_path() {
+	[ $# -eq 1 ] || exit 99
+	local image_name="$1"
+
+	if [ "${LOCAL_FILES}" = true ]; then
+		echo "${TEMP}/${image_name}"
+		return
+	fi
+
+	echo "/dev/rbd/rbd/${image_name}"
+}
+
+function out_data_dir() {
+	[ $# -lt 2 ] || exit 99
+	local out_data="${TEMP}/data"
+	local image_name
+
+	if [ $# -eq 1 ]; then
+		image_name="$1"
+		echo "${out_data}/${image_name}"
+	else
+		echo "${out_data}"
+	fi
+}
+
+function setup() {
+	verbose "===== setting up ====="
+	TEMP=$(mktemp -d /tmp/rbd_image_read.XXXXX)
+	mkdir -p $(out_data_dir)
+
+	# create and fill the original image with some data
+	create_image "${ORIGINAL}"
+	map_image "${ORIGINAL}"
+	fill_original
+
+	# create a snapshot of the original
+	create_image_snap "${ORIGINAL}" "${SNAP1}"
+	map_image_snap "${ORIGINAL}" "${SNAP1}"
+
+	if [ "${TEST_CLONES}" = true ]; then
+		# create a clone of the original snapshot
+		create_snap_clone "${ORIGINAL}" "${SNAP1}" \
+			"${CLONE1}" "${CLONE1_ORDER}"
+		map_image "${CLONE1}"
+
+		# create a snapshot of that clone
+		create_image_snap "${CLONE1}" "${SNAP2}"
+		map_image_snap "${CLONE1}" "${SNAP2}"
+
+		# create a clone of that clone's snapshot
+		create_snap_clone "${CLONE1}" "${SNAP2}" \
+			"${CLONE2}" "${CLONE2_ORDER}"
+		map_image "${CLONE2}"
+	fi
+}
+
+function teardown() {
+	verbose "===== cleaning up ====="
+	if [ "${TEST_CLONES}" = true ]; then
+		unmap_image "${CLONE2}"					|| true
+		destroy_snap_clone "${CLONE1}" "${SNAP2}" "${CLONE2}"	|| true
+
+		unmap_image_snap "${CLONE1}" "${SNAP2}"			|| true
+		destroy_image_snap "${CLONE1}" "${SNAP2}"		|| true
+
+		unmap_image "${CLONE1}"					|| true
+		destroy_snap_clone "${ORIGINAL}" "${SNAP1}" "${CLONE1}"	|| true
+	fi
+	unmap_image_snap "${ORIGINAL}" "${SNAP1}"			|| true
+	destroy_image_snap "${ORIGINAL}" "${SNAP1}"			|| true
+	unmap_image "${ORIGINAL}"					|| true
+	destroy_image "${ORIGINAL}"					|| true
+
+	rm -rf $(out_data_dir)
+	rmdir "${TEMP}"
+}
+
+function create_image() {
+	[ $# -eq 1 ] || exit 99
+	local image_name="$1"
+	local image_path
+	local bytes
+
+	verbose "creating image \"${image_name}\""
+	if [ "${LOCAL_FILES}" = true ]; then
+		image_path=$(image_dev_path "${image_name}")
+		bytes=$(echo "${IMAGE_SIZE} * 1024 * 1024 - 1" | bc)
+		quiet dd if=/dev/zero bs=1 count=1 seek="${bytes}" \
+			of="${image_path}"
+		return
+	fi
+
+	rbd create "${image_name}" --image-format "${FORMAT}" \
+		--size "${IMAGE_SIZE}" --order "${OBJECT_ORDER}" \
+		--image-shared
+}
+
+function destroy_image() {
+	[ $# -eq 1 ] || exit 99
+	local image_name="$1"
+	local image_path
+
+	verbose "destroying image \"${image_name}\""
+	if [ "${LOCAL_FILES}" = true ]; then
+		image_path=$(image_dev_path "${image_name}")
+		rm -f "${image_path}"
+		return
+	fi
+
+	rbd rm "${image_name}"
+}
+
+function map_image() {
+	[ $# -eq 1 ] || exit 99
+	local image_name="$1"		# can be image@snap too
+
+	if [ "${LOCAL_FILES}" = true ]; then
+		return
+	fi
+
+	sudo rbd map "${image_name}"
+}
+
+function unmap_image() {
+	[ $# -eq 1 ] || exit 99
+	local image_name="$1"		# can be image@snap too
+	local image_path
+
+	if [ "${LOCAL_FILES}" = true ]; then
+		return
+	fi
+	image_path=$(image_dev_path "${image_name}")
+
+	if [ -e "${image_path}" ]; then
+		sudo rbd unmap "${image_path}"
+	fi
+}
+
+function map_image_snap() {
+	[ $# -eq 2 ] || exit 99
+	local image_name="$1"
+	local snap_name="$2"
+	local image_snap
+
+	if [ "${LOCAL_FILES}" = true ]; then
+		return
+	fi
+
+	image_snap="${image_name}@${snap_name}"
+	map_image "${image_snap}"
+}
+
+function unmap_image_snap() {
+	[ $# -eq 2 ] || exit 99
+	local image_name="$1"
+	local snap_name="$2"
+	local image_snap
+
+	if [ "${LOCAL_FILES}" = true ]; then
+		return
+	fi
+
+	image_snap="${image_name}@${snap_name}"
+	unmap_image "${image_snap}"
+}
+
+function create_image_snap() {
+	[ $# -eq 2 ] || exit 99
+	local image_name="$1"
+	local snap_name="$2"
+	local image_snap="${image_name}@${snap_name}"
+	local image_path
+	local snap_path
+
+	verbose "creating snapshot \"${snap_name}\"" \
+		"of image \"${image_name}\""
+	if [ "${LOCAL_FILES}" = true ]; then
+		image_path=$(image_dev_path "${image_name}")
+		snap_path=$(image_dev_path "${image_snap}")
+
+		cp "${image_path}" "${snap_path}"
+		return
+	fi
+
+	rbd snap create "${image_snap}"
+}
+
+function destroy_image_snap() {
+	[ $# -eq 2 ] || exit 99
+	local image_name="$1"
+	local snap_name="$2"
+	local image_snap="${image_name}@${snap_name}"
+	local snap_path
+
+	verbose "destroying snapshot \"${snap_name}\"" \
+		"of image \"${image_name}\""
+	if [ "${LOCAL_FILES}" = true ]; then
+		snap_path=$(image_dev_path "${image_snap}")
+		rm -rf "${snap_path}"
+		return
+	fi
+
+	rbd snap rm "${image_snap}"
+}
+
+function create_snap_clone() {
+	[ $# -eq 4 ] || exit 99
+	local image_name="$1"
+	local snap_name="$2"
+	local clone_name="$3"
+	local clone_order="$4"
+	local image_snap="${image_name}@${snap_name}"
+	local snap_path
+	local clone_path
+
+	verbose "creating clone image \"${clone_name}\"" \
+		"of image snapshot \"${image_name}@${snap_name}\""
+	if [ "${LOCAL_FILES}" = true ]; then
+		snap_path=$(image_dev_path "${image_name}@${snap_name}")
+		clone_path=$(image_dev_path "${clone_name}")
+
+		cp "${snap_path}" "${clone_path}"
+		return
+	fi
+
+	rbd snap protect "${image_snap}"
+	rbd clone --order "${clone_order}" --image-shared \
+		"${image_snap}" "${clone_name}"
+}
+
+function destroy_snap_clone() {
+	[ $# -eq 3 ] || exit 99
+	local image_name="$1"
+	local snap_name="$2"
+	local clone_name="$3"
+	local image_snap="${image_name}@${snap_name}"
+	local clone_path
+
+	verbose "destroying clone image \"${clone_name}\""
+	if [ "${LOCAL_FILES}" = true ]; then
+		clone_path=$(image_dev_path "${clone_name}")
+
+		rm -rf "${clone_path}"
+		return
+	fi
+
+	rbd rm "${clone_name}"
+	rbd snap unprotect "${image_snap}"
+}
+
+# function that produces "random" data with which to fill the image
+function source_data() {
+	while quiet dd if=/bin/bash skip=$(($$ % 199)) bs="${PAGE_SIZE}"; do
+		:	# Just do the dd
+	done
+}
+
+function fill_original() {
+	local image_path=$(image_dev_path "${ORIGINAL}")
+
+	verbose "filling original image"
+	# Fill 16 objects worth of "random" data
+	source_data |
+	quiet dd bs="${PAGE_SIZE}" count=$((16 * OBJECT_PAGES)) \
+		of="${image_path}"
+}
+
+function do_read() {
+	[ $# -eq 3 -o $# -eq 4 ] || exit 99
+	local image_name="$1"
+	local offset="$2"
+	local length="$3"
+	[ "${length}" -gt 0 ] || err "do_read: length must be non-zero"
+	local image_path=$(image_dev_path "${image_name}")
+	local out_data=$(out_data_dir "${image_name}")
+	local range=$(printf "%06u~%04u" "${offset}" "${length}")
+	local out_file
+
+	[ $# -eq 4 ] && offset=$((offset + 16 * OBJECT_PAGES))
+
+	verbose "reading \"${image_name}\" pages ${range}"
+
+	out_file="${out_data}/pages_${range}"
+
+	quiet dd bs="${PAGE_SIZE}" skip="${offset}" count="${length}" \
+		if="${image_path}" of="${out_file}"
+}
+
+function one_pass() {
+	[ $# -eq 1 -o $# -eq 2 ] || exit 99
+	local image_name="$1"
+	local extended
+	[ $# -eq 2 ] && extended="true"
+	local offset
+	local length
+
+	offset=0
+
+	# +-----------+-----------+---
+	# |X:X:X...X:X| : : ... : | :
+	# +-----------+-----------+---
+	length="${OBJECT_PAGES}"
+	do_read "${image_name}" "${offset}" "${length}" ${extended}
+	offset=$((offset + length))
+
+	# ---+-----------+---
+	#  : |X: : ... : | :
+	# ---+-----------+---
+	length=1
+	do_read "${image_name}" "${offset}" "${length}" ${extended}
+	offset=$((offset + length))
+
+	# ---+-----------+---
+	#  : | :X: ... : | :
+	# ---+-----------+---
+	length=1
+	do_read "${image_name}" "${offset}" "${length}" ${extended}
+	offset=$((offset + length))
+
+	# ---+-----------+---
+	#  : | : :X...X: | :
+	# ---+-----------+---
+	length=$((OBJECT_PAGES - 3))
+	do_read "${image_name}" "${offset}" "${length}" ${extended}
+	offset=$((offset + length))
+
+	# ---+-----------+---
+	#  : | : : ... :X| :
+	# ---+-----------+---
+	length=1
+	do_read "${image_name}" "${offset}" "${length}" ${extended}
+	offset=$((offset + length))
+
+	# ---+-----------+---
+	#  : |X:X:X...X:X| :
+	# ---+-----------+---
+	length="${OBJECT_PAGES}"
+	do_read "${image_name}" "${offset}" "${length}" ${extended}
+	offset=$((offset + length))
+
+	offset=$((offset + 1))		# skip 1
+
+	# ---+-----------+---
+	#  : | :X:X...X:X| :
+	# ---+-----------+---
+	length=$((OBJECT_PAGES - 1))
+	do_read "${image_name}" "${offset}" "${length}" ${extended}
+	offset=$((offset + length))
+
+	# ---+-----------+-----------+---
+	#  : |X:X:X...X:X|X: : ... : | :
+	# ---+-----------+-----------+---
+	length=$((OBJECT_PAGES + 1))
+	do_read "${image_name}" "${offset}" "${length}" ${extended}
+	offset=$((offset + length))
+
+	# ---+-----------+-----------+---
+	#  : | :X:X...X:X|X: : ... : | :
+	# ---+-----------+-----------+---
+	length="${OBJECT_PAGES}"
+	do_read "${image_name}" "${offset}" "${length}" ${extended}
+	offset=$((offset + length))
+
+	# ---+-----------+-----------+---
+	#  : | :X:X...X:X|X:X: ... : | :
+	# ---+-----------+-----------+---
+	length=$((OBJECT_PAGES + 1))
+	do_read "${image_name}" "${offset}" "${length}" ${extended}
+	offset=$((offset + length))
+
+	# ---+-----------+-----------+---
+	#  : | : :X...X:X|X:X:X...X:X| :
+	# ---+-----------+-----------+---
+	length=$((2 * OBJECT_PAGES + 2))
+	do_read "${image_name}" "${offset}" "${length}" ${extended}
+	offset=$((offset + length))
+
+	offset=$((offset + 1))		# skip 1
+
+	# ---+-----------+-----------+-----
+	#  : | :X:X...X:X|X:X:X...X:X|X: :
+	# ---+-----------+-----------+-----
+	length=$((2 * OBJECT_PAGES))
+	do_read "${image_name}" "${offset}" "${length}" ${extended}
+	offset=$((offset + length))
+
+	# --+-----------+-----------+--------
+	#  : | :X:X...X:X|X:X:X...X:X|X:X: :
+	# --+-----------+-----------+--------
+	length=2049
+	length=$((2 * OBJECT_PAGES + 1))
+	do_read "${image_name}" "${offset}" "${length}" ${extended}
+	# offset=$((offset + length))
+}
+
+function run_using() {
+	[ $# -eq 1 ] || exit 99
+	local image_name="$1"
+	local out_data=$(out_data_dir "${image_name}")
+
+	verbose "===== running using \"${image_name}\" ====="
+	mkdir -p "${out_data}"
+	one_pass "${image_name}"
+	one_pass "${image_name}" extended
+}
+
+function compare() {
+	[ $# -eq 1 ] || exit 99
+	local image_name="$1"
+	local out_data=$(out_data_dir "${image_name}")
+	local original=$(out_data_dir "${ORIGINAL}")
+
+	verbose "===== comparing \"${image_name}\" ====="
+	for i in $(ls "${original}"); do
+		verbose compare "\"${image_name}\" \"${i}\""
+		cmp "${original}/${i}" "${out_data}/${i}"
+	done
+	[ "${image_name}" = "${ORIGINAL}" ] || rm -rf "${out_data}"
+}
+
+function doit() {
+	[ $# -eq 1 ] || exit 99
+	local image_name="$1"
+
+	run_using "${image_name}"
+	compare "${image_name}"
+}
+
+########## Start
+
+parseargs "$@"
+
+trap teardown EXIT HUP INT
+setup
+
+run_using "${ORIGINAL}"
+doit "${ORIGINAL}@${SNAP1}"
+if [ "${TEST_CLONES}" = true ]; then
+	doit "${CLONE1}"
+	doit "${CLONE1}@${SNAP2}"
+	doit "${CLONE2}"
+fi
+rm -rf $(out_data_dir "${ORIGINAL}")
+
+echo "Success!"
+
+exit 0
diff --git a/qa/workunits/rbd/import_export.sh b/qa/workunits/rbd/import_export.sh
new file mode 100755
index 000000000..89e8d35cf
--- /dev/null
+++ b/qa/workunits/rbd/import_export.sh
@@ -0,0 +1,259 @@
+#!/bin/sh -ex
+
+# V1 image unsupported but required for testing purposes
+export RBD_FORCE_ALLOW_V1=1
+
+# returns data pool for a given image
+get_image_data_pool () {
+    image=$1
+    data_pool=$(rbd info $image | grep "data_pool: " | awk -F':' '{ print $NF }')
+    if [ -z $data_pool ]; then
+       data_pool='rbd'
+    fi
+
+    echo $data_pool
+}
+
+# return list of object numbers populated in image
+objects () {
+   image=$1
+   prefix=$(rbd info $image | grep block_name_prefix | awk '{print $NF;}')
+
+   # strip off prefix and leading zeros from objects; sort, although
+   # it doesn't necessarily make sense as they're hex, at least it makes
+   # the list repeatable and comparable
+   objects=$(rados ls -p $(get_image_data_pool $image) | grep $prefix | \
+       sed -e 's/'$prefix'\.//' -e 's/^0*\([0-9a-f]\)/\1/' | sort -u)
+   echo $objects
+}
+
+# return false if either files don't compare or their ondisk
+# sizes don't compare
+
+compare_files_and_ondisk_sizes () {
+    cmp -l $1 $2 || return 1
+    origsize=$(stat $1 --format %b)
+    exportsize=$(stat $2 --format %b)
+    difference=$(($exportsize - $origsize))
+    difference=${difference#-} # absolute value
+    test $difference -ge 0 -a $difference -lt 4096
+}
+
+TMPDIR=/tmp/rbd_import_export_$$
+rm -rf $TMPDIR
+mkdir $TMPDIR
+trap "rm -rf $TMPDIR" INT TERM EXIT
+
+# cannot import a dir
+mkdir foo.$$
+rbd import foo.$$ foo.dir && exit 1 || true   # should fail
+rmdir foo.$$
+
+# create a sparse file
+dd if=/bin/sh of=${TMPDIR}/img bs=1k count=1 seek=10
+dd if=/bin/dd of=${TMPDIR}/img bs=1k count=10 seek=100
+dd if=/bin/rm of=${TMPDIR}/img bs=1k count=100 seek=1000
+dd if=/bin/ls of=${TMPDIR}/img bs=1k seek=10000
+dd if=/bin/ln of=${TMPDIR}/img bs=1k seek=100000
+dd if=/bin/grep of=${TMPDIR}/img bs=1k seek=1000000
+
+rbd rm testimg || true
+
+rbd import $RBD_CREATE_ARGS ${TMPDIR}/img testimg
+rbd export testimg ${TMPDIR}/img2
+rbd export testimg - > ${TMPDIR}/img3
+rbd rm testimg
+cmp ${TMPDIR}/img ${TMPDIR}/img2
+cmp ${TMPDIR}/img ${TMPDIR}/img3
+rm ${TMPDIR}/img2 ${TMPDIR}/img3
+
+# try again, importing from stdin
+rbd import $RBD_CREATE_ARGS - testimg < ${TMPDIR}/img
+rbd export testimg ${TMPDIR}/img2
+rbd export testimg - > ${TMPDIR}/img3
+rbd rm testimg
+cmp ${TMPDIR}/img ${TMPDIR}/img2
+cmp ${TMPDIR}/img ${TMPDIR}/img3
+
+rm ${TMPDIR}/img ${TMPDIR}/img2 ${TMPDIR}/img3
+
+if rbd help export | grep -q export-format; then
+    # try with --export-format for snapshots
+    dd if=/bin/dd of=${TMPDIR}/img bs=1k count=10 seek=100
+    rbd import $RBD_CREATE_ARGS ${TMPDIR}/img testimg
+    rbd snap create testimg@snap
+    rbd image-meta set testimg key1 value1
+    IMAGEMETA_BEFORE=`rbd image-meta list testimg`
+    rbd export --export-format 2 testimg ${TMPDIR}/img_v2
+    rbd import --export-format 2 ${TMPDIR}/img_v2 testimg_import
+    rbd info testimg_import
+    rbd info testimg_import@snap
+    IMAGEMETA_AFTER=`rbd image-meta list testimg_import`
+    [ "$IMAGEMETA_BEFORE" = "$IMAGEMETA_AFTER" ]
+
+    # compare the contents between testimg and testimg_import
+    rbd export testimg_import ${TMPDIR}/img_import
+    compare_files_and_ondisk_sizes ${TMPDIR}/img ${TMPDIR}/img_import
+
+    rbd export testimg@snap ${TMPDIR}/img_snap
+    rbd export testimg_import@snap ${TMPDIR}/img_snap_import
+    compare_files_and_ondisk_sizes ${TMPDIR}/img_snap ${TMPDIR}/img_snap_import
+
+    rm ${TMPDIR}/img_v2
+    rm ${TMPDIR}/img_import
+    rm ${TMPDIR}/img_snap
+    rm ${TMPDIR}/img_snap_import
+
+    rbd snap rm testimg_import@snap
+    rbd remove testimg_import
+    rbd snap rm testimg@snap
+    rbd rm testimg
+
+    # order
+    rbd import --order 20 ${TMPDIR}/img testimg
+    rbd export --export-format 2 testimg ${TMPDIR}/img_v2
+    rbd import --export-format 2 ${TMPDIR}/img_v2 testimg_import
+    rbd info testimg_import|grep order|awk '{print $2}'|grep 20
+    
+    rm ${TMPDIR}/img_v2
+
+    rbd remove testimg_import
+    rbd remove testimg
+
+    # features
+    rbd import --image-feature layering ${TMPDIR}/img testimg
+    FEATURES_BEFORE=`rbd info testimg|grep features`
+    rbd export --export-format 2 testimg ${TMPDIR}/img_v2
+    rbd import --export-format 2 ${TMPDIR}/img_v2 testimg_import
+    FEATURES_AFTER=`rbd info testimg_import|grep features`
+    if [ "$FEATURES_BEFORE" != "$FEATURES_AFTER" ]; then
+        false
+    fi
+
+    rm ${TMPDIR}/img_v2
+
+    rbd remove testimg_import
+    rbd remove testimg
+
+    # stripe
+    rbd import --stripe-count 1000 --stripe-unit 4096 ${TMPDIR}/img testimg
+    rbd export --export-format 2 testimg ${TMPDIR}/img_v2
+    rbd import --export-format 2 ${TMPDIR}/img_v2 testimg_import
+    rbd info testimg_import|grep "stripe unit"|grep -Ei '(4 KiB|4096)'
+    rbd info testimg_import|grep "stripe count"|awk '{print $3}'|grep 1000
+
+    rm ${TMPDIR}/img_v2
+
+    rbd remove testimg_import
+    rbd remove testimg
+
+    # snap protect
+    rbd import --image-format=2 ${TMPDIR}/img testimg
+    rbd snap create testimg@snap1
+    rbd snap create testimg@snap2
+    rbd snap protect testimg@snap2
+    rbd export --export-format 2 testimg ${TMPDIR}/snap_protect
+    rbd import --export-format 2 ${TMPDIR}/snap_protect testimg_import
+    rbd info testimg_import@snap1 | grep 'protected: False'
+    rbd info testimg_import@snap2 | grep 'protected: True'
+
+    rm ${TMPDIR}/snap_protect
+
+    rbd snap unprotect testimg@snap2
+    rbd snap unprotect testimg_import@snap2
+    rbd snap purge testimg
+    rbd snap purge testimg_import
+    rbd remove testimg
+    rbd remove testimg_import
+fi
+
+tiered=0
+if ceph osd dump | grep ^pool | grep "'rbd'" | grep tier; then
+    tiered=1
+fi
+
+# create specifically sparse files
+# 1 1M block of sparse, 1 1M block of random
+dd if=/dev/urandom bs=1M seek=1 count=1 of=${TMPDIR}/sparse1
+
+# 1 1M block of random, 1 1M block of sparse
+dd if=/dev/urandom bs=1M count=1 of=${TMPDIR}/sparse2; truncate ${TMPDIR}/sparse2 -s 2M
+
+# 1M-block images; validate resulting blocks
+
+# 1M sparse, 1M data
+rbd rm sparse1 || true
+rbd import $RBD_CREATE_ARGS --order 20 ${TMPDIR}/sparse1
+rbd ls -l | grep sparse1 | grep -Ei '(2 MiB|2048k)'
+[ $tiered -eq 1 -o "$(objects sparse1)" = '1' ]
+
+# export, compare contents and on-disk size
+rbd export sparse1 ${TMPDIR}/sparse1.out
+compare_files_and_ondisk_sizes ${TMPDIR}/sparse1 ${TMPDIR}/sparse1.out
+rm ${TMPDIR}/sparse1.out
+rbd rm sparse1
+
+# 1M data, 1M sparse
+rbd rm sparse2 || true
+rbd import $RBD_CREATE_ARGS --order 20 ${TMPDIR}/sparse2
+rbd ls -l | grep sparse2 | grep -Ei '(2 MiB|2048k)'
+[ $tiered -eq 1 -o "$(objects sparse2)" = '0' ]
+rbd export sparse2 ${TMPDIR}/sparse2.out
+compare_files_and_ondisk_sizes ${TMPDIR}/sparse2 ${TMPDIR}/sparse2.out
+rm ${TMPDIR}/sparse2.out
+rbd rm sparse2
+
+# extend sparse1 to 10 1M blocks, sparse at the end
+truncate ${TMPDIR}/sparse1 -s 10M
+# import from stdin just for fun, verify still sparse
+rbd import $RBD_CREATE_ARGS --order 20 - sparse1 < ${TMPDIR}/sparse1
+rbd ls -l | grep sparse1 | grep -Ei '(10 MiB|10240k)'
+[ $tiered -eq 1 -o "$(objects sparse1)" = '1' ]
+rbd export sparse1 ${TMPDIR}/sparse1.out
+compare_files_and_ondisk_sizes ${TMPDIR}/sparse1 ${TMPDIR}/sparse1.out
+rm ${TMPDIR}/sparse1.out
+rbd rm sparse1
+
+# extend sparse2 to 4M total with two more nonsparse megs
+dd if=/dev/urandom bs=2M count=1 of=${TMPDIR}/sparse2 oflag=append conv=notrunc
+# again from stding
+rbd import $RBD_CREATE_ARGS --order 20 - sparse2 < ${TMPDIR}/sparse2
+rbd ls -l | grep sparse2 | grep -Ei '(4 MiB|4096k)'
+[ $tiered -eq 1 -o "$(objects sparse2)" = '0 2 3' ]
+rbd export sparse2 ${TMPDIR}/sparse2.out
+compare_files_and_ondisk_sizes ${TMPDIR}/sparse2 ${TMPDIR}/sparse2.out
+rm ${TMPDIR}/sparse2.out
+rbd rm sparse2
+
+# zeros import to a sparse image.  Note: all zeros currently
+# doesn't work right now due to the way we handle 'empty' fiemaps;
+# the image ends up zero-filled.
+
+echo "partially-sparse file imports to partially-sparse image"
+rbd import $RBD_CREATE_ARGS --order 20 ${TMPDIR}/sparse1 sparse
+[ $tiered -eq 1 -o "$(objects sparse)" = '1' ]
+rbd rm sparse
+
+echo "zeros import through stdin to sparse image"
+# stdin
+dd if=/dev/zero bs=1M count=4 | rbd import $RBD_CREATE_ARGS - sparse
+[ $tiered -eq 1 -o "$(objects sparse)" = '' ]
+rbd rm sparse
+
+echo "zeros export to sparse file"
+#  Must be tricky to make image "by hand" ; import won't create a zero image
+rbd create $RBD_CREATE_ARGS sparse --size 4
+prefix=$(rbd info sparse | grep block_name_prefix | awk '{print $NF;}')
+# drop in 0 object directly
+dd if=/dev/zero bs=4M count=1 | rados -p $(get_image_data_pool sparse) \
+                                      put ${prefix}.000000000000 -
+[ $tiered -eq 1 -o "$(objects sparse)" = '0' ]
+# 1 object full of zeros; export should still create 0-disk-usage file
+rm ${TMPDIR}/sparse || true
+rbd export sparse ${TMPDIR}/sparse
+[ $(stat ${TMPDIR}/sparse --format=%b) = '0' ]
+rbd rm sparse
+
+rm ${TMPDIR}/sparse ${TMPDIR}/sparse1 ${TMPDIR}/sparse2 ${TMPDIR}/sparse3 || true
+
+echo OK
diff --git a/qa/workunits/rbd/issue-20295.sh b/qa/workunits/rbd/issue-20295.sh
new file mode 100755
index 000000000..3d617a066
--- /dev/null
+++ b/qa/workunits/rbd/issue-20295.sh
@@ -0,0 +1,18 @@
+#!/bin/sh -ex
+
+TEST_POOL=ecpool
+TEST_IMAGE=test1
+PGS=12
+
+ceph osd pool create $TEST_POOL $PGS $PGS erasure
+ceph osd pool application enable $TEST_POOL rbd
+ceph osd pool set $TEST_POOL allow_ec_overwrites true
+rbd --data-pool $TEST_POOL create --size 1024G $TEST_IMAGE
+rbd bench \
+    --io-type write \
+    --io-size 4096 \
+    --io-pattern=rand \
+    --io-total 100M \
+    $TEST_IMAGE
+
+echo "OK"
diff --git a/qa/workunits/rbd/journal.sh b/qa/workunits/rbd/journal.sh
new file mode 100755
index 000000000..ba89e75c9
--- /dev/null
+++ b/qa/workunits/rbd/journal.sh
@@ -0,0 +1,326 @@
+#!/usr/bin/env bash
+set -e
+
+. $(dirname $0)/../../standalone/ceph-helpers.sh
+
+function list_tests()
+{
+  echo "AVAILABLE TESTS"
+  for i in $TESTS; do
+    echo "  $i"
+  done
+}
+
+function usage()
+{
+  echo "usage: $0 [-h|-l|-t <testname> [-t <testname>...] [--no-cleanup]]"
+}
+
+function expect_false()
+{
+    set -x
+    if "$@"; then return 1; else return 0; fi
+}
+
+function save_commit_position()
+{
+    local journal=$1
+
+    rados -p rbd getomapval journal.${journal} client_ \
+	  $TMPDIR/${journal}.client_.omap
+}
+
+function restore_commit_position()
+{
+    local journal=$1
+
+    rados -p rbd setomapval journal.${journal} client_ \
+	  < $TMPDIR/${journal}.client_.omap
+}
+
+test_rbd_journal()
+{
+    local image=testrbdjournal$$
+
+    rbd create --image-feature exclusive-lock --image-feature journaling \
+	--size 128 ${image}
+    local journal=$(rbd info ${image} --format=xml 2>/dev/null |
+			   $XMLSTARLET sel -t -v "//image/journal")
+    test -n "${journal}"
+    rbd journal info ${journal}
+    rbd journal info --journal ${journal}
+    rbd journal info --image ${image}
+
+    rbd feature disable ${image} journaling
+
+    rbd info ${image} --format=xml 2>/dev/null |
+	expect_false $XMLSTARLET sel -t -v "//image/journal"
+    expect_false rbd journal info ${journal}
+    expect_false rbd journal info --image ${image}
+
+    rbd feature enable ${image} journaling
+
+    local journal1=$(rbd info ${image} --format=xml 2>/dev/null |
+			    $XMLSTARLET sel -t -v "//image/journal")
+    test "${journal}" = "${journal1}"
+
+    rbd journal info ${journal}
+
+    rbd journal status ${journal}
+
+    local count=10
+    save_commit_position ${journal}
+    rbd bench --io-type write ${image} --io-size 4096 --io-threads 1 \
+	--io-total $((4096 * count)) --io-pattern seq
+    rbd journal status --image ${image} | fgrep "tid=$((count - 1))"
+    restore_commit_position ${journal}
+    rbd journal status --image ${image} | fgrep "positions=[]"
+    local count1=$(rbd journal inspect --verbose ${journal} |
+			  grep -c 'event_type.*AioWrite')
+    test "${count}" -eq "${count1}"
+
+    rbd journal export ${journal} $TMPDIR/journal.export
+    local size=$(stat -c "%s" $TMPDIR/journal.export)
+    test "${size}" -gt 0
+
+    rbd export ${image} $TMPDIR/${image}.export
+
+    local image1=${image}1
+    rbd create --image-feature exclusive-lock --image-feature journaling \
+	--size 128 ${image1}
+    journal1=$(rbd info ${image1} --format=xml 2>/dev/null |
+		      $XMLSTARLET sel -t -v "//image/journal")
+
+    save_commit_position ${journal1}
+    rbd journal import --dest ${image1} $TMPDIR/journal.export
+    rbd snap create ${image1}@test
+    restore_commit_position ${journal1}
+    # check that commit position is properly updated: the journal should contain
+    # 14 entries (2 AioFlush + 10 AioWrite + 1 SnapCreate + 1 OpFinish) and
+    # commit position set to tid=14
+    rbd journal inspect --image ${image1} --verbose | awk '
+      /AioFlush/          {a++}         # match: "event_type": "AioFlush",
+      /AioWrite/          {w++}         # match: "event_type": "AioWrite",
+      /SnapCreate/        {s++}         # match: "event_type": "SnapCreate",
+      /OpFinish/          {f++}         # match: "event_type": "OpFinish",
+      /entries inspected/ {t=$1; e=$4}  # match: 14 entries inspected, 0 errors
+                          {print}       # for diagnostic
+      END                 {
+        if (a != 2 || w != 10 || s != 1 || f != 1 || t != 14 || e != 0) exit(1)
+      }
+    '
+
+    rbd export ${image1}@test $TMPDIR/${image1}.export
+    cmp $TMPDIR/${image}.export $TMPDIR/${image1}.export
+
+    rbd journal reset ${journal}
+
+    rbd journal inspect --verbose ${journal} | expect_false grep 'event_type'
+
+    rbd snap purge ${image1}
+    rbd remove ${image1}
+    rbd remove ${image}
+}
+
+
+rbd_assert_eq() {
+    local image=$1
+    local cmd=$2
+    local param=$3
+    local expected_val=$4
+
+    local val=$(rbd --format xml ${cmd} --image ${image} |
+		       $XMLSTARLET sel -t -v "${param}")
+    test "${val}" = "${expected_val}"
+}
+
+test_rbd_create()
+{
+    local image=testrbdcreate$$
+
+    rbd create --image-feature exclusive-lock --image-feature journaling \
+	--journal-pool rbd \
+	--journal-object-size 20M \
+	--journal-splay-width 6 \
+	--size 256 ${image}
+
+    rbd_assert_eq ${image} 'journal info' '//journal/order' 25
+    rbd_assert_eq ${image} 'journal info' '//journal/splay_width' 6
+    rbd_assert_eq ${image} 'journal info' '//journal/object_pool' rbd
+
+    rbd remove ${image}
+}
+
+test_rbd_copy()
+{
+    local src=testrbdcopys$$
+    rbd create --size 256 ${src}
+
+    local image=testrbdcopy$$
+    rbd copy --image-feature exclusive-lock --image-feature journaling \
+	--journal-pool rbd \
+	--journal-object-size 20M \
+	--journal-splay-width 6 \
+	${src} ${image}
+
+    rbd remove ${src}
+
+    rbd_assert_eq ${image} 'journal info' '//journal/order' 25
+    rbd_assert_eq ${image} 'journal info' '//journal/splay_width' 6
+    rbd_assert_eq ${image} 'journal info' '//journal/object_pool' rbd
+
+    rbd remove ${image}
+}
+
+test_rbd_deep_copy()
+{
+    local src=testrbdcopys$$
+    rbd create --size 256 ${src}
+    rbd snap create ${src}@snap1
+
+    local dest=testrbdcopy$$
+    rbd deep copy --image-feature exclusive-lock --image-feature journaling \
+        --journal-pool rbd \
+        --journal-object-size 20M \
+        --journal-splay-width 6 \
+        ${src} ${dest}
+
+    rbd snap purge ${src}
+    rbd remove ${src}
+
+    rbd_assert_eq ${dest} 'journal info' '//journal/order' 25
+    rbd_assert_eq ${dest} 'journal info' '//journal/splay_width' 6
+    rbd_assert_eq ${dest} 'journal info' '//journal/object_pool' rbd
+
+    rbd snap purge ${dest}
+    rbd remove ${dest}
+}
+
+test_rbd_clone()
+{
+    local parent=testrbdclonep$$
+    rbd create --image-feature layering --size 256 ${parent}
+    rbd snap create ${parent}@snap
+    rbd snap protect ${parent}@snap
+
+    local image=testrbdclone$$
+    rbd clone --image-feature layering --image-feature exclusive-lock --image-feature journaling \
+	--journal-pool rbd \
+	--journal-object-size 20M \
+	--journal-splay-width 6 \
+	${parent}@snap ${image}
+
+    rbd_assert_eq ${image} 'journal info' '//journal/order' 25
+    rbd_assert_eq ${image} 'journal info' '//journal/splay_width' 6
+    rbd_assert_eq ${image} 'journal info' '//journal/object_pool' rbd
+
+    rbd remove ${image}
+    rbd snap unprotect ${parent}@snap
+    rbd snap purge ${parent}
+    rbd remove ${parent}
+}
+
+test_rbd_import()
+{
+    local src=testrbdimports$$
+    rbd create --size 256 ${src}
+
+    rbd export ${src} $TMPDIR/${src}.export
+    rbd remove ${src}
+
+    local image=testrbdimport$$
+    rbd import --image-feature exclusive-lock --image-feature journaling \
+	--journal-pool rbd \
+	--journal-object-size 20M \
+	--journal-splay-width 6 \
+	$TMPDIR/${src}.export ${image}
+
+    rbd_assert_eq ${image} 'journal info' '//journal/order' 25
+    rbd_assert_eq ${image} 'journal info' '//journal/splay_width' 6
+    rbd_assert_eq ${image} 'journal info' '//journal/object_pool' rbd
+
+    rbd remove ${image}
+}
+
+test_rbd_feature()
+{
+    local image=testrbdfeature$$
+
+    rbd create --image-feature exclusive-lock --size 256 ${image}
+
+    rbd feature enable ${image} journaling \
+	--journal-pool rbd \
+	--journal-object-size 20M \
+	--journal-splay-width 6
+
+    rbd_assert_eq ${image} 'journal info' '//journal/order' 25
+    rbd_assert_eq ${image} 'journal info' '//journal/splay_width' 6
+    rbd_assert_eq ${image} 'journal info' '//journal/object_pool' rbd
+
+    rbd remove ${image}
+}
+
+TESTS+=" rbd_journal"
+TESTS+=" rbd_create"
+TESTS+=" rbd_copy"
+TESTS+=" rbd_clone"
+TESTS+=" rbd_import"
+TESTS+=" rbd_feature"
+
+#
+# "main" follows
+#
+
+tests_to_run=()
+
+cleanup=true
+
+while [[ $# -gt 0 ]]; do
+    opt=$1
+
+    case "$opt" in
+	"-l" )
+	    do_list=1
+	    ;;
+	"--no-cleanup" )
+	    cleanup=false
+	    ;;
+	"-t" )
+	    shift
+	    if [[ -z "$1" ]]; then
+		echo "missing argument to '-t'"
+		usage ;
+		exit 1
+	    fi
+	    tests_to_run+=" $1"
+	    ;;
+	"-h" )
+	    usage ;
+	    exit 0
+	    ;;
+    esac
+    shift
+done
+
+if [[ $do_list -eq 1 ]]; then
+    list_tests ;
+    exit 0
+fi
+
+TMPDIR=/tmp/rbd_journal$$
+mkdir $TMPDIR
+if $cleanup; then
+    trap "rm -fr $TMPDIR" 0
+fi
+
+if test -z "$tests_to_run" ; then
+    tests_to_run="$TESTS"
+fi
+
+for i in $tests_to_run; do
+    set -x
+    test_${i}
+    set +x
+done
+
+echo OK
diff --git a/qa/workunits/rbd/kernel.sh b/qa/workunits/rbd/kernel.sh
new file mode 100755
index 000000000..faa5760ee
--- /dev/null
+++ b/qa/workunits/rbd/kernel.sh
@@ -0,0 +1,100 @@
+#!/usr/bin/env bash
+set -ex
+
+CEPH_SECRET_FILE=${CEPH_SECRET_FILE:-}
+CEPH_ID=${CEPH_ID:-admin}
+SECRET_ARGS=''
+if [ ! -z $CEPH_SECRET_FILE ]; then
+	SECRET_ARGS="--secret $CEPH_SECRET_FILE"
+fi
+
+TMP_FILES="/tmp/img1 /tmp/img1.small /tmp/img1.snap1 /tmp/img1.export /tmp/img1.trunc"
+
+function expect_false() {
+	if "$@"; then return 1; else return 0; fi
+}
+
+function get_device_dir {
+	local POOL=$1
+	local IMAGE=$2
+	local SNAP=$3
+	rbd device list | tail -n +2 | egrep "\s+$POOL\s+$IMAGE\s+$SNAP\s+" |
+	    awk '{print $1;}'
+}
+
+function clean_up {
+	[ -e /dev/rbd/rbd/testimg1@snap1 ] &&
+		sudo rbd device unmap /dev/rbd/rbd/testimg1@snap1
+	if [ -e /dev/rbd/rbd/testimg1 ]; then
+		sudo rbd device unmap /dev/rbd/rbd/testimg1
+		rbd snap purge testimg1 || true
+	fi
+	rbd ls | grep testimg1 > /dev/null && rbd rm testimg1 || true
+	sudo rm -f $TMP_FILES
+}
+
+clean_up
+
+trap clean_up INT TERM EXIT
+
+# create an image
+dd if=/bin/sh of=/tmp/img1 bs=1k count=1 seek=10
+dd if=/bin/dd of=/tmp/img1 bs=1k count=10 seek=100
+dd if=/bin/rm of=/tmp/img1 bs=1k count=100 seek=1000
+dd if=/bin/ls of=/tmp/img1 bs=1k seek=10000
+dd if=/bin/ln of=/tmp/img1 bs=1k seek=100000
+dd if=/dev/zero of=/tmp/img1 count=0 seek=150000
+
+# import
+rbd import /tmp/img1 testimg1
+sudo rbd device map testimg1 --user $CEPH_ID $SECRET_ARGS
+
+DEV_ID1=$(get_device_dir rbd testimg1 -)
+echo "dev_id1 = $DEV_ID1"
+cat /sys/bus/rbd/devices/$DEV_ID1/size
+cat /sys/bus/rbd/devices/$DEV_ID1/size | grep 76800000
+
+sudo dd if=/dev/rbd/rbd/testimg1 of=/tmp/img1.export
+cmp /tmp/img1 /tmp/img1.export
+
+# snapshot
+rbd snap create testimg1 --snap=snap1
+sudo rbd device map --snap=snap1 testimg1 --user $CEPH_ID $SECRET_ARGS
+
+DEV_ID2=$(get_device_dir rbd testimg1 snap1)
+cat /sys/bus/rbd/devices/$DEV_ID2/size | grep 76800000
+
+sudo dd if=/dev/rbd/rbd/testimg1@snap1 of=/tmp/img1.snap1
+cmp /tmp/img1 /tmp/img1.snap1
+
+# resize
+rbd resize testimg1 --size=40 --allow-shrink
+cat /sys/bus/rbd/devices/$DEV_ID1/size | grep 41943040
+cat /sys/bus/rbd/devices/$DEV_ID2/size | grep 76800000
+
+sudo dd if=/dev/rbd/rbd/testimg1 of=/tmp/img1.small
+cp /tmp/img1 /tmp/img1.trunc
+truncate -s 41943040 /tmp/img1.trunc
+cmp /tmp/img1.trunc /tmp/img1.small
+
+# rollback expects an unlocked image
+# (acquire and) release the lock as a side effect
+rbd bench --io-type read --io-size 1 --io-threads 1 --io-total 1 testimg1
+
+# rollback and check data again
+rbd snap rollback --snap=snap1 testimg1
+cat /sys/bus/rbd/devices/$DEV_ID1/size | grep 76800000
+cat /sys/bus/rbd/devices/$DEV_ID2/size | grep 76800000
+sudo rm -f /tmp/img1.snap1 /tmp/img1.export
+
+sudo dd if=/dev/rbd/rbd/testimg1@snap1 of=/tmp/img1.snap1
+cmp /tmp/img1 /tmp/img1.snap1
+sudo dd if=/dev/rbd/rbd/testimg1 of=/tmp/img1.export
+cmp /tmp/img1 /tmp/img1.export
+
+# zeros are returned if an image or a snapshot is removed
+expect_false cmp -n 76800000 /dev/rbd/rbd/testimg1@snap1 /dev/zero
+rbd snap rm --snap=snap1 testimg1
+cmp -n 76800000 /dev/rbd/rbd/testimg1@snap1 /dev/zero
+
+echo OK
diff --git a/qa/workunits/rbd/krbd_data_pool.sh b/qa/workunits/rbd/krbd_data_pool.sh
new file mode 100755
index 000000000..8eada88bb
--- /dev/null
+++ b/qa/workunits/rbd/krbd_data_pool.sh
@@ -0,0 +1,206 @@
+#!/usr/bin/env bash
+
+set -ex
+
+export RBD_FORCE_ALLOW_V1=1
+
+function fill_image() {
+    local spec=$1
+
+    local dev
+    dev=$(sudo rbd map $spec)
+    xfs_io -c "pwrite -b $OBJECT_SIZE -S 0x78 -W 0 $IMAGE_SIZE" $dev
+    sudo rbd unmap $dev
+}
+
+function create_clones() {
+    local spec=$1
+
+    rbd snap create $spec@snap
+    rbd snap protect $spec@snap
+
+    local pool=${spec%/*}  # pool/image is assumed
+    local image=${spec#*/}
+    local child_pool
+    for child_pool in $pool clonesonly; do
+        rbd clone $spec@snap $child_pool/$pool-$image-clone1
+        rbd clone $spec@snap --data-pool repdata $child_pool/$pool-$image-clone2
+        rbd clone $spec@snap --data-pool ecdata $child_pool/$pool-$image-clone3
+    done
+}
+
+function trigger_copyup() {
+    local spec=$1
+
+    local dev
+    dev=$(sudo rbd map $spec)
+    local i
+    {
+        for ((i = 0; i < $NUM_OBJECTS; i++)); do
+            echo pwrite -b $OBJECT_SIZE -S 0x59 $((i * OBJECT_SIZE + OBJECT_SIZE / 2)) $((OBJECT_SIZE / 2))
+        done
+        echo fsync
+        echo quit
+    } | xfs_io $dev
+    sudo rbd unmap $dev
+}
+
+function compare() {
+    local spec=$1
+    local object=$2
+
+    local dev
+    dev=$(sudo rbd map $spec)
+    local i
+    for ((i = 0; i < $NUM_OBJECTS; i++)); do
+        dd if=$dev bs=$OBJECT_SIZE count=1 skip=$i | cmp $object -
+    done
+    sudo rbd unmap $dev
+}
+
+function mkfs_and_mount() {
+    local spec=$1
+
+    local dev
+    dev=$(sudo rbd map $spec)
+    blkdiscard $dev
+    mkfs.ext4 -q -E nodiscard $dev
+    sudo mount $dev /mnt
+    sudo umount /mnt
+    sudo rbd unmap $dev
+}
+
+function list_HEADs() {
+    local pool=$1
+
+    rados -p $pool ls | while read obj; do
+        if rados -p $pool stat $obj >/dev/null 2>&1; then
+            echo $obj
+        fi
+    done
+}
+
+function count_data_objects() {
+    local spec=$1
+
+    local pool
+    pool=$(rbd info $spec | grep 'data_pool: ' | awk '{ print $NF }')
+    if [[ -z $pool ]]; then
+        pool=${spec%/*}  # pool/image is assumed
+    fi
+
+    local prefix
+    prefix=$(rbd info $spec | grep 'block_name_prefix: ' | awk '{ print $NF }')
+    rados -p $pool ls | grep -c $prefix
+}
+
+function get_num_clones() {
+    local pool=$1
+
+    rados -p $pool --format=json df |
+        python3 -c 'import sys, json; print(json.load(sys.stdin)["pools"][0]["num_object_clones"])'
+}
+
+ceph osd pool create repdata 24 24
+rbd pool init repdata
+ceph osd erasure-code-profile set teuthologyprofile crush-failure-domain=osd m=1 k=2
+ceph osd pool create ecdata 24 24 erasure teuthologyprofile
+rbd pool init ecdata
+ceph osd pool set ecdata allow_ec_overwrites true
+ceph osd pool create rbdnonzero 24 24
+rbd pool init rbdnonzero
+ceph osd pool create clonesonly 24 24
+rbd pool init clonesonly
+
+for pool in rbd rbdnonzero; do
+    rbd create --size 200 --image-format 1 $pool/img0
+    rbd create --size 200 $pool/img1
+    rbd create --size 200 --data-pool repdata $pool/img2
+    rbd create --size 200 --data-pool ecdata $pool/img3
+done
+
+IMAGE_SIZE=$(rbd info --format=json img1 | python3 -c 'import sys, json; print(json.load(sys.stdin)["size"])')
+OBJECT_SIZE=$(rbd info --format=json img1 | python3 -c 'import sys, json; print(json.load(sys.stdin)["object_size"])')
+NUM_OBJECTS=$((IMAGE_SIZE / OBJECT_SIZE))
+[[ $((IMAGE_SIZE % OBJECT_SIZE)) -eq 0 ]]
+
+OBJECT_X=$(mktemp)   # xxxx
+xfs_io -c "pwrite -b $OBJECT_SIZE -S 0x78 0 $OBJECT_SIZE" $OBJECT_X
+
+OBJECT_XY=$(mktemp)  # xxYY
+xfs_io -c "pwrite -b $OBJECT_SIZE -S 0x78 0 $((OBJECT_SIZE / 2))" \
+       -c "pwrite -b $OBJECT_SIZE -S 0x59 $((OBJECT_SIZE / 2)) $((OBJECT_SIZE / 2))" \
+       $OBJECT_XY
+
+for pool in rbd rbdnonzero; do
+    for i in {0..3}; do
+        fill_image $pool/img$i
+        if [[ $i -ne 0 ]]; then
+            create_clones $pool/img$i
+            for child_pool in $pool clonesonly; do
+                for j in {1..3}; do
+                    trigger_copyup $child_pool/$pool-img$i-clone$j
+                done
+            done
+        fi
+    done
+done
+
+# rbd_directory, rbd_children, rbd_info + img0 header + ...
+NUM_META_RBDS=$((3 + 1 + 3 * (1*2 + 3*2)))
+# rbd_directory, rbd_children, rbd_info + ...
+NUM_META_CLONESONLY=$((3 + 2 * 3 * (3*2)))
+
+[[ $(rados -p rbd ls | wc -l) -eq $((NUM_META_RBDS + 5 * NUM_OBJECTS)) ]]
+[[ $(rados -p repdata ls | wc -l) -eq $((1 + 14 * NUM_OBJECTS)) ]]
+[[ $(rados -p ecdata ls | wc -l) -eq $((1 + 14 * NUM_OBJECTS)) ]]
+[[ $(rados -p rbdnonzero ls | wc -l) -eq $((NUM_META_RBDS + 5 * NUM_OBJECTS)) ]]
+[[ $(rados -p clonesonly ls | wc -l) -eq $((NUM_META_CLONESONLY + 6 * NUM_OBJECTS)) ]]
+
+for pool in rbd rbdnonzero; do
+    for i in {0..3}; do
+        [[ $(count_data_objects $pool/img$i) -eq $NUM_OBJECTS ]]
+        if [[ $i -ne 0 ]]; then
+            for child_pool in $pool clonesonly; do
+                for j in {1..3}; do
+                    [[ $(count_data_objects $child_pool/$pool-img$i-clone$j) -eq $NUM_OBJECTS ]]
+                done
+            done
+        fi
+    done
+done
+
+[[ $(get_num_clones rbd) -eq 0 ]]
+[[ $(get_num_clones repdata) -eq 0 ]]
+[[ $(get_num_clones ecdata) -eq 0 ]]
+[[ $(get_num_clones rbdnonzero) -eq 0 ]]
+[[ $(get_num_clones clonesonly) -eq 0 ]]
+
+for pool in rbd rbdnonzero; do
+    for i in {0..3}; do
+        compare $pool/img$i $OBJECT_X
+        mkfs_and_mount $pool/img$i
+        if [[ $i -ne 0 ]]; then
+            for child_pool in $pool clonesonly; do
+                for j in {1..3}; do
+                    compare $child_pool/$pool-img$i-clone$j $OBJECT_XY
+                done
+            done
+        fi
+    done
+done
+
+# mkfs_and_mount should discard some objects everywhere but in clonesonly
+[[ $(list_HEADs rbd | wc -l) -lt $((NUM_META_RBDS + 5 * NUM_OBJECTS)) ]]
+[[ $(list_HEADs repdata | wc -l) -lt $((1 + 14 * NUM_OBJECTS)) ]]
+[[ $(list_HEADs ecdata | wc -l) -lt $((1 + 14 * NUM_OBJECTS)) ]]
+[[ $(list_HEADs rbdnonzero | wc -l) -lt $((NUM_META_RBDS + 5 * NUM_OBJECTS)) ]]
+[[ $(list_HEADs clonesonly | wc -l) -eq $((NUM_META_CLONESONLY + 6 * NUM_OBJECTS)) ]]
+
+[[ $(get_num_clones rbd) -eq $NUM_OBJECTS ]]
+[[ $(get_num_clones repdata) -eq $((2 * NUM_OBJECTS)) ]]
+[[ $(get_num_clones ecdata) -eq $((2 * NUM_OBJECTS)) ]]
+[[ $(get_num_clones rbdnonzero) -eq $NUM_OBJECTS ]]
+[[ $(get_num_clones clonesonly) -eq 0 ]]
+
+echo OK
diff --git a/qa/workunits/rbd/krbd_exclusive_option.sh b/qa/workunits/rbd/krbd_exclusive_option.sh
new file mode 100755
index 000000000..f8493ce98
--- /dev/null
+++ b/qa/workunits/rbd/krbd_exclusive_option.sh
@@ -0,0 +1,233 @@
+#!/usr/bin/env bash
+
+set -ex
+
+function expect_false() {
+    if "$@"; then return 1; else return 0; fi
+}
+
+function assert_locked() {
+    local dev_id="${1#/dev/rbd}"
+
+    local client_addr
+    client_addr="$(< $SYSFS_DIR/$dev_id/client_addr)"
+
+    local client_id
+    client_id="$(< $SYSFS_DIR/$dev_id/client_id)"
+    # client4324 -> client.4324
+    client_id="client.${client_id#client}"
+
+    local watch_cookie
+    watch_cookie="$(rados -p rbd listwatchers rbd_header.$IMAGE_ID |
+        grep $client_id | cut -d ' ' -f 3 | cut -d '=' -f 2)"
+    [[ $(echo -n "$watch_cookie" | grep -c '^') -eq 1 ]]
+
+    local actual
+    actual="$(rados -p rbd --format=json lock info rbd_header.$IMAGE_ID rbd_lock |
+        python3 -m json.tool --sort-keys)"
+
+    local expected
+    expected="$(cat <<EOF | python3 -m json.tool --sort-keys
+{
+    "lockers": [
+        {
+            "addr": "$client_addr",
+            "cookie": "auto $watch_cookie",
+            "description": "",
+            "expiration": "0.000000",
+            "name": "$client_id"
+        }
+    ],
+    "name": "rbd_lock",
+    "tag": "internal",
+    "type": "exclusive"
+}
+EOF
+    )"
+
+    [ "$actual" = "$expected" ]
+}
+
+function assert_unlocked() {
+    rados -p rbd --format=json lock info rbd_header.$IMAGE_ID rbd_lock |
+        grep '"lockers":\[\]'
+}
+
+function blocklist_add() {
+    local dev_id="${1#/dev/rbd}"
+
+    local client_addr
+    client_addr="$(< $SYSFS_DIR/$dev_id/client_addr)"
+
+    ceph osd blocklist add $client_addr
+}
+
+SYSFS_DIR="/sys/bus/rbd/devices"
+IMAGE_NAME="exclusive-option-test"
+
+rbd create --size 1 --image-feature '' $IMAGE_NAME
+
+IMAGE_ID="$(rbd info --format=json $IMAGE_NAME |
+    python3 -c "import sys, json; print(json.load(sys.stdin)['block_name_prefix'].split('.')[1])")"
+
+DEV=$(sudo rbd map $IMAGE_NAME)
+assert_unlocked
+sudo rbd unmap $DEV
+assert_unlocked
+
+expect_false sudo rbd map -o exclusive $IMAGE_NAME
+assert_unlocked
+
+expect_false sudo rbd map -o lock_on_read $IMAGE_NAME
+assert_unlocked
+
+rbd feature enable $IMAGE_NAME exclusive-lock
+rbd snap create $IMAGE_NAME@snap
+
+DEV=$(sudo rbd map $IMAGE_NAME)
+assert_locked $DEV
+[[ $(blockdev --getro $DEV) -eq 0 ]]
+sudo rbd unmap $DEV
+assert_unlocked
+
+DEV=$(sudo rbd map $IMAGE_NAME@snap)
+assert_unlocked
+[[ $(blockdev --getro $DEV) -eq 1 ]]
+sudo rbd unmap $DEV
+assert_unlocked
+
+DEV=$(sudo rbd map -o ro $IMAGE_NAME)
+assert_unlocked
+[[ $(blockdev --getro $DEV) -eq 1 ]]
+sudo rbd unmap $DEV
+assert_unlocked
+
+DEV=$(sudo rbd map -o exclusive $IMAGE_NAME)
+assert_locked $DEV
+[[ $(blockdev --getro $DEV) -eq 0 ]]
+sudo rbd unmap $DEV
+assert_unlocked
+
+DEV=$(sudo rbd map -o exclusive $IMAGE_NAME@snap)
+assert_unlocked
+[[ $(blockdev --getro $DEV) -eq 1 ]]
+sudo rbd unmap $DEV
+assert_unlocked
+
+DEV=$(sudo rbd map -o exclusive,ro $IMAGE_NAME)
+assert_unlocked
+[[ $(blockdev --getro $DEV) -eq 1 ]]
+sudo rbd unmap $DEV
+assert_unlocked
+
+# alternate syntax
+DEV=$(sudo rbd map --exclusive --read-only $IMAGE_NAME)
+assert_unlocked
+[[ $(blockdev --getro $DEV) -eq 1 ]]
+sudo rbd unmap $DEV
+assert_unlocked
+
+DEV=$(sudo rbd map $IMAGE_NAME)
+assert_locked $DEV
+OTHER_DEV=$(sudo rbd map -o noshare $IMAGE_NAME)
+assert_locked $OTHER_DEV
+dd if=/dev/urandom of=$DEV bs=4k count=10 oflag=direct
+assert_locked $DEV
+dd if=/dev/urandom of=$OTHER_DEV bs=4k count=10 oflag=direct
+assert_locked $OTHER_DEV
+sudo rbd unmap $DEV
+sudo rbd unmap $OTHER_DEV
+assert_unlocked
+
+DEV=$(sudo rbd map $IMAGE_NAME)
+assert_locked $DEV
+OTHER_DEV=$(sudo rbd map -o noshare,exclusive $IMAGE_NAME)
+assert_locked $OTHER_DEV
+dd if=$DEV of=/dev/null bs=4k count=10 iflag=direct
+expect_false dd if=/dev/urandom of=$DEV bs=4k count=10 oflag=direct
+assert_locked $OTHER_DEV
+sudo rbd unmap $OTHER_DEV
+assert_unlocked
+dd if=$DEV of=/dev/null bs=4k count=10 iflag=direct
+assert_unlocked
+dd if=/dev/urandom of=$DEV bs=4k count=10 oflag=direct
+assert_locked $DEV
+sudo rbd unmap $DEV
+assert_unlocked
+
+DEV=$(sudo rbd map -o lock_on_read $IMAGE_NAME)
+assert_locked $DEV
+OTHER_DEV=$(sudo rbd map -o noshare,exclusive $IMAGE_NAME)
+assert_locked $OTHER_DEV
+expect_false dd if=$DEV of=/dev/null bs=4k count=10 iflag=direct
+expect_false dd if=/dev/urandom of=$DEV bs=4k count=10 oflag=direct
+sudo udevadm settle
+assert_locked $OTHER_DEV
+sudo rbd unmap $OTHER_DEV
+assert_unlocked
+dd if=$DEV of=/dev/null bs=4k count=10 iflag=direct
+assert_locked $DEV
+dd if=/dev/urandom of=$DEV bs=4k count=10 oflag=direct
+assert_locked $DEV
+sudo rbd unmap $DEV
+assert_unlocked
+
+DEV=$(sudo rbd map -o exclusive $IMAGE_NAME)
+assert_locked $DEV
+expect_false sudo rbd map -o noshare $IMAGE_NAME
+assert_locked $DEV
+sudo rbd unmap $DEV
+assert_unlocked
+
+DEV=$(sudo rbd map -o exclusive $IMAGE_NAME)
+assert_locked $DEV
+expect_false sudo rbd map -o noshare,exclusive $IMAGE_NAME
+assert_locked $DEV
+sudo rbd unmap $DEV
+assert_unlocked
+
+DEV=$(sudo rbd map $IMAGE_NAME)
+assert_locked $DEV
+rbd resize --size 1G $IMAGE_NAME
+assert_unlocked
+sudo rbd unmap $DEV
+assert_unlocked
+
+DEV=$(sudo rbd map -o exclusive $IMAGE_NAME)
+assert_locked $DEV
+expect_false rbd resize --size 2G $IMAGE_NAME
+assert_locked $DEV
+sudo rbd unmap $DEV
+assert_unlocked
+
+DEV=$(sudo rbd map $IMAGE_NAME)
+assert_locked $DEV
+dd if=/dev/urandom of=$DEV bs=4k count=10 oflag=direct
+{ sleep 10; blocklist_add $DEV; } &
+PID=$!
+expect_false dd if=/dev/urandom of=$DEV bs=4k count=200000 oflag=direct
+wait $PID
+# break lock
+OTHER_DEV=$(sudo rbd map -o noshare $IMAGE_NAME)
+assert_locked $OTHER_DEV
+sudo rbd unmap $DEV
+assert_locked $OTHER_DEV
+sudo rbd unmap $OTHER_DEV
+assert_unlocked
+
+# induce a watch error after 30 seconds
+DEV=$(sudo rbd map -o exclusive,osdkeepalive=60 $IMAGE_NAME)
+assert_locked $DEV
+OLD_WATCHER="$(rados -p rbd listwatchers rbd_header.$IMAGE_ID)"
+sleep 40
+assert_locked $DEV
+NEW_WATCHER="$(rados -p rbd listwatchers rbd_header.$IMAGE_ID)"
+# same client_id, old cookie < new cookie
+[ "$(echo "$OLD_WATCHER" | cut -d ' ' -f 2)" = \
+    "$(echo "$NEW_WATCHER" | cut -d ' ' -f 2)" ]
+[[ $(echo "$OLD_WATCHER" | cut -d ' ' -f 3 | cut -d '=' -f 2) -lt \
+    $(echo "$NEW_WATCHER" | cut -d ' ' -f 3 | cut -d '=' -f 2) ]]
+sudo rbd unmap $DEV
+assert_unlocked
+
+echo OK
diff --git a/qa/workunits/rbd/krbd_fallocate.sh b/qa/workunits/rbd/krbd_fallocate.sh
new file mode 100755
index 000000000..79efa1a8b
--- /dev/null
+++ b/qa/workunits/rbd/krbd_fallocate.sh
@@ -0,0 +1,151 @@
+#!/usr/bin/env bash
+
+# - fallocate -z deallocates because BLKDEV_ZERO_NOUNMAP hint is ignored by
+# krbd
+#
+# - big unaligned blkdiscard and fallocate -z/-p leave the objects in place
+
+set -ex
+
+# no blkdiscard(8) in trusty
+function py_blkdiscard() {
+    local offset=$1
+
+    python3 <<EOF
+import fcntl, struct
+BLKDISCARD = 0x1277
+with open('$DEV', 'w') as dev:
+    fcntl.ioctl(dev, BLKDISCARD, struct.pack('QQ', $offset, $IMAGE_SIZE - $offset))
+EOF
+}
+
+# fallocate(1) in trusty doesn't support -z/-p
+function py_fallocate() {
+    local mode=$1
+    local offset=$2
+
+    python3 <<EOF
+import os, ctypes, ctypes.util
+FALLOC_FL_KEEP_SIZE = 0x01
+FALLOC_FL_PUNCH_HOLE = 0x02
+FALLOC_FL_ZERO_RANGE = 0x10
+libc = ctypes.CDLL(ctypes.util.find_library('c'), use_errno=True)
+with open('$DEV', 'w') as dev:
+    if libc.fallocate(dev.fileno(), ctypes.c_int($mode), ctypes.c_long($offset), ctypes.c_long($IMAGE_SIZE - $offset)):
+        err = ctypes.get_errno()
+        raise OSError(err, os.strerror(err))
+EOF
+}
+
+function allocate() {
+    xfs_io -c "pwrite -b $OBJECT_SIZE -W 0 $IMAGE_SIZE" $DEV
+    assert_allocated
+}
+
+function assert_allocated() {
+    cmp <(od -xAx $DEV) - <<EOF
+000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+*
+$(printf %x $IMAGE_SIZE)
+EOF
+    [[ $(rados -p rbd ls | grep -c rbd_data.$IMAGE_ID) -eq $NUM_OBJECTS ]]
+}
+
+function assert_zeroes() {
+    local num_objects_expected=$1
+
+    cmp <(od -xAx $DEV) - <<EOF
+000000 0000 0000 0000 0000 0000 0000 0000 0000
+*
+$(printf %x $IMAGE_SIZE)
+EOF
+    [[ $(rados -p rbd ls | grep -c rbd_data.$IMAGE_ID) -eq $num_objects_expected ]]
+}
+
+function assert_zeroes_unaligned() {
+    local num_objects_expected=$1
+
+    cmp <(od -xAx $DEV) - <<EOF
+000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+*
+$(printf %x $((OBJECT_SIZE / 2))) 0000 0000 0000 0000 0000 0000 0000 0000
+*
+$(printf %x $IMAGE_SIZE)
+EOF
+    [[ $(rados -p rbd ls | grep -c rbd_data.$IMAGE_ID) -eq $num_objects_expected ]]
+    for ((i = 0; i < $num_objects_expected; i++)); do
+        rados -p rbd stat rbd_data.$IMAGE_ID.$(printf %016x $i) | egrep "(size $((OBJECT_SIZE / 2)))|(size 0)"
+    done
+}
+
+IMAGE_NAME="fallocate-test"
+
+rbd create --size 200 $IMAGE_NAME
+
+IMAGE_SIZE=$(rbd info --format=json $IMAGE_NAME | python3 -c 'import sys, json; print(json.load(sys.stdin)["size"])')
+OBJECT_SIZE=$(rbd info --format=json $IMAGE_NAME | python3 -c 'import sys, json; print(json.load(sys.stdin)["object_size"])')
+NUM_OBJECTS=$((IMAGE_SIZE / OBJECT_SIZE))
+[[ $((IMAGE_SIZE % OBJECT_SIZE)) -eq 0 ]]
+
+IMAGE_ID="$(rbd info --format=json $IMAGE_NAME |
+    python3 -c "import sys, json; print(json.load(sys.stdin)['block_name_prefix'].split('.')[1])")"
+
+DEV=$(sudo rbd map $IMAGE_NAME)
+
+# make sure -ENOENT is hidden
+assert_zeroes 0
+py_blkdiscard 0
+assert_zeroes 0
+
+# blkdev_issue_discard
+allocate
+py_blkdiscard 0
+assert_zeroes 0
+
+# blkdev_issue_zeroout w/ BLKDEV_ZERO_NOUNMAP
+allocate
+py_fallocate FALLOC_FL_ZERO_RANGE\|FALLOC_FL_KEEP_SIZE 0
+assert_zeroes 0
+
+# blkdev_issue_zeroout w/ BLKDEV_ZERO_NOFALLBACK
+allocate
+py_fallocate FALLOC_FL_PUNCH_HOLE\|FALLOC_FL_KEEP_SIZE 0
+assert_zeroes 0
+
+# unaligned blkdev_issue_discard
+allocate
+py_blkdiscard $((OBJECT_SIZE / 2))
+assert_zeroes_unaligned $NUM_OBJECTS
+
+# unaligned blkdev_issue_zeroout w/ BLKDEV_ZERO_NOUNMAP
+allocate
+py_fallocate FALLOC_FL_ZERO_RANGE\|FALLOC_FL_KEEP_SIZE $((OBJECT_SIZE / 2))
+assert_zeroes_unaligned $NUM_OBJECTS
+
+# unaligned blkdev_issue_zeroout w/ BLKDEV_ZERO_NOFALLBACK
+allocate
+py_fallocate FALLOC_FL_PUNCH_HOLE\|FALLOC_FL_KEEP_SIZE $((OBJECT_SIZE / 2))
+assert_zeroes_unaligned $NUM_OBJECTS
+
+sudo rbd unmap $DEV
+
+DEV=$(sudo rbd map -o notrim $IMAGE_NAME)
+
+# blkdev_issue_discard
+allocate
+py_blkdiscard 0 |& grep 'Operation not supported'
+assert_allocated
+
+# blkdev_issue_zeroout w/ BLKDEV_ZERO_NOUNMAP
+allocate
+py_fallocate FALLOC_FL_ZERO_RANGE\|FALLOC_FL_KEEP_SIZE 0
+assert_zeroes $NUM_OBJECTS
+
+# blkdev_issue_zeroout w/ BLKDEV_ZERO_NOFALLBACK
+allocate
+py_fallocate FALLOC_FL_PUNCH_HOLE\|FALLOC_FL_KEEP_SIZE 0 |& grep 'Operation not supported'
+assert_allocated
+
+sudo rbd unmap $DEV
+
+echo OK
diff --git a/qa/workunits/rbd/krbd_huge_osdmap.sh b/qa/workunits/rbd/krbd_huge_osdmap.sh
new file mode 100755
index 000000000..0a550d674
--- /dev/null
+++ b/qa/workunits/rbd/krbd_huge_osdmap.sh
@@ -0,0 +1,51 @@
+#!/usr/bin/env bash
+
+# This is a test for https://tracker.ceph.com/issues/40481.
+#
+# An osdmap with 60000 slots encodes to ~16M, of which the ignored portion
+# is ~13M.  However in-memory osdmap is larger than ~3M: in-memory osd_addr
+# array for 60000 OSDs is ~8M because of sockaddr_storage.
+#
+# Set mon_max_osd = 60000 in ceph.conf.
+
+set -ex
+
+function expect_false() {
+    if "$@"; then return 1; else return 0; fi
+}
+
+function run_test() {
+    local dev
+
+    # initially tiny, grow via incrementals
+    dev=$(sudo rbd map img)
+    for max in 8 60 600 6000 60000; do
+        ceph osd setmaxosd $max
+        expect_false sudo rbd map wait_for/latest_osdmap
+        xfs_io -c 'pwrite -w 0 12M' $DEV
+    done
+    ceph osd getcrushmap -o /dev/stdout | ceph osd setcrushmap -i /dev/stdin
+    expect_false sudo rbd map wait_for/latest_osdmap
+    xfs_io -c 'pwrite -w 0 12M' $DEV
+    sudo rbd unmap $dev
+
+    # initially huge, shrink via incrementals
+    dev=$(sudo rbd map img)
+    for max in 60000 6000 600 60 8; do
+        ceph osd setmaxosd $max
+        expect_false sudo rbd map wait_for/latest_osdmap
+        xfs_io -c 'pwrite -w 0 12M' $DEV
+    done
+    ceph osd getcrushmap -o /dev/stdout | ceph osd setcrushmap -i /dev/stdin
+    expect_false sudo rbd map wait_for/latest_osdmap
+    xfs_io -c 'pwrite -w 0 12M' $DEV
+    sudo rbd unmap $dev
+}
+
+rbd create --size 12M img
+run_test
+# repeat with primary affinity (adds an extra array)
+ceph osd primary-affinity osd.0 0.5
+run_test
+
+echo OK
diff --git a/qa/workunits/rbd/krbd_latest_osdmap_on_map.sh b/qa/workunits/rbd/krbd_latest_osdmap_on_map.sh
new file mode 100755
index 000000000..f70f38639
--- /dev/null
+++ b/qa/workunits/rbd/krbd_latest_osdmap_on_map.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+set -ex
+
+function run_test() {
+    ceph osd pool create foo 12
+    rbd pool init foo
+    rbd create --size 1 foo/img
+
+    local dev
+    dev=$(sudo rbd map foo/img)
+    sudo rbd unmap $dev
+
+    ceph osd pool delete foo foo --yes-i-really-really-mean-it
+}
+
+NUM_ITER=20
+
+for ((i = 0; i < $NUM_ITER; i++)); do
+    run_test
+done
+
+rbd create --size 1 img
+DEV=$(sudo rbd map img)
+for ((i = 0; i < $NUM_ITER; i++)); do
+    run_test
+done
+sudo rbd unmap $DEV
+
+echo OK
diff --git a/qa/workunits/rbd/krbd_namespaces.sh b/qa/workunits/rbd/krbd_namespaces.sh
new file mode 100755
index 000000000..0273d8499
--- /dev/null
+++ b/qa/workunits/rbd/krbd_namespaces.sh
@@ -0,0 +1,116 @@
+#!/usr/bin/env bash
+
+set -ex
+
+function get_block_name_prefix() {
+    rbd info --format=json $1 | python3 -c "import sys, json; print(json.load(sys.stdin)['block_name_prefix'])"
+}
+
+function do_pwrite() {
+    local spec=$1
+    local old_byte=$2
+    local new_byte=$3
+
+    local dev
+    dev=$(sudo rbd map $spec)
+    cmp <(dd if=/dev/zero bs=1M count=10 | tr \\000 \\$old_byte) $dev
+    xfs_io -c "pwrite -b 1M -S $new_byte 0 10M" $dev
+    sudo rbd unmap $dev
+}
+
+function do_cmp() {
+    local spec=$1
+    local byte=$2
+
+    local dev
+    dev=$(sudo rbd map $spec)
+    cmp <(dd if=/dev/zero bs=1M count=10 | tr \\000 \\$byte) $dev
+    sudo rbd unmap $dev
+}
+
+function gen_child_specs() {
+    local i=$1
+
+    local child_specs="foo/img$i-clone1 foo/img$i-clone2 foo/ns1/img$i-clone1 foo/ns1/img$i-clone2"
+    if [[ $i -ge 3 ]]; then
+        child_specs="$child_specs foo/ns2/img$i-clone1 foo/ns2/img$i-clone2"
+    fi
+    echo $child_specs
+}
+
+ceph osd pool create foo 12
+rbd pool init foo
+ceph osd pool create bar 12
+rbd pool init bar
+
+ceph osd set-require-min-compat-client nautilus
+rbd namespace create foo/ns1
+rbd namespace create foo/ns2
+
+SPECS=(foo/img1 foo/img2 foo/ns1/img3 foo/ns1/img4)
+
+COUNT=1
+for spec in "${SPECS[@]}"; do
+    if [[ $spec =~ img1|img3 ]]; then
+        rbd create --size 10 $spec
+    else
+        rbd create --size 10 --data-pool bar $spec
+    fi
+    do_pwrite $spec 000 $(printf %03d $COUNT)
+    rbd snap create $spec@snap
+    COUNT=$((COUNT + 1))
+done
+for i in {1..4}; do
+    for child_spec in $(gen_child_specs $i); do
+        if [[ $child_spec =~ clone1 ]]; then
+            rbd clone ${SPECS[i - 1]}@snap $child_spec
+        else
+            rbd clone --data-pool bar ${SPECS[i - 1]}@snap $child_spec
+        fi
+        do_pwrite $child_spec $(printf %03d $i) $(printf %03d $COUNT)
+        COUNT=$((COUNT + 1))
+    done
+done
+
+[[ $(rados -p foo ls | grep -c $(get_block_name_prefix foo/img1)) -eq 3 ]]
+[[ $(rados -p bar ls | grep -c $(get_block_name_prefix foo/img2)) -eq 3 ]]
+[[ $(rados -p foo -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img3)) -eq 3 ]]
+[[ $(rados -p bar -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img4)) -eq 3 ]]
+
+[[ $(rados -p foo ls | grep -c $(get_block_name_prefix foo/img1-clone1)) -eq 3 ]]
+[[ $(rados -p bar ls | grep -c $(get_block_name_prefix foo/img1-clone2)) -eq 3 ]]
+[[ $(rados -p foo -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img1-clone1)) -eq 3 ]]
+[[ $(rados -p bar -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img1-clone2)) -eq 3 ]]
+
+[[ $(rados -p foo ls | grep -c $(get_block_name_prefix foo/img2-clone1)) -eq 3 ]]
+[[ $(rados -p bar ls | grep -c $(get_block_name_prefix foo/img2-clone2)) -eq 3 ]]
+[[ $(rados -p foo -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img2-clone1)) -eq 3 ]]
+[[ $(rados -p bar -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img2-clone2)) -eq 3 ]]
+
+[[ $(rados -p foo ls | grep -c $(get_block_name_prefix foo/img3-clone1)) -eq 3 ]]
+[[ $(rados -p bar ls | grep -c $(get_block_name_prefix foo/img3-clone2)) -eq 3 ]]
+[[ $(rados -p foo -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img3-clone1)) -eq 3 ]]
+[[ $(rados -p bar -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img3-clone2)) -eq 3 ]]
+[[ $(rados -p foo -N ns2 ls | grep -c $(get_block_name_prefix foo/ns2/img3-clone1)) -eq 3 ]]
+[[ $(rados -p bar -N ns2 ls | grep -c $(get_block_name_prefix foo/ns2/img3-clone2)) -eq 3 ]]
+
+[[ $(rados -p foo ls | grep -c $(get_block_name_prefix foo/img4-clone1)) -eq 3 ]]
+[[ $(rados -p bar ls | grep -c $(get_block_name_prefix foo/img4-clone2)) -eq 3 ]]
+[[ $(rados -p foo -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img4-clone1)) -eq 3 ]]
+[[ $(rados -p bar -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img4-clone2)) -eq 3 ]]
+[[ $(rados -p foo -N ns2 ls | grep -c $(get_block_name_prefix foo/ns2/img4-clone1)) -eq 3 ]]
+[[ $(rados -p bar -N ns2 ls | grep -c $(get_block_name_prefix foo/ns2/img4-clone2)) -eq 3 ]]
+
+COUNT=1
+for spec in "${SPECS[@]}"; do
+    do_cmp $spec $(printf %03d $COUNT)
+    COUNT=$((COUNT + 1))
+done
+for i in {1..4}; do
+    for child_spec in $(gen_child_specs $i); do
+        do_cmp $child_spec $(printf %03d $COUNT)
+        COUNT=$((COUNT + 1))
+    done
+done
+
+echo OK
diff --git a/qa/workunits/rbd/krbd_rxbounce.sh b/qa/workunits/rbd/krbd_rxbounce.sh
new file mode 100755
index 000000000..ad00e3f96
--- /dev/null
+++ b/qa/workunits/rbd/krbd_rxbounce.sh
@@ -0,0 +1,103 @@
+#!/usr/bin/env bash
+
+set -ex
+
+rbd create --size 256 img
+
+IMAGE_SIZE=$(rbd info --format=json img | python3 -c 'import sys, json; print(json.load(sys.stdin)["size"])')
+OBJECT_SIZE=$(rbd info --format=json img | python3 -c 'import sys, json; print(json.load(sys.stdin)["object_size"])')
+NUM_OBJECTS=$((IMAGE_SIZE / OBJECT_SIZE))
+[[ $((IMAGE_SIZE % OBJECT_SIZE)) -eq 0 ]]
+OP_SIZE=16384
+
+DEV=$(sudo rbd map img)
+{
+    for ((i = 0; i < $NUM_OBJECTS; i++)); do
+        echo pwrite -b $OP_SIZE -S $i $((i * OBJECT_SIZE)) $OP_SIZE
+    done
+    echo fsync
+    echo quit
+} | xfs_io $DEV
+sudo rbd unmap $DEV
+
+g++ -xc++ -o racereads - -lpthread <<EOF
+#include <assert.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <thread>
+#include <vector>
+
+const int object_size = $OBJECT_SIZE;
+const int num_objects = $NUM_OBJECTS;
+const int read_len = $OP_SIZE;
+const int num_reads = 1024;
+
+int main() {
+  int fd = open("$DEV", O_DIRECT | O_RDONLY);
+  assert(fd >= 0);
+
+  void *buf;
+  int r = posix_memalign(&buf, 512, read_len);
+  assert(r == 0);
+
+  std::vector<std::thread> threads;
+  for (int i = 0; i < num_objects; i++) {
+    threads.emplace_back(
+        [fd, buf, read_off = static_cast<off_t>(i) * object_size]() {
+          for (int i = 0; i < num_reads; i++) {
+            auto len = pread(fd, buf, read_len, read_off);
+            assert(len == read_len);
+          }
+        });
+  }
+
+  for (auto &t : threads) {
+    t.join();
+  }
+}
+EOF
+
+DEV=$(sudo rbd map -o ms_mode=legacy img)
+sudo dmesg -C
+./racereads
+[[ $(dmesg | grep -c 'libceph: osd.* bad crc/signature') -gt 100 ]]
+sudo rbd unmap $DEV
+
+DEV=$(sudo rbd map -o ms_mode=legacy,rxbounce img)
+sudo dmesg -C
+./racereads
+[[ $(dmesg | grep -c 'libceph: osd.* bad crc/signature') -eq 0 ]]
+sudo rbd unmap $DEV
+
+DEV=$(sudo rbd map -o ms_mode=crc img)
+sudo dmesg -C
+./racereads
+[[ $(dmesg | grep -c 'libceph: osd.* integrity error') -gt 100 ]]
+sudo rbd unmap $DEV
+
+DEV=$(sudo rbd map -o ms_mode=crc,rxbounce img)
+sudo dmesg -C
+./racereads
+[[ $(dmesg | grep -c 'libceph: osd.* integrity error') -eq 0 ]]
+sudo rbd unmap $DEV
+
+# rxbounce is a no-op for secure mode
+DEV=$(sudo rbd map -o ms_mode=secure img)
+sudo dmesg -C
+./racereads
+[[ $(dmesg | grep -c 'libceph: osd.* integrity error') -eq 0 ]]
+sudo rbd unmap $DEV
+
+DEV=$(sudo rbd map -o ms_mode=secure,rxbounce img)
+sudo dmesg -C
+./racereads
+[[ $(dmesg | grep -c 'libceph: osd.* integrity error') -eq 0 ]]
+sudo rbd unmap $DEV
+
+rbd rm img
+
+echo OK
diff --git a/qa/workunits/rbd/krbd_stable_writes.sh b/qa/workunits/rbd/krbd_stable_writes.sh
new file mode 100755
index 000000000..d00e5fd04
--- /dev/null
+++ b/qa/workunits/rbd/krbd_stable_writes.sh
@@ -0,0 +1,141 @@
+#!/usr/bin/env bash
+
+set -ex
+
+function assert_dm() {
+    local name=$1
+    local val=$2
+
+    local devno
+    devno=$(sudo dmsetup info -c --noheadings -o Major,Minor $name)
+    grep -q $val /sys/dev/block/$devno/queue/stable_writes
+}
+
+function dmsetup_reload() {
+    local name=$1
+
+    local table
+    table=$(</dev/stdin)
+
+    sudo dmsetup suspend $name
+    echo "$table" | sudo dmsetup reload $name
+    sudo dmsetup resume $name
+}
+
+IMAGE_NAME="stable-writes-test"
+
+rbd create --size 1 $IMAGE_NAME
+DEV=$(sudo rbd map $IMAGE_NAME)
+
+fallocate -l 1M loopfile
+LOOP_DEV=$(sudo losetup -f --show loopfile)
+
+[[ $(blockdev --getsize64 $DEV) -eq 1048576 ]]
+grep -q 1 /sys/block/${DEV#/dev/}/queue/stable_writes
+
+rbd resize --size 2 $IMAGE_NAME
+[[ $(blockdev --getsize64 $DEV) -eq 2097152 ]]
+grep -q 1 /sys/block/${DEV#/dev/}/queue/stable_writes
+
+cat <<EOF | sudo dmsetup create tbl
+0 1024 linear $LOOP_DEV 0
+EOF
+assert_dm tbl 0
+sudo dmsetup remove tbl
+
+cat <<EOF | sudo dmsetup create tbl
+0 1024 linear $DEV 0
+EOF
+assert_dm tbl 1
+sudo dmsetup remove tbl
+
+cat <<EOF | sudo dmsetup create tbl
+0 1024 linear $LOOP_DEV 0
+1024 2048 error
+EOF
+assert_dm tbl 0
+sudo dmsetup remove tbl
+
+cat <<EOF | sudo dmsetup create tbl
+0 1024 linear $DEV 0
+1024 2048 error
+EOF
+assert_dm tbl 1
+sudo dmsetup remove tbl
+
+cat <<EOF | sudo dmsetup create tbl
+0 1024 linear $LOOP_DEV 0
+1024 2048 linear $DEV 0
+EOF
+assert_dm tbl 1
+sudo dmsetup remove tbl
+
+cat <<EOF | sudo dmsetup create tbl
+0 1024 linear $DEV 0
+1024 2048 linear $LOOP_DEV 0
+EOF
+assert_dm tbl 1
+sudo dmsetup remove tbl
+
+cat <<EOF | sudo dmsetup create tbl
+0 1024 linear $LOOP_DEV 0
+EOF
+assert_dm tbl 0
+cat <<EOF | dmsetup_reload tbl
+0 1024 linear $LOOP_DEV 0
+1024 2048 linear $DEV 0
+EOF
+assert_dm tbl 1
+cat <<EOF | dmsetup_reload tbl
+0 1024 linear $LOOP_DEV 0
+EOF
+assert_dm tbl 0
+sudo dmsetup remove tbl
+
+cat <<EOF | sudo dmsetup create tbl
+0 1024 linear $DEV 0
+EOF
+assert_dm tbl 1
+cat <<EOF | dmsetup_reload tbl
+0 1024 linear $DEV 0
+1024 2048 linear $LOOP_DEV 0
+EOF
+assert_dm tbl 1
+cat <<EOF | dmsetup_reload tbl
+0 1024 linear $DEV 0
+EOF
+assert_dm tbl 1
+sudo dmsetup remove tbl
+
+cat <<EOF | sudo dmsetup create tbl
+0 1024 linear $DEV 0
+EOF
+assert_dm tbl 1
+cat <<EOF | dmsetup_reload tbl
+0 1024 linear $DEV 0
+1024 2048 linear $LOOP_DEV 0
+EOF
+assert_dm tbl 1
+cat <<EOF | dmsetup_reload tbl
+0 1024 error
+1024 2048 linear $LOOP_DEV 0
+EOF
+assert_dm tbl 0
+cat <<EOF | dmsetup_reload tbl
+0 1024 linear $DEV 0
+1024 2048 linear $LOOP_DEV 0
+EOF
+assert_dm tbl 1
+cat <<EOF | dmsetup_reload tbl
+0 1024 linear $DEV 0
+EOF
+assert_dm tbl 1
+sudo dmsetup remove tbl
+
+sudo losetup -d $LOOP_DEV
+rm loopfile
+
+sudo rbd unmap $DEV
+rbd rm $IMAGE_NAME
+
+echo OK
diff --git a/qa/workunits/rbd/krbd_udev_enumerate.sh b/qa/workunits/rbd/krbd_udev_enumerate.sh
new file mode 100755
index 000000000..494f958f8
--- /dev/null
+++ b/qa/workunits/rbd/krbd_udev_enumerate.sh
@@ -0,0 +1,66 @@
+#!/usr/bin/env bash
+
+# This is a test for https://tracker.ceph.com/issues/41036, but it also
+# triggers https://tracker.ceph.com/issues/41404 in some environments.
+
+set -ex
+
+function assert_exit_codes() {
+    declare -a pids=($@)
+
+    for pid in ${pids[@]}; do
+       wait $pid
+    done
+}
+
+function run_map() {
+    declare -a pids
+
+    for i in {1..300}; do
+        sudo rbd map img$i &
+        pids+=($!)
+    done
+
+    assert_exit_codes ${pids[@]}
+    [[ $(rbd showmapped | wc -l) -eq 301 ]]
+}
+
+function run_unmap_by_dev() {
+    declare -a pids
+
+    run_map
+    for i in {0..299}; do
+        sudo rbd unmap /dev/rbd$i &
+        pids+=($!)
+    done
+
+    assert_exit_codes ${pids[@]}
+    [[ $(rbd showmapped | wc -l) -eq 0 ]]
+}
+
+function run_unmap_by_spec() {
+    declare -a pids
+
+    run_map
+    for i in {1..300}; do
+        sudo rbd unmap img$i &
+        pids+=($!)
+    done
+
+    assert_exit_codes ${pids[@]}
+    [[ $(rbd showmapped | wc -l) -eq 0 ]]
+}
+
+# Can't test with exclusive-lock, don't bother enabling deep-flatten.
+# See https://tracker.ceph.com/issues/42492.
+for i in {1..300}; do
+    rbd create --size 1 --image-feature '' img$i
+done
+
+for i in {1..30}; do
+    echo Iteration $i
+    run_unmap_by_dev
+    run_unmap_by_spec
+done
+
+echo OK
diff --git a/qa/workunits/rbd/krbd_udev_netlink_enobufs.sh b/qa/workunits/rbd/krbd_udev_netlink_enobufs.sh
new file mode 100755
index 000000000..7c9c53a2f
--- /dev/null
+++ b/qa/workunits/rbd/krbd_udev_netlink_enobufs.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+# This is a test for https://tracker.ceph.com/issues/41404, verifying that udev
+# events are properly reaped while the image is being (un)mapped in the kernel.
+# UDEV_BUF_SIZE is 1M (giving us a 2M socket receive buffer), but modprobe +
+# modprobe -r generate ~28M worth of "block" events.
+
+set -ex
+
+rbd create --size 1 img
+
+ceph osd pause
+sudo rbd map img &
+PID=$!
+sudo modprobe scsi_debug max_luns=16 add_host=16 num_parts=1 num_tgts=16
+sudo udevadm settle
+sudo modprobe -r scsi_debug
+[[ $(rbd showmapped | wc -l) -eq 0 ]]
+ceph osd unpause
+wait $PID
+[[ $(rbd showmapped | wc -l) -eq 2 ]]
+sudo rbd unmap img
+
+echo OK
diff --git a/qa/workunits/rbd/krbd_udev_netns.sh b/qa/workunits/rbd/krbd_udev_netns.sh
new file mode 100755
index 000000000..e746a682e
--- /dev/null
+++ b/qa/workunits/rbd/krbd_udev_netns.sh
@@ -0,0 +1,86 @@
+#!/usr/bin/env bash
+
+set -ex
+
+sudo ip netns add ns1
+sudo ip link add veth1-ext type veth peer name veth1-int
+sudo ip link set veth1-int netns ns1
+
+sudo ip netns exec ns1 ip link set dev lo up
+sudo ip netns exec ns1 ip addr add 192.168.1.2/24 dev veth1-int
+sudo ip netns exec ns1 ip link set veth1-int up
+sudo ip netns exec ns1 ip route add default via 192.168.1.1
+
+sudo ip addr add 192.168.1.1/24 dev veth1-ext
+sudo ip link set veth1-ext up
+
+# Enable forwarding between the namespace and the default route
+# interface and set up NAT.  In case of multiple default routes,
+# just pick the first one.
+if [[ $(sysctl -n net.ipv4.ip_forward) -eq 0 ]]; then
+    sudo iptables -P FORWARD DROP
+    sudo sysctl -w net.ipv4.ip_forward=1
+fi
+IFACE="$(ip route list 0.0.0.0/0 | head -n 1 | cut -d ' ' -f 5)"
+sudo iptables -A FORWARD -i veth1-ext -o "$IFACE" -j ACCEPT
+sudo iptables -A FORWARD -i "$IFACE" -o veth1-ext -j ACCEPT
+sudo iptables -t nat -A POSTROUTING -s 192.168.1.2 -o "$IFACE" -j MASQUERADE
+
+rbd create --size 300 img
+
+DEV="$(sudo rbd map img)"
+mkfs.ext4 "$DEV"
+sudo mount "$DEV" /mnt
+sudo umount /mnt
+sudo rbd unmap "$DEV"
+
+sudo ip netns exec ns1 bash <<'EOF'
+
+set -ex
+
+DEV="/dev/rbd/rbd/img"
+[[ ! -e "$DEV" ]]
+
+# In a network namespace, "rbd map" maps the device and hangs waiting
+# for udev add uevents.  udev runs as usual (in particular creating the
+# symlink which is used here because the device node is never printed),
+# but the uevents it sends out never come because they don't cross
+# network namespace boundaries.
+set +e
+timeout 30s rbd map img
+RET=$?
+set -e
+[[ $RET -eq 124 ]]
+[[ -L "$DEV" ]]
+mkfs.ext4 -F "$DEV"
+mount "$DEV" /mnt
+umount /mnt
+
+# In a network namespace, "rbd unmap" unmaps the device and hangs
+# waiting for udev remove uevents.  udev runs as usual (removing the
+# symlink), but the uevents it sends out never come because they don't
+# cross network namespace boundaries.
+set +e
+timeout 30s rbd unmap "$DEV"
+RET=$?
+set -e
+[[ $RET -eq 124 ]]
+[[ ! -e "$DEV" ]]
+
+# Skip waiting for udev uevents with "-o noudev".
+DEV="$(rbd map -o noudev img)"
+mkfs.ext4 -F "$DEV"
+mount "$DEV" /mnt
+umount /mnt
+rbd unmap -o noudev "$DEV"
+
+EOF
+
+rbd rm img
+
+sudo iptables -t nat -D POSTROUTING -s 192.168.1.2 -o "$IFACE" -j MASQUERADE
+sudo iptables -D FORWARD -i "$IFACE" -o veth1-ext -j ACCEPT
+sudo iptables -D FORWARD -i veth1-ext -o "$IFACE" -j ACCEPT
+sudo ip netns delete ns1
+
+echo OK
diff --git a/qa/workunits/rbd/krbd_udev_symlinks.sh b/qa/workunits/rbd/krbd_udev_symlinks.sh
new file mode 100755
index 000000000..271476527
--- /dev/null
+++ b/qa/workunits/rbd/krbd_udev_symlinks.sh
@@ -0,0 +1,116 @@
+#!/usr/bin/env bash
+
+set -ex
+
+SPECS=(
+rbd/img1
+rbd/img2
+rbd/img2@snap1
+rbd/img3
+rbd/img3@snap1
+rbd/img3@snap2
+rbd/ns1/img1
+rbd/ns1/img2
+rbd/ns1/img2@snap1
+rbd/ns1/img3
+rbd/ns1/img3@snap1
+rbd/ns1/img3@snap2
+rbd/ns2/img1
+rbd/ns2/img2
+rbd/ns2/img2@snap1
+rbd/ns2/img3
+rbd/ns2/img3@snap1
+rbd/ns2/img3@snap2
+custom/img1
+custom/img1@snap1
+custom/img2
+custom/img2@snap1
+custom/img2@snap2
+custom/img3
+custom/ns1/img1
+custom/ns1/img1@snap1
+custom/ns1/img2
+custom/ns1/img2@snap1
+custom/ns1/img2@snap2
+custom/ns1/img3
+custom/ns2/img1
+custom/ns2/img1@snap1
+custom/ns2/img2
+custom/ns2/img2@snap1
+custom/ns2/img2@snap2
+custom/ns2/img3
+)
+
+ceph osd pool create custom 8
+rbd pool init custom
+
+ceph osd set-require-min-compat-client nautilus
+rbd namespace create rbd/ns1
+rbd namespace create rbd/ns2
+rbd namespace create custom/ns1
+rbd namespace create custom/ns2
+
+# create in order, images before snapshots
+for spec in "${SPECS[@]}"; do
+    if [[ "$spec" =~ snap ]]; then
+        rbd snap create "$spec"
+    else
+        rbd create --size 10 "$spec"
+        DEV="$(sudo rbd map "$spec")"
+        sudo sfdisk "$DEV" <<EOF
+unit: sectors
+${DEV}p1 : start=        2048, size=           2, type=83
+${DEV}p2 : start=        4096, size=           2, type=83
+EOF
+        sudo rbd unmap "$DEV"
+    fi
+done
+
+[[ ! -e /dev/rbd ]]
+
+# map in random order
+COUNT=${#SPECS[@]}
+read -r -a INDEXES < <(python3 <<EOF
+import random
+l = list(range($COUNT))
+random.shuffle(l)
+print(*l)
+EOF
+)
+
+DEVS=()
+for idx in "${INDEXES[@]}"; do
+    DEVS+=("$(sudo rbd map "${SPECS[idx]}")")
+done
+
+[[ $(rbd showmapped | wc -l) -eq $((COUNT + 1)) ]]
+
+for ((i = 0; i < COUNT; i++)); do
+    [[ "$(readlink -e "/dev/rbd/${SPECS[INDEXES[i]]}")" == "${DEVS[i]}" ]]
+    [[ "$(readlink -e "/dev/rbd/${SPECS[INDEXES[i]]}-part1")" == "${DEVS[i]}p1" ]]
+    [[ "$(readlink -e "/dev/rbd/${SPECS[INDEXES[i]]}-part2")" == "${DEVS[i]}p2" ]]
+done
+
+for idx in "${INDEXES[@]}"; do
+    sudo rbd unmap "/dev/rbd/${SPECS[idx]}"
+done
+
+[[ ! -e /dev/rbd ]]
+
+# remove in reverse order, snapshots before images
+for ((i = COUNT - 1; i >= 0; i--)); do
+    if [[ "${SPECS[i]}" =~ snap ]]; then
+        rbd snap rm "${SPECS[i]}"
+    else
+        rbd rm "${SPECS[i]}"
+    fi
+done
+
+rbd namespace rm custom/ns2
+rbd namespace rm custom/ns1
+rbd namespace rm rbd/ns2
+rbd namespace rm rbd/ns1
+
+ceph osd pool delete custom custom --yes-i-really-really-mean-it
+
+echo OK
diff --git a/qa/workunits/rbd/krbd_wac.sh b/qa/workunits/rbd/krbd_wac.sh
new file mode 100755
index 000000000..134460409
--- /dev/null
+++ b/qa/workunits/rbd/krbd_wac.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+
+set -ex
+
+wget http://download.ceph.com/qa/wac.c
+gcc -o wac wac.c
+
+rbd create --size 300 img
+DEV=$(sudo rbd map img)
+
+sudo mkfs.ext4 $DEV
+sudo mount $DEV /mnt
+set +e
+sudo timeout 5m ./wac -l 65536 -n 64 -r /mnt/wac-test
+RET=$?
+set -e
+[[ $RET -eq 124 ]]
+sudo killall -w wac || true  # wac forks
+sudo umount /mnt
+
+sudo wipefs -a $DEV
+sudo vgcreate vg_img $DEV
+sudo lvcreate -L 256M -n lv_img vg_img
+udevadm settle
+sudo mkfs.ext4 /dev/mapper/vg_img-lv_img
+sudo mount /dev/mapper/vg_img-lv_img /mnt
+set +e
+sudo timeout 5m ./wac -l 65536 -n 64 -r /mnt/wac-test
+RET=$?
+set -e
+[[ $RET -eq 124 ]]
+sudo killall -w wac || true  # wac forks
+sudo umount /mnt
+sudo vgremove -f vg_img
+sudo pvremove $DEV
+
+sudo rbd unmap $DEV
+rbd rm img
+
+echo OK
diff --git a/qa/workunits/rbd/krbd_watch_errors.sh b/qa/workunits/rbd/krbd_watch_errors.sh
new file mode 100755
index 000000000..f650d2a74
--- /dev/null
+++ b/qa/workunits/rbd/krbd_watch_errors.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+
+set -ex
+set -o pipefail
+
+function refresh_loop() {
+    local dev_id="$1"
+
+    set +x
+
+    local i
+    for ((i = 1; ; i++)); do
+        echo 1 | sudo tee "${SYSFS_DIR}/${dev_id}/refresh" > /dev/null
+        if ((i % 100 == 0)); then
+            echo "Refreshed ${i} times"
+        fi
+    done
+}
+
+readonly SYSFS_DIR="/sys/bus/rbd/devices"
+readonly IMAGE_NAME="watch-errors-test"
+
+rbd create -s 1G --image-feature exclusive-lock "${IMAGE_NAME}"
+
+# induce a watch error every 30 seconds
+dev="$(sudo rbd device map -o osdkeepalive=60 "${IMAGE_NAME}")"
+dev_id="${dev#/dev/rbd}"
+
+# constantly refresh, not just on watch errors
+refresh_loop "${dev_id}" &
+refresh_pid=$!
+
+sudo dmesg -C
+
+# test that none of the above triggers a deadlock with a workload
+fio --name test --filename="${dev}" --ioengine=libaio --direct=1 \
+    --rw=randwrite --norandommap --randrepeat=0 --bs=512 --iodepth=128 \
+    --time_based --runtime=1h --eta=never
+
+num_errors="$(dmesg | grep -c "rbd${dev_id}: encountered watch error")"
+echo "Recorded ${num_errors} watch errors"
+
+kill "${refresh_pid}"
+wait
+
+sudo rbd device unmap "${dev}"
+
+if ((num_errors < 60)); then
+    echo "Too few watch errors"
+    exit 1
+fi
+
+echo OK
diff --git a/qa/workunits/rbd/luks-encryption.sh b/qa/workunits/rbd/luks-encryption.sh
new file mode 100755
index 000000000..5d3cc68cd
--- /dev/null
+++ b/qa/workunits/rbd/luks-encryption.sh
@@ -0,0 +1,217 @@
+#!/usr/bin/env bash
+set -ex
+
+CEPH_ID=${CEPH_ID:-admin}
+TMP_FILES="/tmp/passphrase /tmp/passphrase2 /tmp/testdata1 /tmp/testdata2 /tmp/cmpdata"
+
+_sudo()
+{
+    local cmd
+
+    if [ `id -u` -eq 0 ]
+    then
+	"$@"
+	return $?
+    fi
+
+    # Look for the command in the user path. If it fails run it as is,
+    # supposing it is in sudo path.
+    cmd=`which $1 2>/dev/null` || cmd=$1
+    shift
+    sudo -nE "${cmd}" "$@"
+}
+
+function drop_caches {
+  sudo sync
+  echo 3 | sudo tee /proc/sys/vm/drop_caches
+}
+
+function expect_false() {
+  if "$@"; then return 1; else return 0; fi
+}
+
+function test_encryption_format() {
+  local format=$1
+  clean_up_cryptsetup
+
+  # format
+  rbd encryption format testimg $format /tmp/passphrase
+  drop_caches
+
+  # open encryption with cryptsetup
+  sudo cryptsetup open $RAW_DEV --type luks cryptsetupdev -d /tmp/passphrase
+  sudo chmod 666 /dev/mapper/cryptsetupdev
+
+  # open encryption with librbd
+  LIBRBD_DEV=$(_sudo rbd -p rbd map testimg -t nbd -o encryption-passphrase-file=/tmp/passphrase)
+  sudo chmod 666 $LIBRBD_DEV
+
+  # write via librbd && compare
+  dd if=/tmp/testdata1 of=$LIBRBD_DEV oflag=direct bs=1M
+  dd if=/dev/mapper/cryptsetupdev of=/tmp/cmpdata iflag=direct bs=4M count=4
+  cmp -n 16MB /tmp/cmpdata /tmp/testdata1
+
+  # write via cryptsetup && compare
+  dd if=/tmp/testdata2 of=/dev/mapper/cryptsetupdev oflag=direct bs=1M
+  dd if=$LIBRBD_DEV of=/tmp/cmpdata iflag=direct bs=4M count=4
+  cmp -n 16MB /tmp/cmpdata /tmp/testdata2
+
+  # FIXME: encryption-aware flatten/resize misbehave if proxied to
+  # RAW_DEV mapping (i.e. if RAW_DEV mapping ows the lock)
+  # (acquire and) release the lock as a side effect
+  rbd bench --io-type read --io-size 1 --io-threads 1 --io-total 1 testimg
+
+  # check that encryption-aware resize compensates LUKS header overhead
+  (( $(sudo blockdev --getsize64 $LIBRBD_DEV) < (32 << 20) ))
+  expect_false rbd resize --size 32M testimg
+  rbd resize --size 32M --encryption-passphrase-file /tmp/passphrase testimg
+  (( $(sudo blockdev --getsize64 $LIBRBD_DEV) == (32 << 20) ))
+
+  _sudo rbd device unmap -t nbd $LIBRBD_DEV
+}
+
+function test_clone_encryption() {
+  clean_up_cryptsetup
+
+  # write 1MB plaintext
+  dd if=/tmp/testdata1 of=$RAW_DEV oflag=direct bs=1M count=1
+
+  # clone (luks1)
+  rbd snap create testimg@snap
+  rbd snap protect testimg@snap
+  rbd clone testimg@snap testimg1
+  rbd encryption format testimg1 luks1 /tmp/passphrase
+
+  # open encryption with librbd, write one more MB, close
+  LIBRBD_DEV=$(_sudo rbd -p rbd map testimg1 -t nbd -o encryption-format=luks1,encryption-passphrase-file=/tmp/passphrase)
+  sudo chmod 666 $LIBRBD_DEV
+  dd if=$LIBRBD_DEV of=/tmp/cmpdata iflag=direct bs=1M count=1
+  cmp -n 1MB /tmp/cmpdata /tmp/testdata1
+  dd if=/tmp/testdata1 of=$LIBRBD_DEV seek=1 skip=1 oflag=direct bs=1M count=1
+  _sudo rbd device unmap -t nbd $LIBRBD_DEV
+
+  # second clone (luks2)
+  rbd snap create testimg1@snap
+  rbd snap protect testimg1@snap
+  rbd clone testimg1@snap testimg2
+  rbd encryption format testimg2 luks2 /tmp/passphrase2
+
+  # open encryption with librbd, write one more MB, close
+  LIBRBD_DEV=$(_sudo rbd -p rbd map testimg2 -t nbd -o encryption-format=luks2,encryption-passphrase-file=/tmp/passphrase2,encryption-format=luks1,encryption-passphrase-file=/tmp/passphrase)
+  sudo chmod 666 $LIBRBD_DEV
+  dd if=$LIBRBD_DEV of=/tmp/cmpdata iflag=direct bs=1M count=2
+  cmp -n 2MB /tmp/cmpdata /tmp/testdata1
+  dd if=/tmp/testdata1 of=$LIBRBD_DEV seek=2 skip=2 oflag=direct bs=1M count=1
+  _sudo rbd device unmap -t nbd $LIBRBD_DEV
+
+  # flatten
+  expect_false rbd flatten testimg2 --encryption-format luks1 --encryption-format luks2 --encryption-passphrase-file /tmp/passphrase2 --encryption-passphrase-file /tmp/passphrase
+  rbd flatten testimg2 --encryption-format luks2 --encryption-format luks1 --encryption-passphrase-file /tmp/passphrase2 --encryption-passphrase-file /tmp/passphrase
+
+  # verify with cryptsetup
+  RAW_FLAT_DEV=$(_sudo rbd -p rbd map testimg2 -t nbd)
+  sudo cryptsetup open $RAW_FLAT_DEV --type luks cryptsetupdev -d /tmp/passphrase2
+  sudo chmod 666 /dev/mapper/cryptsetupdev
+  dd if=/dev/mapper/cryptsetupdev of=/tmp/cmpdata iflag=direct bs=1M count=3
+  cmp -n 3MB /tmp/cmpdata /tmp/testdata1
+  _sudo rbd device unmap -t nbd $RAW_FLAT_DEV
+}
+
+function test_clone_and_load_with_a_single_passphrase {
+  local expectedfail=$1
+
+  # clone and format
+  rbd snap create testimg@snap
+  rbd snap protect testimg@snap
+  rbd clone testimg@snap testimg1
+  rbd encryption format testimg1 luks2 /tmp/passphrase2
+
+  if [ "$expectedfail" = "true" ]
+  then
+    expect_false rbd flatten testimg1 --encryption-passphrase-file /tmp/passphrase2
+    rbd flatten testimg1 --encryption-passphrase-file /tmp/passphrase2 --encryption-passphrase-file /tmp/passphrase
+  else
+    rbd flatten testimg1 --encryption-passphrase-file /tmp/passphrase2
+  fi
+
+  rbd remove testimg1
+  rbd snap unprotect testimg@snap
+  rbd snap remove testimg@snap
+}
+
+function test_plaintext_detection {
+  # 16k LUKS header
+  sudo cryptsetup -q luksFormat --type luks2 --luks2-metadata-size 16k $RAW_DEV /tmp/passphrase
+  test_clone_and_load_with_a_single_passphrase true
+
+  # 4m LUKS header
+  sudo cryptsetup -q luksFormat --type luks2 --luks2-metadata-size 4m $RAW_DEV /tmp/passphrase
+  test_clone_and_load_with_a_single_passphrase true
+
+  # no luks header
+  dd if=/dev/zero of=$RAW_DEV oflag=direct bs=4M count=8
+  test_clone_and_load_with_a_single_passphrase false
+}
+
+function get_nbd_device_paths {
+  rbd device list -t nbd | tail -n +2 | egrep "\s+rbd\s+testimg" | awk '{print $5;}'
+}
+
+function clean_up_cryptsetup() {
+  ls /dev/mapper/cryptsetupdev && sudo cryptsetup close cryptsetupdev || true
+}
+
+function clean_up {
+  sudo rm -f $TMP_FILES
+  clean_up_cryptsetup
+  for device in $(get_nbd_device_paths); do
+    _sudo rbd device unmap -t nbd $device
+  done
+
+  rbd remove testimg2 || true
+  rbd snap unprotect testimg1@snap || true
+  rbd snap remove testimg1@snap || true
+  rbd remove testimg1 || true
+  rbd snap unprotect testimg@snap || true
+  rbd snap remove testimg@snap || true
+  rbd remove testimg || true
+}
+
+if [[ $(uname) != "Linux" ]]; then
+	echo "LUKS encryption tests only supported on Linux"
+	exit 0
+fi
+
+
+if [[ $(($(ceph-conf --name client.${CEPH_ID} --show-config-value rbd_default_features) & 64)) != 0 ]]; then
+	echo "LUKS encryption tests not supported alongside image journaling feature"
+	exit 0
+fi
+
+clean_up
+
+trap clean_up INT TERM EXIT
+
+# generate test data
+dd if=/dev/urandom of=/tmp/testdata1 bs=4M count=4
+dd if=/dev/urandom of=/tmp/testdata2 bs=4M count=4
+
+# create passphrase files
+printf "pass\0word\n" > /tmp/passphrase
+printf "\t password2   " > /tmp/passphrase2
+
+# create an image
+rbd create testimg --size=32M
+
+# map raw data to nbd device
+RAW_DEV=$(_sudo rbd -p rbd map testimg -t nbd)
+sudo chmod 666 $RAW_DEV
+
+test_plaintext_detection
+
+test_encryption_format luks1
+test_encryption_format luks2
+
+test_clone_encryption
+
+echo OK
diff --git a/qa/workunits/rbd/map-snapshot-io.sh b/qa/workunits/rbd/map-snapshot-io.sh
new file mode 100755
index 000000000..a69d84829
--- /dev/null
+++ b/qa/workunits/rbd/map-snapshot-io.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+# http://tracker.ceph.com/issues/3964
+
+set -ex
+
+rbd create image -s 100
+DEV=$(sudo rbd map image)
+dd if=/dev/zero of=$DEV oflag=direct count=10
+rbd snap create image@s1
+dd if=/dev/zero of=$DEV oflag=direct count=10   # used to fail
+rbd snap rm image@s1
+dd if=/dev/zero of=$DEV oflag=direct count=10
+sudo rbd unmap $DEV
+rbd rm image
+
+echo OK
diff --git a/qa/workunits/rbd/map-unmap.sh b/qa/workunits/rbd/map-unmap.sh
new file mode 100755
index 000000000..99863849e
--- /dev/null
+++ b/qa/workunits/rbd/map-unmap.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+set -ex
+
+RUN_TIME=300		# approximate duration of run (seconds)
+
+[ $# -eq 1 ] && RUN_TIME="$1"
+
+IMAGE_NAME="image-$$"
+IMAGE_SIZE="1024"	# MB
+
+function get_time() {
+	date '+%s'
+}
+
+function times_up() {
+	local end_time="$1"
+
+	test $(get_time) -ge "${end_time}"
+}
+
+function map_unmap() {
+	[ $# -eq 1 ] || exit 99
+	local image_name="$1"
+
+	local dev
+	dev="$(sudo rbd map "${image_name}")"
+	sudo rbd unmap "${dev}"
+}
+
+#### Start
+
+rbd create "${IMAGE_NAME}" --size="${IMAGE_SIZE}"
+
+COUNT=0
+START_TIME=$(get_time)
+END_TIME=$(expr $(get_time) + ${RUN_TIME})
+while ! times_up "${END_TIME}"; do
+	map_unmap "${IMAGE_NAME}"
+	COUNT=$(expr $COUNT + 1)
+done
+ELAPSED=$(expr "$(get_time)" - "${START_TIME}")
+
+rbd rm "${IMAGE_NAME}"
+
+echo "${COUNT} iterations completed in ${ELAPSED} seconds"
diff --git a/qa/workunits/rbd/merge_diff.sh b/qa/workunits/rbd/merge_diff.sh
new file mode 100755
index 000000000..eb8597304
--- /dev/null
+++ b/qa/workunits/rbd/merge_diff.sh
@@ -0,0 +1,477 @@
+#!/usr/bin/env bash
+set -ex
+
+export RBD_FORCE_ALLOW_V1=1
+
+pool=rbd
+gen=$pool/gen
+out=$pool/out
+testno=1
+
+mkdir -p merge_diff_test
+pushd merge_diff_test
+
+function expect_false()
+{
+  if "$@"; then return 1; else return 0; fi
+}
+
+function clear_all()
+{
+  fusermount -u mnt || true
+
+  rbd snap purge --no-progress $gen || true
+  rbd rm --no-progress $gen || true
+  rbd snap purge --no-progress $out || true
+  rbd rm --no-progress $out || true
+
+  rm -rf diffs || true
+}
+
+function rebuild()
+{
+  clear_all
+  echo Starting test $testno
+  ((testno++))
+  if [[ "$2" -lt "$1" ]] && [[ "$3" -gt "1" ]]; then
+    rbd create $gen --size 100 --object-size $1 --stripe-unit $2 --stripe-count $3 --image-format $4
+  else
+    rbd create $gen --size 100 --object-size $1 --image-format $4
+  fi
+  rbd create $out --size 1 --object-size 524288
+  mkdir -p mnt diffs
+  # lttng has atexit handlers that need to be fork/clone aware
+  LD_PRELOAD=liblttng-ust-fork.so.0 rbd-fuse -p $pool mnt
+}
+
+function write()
+{
+  dd if=/dev/urandom of=mnt/gen bs=1M conv=notrunc seek=$1 count=$2
+}
+
+function snap()
+{
+  rbd snap create $gen@$1
+}
+
+function resize()
+{
+  rbd resize --no-progress $gen --size $1 --allow-shrink
+}
+
+function export_diff()
+{
+  if [ $2 == "head" ]; then
+    target="$gen"
+  else
+    target="$gen@$2"
+  fi
+  if [ $1 == "null" ]; then
+    rbd export-diff --no-progress $target diffs/$1.$2
+  else
+    rbd export-diff --no-progress $target --from-snap $1 diffs/$1.$2
+  fi
+}
+
+function merge_diff()
+{
+  rbd merge-diff diffs/$1.$2 diffs/$2.$3 diffs/$1.$3
+}
+
+function check()
+{
+  rbd import-diff --no-progress diffs/$1.$2 $out || return -1
+  if [ "$2" == "head" ]; then
+    sum1=`rbd export $gen - | md5sum`
+  else
+    sum1=`rbd export $gen@$2 - | md5sum`
+  fi
+  sum2=`rbd export $out - | md5sum`
+  if [ "$sum1" != "$sum2" ]; then
+    exit -1
+  fi
+  if [ "$2" != "head" ]; then
+    rbd snap ls $out | awk '{print $2}' | grep "^$2\$" || return -1
+  fi
+}
+
+#test f/t header
+rebuild 4194304 4194304 1 2
+write 0 1
+snap a
+write 1 1
+export_diff null a
+export_diff a head
+merge_diff null a head
+check null head
+
+rebuild 4194304 4194304 1 2
+write 0 1
+snap a
+write 1 1
+snap b
+write 2 1
+export_diff null a
+export_diff a b
+export_diff b head
+merge_diff null a b
+check null b
+
+rebuild 4194304 4194304 1 2
+write 0 1
+snap a
+write 1 1
+snap b
+write 2 1
+export_diff null a
+export_diff a b
+export_diff b head
+merge_diff a b head
+check null a
+check a head
+
+rebuild 4194304 4194304 1 2
+write 0 1
+snap a
+write 1 1
+snap b
+write 2 1
+export_diff null a
+export_diff a b
+export_diff b head
+rbd merge-diff diffs/null.a diffs/a.b - | rbd merge-diff - diffs/b.head - > diffs/null.head
+check null head
+
+#data test
+rebuild 4194304 4194304 1 2
+write 4 2
+snap s101
+write 0 3
+write 8 2
+snap s102
+export_diff null s101
+export_diff s101 s102
+merge_diff null s101 s102
+check null s102
+
+rebuild 4194304 4194304 1 2
+write 0 3
+write 2 5
+write 8 2
+snap s201
+write 0 2
+write 6 3
+snap s202
+export_diff null s201
+export_diff s201 s202
+merge_diff null s201 s202
+check null s202
+
+rebuild 4194304 4194304 1 2
+write 0 4
+write 12 6
+snap s301
+write 0 6
+write 10 5
+write 16 4
+snap s302
+export_diff null s301
+export_diff s301 s302
+merge_diff null s301 s302
+check null s302
+
+rebuild 4194304 4194304 1 2
+write 0 12
+write 14 2
+write 18 2
+snap s401
+write 1 2
+write 5 6
+write 13 3
+write 18 2
+snap s402
+export_diff null s401
+export_diff s401 s402
+merge_diff null s401 s402
+check null s402
+
+rebuild 4194304 4194304 1 2
+write 2 4
+write 10 12
+write 27 6
+write 36 4
+snap s501
+write 0 24
+write 28 4
+write 36 4
+snap s502
+export_diff null s501
+export_diff s501 s502
+merge_diff null s501 s502
+check null s502
+
+rebuild 4194304 4194304 1 2
+write 0 8
+resize 5
+snap r1
+resize 20
+write 12 8
+snap r2
+resize 8
+write 4 4
+snap r3
+export_diff null r1
+export_diff r1 r2
+export_diff r2 r3
+merge_diff null r1 r2
+merge_diff null r2 r3
+check null r3
+
+rebuild 4194304 4194304 1 2
+write 0 8
+resize 5
+snap r1
+resize 20
+write 12 8
+snap r2
+resize 8
+write 4 4
+snap r3
+resize 10
+snap r4
+export_diff null r1
+export_diff r1 r2
+export_diff r2 r3
+export_diff r3 r4
+merge_diff null r1 r2
+merge_diff null r2 r3
+merge_diff null r3 r4
+check null r4
+
+# merge diff doesn't yet support fancy striping
+# rebuild 4194304 65536 8 2
+# write 0 32
+# snap r1
+# write 16 32
+# snap r2
+# export_diff null r1
+# export_diff r1 r2
+# expect_false merge_diff null r1 r2
+
+rebuild 4194304 4194304 1 2
+write 0 1
+write 2 1
+write 4 1
+write 6 1
+snap s1
+write 1 1
+write 3 1
+write 5 1
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 1 1
+write 3 1
+write 5 1
+snap s1
+write 0 1
+write 2 1
+write 4 1
+write 6 1
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 0 3
+write 6 3
+write 12 3
+snap s1
+write 1 1
+write 7 1
+write 13 1
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 0 3
+write 6 3
+write 12 3
+snap s1
+write 0 1
+write 6 1
+write 12 1
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 0 3
+write 6 3
+write 12 3
+snap s1
+write 2 1
+write 8 1
+write 14 1
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 1 1
+write 7 1
+write 13 1
+snap s1
+write 0 3
+write 6 3
+write 12 3
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 0 1
+write 6 1
+write 12 1
+snap s1
+write 0 3
+write 6 3
+write 12 3
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 2 1
+write 8 1
+write 14 1
+snap s1
+write 0 3
+write 6 3
+write 12 3
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 0 3
+write 6 3
+write 12 3
+snap s1
+write 0 3
+write 6 3
+write 12 3
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 2 4
+write 8 4
+write 14 4
+snap s1
+write 0 3
+write 6 3
+write 12 3
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 0 4
+write 6 4
+write 12 4
+snap s1
+write 0 3
+write 6 3
+write 12 3
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 0 6
+write 6 6
+write 12 6
+snap s1
+write 0 3
+write 6 3
+write 12 3
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 3 6
+write 9 6
+write 15 6
+snap s1
+write 0 3
+write 6 3
+write 12 3
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 0 8
+snap s1
+resize 2
+resize 100
+snap s2
+export_diff null s1
+export_diff s1 s2
+merge_diff null s1 s2
+check null s2
+
+rebuild 4194304 4194304 1 2
+write 0 8
+snap s1
+resize 2
+resize 100
+snap s2
+write 20 2
+snap s3
+export_diff null s1
+export_diff s1 s2
+export_diff s2 s3
+merge_diff s1 s2 s3
+check null s1
+check s1 s3
+
+#addme
+
+clear_all
+popd
+rm -rf merge_diff_test
+
+echo OK
diff --git a/qa/workunits/rbd/notify_master.sh b/qa/workunits/rbd/notify_master.sh
new file mode 100755
index 000000000..99ccd74db
--- /dev/null
+++ b/qa/workunits/rbd/notify_master.sh
@@ -0,0 +1,5 @@
+#!/bin/sh -ex
+
+relpath=$(dirname $0)/../../../src/test/librbd
+python3 $relpath/test_notify.py master
+exit 0
diff --git a/qa/workunits/rbd/notify_slave.sh b/qa/workunits/rbd/notify_slave.sh
new file mode 100755
index 000000000..7f49a0c7d
--- /dev/null
+++ b/qa/workunits/rbd/notify_slave.sh
@@ -0,0 +1,5 @@
+#!/bin/sh -ex
+
+relpath=$(dirname $0)/../../../src/test/librbd
+python3 $relpath/test_notify.py slave
+exit 0
diff --git a/qa/workunits/rbd/permissions.sh b/qa/workunits/rbd/permissions.sh
new file mode 100755
index 000000000..f8a9aaa71
--- /dev/null
+++ b/qa/workunits/rbd/permissions.sh
@@ -0,0 +1,269 @@
+#!/usr/bin/env bash
+set -ex
+
+IMAGE_FEATURES="layering,exclusive-lock,object-map,fast-diff"
+
+clone_v2_enabled() {
+    image_spec=$1
+    rbd info $image_spec | grep "clone-parent"
+}
+
+create_pools() {
+    ceph osd pool create images 32
+    rbd pool init images
+    ceph osd pool create volumes 32
+    rbd pool init volumes
+}
+
+delete_pools() {
+    (ceph osd pool delete images images --yes-i-really-really-mean-it || true) >/dev/null 2>&1
+    (ceph osd pool delete volumes volumes --yes-i-really-really-mean-it || true) >/dev/null 2>&1
+
+}
+
+recreate_pools() {
+    delete_pools
+    create_pools
+}
+
+delete_users() {
+    (ceph auth del client.volumes || true) >/dev/null 2>&1
+    (ceph auth del client.images || true) >/dev/null 2>&1
+
+    (ceph auth del client.snap_none || true) >/dev/null 2>&1
+    (ceph auth del client.snap_all || true) >/dev/null 2>&1
+    (ceph auth del client.snap_pool || true) >/dev/null 2>&1
+    (ceph auth del client.snap_profile_all || true) >/dev/null 2>&1
+    (ceph auth del client.snap_profile_pool || true) >/dev/null 2>&1
+
+    (ceph auth del client.mon_write || true) >/dev/null 2>&1
+}
+
+create_users() {
+    ceph auth get-or-create client.volumes \
+	mon 'profile rbd' \
+	osd 'profile rbd pool=volumes, profile rbd-read-only pool=images' \
+	mgr 'profile rbd pool=volumes, profile rbd-read-only pool=images' >> $KEYRING
+    ceph auth get-or-create client.images mon 'profile rbd' osd 'profile rbd pool=images' >> $KEYRING
+
+    ceph auth get-or-create client.snap_none mon 'allow r' >> $KEYRING
+    ceph auth get-or-create client.snap_all mon 'allow r' osd 'allow w' >> $KEYRING
+    ceph auth get-or-create client.snap_pool mon 'allow r' osd 'allow w pool=images' >> $KEYRING
+    ceph auth get-or-create client.snap_profile_all mon 'allow r' osd 'profile rbd' >> $KEYRING
+    ceph auth get-or-create client.snap_profile_pool mon 'allow r' osd 'profile rbd pool=images' >> $KEYRING
+
+    ceph auth get-or-create client.mon_write mon 'allow *' >> $KEYRING
+}
+
+expect() {
+
+  set +e
+
+  local expected_ret=$1
+  local ret
+
+  shift
+  cmd=$@
+
+  eval $cmd
+  ret=$?
+
+  set -e
+
+  if [[ $ret -ne $expected_ret ]]; then
+    echo "ERROR: running \'$cmd\': expected $expected_ret got $ret"
+    return 1
+  fi
+
+  return 0
+}
+
+test_images_access() {
+    rbd -k $KEYRING --id images create --image-format 2 --image-feature $IMAGE_FEATURES -s 1 images/foo
+    rbd -k $KEYRING --id images snap create images/foo@snap
+    rbd -k $KEYRING --id images snap protect images/foo@snap
+    rbd -k $KEYRING --id images snap unprotect images/foo@snap
+    rbd -k $KEYRING --id images snap protect images/foo@snap
+    rbd -k $KEYRING --id images export images/foo@snap - >/dev/null
+    expect 16 rbd -k $KEYRING --id images snap rm images/foo@snap
+
+    rbd -k $KEYRING --id volumes clone --image-feature $IMAGE_FEATURES images/foo@snap volumes/child
+
+    if ! clone_v2_enabled images/foo; then
+        expect 16 rbd -k $KEYRING --id images snap unprotect images/foo@snap
+    fi
+
+    expect 1 rbd -k $KEYRING --id volumes snap unprotect images/foo@snap
+    expect 1 rbd -k $KEYRING --id images flatten volumes/child
+    rbd -k $KEYRING --id volumes flatten volumes/child
+    expect 1 rbd -k $KEYRING --id volumes snap unprotect images/foo@snap
+    rbd -k $KEYRING --id images snap unprotect images/foo@snap
+
+    expect 39 rbd -k $KEYRING --id images rm images/foo
+    rbd -k $KEYRING --id images snap rm images/foo@snap
+    rbd -k $KEYRING --id images rm images/foo
+    rbd -k $KEYRING --id volumes rm volumes/child
+}
+
+test_volumes_access() {
+    rbd -k $KEYRING --id images create --image-format 2 --image-feature $IMAGE_FEATURES -s 1 images/foo
+    rbd -k $KEYRING --id images snap create images/foo@snap
+    rbd -k $KEYRING --id images snap protect images/foo@snap
+
+    # commands that work with read-only access
+    rbd -k $KEYRING --id volumes info images/foo@snap
+    rbd -k $KEYRING --id volumes snap ls images/foo
+    rbd -k $KEYRING --id volumes export images/foo - >/dev/null
+    rbd -k $KEYRING --id volumes cp images/foo volumes/foo_copy
+    rbd -k $KEYRING --id volumes rm volumes/foo_copy
+    rbd -k $KEYRING --id volumes children images/foo@snap
+    rbd -k $KEYRING --id volumes lock list images/foo
+
+    # commands that fail with read-only access
+    expect 1 rbd -k $KEYRING --id volumes resize -s 2 images/foo --allow-shrink
+    expect 1 rbd -k $KEYRING --id volumes snap create images/foo@2
+    expect 1 rbd -k $KEYRING --id volumes snap rollback images/foo@snap
+    expect 1 rbd -k $KEYRING --id volumes snap remove images/foo@snap
+    expect 1 rbd -k $KEYRING --id volumes snap purge images/foo
+    expect 1 rbd -k $KEYRING --id volumes snap unprotect images/foo@snap
+    expect 1 rbd -k $KEYRING --id volumes flatten images/foo
+    expect 1 rbd -k $KEYRING --id volumes lock add images/foo test
+    expect 1 rbd -k $KEYRING --id volumes lock remove images/foo test locker
+    expect 1 rbd -k $KEYRING --id volumes ls rbd
+
+    # create clone and snapshot
+    rbd -k $KEYRING --id volumes clone --image-feature $IMAGE_FEATURES images/foo@snap volumes/child
+    rbd -k $KEYRING --id volumes snap create volumes/child@snap1
+    rbd -k $KEYRING --id volumes snap protect volumes/child@snap1
+    rbd -k $KEYRING --id volumes snap create volumes/child@snap2
+
+    # make sure original snapshot stays protected
+    if clone_v2_enabled images/foo; then
+        rbd -k $KEYRING --id volumes flatten volumes/child
+        rbd -k $KEYRING --id volumes snap rm volumes/child@snap2
+        rbd -k $KEYRING --id volumes snap unprotect volumes/child@snap1
+    else
+        expect 16 rbd -k $KEYRING --id images snap unprotect images/foo@snap
+        rbd -k $KEYRING --id volumes flatten volumes/child
+        expect 16 rbd -k $KEYRING --id images snap unprotect images/foo@snap
+        rbd -k $KEYRING --id volumes snap rm volumes/child@snap2
+        expect 16 rbd -k $KEYRING --id images snap unprotect images/foo@snap
+        expect 2 rbd -k $KEYRING --id volumes snap rm volumes/child@snap2
+        rbd -k $KEYRING --id volumes snap unprotect volumes/child@snap1
+        expect 16 rbd -k $KEYRING --id images snap unprotect images/foo@snap
+    fi
+
+    # clean up
+    rbd -k $KEYRING --id volumes snap rm volumes/child@snap1
+    rbd -k $KEYRING --id images snap unprotect images/foo@snap
+    rbd -k $KEYRING --id images snap rm images/foo@snap
+    rbd -k $KEYRING --id images rm images/foo
+    rbd -k $KEYRING --id volumes rm volumes/child
+}
+
+create_self_managed_snapshot() {
+  ID=$1
+  POOL=$2
+
+  cat << EOF | CEPH_ARGS="-k $KEYRING" python3
+import rados
+
+with rados.Rados(conffile="", rados_id="${ID}") as cluster:
+  ioctx = cluster.open_ioctx("${POOL}")
+
+  snap_id = ioctx.create_self_managed_snap()
+  print ("Created snap id {}".format(snap_id))
+EOF
+}
+
+remove_self_managed_snapshot() {
+  ID=$1
+  POOL=$2
+
+  cat << EOF | CEPH_ARGS="-k $KEYRING" python3
+import rados
+
+with rados.Rados(conffile="", rados_id="mon_write") as cluster1, \
+     rados.Rados(conffile="", rados_id="${ID}") as cluster2:
+  ioctx1 = cluster1.open_ioctx("${POOL}")
+
+  snap_id = ioctx1.create_self_managed_snap()
+  print ("Created snap id {}".format(snap_id))
+
+  ioctx2 = cluster2.open_ioctx("${POOL}")
+
+  ioctx2.remove_self_managed_snap(snap_id)
+  print ("Removed snap id {}".format(snap_id))
+EOF
+}
+
+test_remove_self_managed_snapshots() {
+    # Ensure users cannot create self-managed snapshots w/o permissions
+    expect 1 create_self_managed_snapshot snap_none images
+    expect 1 create_self_managed_snapshot snap_none volumes
+
+    create_self_managed_snapshot snap_all images
+    create_self_managed_snapshot snap_all volumes
+
+    create_self_managed_snapshot snap_pool images
+    expect 1 create_self_managed_snapshot snap_pool volumes
+
+    create_self_managed_snapshot snap_profile_all images
+    create_self_managed_snapshot snap_profile_all volumes
+
+    create_self_managed_snapshot snap_profile_pool images
+    expect 1 create_self_managed_snapshot snap_profile_pool volumes
+
+    # Ensure users cannot delete self-managed snapshots w/o permissions
+    expect 1 remove_self_managed_snapshot snap_none images
+    expect 1 remove_self_managed_snapshot snap_none volumes
+
+    remove_self_managed_snapshot snap_all images
+    remove_self_managed_snapshot snap_all volumes
+
+    remove_self_managed_snapshot snap_pool images
+    expect 1 remove_self_managed_snapshot snap_pool volumes
+
+    remove_self_managed_snapshot snap_profile_all images
+    remove_self_managed_snapshot snap_profile_all volumes
+
+    remove_self_managed_snapshot snap_profile_pool images
+    expect 1 remove_self_managed_snapshot snap_profile_pool volumes
+}
+
+test_rbd_support() {
+    # read-only commands should work on both pools
+    ceph -k $KEYRING --id volumes rbd perf image stats volumes
+    ceph -k $KEYRING --id volumes rbd perf image stats images
+
+    # read/write commands should only work on 'volumes'
+    rbd -k $KEYRING --id volumes create --image-format 2 --image-feature $IMAGE_FEATURES -s 1 volumes/foo
+    ceph -k $KEYRING --id volumes rbd task add remove volumes/foo
+    expect 13 ceph -k $KEYRING --id volumes rbd task add remove images/foo
+}
+
+cleanup() {
+    rm -f $KEYRING
+}
+
+KEYRING=$(mktemp)
+trap cleanup EXIT ERR HUP INT QUIT
+
+delete_users
+create_users
+
+recreate_pools
+test_images_access
+
+recreate_pools
+test_volumes_access
+
+test_remove_self_managed_snapshots
+
+test_rbd_support
+
+delete_pools
+delete_users
+
+echo OK
+exit 0
diff --git a/qa/workunits/rbd/qemu-iotests.sh b/qa/workunits/rbd/qemu-iotests.sh
new file mode 100755
index 000000000..a2e9e0600
--- /dev/null
+++ b/qa/workunits/rbd/qemu-iotests.sh
@@ -0,0 +1,47 @@
+#!/bin/sh -ex
+
+# Run qemu-iotests against rbd. These are block-level tests that go
+# through qemu but do not involve running a full vm. Note that these
+# require the admin ceph user, as there's no way to pass the ceph user
+# to qemu-iotests currently.
+
+testlist='001 002 003 004 005 008 009 010 011 021 025 032 033'
+
+git clone https://github.com/qemu/qemu.git
+cd qemu
+
+
+if grep -iqE '(bionic|focal|jammy|platform:el9)' /etc/os-release; then
+    git checkout v2.11.0
+elif grep -iqE '(xenial|platform:el8)' /etc/os-release; then
+    git checkout v2.3.0
+else
+    # use v2.2.0-rc3 (last released version that handles all the tests
+    git checkout 2528043f1f299e0e88cb026f1ca7c40bbb4e1f80
+fi
+
+cd tests/qemu-iotests
+# qemu-iotests expects a binary called just 'qemu' to be available
+if [ -x '/usr/bin/qemu-system-x86_64' ]
+then
+    QEMU='/usr/bin/qemu-system-x86_64'
+else
+    QEMU='/usr/libexec/qemu-kvm'
+fi
+
+# Bionic (v2.11.0) tests expect all tools in current directory
+ln -s $QEMU qemu
+ln -s /usr/bin/qemu-img
+ln -s /usr/bin/qemu-io
+ln -s /usr/bin/qemu-nbd
+
+# this is normally generated by configure, but has nothing but a python
+# binary definition, which we don't care about.  for some reason it is
+# not present on trusty.
+touch common.env
+
+# TEST_DIR is the pool for rbd
+TEST_DIR=rbd ./check -rbd $testlist
+
+cd ../../..
+rm -rf qemu
diff --git a/qa/workunits/rbd/qemu_dynamic_features.sh b/qa/workunits/rbd/qemu_dynamic_features.sh
new file mode 100755
index 000000000..70e9fbb3c
--- /dev/null
+++ b/qa/workunits/rbd/qemu_dynamic_features.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+set -x
+
+if [[ -z "${IMAGE_NAME}" ]]; then
+  echo image name must be provided
+  exit 1
+fi
+
+is_qemu_running() {
+  rbd status ${IMAGE_NAME} | grep -v "Watchers: none"
+}
+
+wait_for_qemu() {
+  while ! is_qemu_running ; do
+    echo "*** Waiting for QEMU"
+    sleep 30
+  done
+}
+
+wait_for_qemu
+rbd feature disable ${IMAGE_NAME} journaling
+rbd feature disable ${IMAGE_NAME} object-map
+rbd feature disable ${IMAGE_NAME} exclusive-lock
+
+while is_qemu_running ; do
+  echo "*** Enabling all features"
+  rbd feature enable ${IMAGE_NAME} exclusive-lock || break
+  rbd feature enable ${IMAGE_NAME} journaling || break
+  rbd feature enable ${IMAGE_NAME} object-map || break
+  if is_qemu_running ; then
+    sleep 60
+  fi
+
+  echo "*** Disabling all features"
+  rbd feature disable ${IMAGE_NAME} journaling || break
+  rbd feature disable ${IMAGE_NAME} object-map || break
+  rbd feature disable ${IMAGE_NAME} exclusive-lock || break
+  if is_qemu_running ; then
+    sleep 60
+  fi
+done
+
+if is_qemu_running ; then
+    echo "RBD command failed on alive QEMU"
+    exit 1
+fi
diff --git a/qa/workunits/rbd/qemu_rebuild_object_map.sh b/qa/workunits/rbd/qemu_rebuild_object_map.sh
new file mode 100755
index 000000000..2647dcdcd
--- /dev/null
+++ b/qa/workunits/rbd/qemu_rebuild_object_map.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+set -ex
+
+if [[ -z "${IMAGE_NAME}" ]]; then
+  echo image name must be provided
+  exit 1
+fi
+
+is_qemu_running() {
+  rbd status ${IMAGE_NAME} | grep -v "Watchers: none"
+}
+
+wait_for_qemu() {
+  while ! is_qemu_running ; do
+    echo "*** Waiting for QEMU"
+    sleep 30
+  done
+}
+
+wait_for_qemu
+rbd feature disable ${IMAGE_NAME} journaling || true
+rbd feature disable ${IMAGE_NAME} fast-diff || true
+rbd feature disable ${IMAGE_NAME} object-map || true
+rbd feature disable ${IMAGE_NAME} exclusive-lock || true
+
+rbd feature enable ${IMAGE_NAME} exclusive-lock
+rbd feature enable ${IMAGE_NAME} object-map
+
+while is_qemu_running ; do
+  echo "*** Rebuilding object map"
+  rbd object-map rebuild ${IMAGE_NAME}
+
+  if is_qemu_running ; then
+    sleep 60
+  fi
+done
+
diff --git a/qa/workunits/rbd/qos.sh b/qa/workunits/rbd/qos.sh
new file mode 100755
index 000000000..feb1d5144
--- /dev/null
+++ b/qa/workunits/rbd/qos.sh
@@ -0,0 +1,90 @@
+#!/bin/sh -ex
+
+POOL=rbd
+IMAGE=test$$
+IMAGE_SIZE=1G
+TOLERANCE_PRCNT=10
+
+rbd_bench() {
+    local image=$1
+    local type=$2
+    local total=$3
+    local qos_type=$4
+    local qos_limit=$5
+    local iops_var_name=$6
+    local bps_var_name=$7
+    local timeout=$8
+    local timeout_cmd=""
+
+    if [ -n "${timeout}" ]; then
+        timeout_cmd="timeout --preserve-status ${timeout}"
+    fi
+
+    # parse `rbd bench` output for string like this:
+    # elapsed:    25  ops:     2560  ops/sec:   100.08  bytes/sec: 409.13 MiB
+    iops_bps=$(${timeout_cmd} rbd bench "${image}" \
+                              --io-type ${type} --io-size 4K \
+                              --io-total ${total} --rbd-cache=false \
+                              --rbd_qos_${qos_type}_limit ${qos_limit} |
+                   awk '/elapsed:.* GiB/ {print int($6) ":" int($8) * 1024 * 1024 * 1024}
+                        /elapsed:.* MiB/ {print int($6) ":" int($8) * 1024 * 1024}
+                        /elapsed:.* KiB/ {print int($6) ":" int($8) * 1024}
+                        /elapsed:.* B/   {print int($6) ":" int($8)}')
+    eval ${iops_var_name}=${iops_bps%:*}
+    eval ${bps_var_name}=${iops_bps#*:}
+}
+
+rbd create "${POOL}/${IMAGE}" -s ${IMAGE_SIZE}
+rbd bench "${POOL}/${IMAGE}" --io-type write --io-size 4M --io-total ${IMAGE_SIZE}
+
+rbd_bench "${POOL}/${IMAGE}" write ${IMAGE_SIZE} iops 0 iops bps 60
+iops_unlimited=$iops
+bps_unlimited=$bps
+
+test "${iops_unlimited}" -ge 20 || exit 0
+
+io_total=$((bps_unlimited * 30))
+
+rbd_bench "${POOL}/${IMAGE}" write ${io_total} iops $((iops_unlimited / 2)) iops bps
+test "${iops}" -le $((iops_unlimited / 2 * (100 + TOLERANCE_PRCNT) / 100))
+
+rbd_bench "${POOL}/${IMAGE}" write ${io_total} write_iops $((iops_unlimited / 2)) iops bps
+test "${iops}" -le $((iops_unlimited / 2 * (100 + TOLERANCE_PRCNT) / 100))
+
+rbd_bench "${POOL}/${IMAGE}" write ${io_total} bps $((bps_unlimited / 2)) iops bps
+test "${bps}" -le $((bps_unlimited / 2 * (100 + TOLERANCE_PRCNT) / 100))
+
+rbd_bench "${POOL}/${IMAGE}" write ${io_total} write_bps $((bps_unlimited / 2)) iops bps
+test "${bps}" -le $((bps_unlimited / 2 * (100 + TOLERANCE_PRCNT) / 100))
+
+rbd_bench "${POOL}/${IMAGE}" read ${io_total} iops 0 iops bps
+iops_unlimited=$iops
+bps_unlimited=$bps
+
+test "${iops_unlimited}" -ge 20 || exit 0
+
+io_total=$((bps_unlimited * 30))
+
+rbd_bench "${POOL}/${IMAGE}" read ${io_total} iops $((iops_unlimited / 2)) iops bps
+test "${iops}" -le $((iops_unlimited / 2 * (100 + TOLERANCE_PRCNT) / 100))
+
+rbd_bench "${POOL}/${IMAGE}" read ${io_total} read_iops $((iops_unlimited / 2)) iops bps
+test "${iops}" -le $((iops_unlimited / 2 * (100 + TOLERANCE_PRCNT) / 100))
+
+rbd_bench "${POOL}/${IMAGE}" read ${io_total} bps $((bps_unlimited / 2)) iops bps
+test "${bps}" -le $((bps_unlimited / 2 * (100 + TOLERANCE_PRCNT) / 100))
+
+rbd_bench "${POOL}/${IMAGE}" read ${io_total} read_bps $((bps_unlimited / 2)) iops bps
+test "${bps}" -le $((bps_unlimited / 2 * (100 + TOLERANCE_PRCNT) / 100))
+
+# test a config override is applied
+rbd config image set "${POOL}/${IMAGE}" rbd_qos_iops_limit $((iops_unlimited / 4))
+rbd_bench "${POOL}/${IMAGE}" read ${io_total} iops $((iops_unlimited / 2)) iops bps
+test "${iops}" -le $((iops_unlimited / 4 * (100 + TOLERANCE_PRCNT) / 100))
+rbd config image remove "${POOL}/${IMAGE}" rbd_qos_iops_limit
+rbd_bench "${POOL}/${IMAGE}" read ${io_total} iops $((iops_unlimited / 2)) iops bps
+test "${iops}" -le $((iops_unlimited / 2 * (100 + TOLERANCE_PRCNT) / 100))
+
+rbd rm "${POOL}/${IMAGE}"
+
+echo OK
diff --git a/qa/workunits/rbd/rbd-ggate.sh b/qa/workunits/rbd/rbd-ggate.sh
new file mode 100755
index 000000000..1bf89da38
--- /dev/null
+++ b/qa/workunits/rbd/rbd-ggate.sh
@@ -0,0 +1,239 @@
+#!/bin/sh -ex
+
+POOL=testrbdggate$$
+NS=ns
+IMAGE=test
+SIZE=64
+DATA=
+DEV=
+
+if which xmlstarlet > /dev/null 2>&1; then
+  XMLSTARLET=xmlstarlet
+elif which xml > /dev/null 2>&1; then
+  XMLSTARLET=xml
+else
+  echo "Missing xmlstarlet binary!"
+  exit 1
+fi
+
+if [ `uname -K` -ge 1200078 ] ; then
+    RBD_GGATE_RESIZE_SUPPORTED=1
+fi
+
+_sudo()
+{
+    local cmd
+
+    if [ `id -u` -eq 0 ]
+    then
+	"$@"
+	return $?
+    fi
+
+    # Look for the command in the user path. If it fails run it as is,
+    # supposing it is in sudo path.
+    cmd=`which $1 2>/dev/null` || cmd=$1
+    shift
+    sudo -nE "${cmd}" "$@"
+}
+
+check_geom_gate() 
+{
+    # See if geom_date is load, or can be loaded.
+    # Otherwise the tests can not run
+    if ! kldstat -q -n geom_gate ; then 
+        # See if we can load it
+        if ! _sudo kldload geom_gate ; then
+	    echo Not able to load geom_gate
+	    echo check /var/log/messages as to why 
+	    exit 1
+    	fi
+    fi
+}
+
+setup()
+{
+    local ns x
+
+    if [ -e CMakeCache.txt ]; then
+	# running under cmake build dir
+
+	CEPH_SRC=$(readlink -f $(dirname $0)/../../../src)
+	CEPH_ROOT=${PWD}
+	CEPH_BIN=${CEPH_ROOT}/bin
+
+	export LD_LIBRARY_PATH=${CEPH_ROOT}/lib:${LD_LIBRARY_PATH}
+	export PYTHONPATH=${PYTHONPATH}:${CEPH_SRC}/pybind:${CEPH_ROOT}/lib/cython_modules/lib.3
+	PATH=${CEPH_BIN}:${PATH}
+    fi
+
+    _sudo echo test sudo
+    check_geom_gate
+
+    trap cleanup INT TERM EXIT
+    TEMPDIR=`mktemp -d`
+    DATA=${TEMPDIR}/data
+    dd if=/dev/urandom of=${DATA} bs=1M count=${SIZE}
+    ceph osd pool create ${POOL} 32
+
+    rbd namespace create ${POOL}/${NS}
+    for ns in '' ${NS}; do
+        rbd --dest-pool ${POOL} --dest-namespace "${ns}" --no-progress import \
+            ${DATA} ${IMAGE}
+    done
+}
+
+cleanup()
+{
+    local ns s
+
+    set +e
+    rm -Rf ${TEMPDIR}
+    if [ -n "${DEV}" ]
+    then
+	_sudo rbd-ggate unmap ${DEV}
+    fi
+
+    ceph osd pool delete ${POOL} ${POOL} --yes-i-really-really-mean-it
+}
+
+expect_false()
+{
+  if "$@"; then return 1; else return 0; fi
+}
+
+#
+# main
+#
+
+setup
+
+echo  exit status test
+expect_false rbd-ggate
+expect_false rbd-ggate INVALIDCMD
+if [ `id -u` -ne 0 ]
+then
+    expect_false rbd-ggate map ${IMAGE}
+fi
+expect_false _sudo rbd-ggate map INVALIDIMAGE
+
+echo  map test using the first unused device
+DEV=`_sudo rbd-ggate map ${POOL}/${IMAGE}`
+rbd-ggate list | grep " ${DEV} *$"
+
+echo  map test specifying the device
+expect_false _sudo rbd-ggate --device ${DEV} map ${POOL}/${IMAGE}
+dev1=${DEV}
+_sudo rbd-ggate unmap ${DEV}
+rbd-ggate list | expect_false grep " ${DEV} *$"
+DEV=
+# XXX: race possible when the device is reused by other process
+DEV=`_sudo rbd-ggate --device ${dev1} map ${POOL}/${IMAGE}`
+[ "${DEV}" = "${dev1}" ]
+rbd-ggate list | grep " ${DEV} *$"
+
+echo  list format test
+expect_false _sudo rbd-ggate --format INVALID list
+rbd-ggate --format json --pretty-format list
+rbd-ggate --format xml list
+
+echo  read test
+[ "`dd if=${DATA} bs=1M | md5`" = "`_sudo dd if=${DEV} bs=1M | md5`" ]
+
+echo  write test
+dd if=/dev/urandom of=${DATA} bs=1M count=${SIZE}
+_sudo dd if=${DATA} of=${DEV} bs=1M
+_sudo sync
+[ "`dd if=${DATA} bs=1M | md5`" = "`rbd -p ${POOL} --no-progress export ${IMAGE} - | md5`" ]
+
+echo  trim test
+provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} |
+  $XMLSTARLET sel -t -m "//stats/images/image/provisioned_size" -v .`
+used=`rbd -p ${POOL} --format xml du ${IMAGE} |
+  $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .`
+[ "${used}" -eq "${provisioned}" ]
+_sudo newfs -E ${DEV}
+_sudo sync
+provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} |
+  $XMLSTARLET sel -t -m "//stats/images/image/provisioned_size" -v .`
+used=`rbd -p ${POOL} --format xml du ${IMAGE} |
+  $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .`
+[ "${used}" -lt "${provisioned}" ]
+
+echo  resize test
+devname=$(basename ${DEV})
+size=$(geom gate list ${devname} | awk '$1 ~ /Mediasize:/ {print $2}')
+test -n "${size}"
+rbd resize ${POOL}/${IMAGE} --size $((SIZE * 2))M
+rbd info ${POOL}/${IMAGE}
+if [ -z "$RBD_GGATE_RESIZE_SUPPORTED" ]; then
+    # when resizing is not supported:
+    # resizing the underlying image for a GEOM ggate will stop the
+    # ggate process servicing the device. So we can resize and test 
+    # the disappearance of the device
+    rbd-ggate list | expect_false grep " ${DEV} *$" 
+else
+    rbd-ggate list | grep " ${DEV} *$"
+    size2=$(geom gate list ${devname} | awk '$1 ~ /Mediasize:/ {print $2}')
+    test -n "${size2}"
+    test ${size2} -eq $((size * 2))
+    dd if=/dev/urandom of=${DATA} bs=1M count=$((SIZE * 2))
+    _sudo dd if=${DATA} of=${DEV} bs=1M
+    _sudo sync
+    [ "`dd if=${DATA} bs=1M | md5`" = "`rbd -p ${POOL} --no-progress export ${IMAGE} - | md5`" ]
+    rbd resize ${POOL}/${IMAGE} --allow-shrink --size ${SIZE}M
+    rbd info ${POOL}/${IMAGE}
+    size2=$(geom gate list ${devname} | awk '$1 ~ /Mediasize:/ {print $2}')
+    test -n "${size2}"
+    test ${size2} -eq ${size}
+    truncate -s ${SIZE}M ${DATA}
+    [ "`dd if=${DATA} bs=1M | md5`" = "`rbd -p ${POOL} --no-progress export ${IMAGE} - | md5`" ]
+    _sudo rbd-ggate unmap ${DEV}
+fi
+DEV=
+
+echo  read-only option test
+DEV=`_sudo rbd-ggate map --read-only ${POOL}/${IMAGE}`
+devname=$(basename ${DEV})
+rbd-ggate list | grep " ${DEV} *$"
+access=$(geom gate list ${devname} | awk '$1 == "access:" {print $2}')
+test "${access}" = "read-only"
+_sudo dd if=${DEV} of=/dev/null bs=1M
+expect_false _sudo dd if=${DATA} of=${DEV} bs=1M
+_sudo rbd-ggate unmap ${DEV}
+
+echo  exclusive option test
+DEV=`_sudo rbd-ggate map --exclusive ${POOL}/${IMAGE}`
+rbd-ggate list | grep " ${DEV} *$"
+_sudo dd if=${DATA} of=${DEV} bs=1M
+_sudo sync
+expect_false timeout 10 \
+    rbd -p ${POOL} bench ${IMAGE} --io-type=write --io-size=1024 --io-total=1024
+_sudo rbd-ggate unmap ${DEV}
+DEV=
+rbd bench -p ${POOL} ${IMAGE} --io-type=write --io-size=1024 --io-total=1024
+
+echo  unmap by image name test
+DEV=`_sudo rbd-ggate map ${POOL}/${IMAGE}`
+rbd-ggate list | grep " ${DEV} *$"
+_sudo rbd-ggate unmap "${POOL}/${IMAGE}"
+rbd-ggate list | expect_false grep " ${DEV} *$"
+DEV=
+
+echo  map/unmap snap test
+rbd snap create ${POOL}/${IMAGE}@snap
+DEV=`_sudo rbd-ggate map ${POOL}/${IMAGE}@snap`
+rbd-ggate list | grep " ${DEV} *$"
+_sudo rbd-ggate unmap "${POOL}/${IMAGE}@snap"
+rbd-ggate list | expect_false grep " ${DEV} *$"
+DEV=
+
+echo map/unmap namespace test
+rbd snap create ${POOL}/${NS}/${IMAGE}@snap
+DEV=`_sudo rbd-ggate map ${POOL}/${NS}/${IMAGE}@snap`
+rbd-ggate list | grep " ${DEV} *$"
+_sudo rbd-ggate unmap "${POOL}/${NS}/${IMAGE}@snap"
+rbd-ggate list | expect_false grep "${DEV} $"
+DEV=
+
+echo OK
diff --git a/qa/workunits/rbd/rbd-nbd.sh b/qa/workunits/rbd/rbd-nbd.sh
new file mode 100755
index 000000000..bc89e9be5
--- /dev/null
+++ b/qa/workunits/rbd/rbd-nbd.sh
@@ -0,0 +1,500 @@
+#!/usr/bin/env bash
+set -ex
+
+. $(dirname $0)/../../standalone/ceph-helpers.sh
+
+POOL=rbd
+ANOTHER_POOL=new_default_pool$$
+NS=ns
+IMAGE=testrbdnbd$$
+SIZE=64
+DATA=
+DEV=
+
+_sudo()
+{
+    local cmd
+
+    if [ `id -u` -eq 0 ]
+    then
+	"$@"
+	return $?
+    fi
+
+    # Look for the command in the user path. If it fails run it as is,
+    # supposing it is in sudo path.
+    cmd=`which $1 2>/dev/null` || cmd=$1
+    shift
+    sudo -nE "${cmd}" "$@"
+}
+
+setup()
+{
+    local ns x
+
+    if [ -e CMakeCache.txt ]; then
+	# running under cmake build dir
+
+	CEPH_SRC=$(readlink -f $(dirname $0)/../../../src)
+	CEPH_ROOT=${PWD}
+	CEPH_BIN=${CEPH_ROOT}/bin
+
+	export LD_LIBRARY_PATH=${CEPH_ROOT}/lib:${LD_LIBRARY_PATH}
+	export PYTHONPATH=${PYTHONPATH}:${CEPH_SRC}/pybind:${CEPH_ROOT}/lib/cython_modules/lib.3
+	PATH=${CEPH_BIN}:${PATH}
+    fi
+
+    _sudo echo test sudo
+
+    trap cleanup INT TERM EXIT
+    TEMPDIR=`mktemp -d`
+    DATA=${TEMPDIR}/data
+    dd if=/dev/urandom of=${DATA} bs=1M count=${SIZE}
+
+    rbd namespace create ${POOL}/${NS}
+
+    for ns in '' ${NS}; do
+        rbd --dest-pool ${POOL} --dest-namespace "${ns}" --no-progress import \
+            ${DATA} ${IMAGE}
+    done
+
+    # create another pool
+    ceph osd pool create ${ANOTHER_POOL} 8
+    rbd pool init ${ANOTHER_POOL}
+}
+
+function cleanup()
+{
+    local ns s
+
+    set +e
+
+    mount | fgrep ${TEMPDIR}/mnt && _sudo umount -f ${TEMPDIR}/mnt
+
+    rm -Rf ${TEMPDIR}
+    if [ -n "${DEV}" ]
+    then
+	_sudo rbd device --device-type nbd unmap ${DEV}
+    fi
+
+    for ns in '' ${NS}; do
+        if rbd -p ${POOL} --namespace "${ns}" status ${IMAGE} 2>/dev/null; then
+	    for s in 0.5 1 2 4 8 16 32; do
+	        sleep $s
+	        rbd -p ${POOL} --namespace "${ns}" status ${IMAGE} |
+                    grep 'Watchers: none' && break
+	    done
+	    rbd -p ${POOL} --namespace "${ns}" snap purge ${IMAGE}
+	    rbd -p ${POOL} --namespace "${ns}" remove ${IMAGE}
+        fi
+    done
+    rbd namespace remove ${POOL}/${NS}
+
+    # cleanup/reset default pool
+    rbd config global rm global rbd_default_pool
+    ceph osd pool delete ${ANOTHER_POOL} ${ANOTHER_POOL} --yes-i-really-really-mean-it
+}
+
+function expect_false()
+{
+  if "$@"; then return 1; else return 0; fi
+}
+
+function get_pid()
+{
+    local pool=$1
+    local ns=$2
+
+    PID=$(rbd device --device-type nbd --format xml list | $XMLSTARLET sel -t -v \
+      "//devices/device[pool='${pool}'][namespace='${ns}'][image='${IMAGE}'][device='${DEV}']/id")
+    test -n "${PID}" || return 1
+    ps -p ${PID} -C rbd-nbd
+}
+
+unmap_device()
+{
+    local args=$1
+    local pid=$2
+
+    _sudo rbd device --device-type nbd unmap ${args}
+    rbd device --device-type nbd list | expect_false grep "^${pid}\\b" || return 1
+    ps -C rbd-nbd | expect_false grep "^ *${pid}\\b" || return 1
+
+    # workaround possible race between unmap and following map
+    sleep 0.5
+}
+
+#
+# main
+#
+
+setup
+
+# exit status test
+expect_false rbd-nbd
+expect_false rbd-nbd INVALIDCMD
+if [ `id -u` -ne 0 ]
+then
+    expect_false rbd device --device-type nbd map ${IMAGE}
+fi
+expect_false _sudo rbd device --device-type nbd map INVALIDIMAGE
+expect_false _sudo rbd-nbd --device INVALIDDEV map ${IMAGE}
+
+# list format test
+expect_false rbd device --device-type nbd --format INVALID list
+rbd device --device-type nbd --format json --pretty-format list
+rbd device --device-type nbd --format xml list
+
+# map test using the first unused device
+DEV=`_sudo rbd device --device-type nbd map ${POOL}/${IMAGE}`
+get_pid ${POOL}
+# map test specifying the device
+expect_false _sudo rbd-nbd --device ${DEV} map ${POOL}/${IMAGE}
+dev1=${DEV}
+unmap_device ${DEV} ${PID}
+DEV=
+# XXX: race possible when the device is reused by other process
+DEV=`_sudo rbd-nbd --device ${dev1} map ${POOL}/${IMAGE}`
+[ "${DEV}" = "${dev1}" ]
+rbd device --device-type nbd list | grep "${IMAGE}"
+get_pid ${POOL}
+
+# read test
+[ "`dd if=${DATA} bs=1M | md5sum`" = "`_sudo dd if=${DEV} bs=1M | md5sum`" ]
+
+# write test
+dd if=/dev/urandom of=${DATA} bs=1M count=${SIZE}
+_sudo dd if=${DATA} of=${DEV} bs=1M oflag=direct
+[ "`dd if=${DATA} bs=1M | md5sum`" = "`rbd -p ${POOL} --no-progress export ${IMAGE} - | md5sum`" ]
+unmap_device ${DEV} ${PID}
+
+# notrim test
+DEV=`_sudo rbd device --device-type nbd --options notrim map ${POOL}/${IMAGE}`
+get_pid ${POOL}
+provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} |
+  $XMLSTARLET sel -t -m "//stats/images/image/provisioned_size" -v .`
+used=`rbd -p ${POOL} --format xml du ${IMAGE} |
+  $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .`
+[ "${used}" -eq "${provisioned}" ]
+# should fail discard as at time of mapping notrim was used
+expect_false _sudo blkdiscard ${DEV}
+sync
+provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} |
+  $XMLSTARLET sel -t -m "//stats/images/image/provisioned_size" -v .`
+used=`rbd -p ${POOL} --format xml du ${IMAGE} |
+  $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .`
+[ "${used}" -eq "${provisioned}" ]
+unmap_device ${DEV} ${PID}
+
+# trim test
+DEV=`_sudo rbd device --device-type nbd map ${POOL}/${IMAGE}`
+get_pid ${POOL}
+provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} |
+  $XMLSTARLET sel -t -m "//stats/images/image/provisioned_size" -v .`
+used=`rbd -p ${POOL} --format xml du ${IMAGE} |
+  $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .`
+[ "${used}" -eq "${provisioned}" ]
+# should honor discard as at time of mapping trim was considered by default
+_sudo blkdiscard ${DEV}
+sync
+provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} |
+  $XMLSTARLET sel -t -m "//stats/images/image/provisioned_size" -v .`
+used=`rbd -p ${POOL} --format xml du ${IMAGE} |
+  $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .`
+[ "${used}" -lt "${provisioned}" ]
+
+# resize test
+devname=$(basename ${DEV})
+blocks=$(awk -v dev=${devname} '$4 == dev {print $3}' /proc/partitions)
+test -n "${blocks}"
+rbd resize ${POOL}/${IMAGE} --size $((SIZE * 2))M
+rbd info ${POOL}/${IMAGE}
+blocks2=$(awk -v dev=${devname} '$4 == dev {print $3}' /proc/partitions)
+test -n "${blocks2}"
+test ${blocks2} -eq $((blocks * 2))
+rbd resize ${POOL}/${IMAGE} --allow-shrink --size ${SIZE}M
+blocks2=$(awk -v dev=${devname} '$4 == dev {print $3}' /proc/partitions)
+test -n "${blocks2}"
+test ${blocks2} -eq ${blocks}
+
+# read-only option test
+unmap_device ${DEV} ${PID}
+DEV=`_sudo rbd --device-type nbd map --read-only ${POOL}/${IMAGE}`
+PID=$(rbd device --device-type nbd list | awk -v pool=${POOL} -v img=${IMAGE} -v dev=${DEV} \
+    '$2 == pool && $3 == img && $5 == dev {print $1}')
+test -n "${PID}"
+ps -p ${PID} -C rbd-nbd
+
+_sudo dd if=${DEV} of=/dev/null bs=1M
+expect_false _sudo dd if=${DATA} of=${DEV} bs=1M oflag=direct
+unmap_device ${DEV} ${PID}
+
+# exclusive option test
+DEV=`_sudo rbd --device-type nbd map --exclusive ${POOL}/${IMAGE}`
+get_pid ${POOL}
+
+_sudo dd if=${DATA} of=${DEV} bs=1M oflag=direct
+expect_false timeout 10 \
+	rbd bench ${IMAGE} --io-type write --io-size=1024 --io-total=1024
+unmap_device ${DEV} ${PID}
+DEV=
+rbd bench ${IMAGE} --io-type write --io-size=1024 --io-total=1024
+
+# unmap by image name test
+DEV=`_sudo rbd device --device-type nbd map ${POOL}/${IMAGE}`
+get_pid ${POOL}
+unmap_device ${IMAGE} ${PID}
+DEV=
+
+# map/unmap snap test
+rbd snap create ${POOL}/${IMAGE}@snap
+DEV=`_sudo rbd device --device-type nbd map ${POOL}/${IMAGE}@snap`
+get_pid ${POOL}
+unmap_device "${IMAGE}@snap" ${PID}
+DEV=
+
+# map/unmap snap test with --snap-id
+SNAPID=`rbd snap ls ${POOL}/${IMAGE} | awk '$2 == "snap" {print $1}'`
+DEV=`_sudo rbd device --device-type nbd map --snap-id ${SNAPID} ${POOL}/${IMAGE}`
+get_pid ${POOL}
+unmap_device "--snap-id ${SNAPID} ${IMAGE}" ${PID}
+DEV=
+
+# map/unmap namespace test
+rbd snap create ${POOL}/${NS}/${IMAGE}@snap
+DEV=`_sudo rbd device --device-type nbd map ${POOL}/${NS}/${IMAGE}@snap`
+get_pid ${POOL} ${NS}
+unmap_device "${POOL}/${NS}/${IMAGE}@snap" ${PID}
+DEV=
+
+# map/unmap namespace test with --snap-id
+SNAPID=`rbd snap ls ${POOL}/${NS}/${IMAGE} | awk '$2 == "snap" {print $1}'`
+DEV=`_sudo rbd device --device-type nbd map --snap-id ${SNAPID} ${POOL}/${NS}/${IMAGE}`
+get_pid ${POOL} ${NS}
+unmap_device "--snap-id ${SNAPID} ${POOL}/${NS}/${IMAGE}" ${PID}
+DEV=
+
+# map/unmap namespace using options test
+DEV=`_sudo rbd device --device-type nbd map --pool ${POOL} --namespace ${NS} --image ${IMAGE}`
+get_pid ${POOL} ${NS}
+unmap_device "--pool ${POOL} --namespace ${NS} --image ${IMAGE}" ${PID}
+DEV=`_sudo rbd device --device-type nbd map --pool ${POOL} --namespace ${NS} --image ${IMAGE} --snap snap`
+get_pid ${POOL} ${NS}
+unmap_device "--pool ${POOL} --namespace ${NS} --image ${IMAGE} --snap snap" ${PID}
+DEV=
+
+# unmap by image name test 2
+DEV=`_sudo rbd device --device-type nbd map ${POOL}/${IMAGE}`
+get_pid ${POOL}
+pid=$PID
+DEV=`_sudo rbd device --device-type nbd map ${POOL}/${NS}/${IMAGE}`
+get_pid ${POOL} ${NS}
+unmap_device ${POOL}/${NS}/${IMAGE} ${PID}
+DEV=
+unmap_device ${POOL}/${IMAGE} ${pid}
+
+# map/unmap test with just image name and expect image to come from default pool
+if [ "${POOL}" = "rbd" ];then
+    DEV=`_sudo rbd device --device-type nbd map ${IMAGE}`
+    get_pid ${POOL}
+    unmap_device ${IMAGE} ${PID}
+    DEV=
+fi
+
+# map/unmap test with just image name after changing default pool
+rbd config global set global rbd_default_pool ${ANOTHER_POOL}
+rbd create --size 10M ${IMAGE}
+DEV=`_sudo rbd device --device-type nbd map ${IMAGE}`
+get_pid ${ANOTHER_POOL}
+unmap_device ${IMAGE} ${PID}
+DEV=
+
+# reset
+rbd config global rm global rbd_default_pool
+
+# auto unmap test
+DEV=`_sudo rbd device --device-type nbd map ${POOL}/${IMAGE}`
+get_pid ${POOL}
+_sudo kill ${PID}
+for i in `seq 10`; do
+  rbd device --device-type nbd list | expect_false grep "^${PID} *${POOL} *${IMAGE}" && break
+  sleep 1
+done
+rbd device --device-type nbd list | expect_false grep "^${PID} *${POOL} *${IMAGE}"
+
+# quiesce test
+QUIESCE_HOOK=${TEMPDIR}/quiesce.sh
+DEV=`_sudo rbd device --device-type nbd map --quiesce --quiesce-hook ${QUIESCE_HOOK} ${POOL}/${IMAGE}`
+get_pid ${POOL}
+
+# test it fails if the hook does not exists
+test ! -e ${QUIESCE_HOOK}
+expect_false rbd snap create ${POOL}/${IMAGE}@quiesce1
+_sudo dd if=${DATA} of=${DEV} bs=1M count=1 oflag=direct
+
+# test the hook is executed
+touch ${QUIESCE_HOOK}
+chmod +x ${QUIESCE_HOOK}
+cat > ${QUIESCE_HOOK} <<EOF
+#/bin/sh
+echo "test the hook is executed" >&2
+echo \$1 > ${TEMPDIR}/\$2
+EOF
+rbd snap create ${POOL}/${IMAGE}@quiesce1
+_sudo dd if=${DATA} of=${DEV} bs=1M count=1 oflag=direct
+test "$(cat ${TEMPDIR}/quiesce)" = ${DEV}
+test "$(cat ${TEMPDIR}/unquiesce)" = ${DEV}
+
+# test snap create fails if the hook fails
+touch ${QUIESCE_HOOK}
+chmod +x ${QUIESCE_HOOK}
+cat > ${QUIESCE_HOOK} <<EOF
+#/bin/sh
+echo "test snap create fails if the hook fails" >&2
+exit 22
+EOF
+expect_false rbd snap create ${POOL}/${IMAGE}@quiesce2
+_sudo dd if=${DATA} of=${DEV} bs=1M count=1 oflag=direct
+
+# test the hook is slow
+cat > ${QUIESCE_HOOK} <<EOF
+#/bin/sh
+echo "test the hook is slow" >&2
+sleep 7
+EOF
+rbd snap create ${POOL}/${IMAGE}@quiesce2
+_sudo dd if=${DATA} of=${DEV} bs=1M count=1 oflag=direct
+
+# test rbd-nbd_quiesce hook that comes with distribution
+unmap_device ${DEV} ${PID}
+LOG_FILE=${TEMPDIR}/rbd-nbd.log
+if [ -n "${CEPH_SRC}" ]; then
+    QUIESCE_HOOK=${CEPH_SRC}/tools/rbd_nbd/rbd-nbd_quiesce
+    DEV=`_sudo rbd device --device-type nbd map --quiesce --quiesce-hook ${QUIESCE_HOOK} \
+               ${POOL}/${IMAGE} --log-file=${LOG_FILE}`
+else
+    DEV=`_sudo rbd device --device-type nbd map --quiesce ${POOL}/${IMAGE} --log-file=${LOG_FILE}`
+fi
+get_pid ${POOL}
+_sudo mkfs ${DEV}
+mkdir ${TEMPDIR}/mnt
+_sudo mount ${DEV} ${TEMPDIR}/mnt
+rbd snap create ${POOL}/${IMAGE}@quiesce3
+_sudo dd if=${DATA} of=${TEMPDIR}/mnt/test bs=1M count=1 oflag=direct
+_sudo umount ${TEMPDIR}/mnt
+unmap_device ${DEV} ${PID}
+DEV=
+cat ${LOG_FILE}
+expect_false grep 'quiesce failed' ${LOG_FILE}
+
+# test detach/attach
+OUT=`_sudo rbd device --device-type nbd --options try-netlink,show-cookie map ${POOL}/${IMAGE}`
+read DEV COOKIE <<< "${OUT}"
+get_pid ${POOL}
+_sudo mount ${DEV} ${TEMPDIR}/mnt
+_sudo rbd device detach ${POOL}/${IMAGE} --device-type nbd
+expect_false get_pid ${POOL}
+expect_false _sudo rbd device attach --device ${DEV} ${POOL}/${IMAGE} --device-type nbd
+if [ -n "${COOKIE}" ]; then
+    _sudo rbd device attach --device ${DEV} --cookie ${COOKIE} ${POOL}/${IMAGE} --device-type nbd
+else
+    _sudo rbd device attach --device ${DEV} ${POOL}/${IMAGE} --device-type nbd --force
+fi
+get_pid ${POOL}
+_sudo rbd device detach ${DEV} --device-type nbd
+expect_false get_pid ${POOL}
+if [ -n "${COOKIE}" ]; then
+    _sudo rbd device attach --device ${DEV} --cookie ${COOKIE} ${POOL}/${IMAGE} --device-type nbd
+else
+    _sudo rbd device attach --device ${DEV} ${POOL}/${IMAGE} --device-type nbd --force
+fi
+get_pid ${POOL}
+ls ${TEMPDIR}/mnt/
+dd if=${TEMPDIR}/mnt/test of=/dev/null bs=1M count=1
+_sudo dd if=${DATA} of=${TEMPDIR}/mnt/test1 bs=1M count=1 oflag=direct
+_sudo umount ${TEMPDIR}/mnt
+unmap_device ${DEV} ${PID}
+# if kernel supports cookies
+if [ -n "${COOKIE}" ]; then
+    OUT=`_sudo rbd device --device-type nbd --show-cookie --cookie "abc de" --options try-netlink map ${POOL}/${IMAGE}`
+    read DEV ANOTHER_COOKIE <<< "${OUT}"
+    get_pid ${POOL}
+    test "${ANOTHER_COOKIE}" = "abc de"
+    unmap_device ${DEV} ${PID}
+fi
+DEV=
+
+# test detach/attach with --snap-id
+SNAPID=`rbd snap ls ${POOL}/${IMAGE} | awk '$2 == "snap" {print $1}'`
+OUT=`_sudo rbd device --device-type nbd --options try-netlink,show-cookie map --snap-id ${SNAPID} ${POOL}/${IMAGE}`
+read DEV COOKIE <<< "${OUT}"
+get_pid ${POOL}
+_sudo rbd device detach ${POOL}/${IMAGE} --snap-id ${SNAPID} --device-type nbd
+expect_false get_pid ${POOL}
+expect_false _sudo rbd device attach --device ${DEV} --snap-id ${SNAPID} ${POOL}/${IMAGE} --device-type nbd
+if [ -n "${COOKIE}" ]; then
+    _sudo rbd device attach --device ${DEV} --cookie ${COOKIE} --snap-id ${SNAPID} ${POOL}/${IMAGE} --device-type nbd
+else
+    _sudo rbd device attach --device ${DEV} --snap-id ${SNAPID} ${POOL}/${IMAGE} --device-type nbd --force
+fi
+get_pid ${POOL}
+_sudo rbd device detach ${DEV} --device-type nbd
+expect_false get_pid ${POOL}
+DEV=
+
+# test discard granularity with journaling
+rbd config image set ${POOL}/${IMAGE} rbd_discard_granularity_bytes 4096
+rbd feature enable ${POOL}/${IMAGE} journaling
+DEV=`_sudo rbd device --device-type nbd map ${POOL}/${IMAGE}`
+get_pid ${POOL}
+# since a discard will now be pruned to only whole blocks (0..4095, 4096..8191)
+# let us test all the cases around those alignments. 512 is the smallest
+# possible block blkdiscard allows us to use. Thus the test checks
+# 512 before, on the alignment, 512 after.
+_sudo blkdiscard --offset 0 --length $((4096-512)) ${DEV}
+_sudo blkdiscard --offset 0 --length 4096 ${DEV}
+_sudo blkdiscard --offset 0 --length $((4096+512)) ${DEV}
+_sudo blkdiscard --offset 512 --length $((8192-1024)) ${DEV}
+_sudo blkdiscard --offset 512 --length $((8192-512)) ${DEV}
+_sudo blkdiscard --offset 512 --length 8192 ${DEV}
+# wait for commit log to be empty, 10 seconds should be well enough
+tries=0
+queue_length=`rbd journal inspect --pool ${POOL} --image ${IMAGE} | awk '/entries inspected/ {print $1}'`
+while [ ${tries} -lt 10 ] && [ ${queue_length} -gt 0 ]; do
+    rbd journal inspect --pool ${POOL} --image ${IMAGE} --verbose
+    sleep 1
+    queue_length=`rbd journal inspect --pool ${POOL} --image ${IMAGE} | awk '/entries inspected/ {print $1}'`
+    tries=$((tries+1))
+done
+[ ${queue_length} -eq 0 ]
+unmap_device ${DEV} ${PID}
+DEV=
+rbd feature disable ${POOL}/${IMAGE} journaling
+rbd config image rm ${POOL}/${IMAGE} rbd_discard_granularity_bytes
+
+# test that disabling a feature so that the op is proxied to rbd-nbd
+# (arranged here by blkdiscard before "rbd feature disable") doesn't hang
+DEV=`_sudo rbd device --device-type nbd map ${POOL}/${IMAGE}`
+get_pid ${POOL}
+rbd feature enable ${POOL}/${IMAGE} journaling
+_sudo blkdiscard --offset 0 --length 4096 ${DEV}
+rbd feature disable ${POOL}/${IMAGE} journaling
+unmap_device ${DEV} ${PID}
+DEV=
+
+# test that rbd_op_threads setting takes effect
+EXPECTED=`ceph-conf --show-config-value librados_thread_count`
+DEV=`_sudo rbd device --device-type nbd map ${POOL}/${IMAGE}`
+get_pid ${POOL}
+ACTUAL=`ps -p ${PID} -T | grep -c io_context_pool`
+[ ${ACTUAL} -eq ${EXPECTED} ]
+unmap_device ${DEV} ${PID}
+EXPECTED=$((EXPECTED * 3 + 1))
+DEV=`_sudo rbd device --device-type nbd --rbd-op-threads ${EXPECTED} map ${POOL}/${IMAGE}`
+get_pid ${POOL}
+ACTUAL=`ps -p ${PID} -T | grep -c io_context_pool`
+[ ${ACTUAL} -eq ${EXPECTED} ]
+unmap_device ${DEV} ${PID}
+DEV=
+
+echo OK
diff --git a/qa/workunits/rbd/rbd_groups.sh b/qa/workunits/rbd/rbd_groups.sh
new file mode 100755
index 000000000..a32618484
--- /dev/null
+++ b/qa/workunits/rbd/rbd_groups.sh
@@ -0,0 +1,258 @@
+#!/usr/bin/env bash
+
+set -ex
+
+#
+# rbd_consistency_groups.sh - test consistency groups cli commands
+#
+
+#
+# Functions
+#
+
+create_group()
+{
+    local group_name=$1
+
+    rbd group create $group_name
+}
+
+list_groups()
+{
+    rbd group list
+}
+
+check_group_exists()
+{
+    local group_name=$1
+    list_groups | grep $group_name
+}
+
+remove_group()
+{
+    local group_name=$1
+
+    rbd group remove $group_name
+}
+
+rename_group()
+{
+    local src_name=$1
+    local dest_name=$2
+
+    rbd group rename $src_name $dest_name
+}
+
+check_group_does_not_exist()
+{
+    local group_name=$1
+    for v in $(list_groups); do
+        if [ "$v" == "$group_name" ]; then
+            return 1
+        fi
+    done
+    return 0
+}
+
+create_image()
+{
+    local image_name=$1
+    rbd create --size 10M $image_name
+}
+
+remove_image()
+{
+    local image_name=$1
+    rbd remove $image_name
+}
+
+add_image_to_group()
+{
+    local image_name=$1
+    local group_name=$2
+    rbd group image add $group_name $image_name
+}
+
+remove_image_from_group()
+{
+    local image_name=$1
+    local group_name=$2
+    rbd group image remove $group_name $image_name
+}
+
+check_image_in_group()
+{
+    local image_name=$1
+    local group_name=$2
+    for v in $(rbd group image list $group_name); do
+        local vtrimmed=${v#*/}
+        if [ "$vtrimmed" = "$image_name" ]; then
+            return 0
+        fi
+    done
+    return 1
+}
+
+check_image_not_in_group()
+{
+    local image_name=$1
+    local group_name=$2
+    for v in $(rbd group image list $group_name); do
+        local vtrimmed=${v#*/}
+        if [ "$vtrimmed" = "$image_name" ]; then
+            return 1
+        fi
+    done
+    return 0
+}
+
+create_snapshot()
+{
+    local group_name=$1
+    local snap_name=$2
+    rbd group snap create $group_name@$snap_name
+}
+
+create_snapshots()
+{
+    local group_name=$1
+    local snap_name=$2
+    local snap_count=$3
+    for i in `seq 1 $snap_count`; do
+        rbd group snap create $group_name@$snap_name$i
+    done
+}
+
+remove_snapshot()
+{
+    local group_name=$1
+    local snap_name=$2
+    rbd group snap remove $group_name@$snap_name
+}
+
+remove_snapshots()
+{
+    local group_name=$1
+    local snap_name=$2
+    local snap_count=$3
+    for i in `seq 1 $snap_count`; do
+        rbd group snap remove $group_name@$snap_name$i
+    done
+}
+
+rename_snapshot()
+{
+    local group_name=$1
+    local snap_name=$2
+    local new_snap_name=$3
+    rbd group snap rename $group_name@$snap_name $new_snap_name
+}
+
+list_snapshots()
+{
+    local group_name=$1
+    rbd group snap list $group_name
+}
+
+rollback_snapshot()
+{
+    local group_name=$1
+    local snap_name=$2
+    rbd group snap rollback $group_name@$snap_name
+}
+
+check_snapshot_in_group()
+{
+    local group_name=$1
+    local snap_name=$2
+    list_snapshots $group_name | grep $snap_name
+}
+
+check_snapshots_count_in_group()
+{
+    local group_name=$1
+    local snap_name=$2
+    local expected_count=$3
+    local actual_count
+    actual_count=$(list_snapshots $group_name | grep -c $snap_name)
+    (( actual_count == expected_count ))
+}
+
+check_snapshot_not_in_group()
+{
+    local group_name=$1
+    local snap_name=$2
+    for v in $(list_snapshots $group_name | awk '{print $1}'); do
+        if [ "$v" = "$snap_name" ]; then
+            return 1
+        fi
+    done
+    return 0
+}
+
+echo "TEST: create remove consistency group"
+group="test_consistency_group"
+new_group="test_new_consistency_group"
+create_group $group
+check_group_exists $group
+rename_group $group $new_group
+check_group_exists $new_group
+remove_group $new_group
+check_group_does_not_exist $new_group
+echo "PASSED"
+
+echo "TEST: add remove images to consistency group"
+image="test_image"
+group="test_consistency_group"
+create_image $image
+create_group $group
+add_image_to_group $image $group
+check_image_in_group $image $group
+remove_image_from_group $image $group
+check_image_not_in_group $image $group
+remove_group $group
+remove_image $image
+echo "PASSED"
+
+echo "TEST: create remove snapshots of consistency group"
+image="test_image"
+group="test_consistency_group"
+snap="group_snap"
+new_snap="new_group_snap"
+sec_snap="group_snap2"
+create_image $image
+create_group $group
+add_image_to_group $image $group
+create_snapshot $group $snap
+check_snapshot_in_group $group $snap
+rename_snapshot $group $snap $new_snap
+check_snapshot_not_in_group $group $snap
+create_snapshot $group $sec_snap
+check_snapshot_in_group $group $sec_snap
+rollback_snapshot $group $new_snap
+remove_snapshot $group $new_snap
+check_snapshot_not_in_group $group $new_snap
+remove_snapshot $group $sec_snap
+check_snapshot_not_in_group $group $sec_snap
+remove_group $group
+remove_image $image
+echo "PASSED"
+
+echo "TEST: list snapshots of consistency group"
+image="test_image"
+group="test_consistency_group"
+snap="group_snap"
+create_image $image
+create_group $group
+add_image_to_group $image $group
+create_snapshots $group $snap 10
+check_snapshots_count_in_group $group $snap 10
+remove_snapshots $group $snap 10
+create_snapshots $group $snap 100
+check_snapshots_count_in_group $group $snap 100
+remove_snapshots $group $snap 100
+remove_group $group
+remove_image $image
+echo "PASSED"
+
+echo "OK"
diff --git a/qa/workunits/rbd/rbd_mirror_bootstrap.sh b/qa/workunits/rbd/rbd_mirror_bootstrap.sh
new file mode 100755
index 000000000..6ef06f2b8
--- /dev/null
+++ b/qa/workunits/rbd/rbd_mirror_bootstrap.sh
@@ -0,0 +1,58 @@
+#!/bin/sh -ex
+#
+# rbd_mirror_bootstrap.sh - test peer bootstrap create/import
+#
+
+RBD_MIRROR_MANUAL_PEERS=1
+RBD_MIRROR_INSTANCES=${RBD_MIRROR_INSTANCES:-1}
+. $(dirname $0)/rbd_mirror_helpers.sh
+
+setup
+
+testlog "TEST: bootstrap cluster2 from cluster1"
+# create token on cluster1 and import to cluster2
+TOKEN=${TEMPDIR}/peer-token
+TOKEN_2=${TEMPDIR}/peer-token-2
+CEPH_ARGS='' rbd --cluster ${CLUSTER1} mirror pool peer bootstrap create ${POOL} > ${TOKEN}
+CEPH_ARGS='' rbd --cluster ${CLUSTER1} mirror pool peer bootstrap create ${PARENT_POOL} > ${TOKEN_2}
+cmp ${TOKEN} ${TOKEN_2}
+
+CEPH_ARGS='' rbd --cluster ${CLUSTER2} --pool ${POOL} mirror pool peer bootstrap import ${TOKEN} --direction rx-only
+CEPH_ARGS='' rbd --cluster ${CLUSTER2} --pool ${PARENT_POOL} mirror pool peer bootstrap import ${TOKEN} --direction rx-tx
+
+start_mirrors ${CLUSTER1}
+start_mirrors ${CLUSTER2}
+
+testlog "TEST: verify rx-only direction"
+# rx-only peer is added immediately by "rbd mirror pool peer bootstrap import"
+rbd --cluster ${CLUSTER2} --pool ${POOL} mirror pool info --format json | jq -e '.peers[0].direction == "rx-only"'
+# tx-only peer is added asynchronously by mirror_peer_ping class method
+while ! rbd --cluster ${CLUSTER1} --pool ${POOL} mirror pool info --format json | jq -e '.peers | length > 0'; do
+    sleep 1
+done
+rbd --cluster ${CLUSTER1} --pool ${POOL} mirror pool info --format json | jq -e '.peers[0].direction == "tx-only"'
+
+create_image_and_enable_mirror ${CLUSTER1} ${POOL} image1
+
+wait_for_image_replay_started ${CLUSTER2} ${POOL} image1
+write_image ${CLUSTER1} ${POOL} image1 100
+wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${POOL} image1
+
+testlog "TEST: verify rx-tx direction"
+# both rx-tx peers are added immediately by "rbd mirror pool peer bootstrap import"
+rbd --cluster ${CLUSTER1} --pool ${PARENT_POOL} mirror pool info --format json | jq -e '.peers[0].direction == "rx-tx"'
+rbd --cluster ${CLUSTER2} --pool ${PARENT_POOL} mirror pool info --format json | jq -e '.peers[0].direction == "rx-tx"'
+
+create_image ${CLUSTER1} ${PARENT_POOL} image1
+create_image ${CLUSTER2} ${PARENT_POOL} image2
+
+enable_mirror ${CLUSTER1} ${PARENT_POOL} image1
+enable_mirror ${CLUSTER2} ${PARENT_POOL} image2
+
+wait_for_image_replay_started ${CLUSTER2} ${PARENT_POOL} image1
+write_image ${CLUSTER1} ${PARENT_POOL} image1 100
+wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${PARENT_POOL} image1
+
+wait_for_image_replay_started ${CLUSTER1} ${PARENT_POOL} image2
+write_image ${CLUSTER2} ${PARENT_POOL} image2 100
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${PARENT_POOL} image2
diff --git a/qa/workunits/rbd/rbd_mirror_fsx_compare.sh b/qa/workunits/rbd/rbd_mirror_fsx_compare.sh
new file mode 100755
index 000000000..0ba3c97d7
--- /dev/null
+++ b/qa/workunits/rbd/rbd_mirror_fsx_compare.sh
@@ -0,0 +1,38 @@
+#!/bin/sh -ex
+#
+# rbd_mirror_fsx_compare.sh - test rbd-mirror daemon under FSX workload
+#
+# The script is used to compare FSX-generated images between two clusters.
+#
+
+. $(dirname $0)/rbd_mirror_helpers.sh
+
+trap 'cleanup $?' INT TERM EXIT
+
+setup_tempdir
+
+testlog "TEST: wait for all images"
+image_count=$(rbd --cluster ${CLUSTER1} --pool ${POOL} ls | wc -l)
+retrying_seconds=0
+sleep_seconds=10
+while [ ${retrying_seconds} -le 7200 ]; do
+    [ $(rbd --cluster ${CLUSTER2} --pool ${POOL} ls | wc -l) -ge ${image_count} ] && break
+    sleep ${sleep_seconds}
+    retrying_seconds=$(($retrying_seconds+${sleep_seconds}))
+done
+
+testlog "TEST: snapshot all pool images"
+snap_id=`uuidgen`
+for image in $(rbd --cluster ${CLUSTER1} --pool ${POOL} ls); do
+    create_snapshot ${CLUSTER1} ${POOL} ${image} ${snap_id}
+done
+
+testlog "TEST: wait for snapshots"
+for image in $(rbd --cluster ${CLUSTER1} --pool ${POOL} ls); do
+    wait_for_snap_present ${CLUSTER2} ${POOL} ${image} ${snap_id}
+done
+
+testlog "TEST: compare image snapshots"
+for image in $(rbd --cluster ${CLUSTER1} --pool ${POOL} ls); do
+    compare_image_snapshots ${POOL} ${image}
+done
diff --git a/qa/workunits/rbd/rbd_mirror_fsx_prepare.sh b/qa/workunits/rbd/rbd_mirror_fsx_prepare.sh
new file mode 100755
index 000000000..d988987ba
--- /dev/null
+++ b/qa/workunits/rbd/rbd_mirror_fsx_prepare.sh
@@ -0,0 +1,10 @@
+#!/bin/sh -ex
+#
+# rbd_mirror_fsx_prepare.sh - test rbd-mirror daemon under FSX workload
+#
+# The script is used to compare FSX-generated images between two clusters.
+#
+
+. $(dirname $0)/rbd_mirror_helpers.sh
+
+setup
diff --git a/qa/workunits/rbd/rbd_mirror_ha.sh b/qa/workunits/rbd/rbd_mirror_ha.sh
new file mode 100755
index 000000000..37739a83d
--- /dev/null
+++ b/qa/workunits/rbd/rbd_mirror_ha.sh
@@ -0,0 +1,210 @@
+#!/bin/sh -ex
+#
+# rbd_mirror_ha.sh - test rbd-mirror daemons in HA mode
+#
+
+RBD_MIRROR_INSTANCES=${RBD_MIRROR_INSTANCES:-7}
+
+. $(dirname $0)/rbd_mirror_helpers.sh
+
+setup
+
+is_leader()
+{
+    local instance=$1
+    local pool=$2
+
+    test -n "${pool}" || pool=${POOL}
+
+    admin_daemon "${CLUSTER1}:${instance}" \
+		 rbd mirror status ${pool} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} |
+	grep '"leader": true'
+}
+
+wait_for_leader()
+{
+    local s instance
+
+    for s in 1 1 2 4 4 4 4 4 8 8 8 8 16 16 32 64; do
+	sleep $s
+	for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do
+	    is_leader ${instance} || continue
+	    LEADER=${instance}
+	    return 0
+	done
+    done
+
+    LEADER=
+    return 1
+}
+
+release_leader()
+{
+    local pool=$1
+    local cmd="rbd mirror leader release"
+
+    test -n "${pool}" && cmd="${cmd} ${pool} ${CLUSTER2}"
+
+    admin_daemon "${CLUSTER1}:${LEADER}" ${cmd}
+}
+
+wait_for_leader_released()
+{
+    local i
+
+    test -n "${LEADER}"
+    for i in `seq 10`; do
+	is_leader ${LEADER} || return 0
+	sleep 1
+    done
+
+    return 1
+}
+
+test_replay()
+{
+    local image
+
+    for image; do
+	wait_for_image_replay_started ${CLUSTER1}:${LEADER} ${POOL} ${image}
+	write_image ${CLUSTER2} ${POOL} ${image} 100
+	wait_for_replay_complete ${CLUSTER1}:${LEADER} ${CLUSTER2} ${POOL} \
+				 ${image}
+	wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' \
+                                    'primary_position' \
+                                    "${MIRROR_USER_ID_PREFIX}${LEADER} on $(hostname -s)"
+	if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
+	    wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} \
+					'down+unknown'
+	fi
+	compare_images ${POOL} ${image}
+    done
+}
+
+testlog "TEST: start first daemon instance and test replay"
+start_mirror ${CLUSTER1}:0
+image1=test1
+create_image ${CLUSTER2} ${POOL} ${image1}
+LEADER=0
+test_replay ${image1}
+
+testlog "TEST: release leader and wait it is reacquired"
+is_leader 0 ${POOL}
+is_leader 0 ${PARENT_POOL}
+release_leader ${POOL}
+wait_for_leader_released
+is_leader 0 ${PARENT_POOL}
+wait_for_leader
+release_leader
+wait_for_leader_released
+expect_failure "" is_leader 0 ${PARENT_POOL}
+wait_for_leader
+
+testlog "TEST: start second daemon instance and test replay"
+start_mirror ${CLUSTER1}:1
+image2=test2
+create_image ${CLUSTER2} ${POOL} ${image2}
+test_replay ${image1} ${image2}
+
+testlog "TEST: release leader and test it is acquired by secondary"
+is_leader 0 ${POOL}
+is_leader 0 ${PARENT_POOL}
+release_leader ${POOL}
+wait_for_leader_released
+wait_for_leader
+test_replay ${image1} ${image2}
+release_leader
+wait_for_leader_released
+wait_for_leader
+test "${LEADER}" = 0
+
+testlog "TEST: stop first daemon instance and test replay"
+stop_mirror ${CLUSTER1}:0
+image3=test3
+create_image ${CLUSTER2} ${POOL} ${image3}
+LEADER=1
+test_replay ${image1} ${image2} ${image3}
+
+testlog "TEST: start first daemon instance and test replay"
+start_mirror ${CLUSTER1}:0
+image4=test4
+create_image ${CLUSTER2} ${POOL} ${image4}
+test_replay ${image3} ${image4}
+
+testlog "TEST: crash leader and test replay"
+stop_mirror ${CLUSTER1}:1 -KILL
+image5=test5
+create_image ${CLUSTER2} ${POOL} ${image5}
+LEADER=0
+test_replay ${image1} ${image4} ${image5}
+
+testlog "TEST: start crashed leader and test replay"
+start_mirror ${CLUSTER1}:1
+image6=test6
+create_image ${CLUSTER2} ${POOL} ${image6}
+test_replay ${image1} ${image6}
+
+testlog "TEST: start yet another daemon instance and test replay"
+start_mirror ${CLUSTER1}:2
+image7=test7
+create_image ${CLUSTER2} ${POOL} ${image7}
+test_replay ${image1} ${image7}
+
+testlog "TEST: release leader and test it is acquired by secondary"
+is_leader 0
+release_leader
+wait_for_leader_released
+wait_for_leader
+test_replay ${image1} ${image2}
+
+testlog "TEST: stop leader and test replay"
+stop_mirror ${CLUSTER1}:${LEADER}
+image8=test8
+create_image ${CLUSTER2} ${POOL} ${image8}
+prev_leader=${LEADER}
+wait_for_leader
+test_replay ${image1} ${image8}
+
+testlog "TEST: start previous leader and test replay"
+start_mirror ${CLUSTER1}:${prev_leader}
+image9=test9
+create_image ${CLUSTER2} ${POOL} ${image9}
+test_replay ${image1} ${image9}
+
+testlog "TEST: crash leader and test replay"
+stop_mirror ${CLUSTER1}:${LEADER} -KILL
+image10=test10
+create_image ${CLUSTER2} ${POOL} ${image10}
+prev_leader=${LEADER}
+wait_for_leader
+test_replay ${image1} ${image10}
+
+testlog "TEST: start previous leader and test replay"
+start_mirror ${CLUSTER1}:${prev_leader}
+image11=test11
+create_image ${CLUSTER2} ${POOL} ${image11}
+test_replay ${image1} ${image11}
+
+testlog "TEST: start some more daemon instances and test replay"
+start_mirror ${CLUSTER1}:3
+start_mirror ${CLUSTER1}:4
+start_mirror ${CLUSTER1}:5
+start_mirror ${CLUSTER1}:6
+image13=test13
+create_image ${CLUSTER2} ${POOL} ${image13}
+test_replay ${leader} ${image1} ${image13}
+
+testlog "TEST: release leader and test it is acquired by secondary"
+release_leader
+wait_for_leader_released
+wait_for_leader
+test_replay ${image1} ${image2}
+
+testlog "TEST: in loop: stop leader and test replay"
+for i in 0 1 2 3 4 5; do
+    stop_mirror ${CLUSTER1}:${LEADER}
+    wait_for_leader
+    test_replay ${image1}
+done
+
+stop_mirror ${CLUSTER1}:${LEADER}
diff --git a/qa/workunits/rbd/rbd_mirror_helpers.sh b/qa/workunits/rbd/rbd_mirror_helpers.sh
new file mode 100755
index 000000000..f4961b925
--- /dev/null
+++ b/qa/workunits/rbd/rbd_mirror_helpers.sh
@@ -0,0 +1,1488 @@
+#!/bin/sh
+#
+# rbd_mirror_helpers.sh - shared rbd-mirror daemon helper functions
+#
+# The scripts starts two ("local" and "remote") clusters using mstart.sh script,
+# creates a temporary directory, used for cluster configs, daemon logs, admin
+# socket, temporary files, and launches rbd-mirror daemon.
+#
+# There are several env variables useful when troubleshooting a test failure:
+#
+#  RBD_MIRROR_NOCLEANUP  - if not empty, don't run the cleanup (stop processes,
+#                          destroy the clusters and remove the temp directory)
+#                          on exit, so it is possible to check the test state
+#                          after failure.
+#  RBD_MIRROR_TEMDIR     - use this path when creating the temporary directory
+#                          (should not exist) instead of running mktemp(1).
+#  RBD_MIRROR_ARGS       - use this to pass additional arguments to started
+#                          rbd-mirror daemons.
+#  RBD_MIRROR_VARGS      - use this to pass additional arguments to vstart.sh
+#                          when starting clusters.
+#  RBD_MIRROR_INSTANCES  - number of daemons to start per cluster
+#  RBD_MIRROR_CONFIG_KEY - if not empty, use config-key for remote cluster
+#                          secrets
+# The cleanup can be done as a separate step, running the script with
+# `cleanup ${RBD_MIRROR_TEMDIR}' arguments.
+#
+# Note, as other workunits tests, rbd_mirror_journal.sh expects to find ceph binaries
+# in PATH.
+#
+# Thus a typical troubleshooting session:
+#
+# From Ceph src dir (CEPH_SRC_PATH), start the test in NOCLEANUP mode and with
+# TEMPDIR pointing to a known location:
+#
+#   cd $CEPH_SRC_PATH
+#   PATH=$CEPH_SRC_PATH:$PATH
+#   RBD_MIRROR_NOCLEANUP=1 RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror \
+#     ../qa/workunits/rbd/rbd_mirror_journal.sh
+#
+# After the test failure cd to TEMPDIR and check the current state:
+#
+#   cd /tmp/tmp.rbd_mirror
+#   ls
+#   less rbd-mirror.cluster1_daemon.$pid.log
+#   ceph --cluster cluster1 -s
+#   ceph --cluster cluster1 -s
+#   rbd --cluster cluster2 -p mirror ls
+#   rbd --cluster cluster2 -p mirror journal status --image test
+#   ceph --admin-daemon rbd-mirror.cluster1_daemon.cluster1.$pid.asok help
+#   ...
+#
+# Also you can execute commands (functions) from the script:
+#
+#   cd $CEPH_SRC_PATH
+#   export RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror
+#   ../qa/workunits/rbd/rbd_mirror_journal.sh status
+#   ../qa/workunits/rbd/rbd_mirror_journal.sh stop_mirror cluster1
+#   ../qa/workunits/rbd/rbd_mirror_journal.sh start_mirror cluster2
+#   ../qa/workunits/rbd/rbd_mirror_journal.sh flush cluster2
+#   ...
+#
+# Eventually, run the cleanup:
+#
+#   cd $CEPH_SRC_PATH
+#   RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror \
+#     ../qa/workunits/rbd/rbd_mirror_journal.sh cleanup
+#
+
+if type xmlstarlet > /dev/null 2>&1; then
+    XMLSTARLET=xmlstarlet
+elif type xml > /dev/null 2>&1; then
+    XMLSTARLET=xml
+else
+    echo "Missing xmlstarlet binary!"
+    exit 1
+fi
+
+RBD_MIRROR_INSTANCES=${RBD_MIRROR_INSTANCES:-2}
+
+CLUSTER1=cluster1
+CLUSTER2=cluster2
+PEER_CLUSTER_SUFFIX=
+POOL=mirror
+PARENT_POOL=mirror_parent
+NS1=ns1
+NS2=ns2
+TEMPDIR=
+CEPH_ID=${CEPH_ID:-mirror}
+RBD_IMAGE_FEATURES=${RBD_IMAGE_FEATURES:-layering,exclusive-lock,journaling}
+MIRROR_USER_ID_PREFIX=${MIRROR_USER_ID_PREFIX:-${CEPH_ID}.}
+MIRROR_POOL_MODE=${MIRROR_POOL_MODE:-pool}
+MIRROR_IMAGE_MODE=${MIRROR_IMAGE_MODE:-journal}
+
+export CEPH_ARGS="--id ${CEPH_ID}"
+
+LAST_MIRROR_INSTANCE=$((${RBD_MIRROR_INSTANCES} - 1))
+
+CEPH_ROOT=$(readlink -f $(dirname $0)/../../../src)
+CEPH_BIN=.
+CEPH_SRC=.
+if [ -e CMakeCache.txt ]; then
+    CEPH_SRC=${CEPH_ROOT}
+    CEPH_ROOT=${PWD}
+    CEPH_BIN=./bin
+
+    # needed for ceph CLI under cmake
+    export LD_LIBRARY_PATH=${CEPH_ROOT}/lib:${LD_LIBRARY_PATH}
+    export PYTHONPATH=${PYTHONPATH}:${CEPH_SRC}/pybind:${CEPH_ROOT}/lib/cython_modules/lib.3
+fi
+
+# These vars facilitate running this script in an environment with
+# ceph installed from packages, like teuthology. These are not defined
+# by default.
+#
+# RBD_MIRROR_USE_EXISTING_CLUSTER - if set, do not start and stop ceph clusters
+# RBD_MIRROR_USE_RBD_MIRROR - if set, use an existing instance of rbd-mirror
+#                             running as ceph client $CEPH_ID. If empty,
+#                             this script will start and stop rbd-mirror
+
+#
+# Functions
+#
+
+# Parse a value in format cluster[:instance] and set cluster and instance vars.
+set_cluster_instance()
+{
+    local val=$1
+    local cluster_var_name=$2
+    local instance_var_name=$3
+
+    cluster=${val%:*}
+    instance=${val##*:}
+
+    if [ "${instance}" =  "${val}" ]; then
+        # instance was not specified, use default
+        instance=0
+    fi
+
+    eval ${cluster_var_name}=${cluster}
+    eval ${instance_var_name}=${instance}
+}
+
+daemon_asok_file()
+{
+    local local_cluster=$1
+    local cluster=$2
+    local instance
+
+    set_cluster_instance "${local_cluster}" local_cluster instance
+
+    echo $(ceph-conf --cluster $local_cluster --name "client.${MIRROR_USER_ID_PREFIX}${instance}" 'admin socket')
+}
+
+daemon_pid_file()
+{
+    local cluster=$1
+    local instance
+
+    set_cluster_instance "${cluster}" cluster instance
+
+    echo $(ceph-conf --cluster $cluster --name "client.${MIRROR_USER_ID_PREFIX}${instance}" 'pid file')
+}
+
+testlog()
+{
+    echo $(date '+%F %T') $@ | tee -a "${TEMPDIR}/rbd-mirror.test.log" >&2
+}
+
+expect_failure()
+{
+    local expected="$1" ; shift
+    local out=${TEMPDIR}/expect_failure.out
+
+    if "$@" > ${out} 2>&1 ; then
+        cat ${out} >&2
+        return 1
+    fi
+
+    if [ -z "${expected}" ]; then
+        return 0
+    fi
+
+    if ! grep -q "${expected}" ${out} ; then
+        cat ${out} >&2
+        return 1
+    fi
+
+    return 0
+}
+
+mkfname()
+{
+    echo "$@" | sed -e 's|[/ ]|_|g'
+}
+
+create_users()
+{
+    local cluster=$1
+
+    CEPH_ARGS='' ceph --cluster "${cluster}" \
+        auth get-or-create client.${CEPH_ID} \
+        mon 'profile rbd' osd 'profile rbd' mgr 'profile rbd' >> \
+        ${CEPH_ROOT}/run/${cluster}/keyring
+    for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do
+        CEPH_ARGS='' ceph --cluster "${cluster}" \
+            auth get-or-create client.${MIRROR_USER_ID_PREFIX}${instance} \
+            mon 'profile rbd-mirror' osd 'profile rbd' mgr 'profile rbd' >> \
+            ${CEPH_ROOT}/run/${cluster}/keyring
+    done
+}
+
+setup_cluster()
+{
+    local cluster=$1
+
+    CEPH_ARGS='' ${CEPH_SRC}/mstart.sh ${cluster} -n ${RBD_MIRROR_VARGS}
+
+    cd ${CEPH_ROOT}
+    rm -f ${TEMPDIR}/${cluster}.conf
+    ln -s $(readlink -f run/${cluster}/ceph.conf) \
+       ${TEMPDIR}/${cluster}.conf
+
+    cd ${TEMPDIR}
+    create_users "${cluster}"
+
+    for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do
+        cat<<EOF >> ${TEMPDIR}/${cluster}.conf
+[client.${MIRROR_USER_ID_PREFIX}${instance}]
+    admin socket = ${TEMPDIR}/rbd-mirror.\$cluster-\$name.asok
+    pid file = ${TEMPDIR}/rbd-mirror.\$cluster-\$name.pid
+    log file = ${TEMPDIR}/rbd-mirror.${cluster}_daemon.${instance}.log
+EOF
+    done
+}
+
+peer_add()
+{
+    local cluster=$1 ; shift
+    local pool=$1 ; shift
+    local client_cluster=$1 ; shift
+    local remote_cluster="${client_cluster##*@}"
+
+    local uuid_var_name
+    if [ -n "$1" ]; then
+        uuid_var_name=$1 ; shift
+    fi
+
+    local error_code
+    local peer_uuid
+
+    for s in 1 2 4 8 16 32; do
+        set +e
+        peer_uuid=$(rbd --cluster ${cluster} mirror pool peer add \
+            ${pool} ${client_cluster} $@)
+        error_code=$?
+        set -e
+
+        if [ $error_code -eq 17 ]; then
+            # raced with a remote heartbeat ping -- remove and retry
+            sleep $s
+            peer_uuid=$(rbd mirror pool info --cluster ${cluster} --pool ${pool} --format xml | \
+                xmlstarlet sel -t -v "//peers/peer[site_name='${remote_cluster}']/uuid")
+
+            CEPH_ARGS='' rbd --cluster ${cluster} --pool ${pool} mirror pool peer remove ${peer_uuid}
+        else
+            test $error_code -eq 0
+            if [ -n "$uuid_var_name" ]; then
+                eval ${uuid_var_name}=${peer_uuid}
+            fi
+            return 0
+        fi
+    done
+
+    return 1
+}
+
+setup_pools()
+{
+    local cluster=$1
+    local remote_cluster=$2
+    local mon_map_file
+    local mon_addr
+    local admin_key_file
+    local uuid
+
+    CEPH_ARGS='' ceph --cluster ${cluster} osd pool create ${POOL} 64 64
+    CEPH_ARGS='' ceph --cluster ${cluster} osd pool create ${PARENT_POOL} 64 64
+
+    CEPH_ARGS='' rbd --cluster ${cluster} pool init ${POOL}
+    CEPH_ARGS='' rbd --cluster ${cluster} pool init ${PARENT_POOL}
+
+    if [ -n "${RBD_MIRROR_CONFIG_KEY}" ]; then
+      PEER_CLUSTER_SUFFIX=-DNE
+    fi
+
+    CEPH_ARGS='' rbd --cluster ${cluster} mirror pool enable \
+        --site-name ${cluster}${PEER_CLUSTER_SUFFIX} ${POOL} ${MIRROR_POOL_MODE}
+    rbd --cluster ${cluster} mirror pool enable ${PARENT_POOL} image
+
+    rbd --cluster ${cluster} namespace create ${POOL}/${NS1}
+    rbd --cluster ${cluster} namespace create ${POOL}/${NS2}
+
+    rbd --cluster ${cluster} mirror pool enable ${POOL}/${NS1} ${MIRROR_POOL_MODE}
+    rbd --cluster ${cluster} mirror pool enable ${POOL}/${NS2} image
+
+    if [ -z ${RBD_MIRROR_MANUAL_PEERS} ]; then
+      if [ -z ${RBD_MIRROR_CONFIG_KEY} ]; then
+        peer_add ${cluster} ${POOL} ${remote_cluster}
+        peer_add ${cluster} ${PARENT_POOL} ${remote_cluster}
+      else
+        mon_map_file=${TEMPDIR}/${remote_cluster}.monmap
+        CEPH_ARGS='' ceph --cluster ${remote_cluster} mon getmap > ${mon_map_file}
+        mon_addr=$(monmaptool --print ${mon_map_file} | grep -E 'mon\.' |
+          head -n 1 | sed -E 's/^[0-9]+: ([^ ]+).+$/\1/' | sed -E 's/\/[0-9]+//g')
+
+        admin_key_file=${TEMPDIR}/${remote_cluster}.client.${CEPH_ID}.key
+        CEPH_ARGS='' ceph --cluster ${remote_cluster} auth get-key client.${CEPH_ID} > ${admin_key_file}
+
+        CEPH_ARGS='' peer_add ${cluster} ${POOL} \
+            client.${CEPH_ID}@${remote_cluster}${PEER_CLUSTER_SUFFIX} '' \
+            --remote-mon-host "${mon_addr}" --remote-key-file ${admin_key_file}
+
+        peer_add ${cluster} ${PARENT_POOL} client.${CEPH_ID}@${remote_cluster}${PEER_CLUSTER_SUFFIX} uuid
+        CEPH_ARGS='' rbd --cluster ${cluster} mirror pool peer set ${PARENT_POOL} ${uuid} mon-host ${mon_addr}
+        CEPH_ARGS='' rbd --cluster ${cluster} mirror pool peer set ${PARENT_POOL} ${uuid} key-file ${admin_key_file}
+      fi
+    fi
+}
+
+setup_tempdir()
+{
+    if [ -n "${RBD_MIRROR_TEMDIR}" ]; then
+        test -d "${RBD_MIRROR_TEMDIR}" ||
+        mkdir "${RBD_MIRROR_TEMDIR}"
+        TEMPDIR="${RBD_MIRROR_TEMDIR}"
+        cd ${TEMPDIR}
+    else
+        TEMPDIR=`mktemp -d`
+    fi
+}
+
+setup()
+{
+    local c
+    trap 'cleanup $?' INT TERM EXIT
+
+    setup_tempdir
+    if [ -z "${RBD_MIRROR_USE_EXISTING_CLUSTER}" ]; then
+        setup_cluster "${CLUSTER1}"
+        setup_cluster "${CLUSTER2}"
+    fi
+
+    setup_pools "${CLUSTER1}" "${CLUSTER2}"
+    setup_pools "${CLUSTER2}" "${CLUSTER1}"
+
+    if [ -n "${RBD_MIRROR_MIN_COMPAT_CLIENT}" ]; then
+        CEPH_ARGS='' ceph --cluster ${CLUSTER1} osd \
+                 set-require-min-compat-client ${RBD_MIRROR_MIN_COMPAT_CLIENT}
+        CEPH_ARGS='' ceph --cluster ${CLUSTER2} osd \
+                 set-require-min-compat-client ${RBD_MIRROR_MIN_COMPAT_CLIENT}
+    fi
+}
+
+cleanup()
+{
+    local error_code=$1
+
+    set +e
+
+    if [ "${error_code}" -ne 0 ]; then
+        status
+    fi
+
+    if [ -z "${RBD_MIRROR_NOCLEANUP}" ]; then
+        local cluster instance
+
+        CEPH_ARGS='' ceph --cluster ${CLUSTER1} osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it
+        CEPH_ARGS='' ceph --cluster ${CLUSTER2} osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it
+        CEPH_ARGS='' ceph --cluster ${CLUSTER1} osd pool rm ${PARENT_POOL} ${PARENT_POOL} --yes-i-really-really-mean-it
+        CEPH_ARGS='' ceph --cluster ${CLUSTER2} osd pool rm ${PARENT_POOL} ${PARENT_POOL} --yes-i-really-really-mean-it
+
+        for cluster in "${CLUSTER1}" "${CLUSTER2}"; do
+            stop_mirrors "${cluster}"
+        done
+
+        if [ -z "${RBD_MIRROR_USE_EXISTING_CLUSTER}" ]; then
+            cd ${CEPH_ROOT}
+            CEPH_ARGS='' ${CEPH_SRC}/mstop.sh ${CLUSTER1}
+            CEPH_ARGS='' ${CEPH_SRC}/mstop.sh ${CLUSTER2}
+        fi
+        test "${RBD_MIRROR_TEMDIR}" = "${TEMPDIR}" || rm -Rf ${TEMPDIR}
+    fi
+
+    if [ "${error_code}" -eq 0 ]; then
+        echo "OK"
+    else
+        echo "FAIL"
+    fi
+
+    exit ${error_code}
+}
+
+start_mirror()
+{
+    local cluster=$1
+    local instance
+
+    set_cluster_instance "${cluster}" cluster instance
+
+    test -n "${RBD_MIRROR_USE_RBD_MIRROR}" && return
+
+    rbd-mirror \
+        --cluster ${cluster} \
+        --id ${MIRROR_USER_ID_PREFIX}${instance} \
+        --rbd-mirror-delete-retry-interval=5 \
+        --rbd-mirror-image-state-check-interval=5 \
+        --rbd-mirror-journal-poll-age=1 \
+        --rbd-mirror-pool-replayers-refresh-interval=5 \
+        --debug-rbd=30 --debug-journaler=30 \
+        --debug-rbd_mirror=30 \
+        --daemonize=true \
+        ${RBD_MIRROR_ARGS}
+}
+
+start_mirrors()
+{
+    local cluster=$1
+
+    for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do
+        start_mirror "${cluster}:${instance}"
+    done
+}
+
+stop_mirror()
+{
+    local cluster=$1
+    local sig=$2
+
+    test -n "${RBD_MIRROR_USE_RBD_MIRROR}" && return
+
+    local pid
+    pid=$(cat $(daemon_pid_file "${cluster}") 2>/dev/null) || :
+    if [ -n "${pid}" ]
+    then
+        kill ${sig} ${pid}
+        for s in 1 2 4 8 16 32; do
+            sleep $s
+            ps auxww | awk -v pid=${pid} '$2 == pid {print; exit 1}' && break
+        done
+        ps auxww | awk -v pid=${pid} '$2 == pid {print; exit 1}'
+    fi
+    rm -f $(daemon_asok_file "${cluster}" "${CLUSTER1}")
+    rm -f $(daemon_asok_file "${cluster}" "${CLUSTER2}")
+    rm -f $(daemon_pid_file "${cluster}")
+}
+
+stop_mirrors()
+{
+    local cluster=$1
+    local sig=$2
+
+    for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do
+        stop_mirror "${cluster}:${instance}" "${sig}"
+    done
+}
+
+admin_daemon()
+{
+    local cluster=$1 ; shift
+    local instance
+
+    set_cluster_instance "${cluster}" cluster instance
+
+    local asok_file=$(daemon_asok_file "${cluster}:${instance}" "${cluster}")
+    test -S "${asok_file}"
+
+    ceph --admin-daemon ${asok_file} $@
+}
+
+admin_daemons()
+{
+    local cluster_instance=$1 ; shift
+    local cluster="${cluster_instance%:*}"
+    local instance="${cluster_instance##*:}"
+    local loop_instance
+
+    for s in 0 1 2 4 8 8 8 8 8 8 8 8 16 16; do
+        sleep ${s}
+        if [ "${instance}" != "${cluster_instance}" ]; then
+            admin_daemon "${cluster}:${instance}" $@ && return 0
+        else
+            for loop_instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do
+                admin_daemon "${cluster}:${loop_instance}" $@ && return 0
+            done
+        fi
+    done
+    return 1
+}
+
+all_admin_daemons()
+{
+    local cluster=$1 ; shift
+
+    for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do
+        admin_daemon "${cluster}:${instance}" $@
+    done
+}
+
+status()
+{
+    local cluster daemon image_pool image_ns image
+
+    for cluster in ${CLUSTER1} ${CLUSTER2}
+    do
+        echo "${cluster} status"
+        CEPH_ARGS='' ceph --cluster ${cluster} -s
+        CEPH_ARGS='' ceph --cluster ${cluster} service dump
+        CEPH_ARGS='' ceph --cluster ${cluster} service status
+        echo
+
+        for image_pool in ${POOL} ${PARENT_POOL}
+        do
+            for image_ns in "" "${NS1}" "${NS2}"
+            do
+                echo "${cluster} ${image_pool} ${image_ns} images"
+                rbd --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" ls -l
+                echo
+
+                echo "${cluster} ${image_pool}${image_ns} mirror pool info"
+                rbd --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" mirror pool info
+                echo
+
+                echo "${cluster} ${image_pool}${image_ns} mirror pool status"
+                CEPH_ARGS='' rbd --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" mirror pool status --verbose
+                echo
+
+                for image in `rbd --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" ls 2>/dev/null`
+                do
+                    echo "image ${image} info"
+                    rbd --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" info ${image}
+                    echo
+                    echo "image ${image} journal status"
+                    rbd --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" journal status --image ${image}
+                    echo
+                    echo "image ${image} snapshots"
+                    rbd --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" snap ls --all ${image}
+                    echo
+                done
+
+                echo "${cluster} ${image_pool} ${image_ns} rbd_mirroring omap vals"
+                rados --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" listomapvals rbd_mirroring
+                echo "${cluster} ${image_pool} ${image_ns} rbd_mirror_leader omap vals"
+                rados --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" listomapvals rbd_mirror_leader
+                echo
+            done
+        done
+    done
+
+    local ret
+
+    for cluster in "${CLUSTER1}" "${CLUSTER2}"
+    do
+        for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do
+            local pid_file=$(daemon_pid_file ${cluster}:${instance})
+            if [ ! -e ${pid_file} ]
+            then
+                echo "${cluster} rbd-mirror not running or unknown" \
+                     "(${pid_file} not exist)"
+                continue
+            fi
+
+            local pid
+            pid=$(cat ${pid_file} 2>/dev/null) || :
+            if [ -z "${pid}" ]
+            then
+                echo "${cluster} rbd-mirror not running or unknown" \
+                     "(can't find pid using ${pid_file})"
+                ret=1
+                continue
+            fi
+
+            echo "${daemon} rbd-mirror process in ps output:"
+            if ps auxww |
+                awk -v pid=${pid} 'NR == 1 {print} $2 == pid {print; exit 1}'
+            then
+                echo
+                echo "${cluster} rbd-mirror not running" \
+                     "(can't find pid $pid in ps output)"
+                ret=1
+                continue
+            fi
+            echo
+
+            local asok_file=$(daemon_asok_file ${cluster}:${instance} ${cluster})
+            if [ ! -S "${asok_file}" ]
+            then
+                echo "${cluster} rbd-mirror asok is unknown (${asok_file} not exits)"
+                ret=1
+                continue
+            fi
+
+            echo "${cluster} rbd-mirror status"
+            ceph --admin-daemon ${asok_file} rbd mirror status
+            echo
+        done
+    done
+
+    return ${ret}
+}
+
+flush()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local cmd="rbd mirror flush"
+
+    if [ -n "${image}" ]
+    then
+       cmd="${cmd} ${pool}/${image}"
+    fi
+
+    admin_daemons "${cluster}" ${cmd}
+}
+
+test_image_replay_state()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local test_state=$4
+    local status_result
+    local current_state=stopped
+
+    status_result=$(admin_daemons "${cluster}" rbd mirror status ${pool}/${image} | grep -i 'state') || return 1
+    echo "${status_result}" | grep -i 'Replaying' && current_state=started
+    test "${test_state}" = "${current_state}"
+}
+
+wait_for_image_replay_state()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local state=$4
+    local s
+
+    # TODO: add a way to force rbd-mirror to update replayers
+    for s in 1 2 4 8 8 8 8 8 8 8 8 16 16; do
+        sleep ${s}
+        test_image_replay_state "${cluster}" "${pool}" "${image}" "${state}" && return 0
+    done
+    return 1
+}
+
+wait_for_image_replay_started()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+
+    wait_for_image_replay_state "${cluster}" "${pool}" "${image}" started
+}
+
+wait_for_image_replay_stopped()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+
+    wait_for_image_replay_state "${cluster}" "${pool}" "${image}" stopped
+}
+
+get_journal_position()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local id_regexp=$4
+
+    # Parse line like below, looking for the first position
+    # [id=, commit_position=[positions=[[object_number=1, tag_tid=3, entry_tid=9], [object_number=0, tag_tid=3, entry_tid=8], [object_number=3, tag_tid=3, entry_tid=7], [object_number=2, tag_tid=3, entry_tid=6]]]]
+
+    local status_log=${TEMPDIR}/$(mkfname ${CLUSTER2}-${pool}-${image}.status)
+    rbd --cluster ${cluster} journal status --image ${pool}/${image} |
+        tee ${status_log} >&2
+    sed -nEe 's/^.*\[id='"${id_regexp}"',.*positions=\[\[([^]]*)\],.*state=connected.*$/\1/p' \
+        ${status_log}
+}
+
+get_master_journal_position()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+
+    get_journal_position "${cluster}" "${pool}" "${image}" ''
+}
+
+get_mirror_journal_position()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+
+    get_journal_position "${cluster}" "${pool}" "${image}" '..*'
+}
+
+wait_for_journal_replay_complete()
+{
+    local local_cluster=$1
+    local cluster=$2
+    local pool=$3
+    local image=$4
+    local s master_pos mirror_pos last_mirror_pos
+    local master_tag master_entry mirror_tag mirror_entry
+
+    while true; do
+        for s in 0.2 0.4 0.8 1.6 2 2 4 4 8 8 16 16 32 32; do
+            sleep ${s}
+            flush "${local_cluster}" "${pool}" "${image}"
+            master_pos=$(get_master_journal_position "${cluster}" "${pool}" "${image}")
+            mirror_pos=$(get_mirror_journal_position "${cluster}" "${pool}" "${image}")
+            test -n "${master_pos}" -a "${master_pos}" = "${mirror_pos}" && return 0
+            test "${mirror_pos}" != "${last_mirror_pos}" && break
+        done
+
+        test "${mirror_pos}" = "${last_mirror_pos}" && return 1
+        last_mirror_pos="${mirror_pos}"
+
+        # handle the case where the mirror is ahead of the master
+        master_tag=$(echo "${master_pos}" | grep -Eo "tag_tid=[0-9]*" | cut -d'=' -f 2)
+        mirror_tag=$(echo "${mirror_pos}" | grep -Eo "tag_tid=[0-9]*" | cut -d'=' -f 2)
+        master_entry=$(echo "${master_pos}" | grep -Eo "entry_tid=[0-9]*" | cut -d'=' -f 2)
+        mirror_entry=$(echo "${mirror_pos}" | grep -Eo "entry_tid=[0-9]*" | cut -d'=' -f 2)
+        test "${master_tag}" = "${mirror_tag}" -a ${master_entry} -le ${mirror_entry} && return 0
+    done
+    return 1
+}
+
+mirror_image_snapshot()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+
+    rbd --cluster "${cluster}" mirror image snapshot "${pool}/${image}"
+}
+
+get_newest_mirror_snapshot()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local log=$4
+
+    rbd --cluster "${cluster}" snap list --all "${pool}/${image}" --format xml | \
+        xmlstarlet sel -t -c "//snapshots/snapshot[namespace/complete='true' and position()=last()]" > \
+        ${log} || true
+}
+
+wait_for_snapshot_sync_complete()
+{
+    local local_cluster=$1
+    local cluster=$2
+    local pool=$3
+    local image=$4
+
+    local status_log=${TEMPDIR}/$(mkfname ${cluster}-${pool}-${image}.status)
+    local local_status_log=${TEMPDIR}/$(mkfname ${local_cluster}-${pool}-${image}.status)
+
+    mirror_image_snapshot "${cluster}" "${pool}" "${image}"
+    get_newest_mirror_snapshot "${cluster}" "${pool}" "${image}" "${status_log}"
+    local snapshot_id=$(xmlstarlet sel -t -v "//snapshot/id" < ${status_log})
+
+    while true; do
+        for s in 0.2 0.4 0.8 1.6 2 2 4 4 8 8 16 16 32 32; do
+            sleep ${s}
+
+            get_newest_mirror_snapshot "${local_cluster}" "${pool}" "${image}" "${local_status_log}"
+            local primary_snapshot_id=$(xmlstarlet sel -t -v "//snapshot/namespace/primary_snap_id" < ${local_status_log})
+
+            test "${snapshot_id}" = "${primary_snapshot_id}" && return 0
+        done
+
+        return 1
+    done
+    return 1
+}
+
+wait_for_replay_complete()
+{
+    local local_cluster=$1
+    local cluster=$2
+    local pool=$3
+    local image=$4
+
+    if [ "${MIRROR_IMAGE_MODE}" = "journal" ]; then
+        wait_for_journal_replay_complete ${local_cluster} ${cluster} ${pool} ${image}
+    elif [ "${MIRROR_IMAGE_MODE}" = "snapshot" ]; then
+        wait_for_snapshot_sync_complete ${local_cluster} ${cluster} ${pool} ${image}
+    else
+        return 1
+    fi
+}
+
+
+test_status_in_pool_dir()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local state_pattern="$4"
+    local description_pattern="$5"
+    local service_pattern="$6"
+
+    local status_log=${TEMPDIR}/$(mkfname ${cluster}-${pool}-${image}.mirror_status)
+    CEPH_ARGS='' rbd --cluster ${cluster} mirror image status ${pool}/${image} |
+        tee ${status_log} >&2
+    grep "^  state: .*${state_pattern}" ${status_log} || return 1
+    grep "^  description: .*${description_pattern}" ${status_log} || return 1
+
+    if [ -n "${service_pattern}" ]; then
+        grep "service: *${service_pattern}" ${status_log} || return 1
+    elif echo ${state_pattern} | grep '^up+'; then
+        grep "service: *${MIRROR_USER_ID_PREFIX}.* on " ${status_log} || return 1
+    else
+        grep "service: " ${status_log} && return 1
+    fi
+
+    # recheck using `mirror pool status` command to stress test it.
+
+    local last_update="$(sed -nEe 's/^  last_update: *(.*) *$/\1/p' ${status_log})"
+    test_mirror_pool_status_verbose \
+        ${cluster} ${pool} ${image} "${state_pattern}" "${last_update}" &&
+    return 0
+
+    echo "'mirror pool status' test failed" >&2
+    exit 1
+}
+
+test_mirror_pool_status_verbose()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local state_pattern="$4"
+    local prev_last_update="$5"
+
+    local status_log=${TEMPDIR}/$(mkfname ${cluster}-${pool}.mirror_status)
+
+    rbd --cluster ${cluster} mirror pool status ${pool} --verbose --format xml \
+        > ${status_log}
+
+    local last_update state
+    last_update=$($XMLSTARLET sel -t -v \
+        "//images/image[name='${image}']/last_update" < ${status_log})
+    state=$($XMLSTARLET sel -t -v \
+        "//images/image[name='${image}']/state" < ${status_log})
+
+    echo "${state}" | grep "${state_pattern}" ||
+    test "${last_update}" '>' "${prev_last_update}"
+}
+
+wait_for_status_in_pool_dir()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local state_pattern="$4"
+    local description_pattern="$5"
+    local service_pattern="$6"
+
+    for s in 1 2 4 8 8 8 8 8 8 8 8 16 16; do
+        sleep ${s}
+        test_status_in_pool_dir ${cluster} ${pool} ${image} "${state_pattern}" \
+                                "${description_pattern}" "${service_pattern}" &&
+            return 0
+    done
+    return 1
+}
+
+create_image()
+{
+    local cluster=$1 ; shift
+    local pool=$1 ; shift
+    local image=$1 ; shift
+    local size=128
+
+    if [ -n "$1" ]; then
+        size=$1
+        shift
+    fi
+
+    rbd --cluster ${cluster} create --size ${size} \
+        --image-feature "${RBD_IMAGE_FEATURES}" $@ ${pool}/${image}
+}
+
+create_image_and_enable_mirror()
+{
+    local cluster=$1 ; shift
+    local pool=$1 ; shift
+    local image=$1 ; shift
+    local mode=${1:-${MIRROR_IMAGE_MODE}}
+    if [ -n "$1" ]; then
+        shift
+    fi
+
+    create_image ${cluster} ${pool} ${image} $@
+    if [ "${MIRROR_POOL_MODE}" = "image" ] || [ "$pool" = "${PARENT_POOL}" ]; then
+        enable_mirror ${cluster} ${pool} ${image} ${mode}
+    fi
+}
+
+enable_journaling()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+
+    rbd --cluster ${cluster} feature enable ${pool}/${image} journaling
+}
+
+set_image_meta()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local key=$4
+    local val=$5
+
+    rbd --cluster ${cluster} image-meta set ${pool}/${image} $key $val
+}
+
+compare_image_meta()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local key=$4
+    local value=$5
+
+    test `rbd --cluster ${cluster} image-meta get ${pool}/${image} ${key}` = "${value}"
+}
+
+rename_image()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local new_name=$4
+
+    rbd --cluster=${cluster} rename ${pool}/${image} ${pool}/${new_name}
+}
+
+remove_image()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+
+    rbd --cluster=${cluster} snap purge ${pool}/${image}
+    rbd --cluster=${cluster} rm ${pool}/${image}
+}
+
+remove_image_retry()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+
+    for s in 0 1 2 4 8 16 32; do
+        sleep ${s}
+        remove_image ${cluster} ${pool} ${image} && return 0
+    done
+    return 1
+}
+
+trash_move() {
+    local cluster=$1
+    local pool=$2
+    local image=$3
+
+    rbd --cluster=${cluster} trash move ${pool}/${image}
+}
+
+trash_restore() {
+    local cluster=$1
+    local pool=$2
+    local image_id=$3
+
+    rbd --cluster=${cluster} trash restore ${pool}/${image_id}
+}
+
+clone_image()
+{
+    local cluster=$1
+    local parent_pool=$2
+    local parent_image=$3
+    local parent_snap=$4
+    local clone_pool=$5
+    local clone_image=$6
+
+    shift 6
+
+    rbd --cluster ${cluster} clone \
+        ${parent_pool}/${parent_image}@${parent_snap} \
+        ${clone_pool}/${clone_image} --image-feature "${RBD_IMAGE_FEATURES}" $@
+}
+
+clone_image_and_enable_mirror()
+{
+    local cluster=$1
+    local parent_pool=$2
+    local parent_image=$3
+    local parent_snap=$4
+    local clone_pool=$5
+    local clone_image=$6
+    shift 6
+
+    local mode=${1:-${MIRROR_IMAGE_MODE}}
+    if [ -n "$1" ]; then
+        shift
+    fi
+
+    clone_image ${cluster} ${parent_pool} ${parent_image} ${parent_snap} ${clone_pool} ${clone_image} $@
+    enable_mirror ${cluster} ${clone_pool} ${clone_image} ${mode}
+}
+
+disconnect_image()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+
+    rbd --cluster ${cluster} journal client disconnect \
+        --image ${pool}/${image}
+}
+
+create_snapshot()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local snap=$4
+
+    rbd --cluster ${cluster} snap create ${pool}/${image}@${snap}
+}
+
+remove_snapshot()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local snap=$4
+
+    rbd --cluster ${cluster} snap rm ${pool}/${image}@${snap}
+}
+
+rename_snapshot()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local snap=$4
+    local new_snap=$5
+
+    rbd --cluster ${cluster} snap rename ${pool}/${image}@${snap} \
+        ${pool}/${image}@${new_snap}
+}
+
+purge_snapshots()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+
+    rbd --cluster ${cluster} snap purge ${pool}/${image}
+}
+
+protect_snapshot()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local snap=$4
+
+    rbd --cluster ${cluster} snap protect ${pool}/${image}@${snap}
+}
+
+unprotect_snapshot()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local snap=$4
+
+    rbd --cluster ${cluster} snap unprotect ${pool}/${image}@${snap}
+}
+
+unprotect_snapshot_retry()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local snap=$4
+
+    for s in 0 1 2 4 8 16 32; do
+        sleep ${s}
+        unprotect_snapshot ${cluster} ${pool} ${image} ${snap} && return 0
+    done
+    return 1
+}
+
+wait_for_snap_present()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local snap_name=$4
+    local s
+
+    for s in 1 2 4 8 8 8 8 8 8 8 8 16 16 16 16 32 32 32 32; do
+        sleep ${s}
+        rbd --cluster ${cluster} info ${pool}/${image}@${snap_name} || continue
+        return 0
+    done
+    return 1
+}
+
+test_snap_moved_to_trash()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local snap_name=$4
+
+    rbd --cluster ${cluster} snap ls ${pool}/${image} --all |
+        grep -F " trash (${snap_name})"
+}
+
+wait_for_snap_moved_to_trash()
+{
+    local s
+
+    for s in 1 2 4 8 8 8 8 8 8 8 8 16 16 16 16 32 32 32 32; do
+        sleep ${s}
+        test_snap_moved_to_trash $@ || continue
+        return 0
+    done
+    return 1
+}
+
+test_snap_removed_from_trash()
+{
+    test_snap_moved_to_trash $@ && return 1
+    return 0
+}
+
+wait_for_snap_removed_from_trash()
+{
+    local s
+
+    for s in 1 2 4 8 8 8 8 8 8 8 8 16 16 16 16 32 32 32 32; do
+        sleep ${s}
+        test_snap_removed_from_trash $@ || continue
+        return 0
+    done
+    return 1
+}
+
+count_mirror_snaps()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+
+    rbd --cluster ${cluster} snap ls ${pool}/${image} --all |
+        grep -c -F " mirror ("
+}
+
+write_image()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local count=$4
+    local size=$5
+
+    test -n "${size}" || size=4096
+
+    rbd --cluster ${cluster} bench ${pool}/${image} --io-type write \
+        --io-size ${size} --io-threads 1 --io-total $((size * count)) \
+        --io-pattern rand
+}
+
+stress_write_image()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local duration=$(awk 'BEGIN {srand(); print int(10 * rand()) + 5}')
+
+    set +e
+    timeout ${duration}s ceph_test_rbd_mirror_random_write \
+        --cluster ${cluster} ${pool} ${image} \
+        --debug-rbd=20 --debug-journaler=20 \
+        2> ${TEMPDIR}/rbd-mirror-random-write.log
+    error_code=$?
+    set -e
+
+    if [ $error_code -eq 124 ]; then
+        return 0
+    fi
+    return 1
+}
+
+show_diff()
+{
+    local file1=$1
+    local file2=$2
+
+    xxd ${file1} > ${file1}.xxd
+    xxd ${file2} > ${file2}.xxd
+    sdiff -s ${file1}.xxd ${file2}.xxd | head -n 64
+    rm -f ${file1}.xxd ${file2}.xxd
+}
+
+compare_images()
+{
+    local pool=$1
+    local image=$2
+    local ret=0
+
+    local rmt_export=${TEMPDIR}/$(mkfname ${CLUSTER2}-${pool}-${image}.export)
+    local loc_export=${TEMPDIR}/$(mkfname ${CLUSTER1}-${pool}-${image}.export)
+
+    rm -f ${rmt_export} ${loc_export}
+    rbd --cluster ${CLUSTER2} export ${pool}/${image} ${rmt_export}
+    rbd --cluster ${CLUSTER1} export ${pool}/${image} ${loc_export}
+    if ! cmp ${rmt_export} ${loc_export}
+    then
+        show_diff ${rmt_export} ${loc_export}
+        ret=1
+    fi
+    rm -f ${rmt_export} ${loc_export}
+    return ${ret}
+}
+
+compare_image_snapshots()
+{
+    local pool=$1
+    local image=$2
+    local ret=0
+
+    local rmt_export=${TEMPDIR}/${CLUSTER2}-${pool}-${image}.export
+    local loc_export=${TEMPDIR}/${CLUSTER1}-${pool}-${image}.export
+
+    for snap_name in $(rbd --cluster ${CLUSTER1} --format xml \
+                           snap list ${pool}/${image} | \
+                           $XMLSTARLET sel -t -v "//snapshot/name" | \
+                           grep -E -v "^\.rbd-mirror\."); do
+        rm -f ${rmt_export} ${loc_export}
+        rbd --cluster ${CLUSTER2} export ${pool}/${image}@${snap_name} ${rmt_export}
+        rbd --cluster ${CLUSTER1} export ${pool}/${image}@${snap_name} ${loc_export}
+        if ! cmp ${rmt_export} ${loc_export}
+        then
+            show_diff ${rmt_export} ${loc_export}
+            ret=1
+        fi
+    done
+    rm -f ${rmt_export} ${loc_export}
+    return ${ret}
+}
+
+demote_image()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+
+    rbd --cluster=${cluster} mirror image demote ${pool}/${image}
+}
+
+promote_image()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local force=$4
+
+    rbd --cluster=${cluster} mirror image promote ${pool}/${image} ${force}
+}
+
+set_pool_mirror_mode()
+{
+    local cluster=$1
+    local pool=$2
+    local mode=${3:-${MIRROR_POOL_MODE}}
+
+    rbd --cluster=${cluster} mirror pool enable ${pool} ${mode}
+}
+
+disable_mirror()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+
+    rbd --cluster=${cluster} mirror image disable ${pool}/${image}
+}
+
+enable_mirror()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local mode=${4:-${MIRROR_IMAGE_MODE}}
+
+    rbd --cluster=${cluster} mirror image enable ${pool}/${image} ${mode}
+    # Display image info including the global image id for debugging purpose
+    rbd --cluster=${cluster} info ${pool}/${image}
+}
+
+test_image_present()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local test_state=$4
+    local image_id=$5
+    local current_state=deleted
+    local current_image_id
+
+    current_image_id=$(get_image_id ${cluster} ${pool} ${image})
+    test -n "${current_image_id}" &&
+    test -z "${image_id}" -o "${image_id}" = "${current_image_id}" &&
+    current_state=present
+
+    test "${test_state}" = "${current_state}"
+}
+
+wait_for_image_present()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local state=$4
+    local image_id=$5
+    local s
+
+    test -n "${image_id}" ||
+    image_id=$(get_image_id ${cluster} ${pool} ${image})
+
+    # TODO: add a way to force rbd-mirror to update replayers
+    for s in 0.1 1 2 4 8 8 8 8 8 8 8 8 16 16 32 32; do
+        sleep ${s}
+        test_image_present \
+            "${cluster}" "${pool}" "${image}" "${state}" "${image_id}" &&
+        return 0
+    done
+    return 1
+}
+
+get_image_id()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+
+    rbd --cluster=${cluster} info ${pool}/${image} |
+        sed -ne 's/^.*block_name_prefix: rbd_data\.//p'
+}
+
+request_resync_image()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local image_id_var_name=$4
+
+    eval "${image_id_var_name}='$(get_image_id ${cluster} ${pool} ${image})'"
+    eval 'test -n "$'${image_id_var_name}'"'
+
+    rbd --cluster=${cluster} mirror image resync ${pool}/${image}
+}
+
+get_image_data_pool()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+
+    rbd --cluster ${cluster} info ${pool}/${image} |
+        awk '$1 == "data_pool:" {print $2}'
+}
+
+get_clone_format()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+
+    rbd --cluster ${cluster} info ${pool}/${image} |
+        awk 'BEGIN {
+               format = 1
+             }
+             $1 == "parent:" {
+               parent = $2
+             }
+             /op_features: .*clone-child/ {
+               format = 2
+             }
+             END {
+               if (!parent) exit 1
+               print format
+             }'
+}
+
+list_omap_keys()
+{
+    local cluster=$1
+    local pool=$2
+    local obj_name=$3
+
+    rados --cluster ${cluster} -p ${pool} listomapkeys ${obj_name}
+}
+
+count_omap_keys_with_filter()
+{
+    local cluster=$1
+    local pool=$2
+    local obj_name=$3
+    local filter=$4
+
+    list_omap_keys ${cluster} ${pool} ${obj_name} | grep -c ${filter}
+}
+
+wait_for_omap_keys()
+{
+    local cluster=$1
+    local pool=$2
+    local obj_name=$3
+    local filter=$4
+
+    for s in 0 1 2 2 4 4 8 8 8 16 16 32; do
+        sleep $s
+
+        set +e
+        test "$(count_omap_keys_with_filter ${cluster} ${pool} ${obj_name} ${filter})" = 0
+        error_code=$?
+        set -e
+
+        if [ $error_code -eq 0 ]; then
+            return 0
+        fi
+    done
+
+    return 1
+}
+
+wait_for_image_in_omap()
+{
+    local cluster=$1
+    local pool=$2
+
+    wait_for_omap_keys ${cluster} ${pool} rbd_mirroring status_global
+    wait_for_omap_keys ${cluster} ${pool} rbd_mirroring image_
+    wait_for_omap_keys ${cluster} ${pool} rbd_mirror_leader image_map
+}
+
+#
+# Main
+#
+
+if [ "$#" -gt 0 ]
+then
+    if [ -z "${RBD_MIRROR_TEMDIR}" ]
+    then
+       echo "RBD_MIRROR_TEMDIR is not set" >&2
+       exit 1
+    fi
+
+    TEMPDIR="${RBD_MIRROR_TEMDIR}"
+    cd ${TEMPDIR}
+    $@
+    exit $?
+fi
diff --git a/qa/workunits/rbd/rbd_mirror_journal.sh b/qa/workunits/rbd/rbd_mirror_journal.sh
new file mode 100755
index 000000000..54f6aeec8
--- /dev/null
+++ b/qa/workunits/rbd/rbd_mirror_journal.sh
@@ -0,0 +1,614 @@
+#!/bin/sh -ex
+#
+# rbd_mirror_journal.sh - test rbd-mirror daemon in journal-based mirroring mode
+#
+# The scripts starts two ("local" and "remote") clusters using mstart.sh script,
+# creates a temporary directory, used for cluster configs, daemon logs, admin
+# socket, temporary files, and launches rbd-mirror daemon.
+#
+
+. $(dirname $0)/rbd_mirror_helpers.sh
+
+setup
+
+testlog "TEST: add image and test replay"
+start_mirrors ${CLUSTER1}
+image=test
+create_image ${CLUSTER2} ${POOL} ${image}
+set_image_meta ${CLUSTER2} ${POOL} ${image} "key1" "value1"
+set_image_meta ${CLUSTER2} ${POOL} ${image} "key2" "value2"
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+write_image ${CLUSTER2} ${POOL} ${image} 100
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position'
+if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
+  wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'down+unknown'
+fi
+compare_images ${POOL} ${image}
+compare_image_meta ${CLUSTER1} ${POOL} ${image} "key1" "value1"
+compare_image_meta ${CLUSTER1} ${POOL} ${image} "key2" "value2"
+
+testlog "TEST: stop mirror, add image, start mirror and test replay"
+stop_mirrors ${CLUSTER1}
+image1=test1
+create_image ${CLUSTER2} ${POOL} ${image1}
+write_image ${CLUSTER2} ${POOL} ${image1} 100
+start_mirrors ${CLUSTER1}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image1}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' 'primary_position'
+if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
+  wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image1} 'down+unknown'
+fi
+compare_images ${POOL} ${image1}
+
+testlog "TEST: test the first image is replaying after restart"
+write_image ${CLUSTER2} ${POOL} ${image} 100
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position'
+compare_images ${POOL} ${image}
+
+if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
+  testlog "TEST: stop/start/restart mirror via admin socket"
+  all_admin_daemons ${CLUSTER1} rbd mirror stop
+  wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+  wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped'
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped'
+
+  all_admin_daemons ${CLUSTER1} rbd mirror start
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying'
+
+  all_admin_daemons ${CLUSTER1} rbd mirror restart
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying'
+
+  all_admin_daemons ${CLUSTER1} rbd mirror stop
+  wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+  wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped'
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped'
+
+  all_admin_daemons ${CLUSTER1} rbd mirror restart
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying'
+
+  all_admin_daemons ${CLUSTER1} rbd mirror stop ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX}
+  wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+  wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped'
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped'
+
+  admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image}
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+
+  all_admin_daemons ${CLUSTER1} rbd mirror start ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX}
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying'
+
+  admin_daemons ${CLUSTER1} rbd mirror restart ${POOL}/${image}
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+
+  all_admin_daemons ${CLUSTER1} rbd mirror restart ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX}
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1}
+
+  all_admin_daemons ${CLUSTER1} rbd mirror stop ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX}
+  wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+  wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped'
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped'
+
+  all_admin_daemons ${CLUSTER1} rbd mirror restart ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX}
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying'
+
+  flush ${CLUSTER1}
+  all_admin_daemons ${CLUSTER1} rbd mirror status
+fi
+
+remove_image_retry ${CLUSTER2} ${POOL} ${image1}
+
+testlog "TEST: test image rename"
+new_name="${image}_RENAMED"
+rename_image ${CLUSTER2} ${POOL} ${image} ${new_name}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${new_name}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${new_name} 'up+replaying'
+admin_daemons ${CLUSTER1} rbd mirror status ${POOL}/${new_name}
+admin_daemons ${CLUSTER1} rbd mirror restart ${POOL}/${new_name}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${new_name}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${new_name} 'up+replaying'
+rename_image ${CLUSTER2} ${POOL} ${new_name} ${image}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+
+testlog "TEST: test trash move restore"
+image_id=$(get_image_id ${CLUSTER2} ${POOL} ${image})
+trash_move ${CLUSTER2} ${POOL} ${image}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted'
+trash_restore ${CLUSTER2} ${POOL} ${image_id}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+
+testlog "TEST: check if removed images' OMAP are removed (with rbd-mirror on one cluster)"
+remove_image_retry ${CLUSTER2} ${POOL} ${image}
+
+wait_for_image_in_omap ${CLUSTER1} ${POOL}
+wait_for_image_in_omap ${CLUSTER2} ${POOL}
+
+create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+write_image ${CLUSTER2} ${POOL} ${image} 100
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+
+testlog "TEST: failover and failback"
+start_mirrors ${CLUSTER2}
+
+# demote and promote same cluster
+demote_image ${CLUSTER2} ${POOL} ${image}
+wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown'
+promote_image ${CLUSTER2} ${POOL} ${image}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+write_image ${CLUSTER2} ${POOL} ${image} 100
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped'
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position'
+compare_images ${POOL} ${image}
+
+# failover (unmodified)
+demote_image ${CLUSTER2} ${POOL} ${image}
+wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown'
+promote_image ${CLUSTER1} ${POOL} ${image}
+wait_for_image_replay_started ${CLUSTER2} ${POOL} ${image}
+
+# failback (unmodified)
+demote_image ${CLUSTER1} ${POOL} ${image}
+wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown'
+promote_image ${CLUSTER2} ${POOL} ${image}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped'
+compare_images ${POOL} ${image}
+
+# failover
+demote_image ${CLUSTER2} ${POOL} ${image}
+wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown'
+promote_image ${CLUSTER1} ${POOL} ${image}
+wait_for_image_replay_started ${CLUSTER2} ${POOL} ${image}
+write_image ${CLUSTER1} ${POOL} ${image} 100
+wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+replaying' 'primary_position'
+compare_images ${POOL} ${image}
+
+# failback
+demote_image ${CLUSTER1} ${POOL} ${image}
+wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown'
+promote_image ${CLUSTER2} ${POOL} ${image}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+write_image ${CLUSTER2} ${POOL} ${image} 100
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped'
+compare_images ${POOL} ${image}
+
+testlog "TEST: failover / failback loop"
+for i in `seq 1 20`; do
+  demote_image ${CLUSTER2} ${POOL} ${image}
+  wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown'
+  wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown'
+  promote_image ${CLUSTER1} ${POOL} ${image}
+  wait_for_image_replay_started ${CLUSTER2} ${POOL} ${image}
+  wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${POOL} ${image}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped'
+  wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+replaying'
+  demote_image ${CLUSTER1} ${POOL} ${image}
+  wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${image}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown'
+  wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown'
+  promote_image ${CLUSTER2} ${POOL} ${image}
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+  wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+  wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped'
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+done
+
+testlog "TEST: force promote"
+force_promote_image=test_force_promote
+create_image ${CLUSTER2} ${POOL} ${force_promote_image}
+write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100
+wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${force_promote_image}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${force_promote_image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${force_promote_image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+replaying' 'primary_position'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped'
+promote_image ${CLUSTER1} ${POOL} ${force_promote_image} '--force'
+wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${force_promote_image}
+wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${force_promote_image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+stopped'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped'
+write_image ${CLUSTER1} ${POOL} ${force_promote_image} 100
+write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100
+remove_image_retry ${CLUSTER1} ${POOL} ${force_promote_image}
+remove_image_retry ${CLUSTER2} ${POOL} ${force_promote_image}
+
+testlog "TEST: cloned images"
+testlog " - default"
+parent_image=test_parent
+parent_snap=snap
+create_image ${CLUSTER2} ${PARENT_POOL} ${parent_image}
+write_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} 100
+create_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+protect_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+
+clone_image=test_clone
+clone_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} ${clone_image}
+write_image ${CLUSTER2} ${POOL} ${clone_image} 100
+
+enable_mirror ${CLUSTER2} ${PARENT_POOL} ${parent_image} journal
+wait_for_image_replay_started ${CLUSTER1} ${PARENT_POOL} ${parent_image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${PARENT_POOL} ${parent_image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${PARENT_POOL} ${parent_image} 'up+replaying' 'primary_position'
+compare_images ${PARENT_POOL} ${parent_image}
+
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${clone_image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${clone_image} 'up+replaying' 'primary_position'
+compare_images ${POOL} ${clone_image}
+remove_image_retry ${CLUSTER2} ${POOL} ${clone_image}
+
+testlog " - clone v1"
+clone_image ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} ${clone_image}1
+
+clone_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} \
+            ${clone_image}_v1 --rbd-default-clone-format 1
+test $(get_clone_format ${CLUSTER2} ${POOL} ${clone_image}_v1) = 1
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image}_v1
+test $(get_clone_format ${CLUSTER1} ${POOL} ${clone_image}_v1) = 1
+remove_image_retry ${CLUSTER2} ${POOL} ${clone_image}_v1
+remove_image_retry ${CLUSTER1} ${POOL} ${clone_image}1
+unprotect_snapshot_retry ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+remove_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+
+testlog " - clone v2"
+parent_snap=snap_v2
+create_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+clone_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} \
+            ${clone_image}_v2 --rbd-default-clone-format 2
+test $(get_clone_format ${CLUSTER2} ${POOL} ${clone_image}_v2) = 2
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image}_v2
+test $(get_clone_format ${CLUSTER1} ${POOL} ${clone_image}_v2) = 2
+
+remove_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+test_snap_moved_to_trash ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+wait_for_snap_moved_to_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap}
+remove_image_retry ${CLUSTER2} ${POOL} ${clone_image}_v2
+wait_for_image_present ${CLUSTER1} ${POOL} ${clone_image}_v2 'deleted'
+test_snap_removed_from_trash ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+wait_for_snap_removed_from_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap}
+
+testlog " - clone v2 non-primary"
+create_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+wait_for_snap_present ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap}
+clone_image ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} \
+            ${clone_image}_v2 --rbd-default-clone-format 2
+remove_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+test_snap_removed_from_trash ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+wait_for_snap_moved_to_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap}
+remove_image_retry ${CLUSTER1} ${POOL} ${clone_image}_v2
+wait_for_snap_removed_from_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap}
+remove_image_retry ${CLUSTER2} ${PARENT_POOL} ${parent_image}
+
+testlog "TEST: data pool"
+dp_image=test_data_pool
+create_image ${CLUSTER2} ${POOL} ${dp_image} 128 --data-pool ${PARENT_POOL}
+data_pool=$(get_image_data_pool ${CLUSTER2} ${POOL} ${dp_image})
+test "${data_pool}" = "${PARENT_POOL}"
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${dp_image}
+data_pool=$(get_image_data_pool ${CLUSTER1} ${POOL} ${dp_image})
+test "${data_pool}" = "${PARENT_POOL}"
+create_snapshot ${CLUSTER2} ${POOL} ${dp_image} 'snap1'
+write_image ${CLUSTER2} ${POOL} ${dp_image} 100
+create_snapshot ${CLUSTER2} ${POOL} ${dp_image} 'snap2'
+write_image ${CLUSTER2} ${POOL} ${dp_image} 100
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${dp_image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${dp_image} 'up+replaying' 'primary_position'
+compare_images ${POOL} ${dp_image}@snap1
+compare_images ${POOL} ${dp_image}@snap2
+compare_images ${POOL} ${dp_image}
+remove_image_retry ${CLUSTER2} ${POOL} ${dp_image}
+
+testlog "TEST: disable mirroring / delete non-primary image"
+image2=test2
+image3=test3
+image4=test4
+image5=test5
+for i in ${image2} ${image3} ${image4} ${image5}; do
+  create_image ${CLUSTER2} ${POOL} ${i}
+  write_image ${CLUSTER2} ${POOL} ${i} 100
+  create_snapshot ${CLUSTER2} ${POOL} ${i} 'snap1'
+  create_snapshot ${CLUSTER2} ${POOL} ${i} 'snap2'
+  if [ "${i}" = "${image4}" ] || [ "${i}" = "${image5}" ]; then
+    protect_snapshot ${CLUSTER2} ${POOL} ${i} 'snap1'
+    protect_snapshot ${CLUSTER2} ${POOL} ${i} 'snap2'
+  fi
+  write_image ${CLUSTER2} ${POOL} ${i} 100
+  wait_for_image_present ${CLUSTER1} ${POOL} ${i} 'present'
+  wait_for_snap_present ${CLUSTER1} ${POOL} ${i} 'snap2'
+done
+
+set_pool_mirror_mode ${CLUSTER2} ${POOL} 'image'
+for i in ${image2} ${image4}; do
+  disable_mirror ${CLUSTER2} ${POOL} ${i}
+done
+
+unprotect_snapshot ${CLUSTER2} ${POOL} ${image5} 'snap1'
+unprotect_snapshot ${CLUSTER2} ${POOL} ${image5} 'snap2'
+for i in ${image3} ${image5}; do
+  remove_snapshot ${CLUSTER2} ${POOL} ${i} 'snap1'
+  remove_snapshot ${CLUSTER2} ${POOL} ${i} 'snap2'
+  remove_image_retry ${CLUSTER2} ${POOL} ${i}
+done
+
+for i in ${image2} ${image3} ${image4} ${image5}; do
+  wait_for_image_present ${CLUSTER1} ${POOL} ${i} 'deleted'
+done
+
+set_pool_mirror_mode ${CLUSTER2} ${POOL} 'pool'
+for i in ${image2} ${image4}; do
+  enable_journaling ${CLUSTER2} ${POOL} ${i}
+  wait_for_image_present ${CLUSTER1} ${POOL} ${i} 'present'
+  wait_for_snap_present ${CLUSTER1} ${POOL} ${i} 'snap2'
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${i}
+  wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${i}
+  compare_images ${POOL} ${i}
+done
+
+testlog "TEST: remove mirroring pool"
+pool=pool_to_remove
+for cluster in ${CLUSTER1} ${CLUSTER2}; do
+    CEPH_ARGS='' ceph --cluster ${cluster} osd pool create ${pool} 16 16
+    CEPH_ARGS='' rbd --cluster ${cluster} pool init ${pool}
+    rbd --cluster ${cluster} mirror pool enable ${pool} pool
+done
+peer_add ${CLUSTER1} ${pool} ${CLUSTER2}
+peer_add ${CLUSTER2} ${pool} ${CLUSTER1}
+rdp_image=test_remove_data_pool
+create_image ${CLUSTER2} ${pool} ${image} 128
+create_image ${CLUSTER2} ${POOL} ${rdp_image} 128 --data-pool ${pool}
+write_image ${CLUSTER2} ${pool} ${image} 100
+write_image ${CLUSTER2} ${POOL} ${rdp_image} 100
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${pool} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${pool} ${image} 'up+replaying' 'primary_position'
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${rdp_image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${rdp_image} 'up+replaying' 'primary_position'
+for cluster in ${CLUSTER1} ${CLUSTER2}; do
+    CEPH_ARGS='' ceph --cluster ${cluster} osd pool rm ${pool} ${pool} --yes-i-really-really-mean-it
+done
+remove_image_retry ${CLUSTER2} ${POOL} ${rdp_image}
+wait_for_image_present ${CLUSTER1} ${POOL} ${rdp_image} 'deleted'
+for i in 0 1 2 4 8 8 8 8 16 16; do
+    sleep $i
+    admin_daemons "${CLUSTER2}" rbd mirror status ${pool}/${image} || break
+done
+admin_daemons "${CLUSTER2}" rbd mirror status ${pool}/${image} && false
+
+testlog "TEST: snapshot rename"
+snap_name='snap_rename'
+create_snapshot ${CLUSTER2} ${POOL} ${image2} "${snap_name}_0"
+for i in `seq 1 20`; do
+  rename_snapshot ${CLUSTER2} ${POOL} ${image2} "${snap_name}_$(expr ${i} - 1)" "${snap_name}_${i}"
+done
+wait_for_snap_present ${CLUSTER1} ${POOL} ${image2} "${snap_name}_${i}"
+
+unprotect_snapshot ${CLUSTER2} ${POOL} ${image4} 'snap1'
+unprotect_snapshot ${CLUSTER2} ${POOL} ${image4} 'snap2'
+for i in ${image2} ${image4}; do
+    remove_image_retry ${CLUSTER2} ${POOL} ${i}
+done
+
+testlog "TEST: disable mirror while daemon is stopped"
+stop_mirrors ${CLUSTER1}
+stop_mirrors ${CLUSTER2}
+set_pool_mirror_mode ${CLUSTER2} ${POOL} 'image'
+disable_mirror ${CLUSTER2} ${POOL} ${image}
+if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
+  test_image_present ${CLUSTER1} ${POOL} ${image} 'present'
+fi
+start_mirrors ${CLUSTER1}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted'
+set_pool_mirror_mode ${CLUSTER2} ${POOL} 'pool'
+enable_journaling ${CLUSTER2} ${POOL} ${image}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+
+testlog "TEST: non-default namespace image mirroring"
+testlog " - replay"
+create_image ${CLUSTER2} ${POOL}/${NS1} ${image}
+create_image ${CLUSTER2} ${POOL}/${NS2} ${image}
+enable_mirror ${CLUSTER2} ${POOL}/${NS2} ${image} journal
+wait_for_image_replay_started ${CLUSTER1} ${POOL}/${NS1} ${image}
+wait_for_image_replay_started ${CLUSTER1} ${POOL}/${NS2} ${image}
+write_image ${CLUSTER2} ${POOL}/${NS1} ${image} 100
+write_image ${CLUSTER2} ${POOL}/${NS2} ${image} 100
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS1} ${image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS2} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL}/${NS1} ${image} 'up+replaying' 'primary_position'
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL}/${NS2} ${image} 'up+replaying' 'primary_position'
+compare_images ${POOL}/${NS1} ${image}
+compare_images ${POOL}/${NS2} ${image}
+
+testlog " - disable mirroring / delete image"
+remove_image_retry ${CLUSTER2} ${POOL}/${NS1} ${image}
+disable_mirror ${CLUSTER2} ${POOL}/${NS2} ${image}
+wait_for_image_present ${CLUSTER1} ${POOL}/${NS1} ${image} 'deleted'
+wait_for_image_present ${CLUSTER1} ${POOL}/${NS2} ${image} 'deleted'
+remove_image_retry ${CLUSTER2} ${POOL}/${NS2} ${image}
+
+testlog " - data pool"
+dp_image=test_data_pool
+create_image ${CLUSTER2} ${POOL}/${NS1} ${dp_image} 128 --data-pool ${PARENT_POOL}
+data_pool=$(get_image_data_pool ${CLUSTER2} ${POOL}/${NS1} ${dp_image})
+test "${data_pool}" = "${PARENT_POOL}"
+wait_for_image_replay_started ${CLUSTER1} ${POOL}/${NS1} ${dp_image}
+data_pool=$(get_image_data_pool ${CLUSTER1} ${POOL}/${NS1} ${dp_image})
+test "${data_pool}" = "${PARENT_POOL}"
+write_image ${CLUSTER2} ${POOL}/${NS1} ${dp_image} 100
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS1} ${dp_image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL}/${NS1} ${dp_image} 'up+replaying' 'primary_position'
+compare_images ${POOL}/${NS1} ${dp_image}
+remove_image_retry ${CLUSTER2} ${POOL}/${NS1} ${dp_image}
+
+testlog "TEST: simple image resync"
+request_resync_image ${CLUSTER1} ${POOL} ${image} image_id
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position'
+compare_images ${POOL} ${image}
+
+testlog "TEST: image resync while replayer is stopped"
+if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
+  admin_daemons ${CLUSTER1} rbd mirror stop ${POOL}/${image}
+  wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+  request_resync_image ${CLUSTER1} ${POOL} ${image} image_id
+  admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image}
+  wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id}
+  admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image}
+  wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position'
+  compare_images ${POOL} ${image}
+fi
+
+testlog "TEST: request image resync while daemon is offline"
+stop_mirrors ${CLUSTER1}
+request_resync_image ${CLUSTER1} ${POOL} ${image} image_id
+start_mirrors ${CLUSTER1}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position'
+compare_images ${POOL} ${image}
+remove_image_retry ${CLUSTER2} ${POOL} ${image}
+
+testlog "TEST: client disconnect"
+image=laggy
+create_image ${CLUSTER2} ${POOL} ${image} 128 --journal-object-size 64K
+write_image ${CLUSTER2} ${POOL} ${image} 10
+
+testlog " - replay stopped after disconnect"
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+test -n "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})"
+disconnect_image ${CLUSTER2} ${POOL} ${image}
+test -z "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})"
+wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'disconnected'
+
+testlog " - replay started after resync requested"
+request_resync_image ${CLUSTER1} ${POOL} ${image} image_id
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+test -n "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})"
+compare_images ${POOL} ${image}
+
+testlog " - disconnected after max_concurrent_object_sets reached"
+if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
+  admin_daemons ${CLUSTER1} rbd mirror stop ${POOL}/${image}
+  wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+  test -n "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})"
+  set_image_meta ${CLUSTER2} ${POOL} ${image} \
+	         conf_rbd_journal_max_concurrent_object_sets 1
+  write_image ${CLUSTER2} ${POOL} ${image} 20 16384
+  write_image ${CLUSTER2} ${POOL} ${image} 20 16384
+  test -z "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})"
+  set_image_meta ${CLUSTER2} ${POOL} ${image} \
+	         conf_rbd_journal_max_concurrent_object_sets 0
+
+  testlog " - replay is still stopped (disconnected) after restart"
+  admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image}
+  wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'disconnected'
+fi
+
+testlog " - replay started after resync requested"
+request_resync_image ${CLUSTER1} ${POOL} ${image} image_id
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+test -n "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})"
+compare_images ${POOL} ${image}
+
+testlog " - rbd_mirroring_resync_after_disconnect config option"
+set_image_meta ${CLUSTER2} ${POOL} ${image} \
+	       conf_rbd_mirroring_resync_after_disconnect true
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+image_id=$(get_image_id ${CLUSTER1} ${POOL} ${image})
+disconnect_image ${CLUSTER2} ${POOL} ${image}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+test -n "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})"
+compare_images ${POOL} ${image}
+set_image_meta ${CLUSTER2} ${POOL} ${image} \
+	       conf_rbd_mirroring_resync_after_disconnect false
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+disconnect_image ${CLUSTER2} ${POOL} ${image}
+test -z "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})"
+wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'disconnected'
+remove_image_retry ${CLUSTER2} ${POOL} ${image}
+
+testlog "TEST: split-brain"
+image=split-brain
+create_image ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position'
+promote_image ${CLUSTER1} ${POOL} ${image} --force
+wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped'
+write_image ${CLUSTER1} ${POOL} ${image} 10
+demote_image ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'split-brain'
+request_resync_image ${CLUSTER1} ${POOL} ${image} image_id
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position'
+remove_image_retry ${CLUSTER2} ${POOL} ${image}
+
+testlog "TEST: check if removed images' OMAP are removed"
+start_mirrors ${CLUSTER2}
+wait_for_image_in_omap ${CLUSTER1} ${POOL}
+wait_for_image_in_omap ${CLUSTER2} ${POOL}
+
+if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
+  # teuthology will trash the daemon
+  testlog "TEST: no blocklists"
+  CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER1} osd blocklist ls 2>&1 | grep -q "listed 0 entries"
+  CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER2} osd blocklist ls 2>&1 | grep -q "listed 0 entries"
+fi
diff --git a/qa/workunits/rbd/rbd_mirror_snapshot.sh b/qa/workunits/rbd/rbd_mirror_snapshot.sh
new file mode 100755
index 000000000..c70d48b09
--- /dev/null
+++ b/qa/workunits/rbd/rbd_mirror_snapshot.sh
@@ -0,0 +1,517 @@
+#!/bin/sh -ex
+#
+# rbd_mirror_snapshot.sh - test rbd-mirror daemon in snapshot-based mirroring mode
+#
+# The scripts starts two ("local" and "remote") clusters using mstart.sh script,
+# creates a temporary directory, used for cluster configs, daemon logs, admin
+# socket, temporary files, and launches rbd-mirror daemon.
+#
+
+MIRROR_POOL_MODE=image
+MIRROR_IMAGE_MODE=snapshot
+
+. $(dirname $0)/rbd_mirror_helpers.sh
+
+setup
+
+testlog "TEST: add image and test replay"
+start_mirrors ${CLUSTER1}
+image=test
+create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image}
+set_image_meta ${CLUSTER2} ${POOL} ${image} "key1" "value1"
+set_image_meta ${CLUSTER2} ${POOL} ${image} "key2" "value2"
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+write_image ${CLUSTER2} ${POOL} ${image} 100
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
+  wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'down+unknown'
+fi
+compare_images ${POOL} ${image}
+compare_image_meta ${CLUSTER1} ${POOL} ${image} "key1" "value1"
+compare_image_meta ${CLUSTER1} ${POOL} ${image} "key2" "value2"
+
+testlog "TEST: stop mirror, add image, start mirror and test replay"
+stop_mirrors ${CLUSTER1}
+image1=test1
+create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image1}
+write_image ${CLUSTER2} ${POOL} ${image1} 100
+start_mirrors ${CLUSTER1}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image1}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying'
+if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
+  wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image1} 'down+unknown'
+fi
+compare_images ${POOL} ${image1}
+
+testlog "TEST: test the first image is replaying after restart"
+write_image ${CLUSTER2} ${POOL} ${image} 100
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+compare_images ${POOL} ${image}
+
+if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
+  testlog "TEST: stop/start/restart mirror via admin socket"
+  all_admin_daemons ${CLUSTER1} rbd mirror stop
+  wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+  wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped'
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped'
+
+  all_admin_daemons ${CLUSTER1} rbd mirror start
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying'
+
+  all_admin_daemons ${CLUSTER1} rbd mirror restart
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying'
+
+  all_admin_daemons ${CLUSTER1} rbd mirror stop
+  wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+  wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped'
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped'
+
+  all_admin_daemons ${CLUSTER1} rbd mirror restart
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying'
+
+  all_admin_daemons ${CLUSTER1} rbd mirror stop ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX}
+  wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+  wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped'
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped'
+
+  admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image}
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+
+  all_admin_daemons ${CLUSTER1} rbd mirror start ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX}
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying'
+
+  admin_daemons ${CLUSTER1} rbd mirror restart ${POOL}/${image}
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+
+  all_admin_daemons ${CLUSTER1} rbd mirror restart ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX}
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1}
+
+  all_admin_daemons ${CLUSTER1} rbd mirror stop ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX}
+  wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+  wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped'
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped'
+
+  all_admin_daemons ${CLUSTER1} rbd mirror restart ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX}
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying'
+
+  flush ${CLUSTER1}
+  all_admin_daemons ${CLUSTER1} rbd mirror status
+fi
+
+remove_image_retry ${CLUSTER2} ${POOL} ${image1}
+
+testlog "TEST: test image rename"
+new_name="${image}_RENAMED"
+rename_image ${CLUSTER2} ${POOL} ${image} ${new_name}
+mirror_image_snapshot ${CLUSTER2} ${POOL} ${new_name}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${new_name}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${new_name} 'up+replaying'
+admin_daemons ${CLUSTER1} rbd mirror status ${POOL}/${new_name}
+admin_daemons ${CLUSTER1} rbd mirror restart ${POOL}/${new_name}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${new_name}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${new_name} 'up+replaying'
+rename_image ${CLUSTER2} ${POOL} ${new_name} ${image}
+mirror_image_snapshot ${CLUSTER2} ${POOL} ${image}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+
+testlog "TEST: test trash move restore"
+image_id=$(get_image_id ${CLUSTER2} ${POOL} ${image})
+trash_move ${CLUSTER2} ${POOL} ${image}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted'
+trash_restore ${CLUSTER2} ${POOL} ${image_id}
+enable_mirror ${CLUSTER2} ${POOL} ${image} snapshot
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+
+testlog "TEST: check if removed images' OMAP are removed (with rbd-mirror on one cluster)"
+remove_image_retry ${CLUSTER2} ${POOL} ${image}
+
+wait_for_image_in_omap ${CLUSTER1} ${POOL}
+wait_for_image_in_omap ${CLUSTER2} ${POOL}
+
+create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+write_image ${CLUSTER2} ${POOL} ${image} 100
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+
+testlog "TEST: failover and failback"
+start_mirrors ${CLUSTER2}
+
+# demote and promote same cluster
+demote_image ${CLUSTER2} ${POOL} ${image}
+wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown'
+promote_image ${CLUSTER2} ${POOL} ${image}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+write_image ${CLUSTER2} ${POOL} ${image} 100
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped'
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+compare_images ${POOL} ${image}
+
+# failover (unmodified)
+demote_image ${CLUSTER2} ${POOL} ${image}
+wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown'
+promote_image ${CLUSTER1} ${POOL} ${image}
+wait_for_image_replay_started ${CLUSTER2} ${POOL} ${image}
+
+# failback (unmodified)
+demote_image ${CLUSTER1} ${POOL} ${image}
+wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown'
+promote_image ${CLUSTER2} ${POOL} ${image}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped'
+compare_images ${POOL} ${image}
+
+# failover
+demote_image ${CLUSTER2} ${POOL} ${image}
+wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown'
+promote_image ${CLUSTER1} ${POOL} ${image}
+wait_for_image_replay_started ${CLUSTER2} ${POOL} ${image}
+write_image ${CLUSTER1} ${POOL} ${image} 100
+wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+replaying'
+compare_images ${POOL} ${image}
+
+# failback
+demote_image ${CLUSTER1} ${POOL} ${image}
+wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown'
+promote_image ${CLUSTER2} ${POOL} ${image}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+write_image ${CLUSTER2} ${POOL} ${image} 100
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped'
+compare_images ${POOL} ${image}
+
+testlog "TEST: failover / failback loop"
+for i in `seq 1 20`; do
+  demote_image ${CLUSTER2} ${POOL} ${image}
+  wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown'
+  wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown'
+  promote_image ${CLUSTER1} ${POOL} ${image}
+  wait_for_image_replay_started ${CLUSTER2} ${POOL} ${image}
+  wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${POOL} ${image}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped'
+  wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+replaying'
+  demote_image ${CLUSTER1} ${POOL} ${image}
+  wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${image}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown'
+  wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown'
+  promote_image ${CLUSTER2} ${POOL} ${image}
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+  wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+  wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped'
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+done
+# check that demote (or other mirror snapshots) don't pile up
+test "$(count_mirror_snaps ${CLUSTER1} ${POOL} ${image})" -le 3
+test "$(count_mirror_snaps ${CLUSTER2} ${POOL} ${image})" -le 3
+
+testlog "TEST: force promote"
+force_promote_image=test_force_promote
+create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${force_promote_image}
+write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100
+wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${force_promote_image}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${force_promote_image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${force_promote_image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+replaying'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped'
+promote_image ${CLUSTER1} ${POOL} ${force_promote_image} '--force'
+wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${force_promote_image}
+wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${force_promote_image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+stopped'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped'
+write_image ${CLUSTER1} ${POOL} ${force_promote_image} 100
+write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100
+remove_image_retry ${CLUSTER1} ${POOL} ${force_promote_image}
+remove_image_retry ${CLUSTER2} ${POOL} ${force_promote_image}
+
+testlog "TEST: cloned images"
+testlog " - default"
+parent_image=test_parent
+parent_snap=snap
+create_image_and_enable_mirror ${CLUSTER2} ${PARENT_POOL} ${parent_image}
+write_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} 100
+create_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+protect_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+
+clone_image=test_clone
+clone_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} ${clone_image}
+write_image ${CLUSTER2} ${POOL} ${clone_image} 100
+enable_mirror ${CLUSTER2} ${POOL} ${clone_image} snapshot
+
+wait_for_image_replay_started ${CLUSTER1} ${PARENT_POOL} ${parent_image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${PARENT_POOL} ${parent_image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${PARENT_POOL} ${parent_image} 'up+replaying'
+compare_images ${PARENT_POOL} ${parent_image}
+
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${clone_image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${clone_image} 'up+replaying'
+compare_images ${POOL} ${clone_image}
+remove_image_retry ${CLUSTER2} ${POOL} ${clone_image}
+
+testlog " - clone v1"
+clone_image_and_enable_mirror ${CLUSTER1} ${PARENT_POOL} ${parent_image} \
+    ${parent_snap} ${POOL} ${clone_image}1
+
+clone_image_and_enable_mirror ${CLUSTER2} ${PARENT_POOL} ${parent_image} \
+    ${parent_snap} ${POOL} ${clone_image}_v1 snapshot --rbd-default-clone-format 1
+test $(get_clone_format ${CLUSTER2} ${POOL} ${clone_image}_v1) = 1
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image}_v1
+test $(get_clone_format ${CLUSTER1} ${POOL} ${clone_image}_v1) = 1
+remove_image_retry ${CLUSTER2} ${POOL} ${clone_image}_v1
+remove_image_retry ${CLUSTER1} ${POOL} ${clone_image}1
+unprotect_snapshot_retry ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+remove_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+
+testlog " - clone v2"
+parent_snap=snap_v2
+create_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+mirror_image_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image}
+clone_image_and_enable_mirror ${CLUSTER2} ${PARENT_POOL} ${parent_image} \
+    ${parent_snap} ${POOL} ${clone_image}_v2 snapshot --rbd-default-clone-format 2
+test $(get_clone_format ${CLUSTER2} ${POOL} ${clone_image}_v2) = 2
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image}_v2
+test $(get_clone_format ${CLUSTER1} ${POOL} ${clone_image}_v2) = 2
+
+remove_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+mirror_image_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image}
+test_snap_moved_to_trash ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+wait_for_snap_moved_to_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap}
+remove_image_retry ${CLUSTER2} ${POOL} ${clone_image}_v2
+wait_for_image_present ${CLUSTER1} ${POOL} ${clone_image}_v2 'deleted'
+test_snap_removed_from_trash ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+wait_for_snap_removed_from_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap}
+
+testlog " - clone v2 non-primary"
+create_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+mirror_image_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image}
+wait_for_snap_present ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap}
+clone_image_and_enable_mirror ${CLUSTER1} ${PARENT_POOL} ${parent_image} \
+    ${parent_snap} ${POOL} ${clone_image}_v2 snapshot --rbd-default-clone-format 2
+remove_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+test_snap_removed_from_trash ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+mirror_image_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image}
+wait_for_snap_moved_to_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap}
+remove_image_retry ${CLUSTER1} ${POOL} ${clone_image}_v2
+wait_for_snap_removed_from_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap}
+remove_image_retry ${CLUSTER2} ${PARENT_POOL} ${parent_image}
+
+testlog "TEST: data pool"
+dp_image=test_data_pool
+create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${dp_image} snapshot 128 --data-pool ${PARENT_POOL}
+data_pool=$(get_image_data_pool ${CLUSTER2} ${POOL} ${dp_image})
+test "${data_pool}" = "${PARENT_POOL}"
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${dp_image}
+data_pool=$(get_image_data_pool ${CLUSTER1} ${POOL} ${dp_image})
+test "${data_pool}" = "${PARENT_POOL}"
+create_snapshot ${CLUSTER2} ${POOL} ${dp_image} 'snap1'
+write_image ${CLUSTER2} ${POOL} ${dp_image} 100
+create_snapshot ${CLUSTER2} ${POOL} ${dp_image} 'snap2'
+write_image ${CLUSTER2} ${POOL} ${dp_image} 100
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${dp_image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${dp_image} 'up+replaying'
+compare_images ${POOL} ${dp_image}@snap1
+compare_images ${POOL} ${dp_image}@snap2
+compare_images ${POOL} ${dp_image}
+remove_image_retry ${CLUSTER2} ${POOL} ${dp_image}
+
+testlog "TEST: disable mirroring / delete non-primary image"
+image2=test2
+image3=test3
+image4=test4
+image5=test5
+for i in ${image2} ${image3} ${image4} ${image5}; do
+  create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${i}
+  write_image ${CLUSTER2} ${POOL} ${i} 100
+  create_snapshot ${CLUSTER2} ${POOL} ${i} 'snap1'
+  create_snapshot ${CLUSTER2} ${POOL} ${i} 'snap2'
+  if [ "${i}" = "${image4}" ] || [ "${i}" = "${image5}" ]; then
+    protect_snapshot ${CLUSTER2} ${POOL} ${i} 'snap1'
+    protect_snapshot ${CLUSTER2} ${POOL} ${i} 'snap2'
+  fi
+  write_image ${CLUSTER2} ${POOL} ${i} 100
+  mirror_image_snapshot ${CLUSTER2} ${POOL} ${i}
+  wait_for_image_present ${CLUSTER1} ${POOL} ${i} 'present'
+  wait_for_snap_present ${CLUSTER1} ${POOL} ${i} 'snap2'
+done
+
+set_pool_mirror_mode ${CLUSTER2} ${POOL} 'image'
+for i in ${image2} ${image4}; do
+  disable_mirror ${CLUSTER2} ${POOL} ${i}
+done
+
+unprotect_snapshot ${CLUSTER2} ${POOL} ${image5} 'snap1'
+unprotect_snapshot ${CLUSTER2} ${POOL} ${image5} 'snap2'
+for i in ${image3} ${image5}; do
+  remove_snapshot ${CLUSTER2} ${POOL} ${i} 'snap1'
+  remove_snapshot ${CLUSTER2} ${POOL} ${i} 'snap2'
+  remove_image_retry ${CLUSTER2} ${POOL} ${i}
+done
+
+for i in ${image2} ${image3} ${image4} ${image5}; do
+  wait_for_image_present ${CLUSTER1} ${POOL} ${i} 'deleted'
+done
+
+testlog "TEST: snapshot rename"
+snap_name='snap_rename'
+enable_mirror ${CLUSTER2} ${POOL} ${image2}
+create_snapshot ${CLUSTER2} ${POOL} ${image2} "${snap_name}_0"
+for i in `seq 1 20`; do
+  rename_snapshot ${CLUSTER2} ${POOL} ${image2} "${snap_name}_$(expr ${i} - 1)" "${snap_name}_${i}"
+done
+mirror_image_snapshot ${CLUSTER2} ${POOL} ${image2}
+wait_for_snap_present ${CLUSTER1} ${POOL} ${image2} "${snap_name}_${i}"
+
+unprotect_snapshot ${CLUSTER2} ${POOL} ${image4} 'snap1'
+unprotect_snapshot ${CLUSTER2} ${POOL} ${image4} 'snap2'
+for i in ${image2} ${image4}; do
+    remove_image_retry ${CLUSTER2} ${POOL} ${i}
+done
+
+testlog "TEST: disable mirror while daemon is stopped"
+stop_mirrors ${CLUSTER1}
+stop_mirrors ${CLUSTER2}
+disable_mirror ${CLUSTER2} ${POOL} ${image}
+if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
+  test_image_present ${CLUSTER1} ${POOL} ${image} 'present'
+fi
+start_mirrors ${CLUSTER1}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted'
+enable_mirror ${CLUSTER2} ${POOL} ${image}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+
+testlog "TEST: non-default namespace image mirroring"
+testlog " - replay"
+create_image_and_enable_mirror ${CLUSTER2} ${POOL}/${NS1} ${image}
+create_image_and_enable_mirror ${CLUSTER2} ${POOL}/${NS2} ${image}
+wait_for_image_replay_started ${CLUSTER1} ${POOL}/${NS1} ${image}
+wait_for_image_replay_started ${CLUSTER1} ${POOL}/${NS2} ${image}
+write_image ${CLUSTER2} ${POOL}/${NS1} ${image} 100
+write_image ${CLUSTER2} ${POOL}/${NS2} ${image} 100
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS1} ${image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS2} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL}/${NS1} ${image} 'up+replaying'
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL}/${NS2} ${image} 'up+replaying'
+compare_images ${POOL}/${NS1} ${image}
+compare_images ${POOL}/${NS2} ${image}
+
+testlog " - disable mirroring / delete image"
+remove_image_retry ${CLUSTER2} ${POOL}/${NS1} ${image}
+disable_mirror ${CLUSTER2} ${POOL}/${NS2} ${image}
+wait_for_image_present ${CLUSTER1} ${POOL}/${NS1} ${image} 'deleted'
+wait_for_image_present ${CLUSTER1} ${POOL}/${NS2} ${image} 'deleted'
+remove_image_retry ${CLUSTER2} ${POOL}/${NS2} ${image}
+
+testlog " - data pool"
+dp_image=test_data_pool
+create_image_and_enable_mirror ${CLUSTER2} ${POOL}/${NS1} ${dp_image} snapshot 128 --data-pool ${PARENT_POOL}
+data_pool=$(get_image_data_pool ${CLUSTER2} ${POOL}/${NS1} ${dp_image})
+test "${data_pool}" = "${PARENT_POOL}"
+wait_for_image_replay_started ${CLUSTER1} ${POOL}/${NS1} ${dp_image}
+data_pool=$(get_image_data_pool ${CLUSTER1} ${POOL}/${NS1} ${dp_image})
+test "${data_pool}" = "${PARENT_POOL}"
+write_image ${CLUSTER2} ${POOL}/${NS1} ${dp_image} 100
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS1} ${dp_image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL}/${NS1} ${dp_image} 'up+replaying'
+compare_images ${POOL}/${NS1} ${dp_image}
+remove_image_retry ${CLUSTER2} ${POOL}/${NS1} ${dp_image}
+
+testlog "TEST: simple image resync"
+request_resync_image ${CLUSTER1} ${POOL} ${image} image_id
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+compare_images ${POOL} ${image}
+
+if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
+  testlog "TEST: image resync while replayer is stopped"
+  admin_daemons ${CLUSTER1} rbd mirror stop ${POOL}/${image}
+  wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+  request_resync_image ${CLUSTER1} ${POOL} ${image} image_id
+  admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image}
+  wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id}
+  admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image}
+  wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+  compare_images ${POOL} ${image}
+fi
+
+testlog "TEST: request image resync while daemon is offline"
+stop_mirrors ${CLUSTER1}
+request_resync_image ${CLUSTER1} ${POOL} ${image} image_id
+start_mirrors ${CLUSTER1}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+compare_images ${POOL} ${image}
+remove_image_retry ${CLUSTER2} ${POOL} ${image}
+
+testlog "TEST: split-brain"
+image=split-brain
+create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+promote_image ${CLUSTER1} ${POOL} ${image} --force
+wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped'
+write_image ${CLUSTER1} ${POOL} ${image} 10
+demote_image ${CLUSTER1} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'split-brain'
+request_resync_image ${CLUSTER1} ${POOL} ${image} image_id
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+remove_image_retry ${CLUSTER2} ${POOL} ${image}
+
+testlog "TEST: check if removed images' OMAP are removed"
+start_mirrors ${CLUSTER2}
+wait_for_image_in_omap ${CLUSTER1} ${POOL}
+wait_for_image_in_omap ${CLUSTER2} ${POOL}
+
+if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
+  # teuthology will trash the daemon
+  testlog "TEST: no blocklists"
+  CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER1} osd blocklist ls 2>&1 | grep -q "listed 0 entries"
+  CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER2} osd blocklist ls 2>&1 | grep -q "listed 0 entries"
+fi
diff --git a/qa/workunits/rbd/rbd_mirror_stress.sh b/qa/workunits/rbd/rbd_mirror_stress.sh
new file mode 100755
index 000000000..cb79aba7e
--- /dev/null
+++ b/qa/workunits/rbd/rbd_mirror_stress.sh
@@ -0,0 +1,221 @@
+#!/bin/sh -ex
+#
+# rbd_mirror_stress.sh - stress test rbd-mirror daemon
+#
+# The following additional environment variables affect the test:
+#
+#  RBD_MIRROR_REDUCE_WRITES - if not empty, don't run the stress bench write
+#                             tool during the many image test
+#
+
+IMAGE_COUNT=50
+export LOCKDEP=0
+
+. $(dirname $0)/rbd_mirror_helpers.sh
+
+setup
+
+create_snap()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local snap_name=$4
+
+    rbd --cluster ${cluster} -p ${pool} snap create ${image}@${snap_name} \
+	--debug-rbd=20 --debug-journaler=20 2> ${TEMPDIR}/rbd-snap-create.log
+}
+
+compare_image_snaps()
+{
+    local pool=$1
+    local image=$2
+    local snap_name=$3
+    local ret=0
+
+    local rmt_export=${TEMPDIR}/${CLUSTER2}-${pool}-${image}.export
+    local loc_export=${TEMPDIR}/${CLUSTER1}-${pool}-${image}.export
+
+    rm -f ${rmt_export} ${loc_export}
+    rbd --cluster ${CLUSTER2} -p ${pool} export ${image}@${snap_name} ${rmt_export}
+    rbd --cluster ${CLUSTER1} -p ${pool} export ${image}@${snap_name} ${loc_export}
+    if ! cmp ${rmt_export} ${loc_export}
+    then
+        show_diff ${rmt_export} ${loc_export}
+        ret=1
+    fi
+    rm -f ${rmt_export} ${loc_export}
+    return ${ret}
+}
+
+wait_for_pool_images()
+{
+    local cluster=$1
+    local pool=$2
+    local image_count=$3
+    local s
+    local count
+    local last_count=0
+
+    while true; do
+        for s in `seq 1 40`; do
+            test $s -ne 1 && sleep 30
+            count=$(rbd --cluster ${cluster} -p ${pool} mirror pool status | grep 'images: ' | cut -d' ' -f 2)
+            test "${count}" = "${image_count}" && return 0
+
+            # reset timeout if making forward progress
+            test $count -ne $last_count && break
+        done
+
+        test $count -eq $last_count && break
+        last_count=$count
+    done
+    rbd --cluster ${cluster} -p ${pool} mirror pool status --verbose >&2
+    return 1
+}
+
+wait_for_pool_healthy()
+{
+    local cluster=$1
+    local pool=$2
+    local s
+    local state
+
+    for s in `seq 1 40`; do
+        test $s -ne 1 && sleep 30
+        state=$(rbd --cluster ${cluster} -p ${pool} mirror pool status | grep 'image health:' | cut -d' ' -f 3)
+        test "${state}" = "ERROR" && break
+        test "${state}" = "OK" && return 0
+    done
+    rbd --cluster ${cluster} -p ${pool} mirror pool status --verbose >&2
+    return 1
+}
+
+start_mirrors ${CLUSTER1}
+start_mirrors ${CLUSTER2}
+
+testlog "TEST: add image and test replay after client crashes"
+image=test
+create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image} ${MIRROR_IMAGE_MODE} '512M'
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+
+clean_snap_name=
+for i in `seq 1 10`
+do
+  stress_write_image ${CLUSTER2} ${POOL} ${image}
+
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+
+  snap_name="snap${i}"
+  create_snap ${CLUSTER2} ${POOL} ${image} ${snap_name}
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+  wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+  wait_for_snap_present ${CLUSTER1} ${POOL} ${image} ${snap_name}
+
+  if [ -n "${clean_snap_name}" ]; then
+      compare_image_snaps ${POOL} ${image} ${clean_snap_name}
+  fi
+  compare_image_snaps ${POOL} ${image} ${snap_name}
+
+  clean_snap_name="snap${i}-clean"
+  create_snap ${CLUSTER2} ${POOL} ${image} ${clean_snap_name}
+done
+
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+wait_for_snap_present ${CLUSTER1} ${POOL} ${image} ${clean_snap_name}
+
+for i in `seq 1 10`
+do
+  snap_name="snap${i}"
+  compare_image_snaps ${POOL} ${image} ${snap_name}
+
+  snap_name="snap${i}-clean"
+  compare_image_snaps ${POOL} ${image} ${snap_name}
+done
+
+for i in `seq 1 10`
+do
+  snap_name="snap${i}"
+  remove_snapshot ${CLUSTER2} ${POOL} ${image} ${snap_name}
+
+  snap_name="snap${i}-clean"
+  remove_snapshot ${CLUSTER2} ${POOL} ${image} ${snap_name}
+done
+
+remove_image_retry ${CLUSTER2} ${POOL} ${image}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted'
+
+testlog "TEST: create many images"
+snap_name="snap"
+for i in `seq 1 ${IMAGE_COUNT}`
+do
+  image="image_${i}"
+  create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image} ${MIRROR_IMAGE_MODE} '128M'
+  if [ -n "${RBD_MIRROR_REDUCE_WRITES}" ]; then
+    write_image ${CLUSTER2} ${POOL} ${image} 100
+  else
+    stress_write_image ${CLUSTER2} ${POOL} ${image}
+  fi
+done
+
+wait_for_pool_images ${CLUSTER2} ${POOL} ${IMAGE_COUNT}
+wait_for_pool_healthy ${CLUSTER2} ${POOL}
+
+wait_for_pool_images ${CLUSTER1} ${POOL} ${IMAGE_COUNT}
+wait_for_pool_healthy ${CLUSTER1} ${POOL}
+
+testlog "TEST: compare many images"
+for i in `seq 1 ${IMAGE_COUNT}`
+do
+  image="image_${i}"
+  create_snap ${CLUSTER2} ${POOL} ${image} ${snap_name}
+  wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+  wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+  wait_for_snap_present ${CLUSTER1} ${POOL} ${image} ${snap_name}
+  compare_image_snaps ${POOL} ${image} ${snap_name}
+done
+
+testlog "TEST: delete many images"
+for i in `seq 1 ${IMAGE_COUNT}`
+do
+  image="image_${i}"
+  remove_snapshot ${CLUSTER2} ${POOL} ${image} ${snap_name}
+  remove_image_retry ${CLUSTER2} ${POOL} ${image}
+done
+
+testlog "TEST: image deletions should propagate"
+wait_for_pool_images ${CLUSTER1} ${POOL} 0
+wait_for_pool_healthy ${CLUSTER1} ${POOL} 0
+for i in `seq 1 ${IMAGE_COUNT}`
+do
+  image="image_${i}"
+  wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted'
+done
+
+testlog "TEST: delete images during bootstrap"
+set_pool_mirror_mode ${CLUSTER1} ${POOL} 'image'
+set_pool_mirror_mode ${CLUSTER2} ${POOL} 'image'
+
+start_mirror ${CLUSTER1}
+image=test
+
+for i in `seq 1 10`
+do
+  image="image_${i}"
+  create_image ${CLUSTER2} ${POOL} ${image} '512M'
+  enable_mirror ${CLUSTER2} ${POOL} ${image}
+
+  stress_write_image ${CLUSTER2} ${POOL} ${image}
+  wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
+
+  disable_mirror ${CLUSTER2} ${POOL} ${image}
+  wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted'
+  purge_snapshots ${CLUSTER2} ${POOL} ${image}
+  remove_image_retry ${CLUSTER2} ${POOL} ${image}
+done
+
+testlog "TEST: check if removed images' OMAP are removed"
+
+wait_for_image_in_omap ${CLUSTER1} ${POOL}
+wait_for_image_in_omap ${CLUSTER2} ${POOL}
diff --git a/qa/workunits/rbd/rbd_support_module_recovery.sh b/qa/workunits/rbd/rbd_support_module_recovery.sh
new file mode 100755
index 000000000..e9defced2
--- /dev/null
+++ b/qa/workunits/rbd/rbd_support_module_recovery.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+set -ex
+
+POOL=rbd
+IMAGE_PREFIX=image
+NUM_IMAGES=20
+RUN_TIME=3600
+
+rbd mirror pool enable ${POOL} image
+rbd mirror pool peer add ${POOL} dummy
+
+# Create images and schedule their mirror snapshots
+for ((i = 1; i <= ${NUM_IMAGES}; i++)); do
+    rbd create -s 1G --image-feature exclusive-lock ${POOL}/${IMAGE_PREFIX}$i
+    rbd mirror image enable ${POOL}/${IMAGE_PREFIX}$i snapshot
+    rbd mirror snapshot schedule add -p ${POOL} --image ${IMAGE_PREFIX}$i 1m
+done
+
+# Run fio workloads on images via kclient
+# Test the recovery of the rbd_support module and its scheduler from their
+# librbd client being blocklisted while a exclusive lock gets passed around
+# between their librbd client and a kclient trying to take mirror snapshots
+# and perform I/O on the same image.
+for ((i = 1; i <= ${NUM_IMAGES}; i++)); do
+    DEVS[$i]=$(sudo rbd device map ${POOL}/${IMAGE_PREFIX}$i)
+    fio --name=fiotest --filename=${DEVS[$i]} --rw=randrw --bs=4K --direct=1 \
+        --ioengine=libaio --iodepth=2 --runtime=43200 --time_based \
+        &> /dev/null &
+done
+
+# Repeatedly blocklist rbd_support module's client ~10s after the module
+# recovers from previous blocklisting
+CURRENT_TIME=$(date +%s)
+END_TIME=$((CURRENT_TIME + RUN_TIME))
+PREV_CLIENT_ADDR=""
+CLIENT_ADDR=""
+while ((CURRENT_TIME <= END_TIME)); do
+    if [[ -n "${CLIENT_ADDR}" ]] &&
+       [[ "${CLIENT_ADDR}" != "${PREV_CLIENT_ADDR}" ]]; then
+            ceph osd blocklist add ${CLIENT_ADDR}
+            # Confirm rbd_support module's client is blocklisted
+            ceph osd blocklist ls | grep -q ${CLIENT_ADDR}
+            PREV_CLIENT_ADDR=${CLIENT_ADDR}
+    fi
+    sleep 10
+    CLIENT_ADDR=$(ceph mgr dump |
+        jq .active_clients[] |
+        jq 'select(.name == "rbd_support")' |
+        jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add')
+    CURRENT_TIME=$(date +%s)
+done
+
+# Confirm that rbd_support module recovered from repeated blocklisting
+# Check that you can add a mirror snapshot schedule after a few retries
+for ((i = 1; i <= 24; i++)); do
+    rbd mirror snapshot schedule add -p ${POOL} \
+        --image ${IMAGE_PREFIX}1 2m && break
+    sleep 10
+done
+rbd mirror snapshot schedule ls -p ${POOL} --image ${IMAGE_PREFIX}1 |
+    grep 'every 2m'
+# Verify that the schedule present before client blocklisting is preserved
+rbd mirror snapshot schedule ls -p ${POOL} --image ${IMAGE_PREFIX}1 |
+    grep 'every 1m'
+rbd mirror snapshot schedule rm -p ${POOL} --image ${IMAGE_PREFIX}1 2m
+for ((i = 1; i <= ${NUM_IMAGES}; i++)); do
+    rbd mirror snapshot schedule rm -p ${POOL} --image ${IMAGE_PREFIX}$i 1m
+done
+
+# cleanup
+killall fio || true
+wait
+for ((i = 1; i <= ${NUM_IMAGES}; i++)); do
+    sudo rbd device unmap ${DEVS[$i]}
+done
+
+echo OK
diff --git a/qa/workunits/rbd/read-flags.sh b/qa/workunits/rbd/read-flags.sh
new file mode 100755
index 000000000..7d787ce67
--- /dev/null
+++ b/qa/workunits/rbd/read-flags.sh
@@ -0,0 +1,61 @@
+#!/usr/bin/env bash
+set -ex
+
+# create a snapshot, then export it and check that setting read flags works
+# by looking at --debug-ms output
+
+function clean_up {
+    rm -f test.log || true
+    rbd snap remove test@snap || true
+    rbd rm test || true
+}
+
+function test_read_flags {
+    local IMAGE=$1
+    local SET_BALANCED=$2
+    local SET_LOCALIZED=$3
+    local EXPECT_BALANCED=$4
+    local EXPECT_LOCALIZED=$5
+
+    local EXTRA_ARGS="--log-file test.log --debug-ms 1 --no-log-to-stderr"
+    if [ "$SET_BALANCED" = 'y' ]; then
+	EXTRA_ARGS="$EXTRA_ARGS --rbd-balance-snap-reads"
+    elif [ "$SET_LOCALIZED" = 'y' ]; then
+	EXTRA_ARGS="$EXTRA_ARGS --rbd-localize-snap-reads"
+    fi
+
+    rbd export $IMAGE - $EXTRA_ARGS > /dev/null
+    if [ "$EXPECT_BALANCED" = 'y' ]; then
+	grep -q balance_reads test.log
+    else
+	grep -L balance_reads test.log | grep -q test.log
+    fi
+    if [ "$EXPECT_LOCALIZED" = 'y' ]; then
+	grep -q localize_reads test.log
+    else
+	grep -L localize_reads test.log | grep -q test.log
+    fi
+    rm -f test.log
+
+}
+
+clean_up
+
+trap clean_up INT TERM EXIT
+
+rbd create --image-feature layering -s 10 test
+rbd snap create test@snap
+
+# export from non snapshot with or without settings should not have flags
+test_read_flags test n n n n
+test_read_flags test y y n n
+
+# export from snapshot should have read flags in log if they are set
+test_read_flags test@snap n n n n
+test_read_flags test@snap y n y n
+test_read_flags test@snap n y n y
+
+# balanced_reads happens to take priority over localize_reads
+test_read_flags test@snap y y y n
+
+echo OK
diff --git a/qa/workunits/rbd/simple_big.sh b/qa/workunits/rbd/simple_big.sh
new file mode 100755
index 000000000..70aafda4c
--- /dev/null
+++ b/qa/workunits/rbd/simple_big.sh
@@ -0,0 +1,12 @@
+#!/bin/sh -ex
+
+mb=100000
+
+rbd create foo --size $mb
+DEV=$(sudo rbd map foo)
+dd if=/dev/zero of=$DEV bs=1M count=$mb
+dd if=$DEV of=/dev/null bs=1M count=$mb
+sudo rbd unmap $DEV
+rbd rm foo
+
+echo OK
diff --git a/qa/workunits/rbd/test_admin_socket.sh b/qa/workunits/rbd/test_admin_socket.sh
new file mode 100755
index 000000000..6b960787b
--- /dev/null
+++ b/qa/workunits/rbd/test_admin_socket.sh
@@ -0,0 +1,151 @@
+#!/usr/bin/env bash
+set -ex
+
+TMPDIR=/tmp/rbd_test_admin_socket$$
+mkdir $TMPDIR
+trap "rm -fr $TMPDIR" 0
+
+. $(dirname $0)/../../standalone/ceph-helpers.sh
+
+function expect_false()
+{
+    set -x
+    if "$@"; then return 1; else return 0; fi
+}
+
+function rbd_watch_out_file()
+{
+    echo ${TMPDIR}/rbd_watch_$1.out
+}
+
+function rbd_watch_pid_file()
+{
+    echo ${TMPDIR}/rbd_watch_$1.pid
+}
+
+function rbd_watch_fifo()
+{
+    echo ${TMPDIR}/rbd_watch_$1.fifo
+}
+
+function rbd_watch_asok()
+{
+    echo ${TMPDIR}/rbd_watch_$1.asok
+}
+
+function rbd_get_perfcounter()
+{
+    local image=$1
+    local counter=$2
+    local name
+
+    name=$(ceph --format xml --admin-daemon $(rbd_watch_asok ${image}) \
+		perf schema | $XMLSTARLET el -d3 |
+		  grep "/librbd-.*-${image}/${counter}\$")
+    test -n "${name}" || return 1
+
+    ceph --format xml --admin-daemon $(rbd_watch_asok ${image}) perf dump |
+	$XMLSTARLET sel -t -m "${name}" -v .
+}
+
+function rbd_check_perfcounter()
+{
+    local image=$1
+    local counter=$2
+    local expected_val=$3
+    local val=
+
+    val=$(rbd_get_perfcounter ${image} ${counter})
+
+    test "${val}" -eq "${expected_val}"
+}
+
+function rbd_watch_start()
+{
+    local image=$1
+    local asok=$(rbd_watch_asok ${image})
+
+    mkfifo $(rbd_watch_fifo ${image})
+    (cat $(rbd_watch_fifo ${image}) |
+	    rbd --admin-socket ${asok} watch ${image} \
+                > $(rbd_watch_out_file ${image}) 2>&1)&
+
+    # find pid of the started rbd watch process
+    local pid
+    for i in `seq 10`; do
+	pid=$(ps auxww | awk "/[r]bd --admin.* watch ${image}/ {print \$2}")
+	test -n "${pid}" && break
+	sleep 0.1
+    done
+    test -n "${pid}"
+    echo ${pid} > $(rbd_watch_pid_file ${image})
+
+    # find watcher admin socket
+    test -n "${asok}"
+    for i in `seq 10`; do
+	test -S "${asok}" && break
+	sleep 0.1
+    done
+    test -S "${asok}"
+
+    # configure debug level
+    ceph --admin-daemon "${asok}" config set debug_rbd 20
+
+    # check that watcher is registered
+    rbd status ${image} | expect_false grep "Watchers: none"
+}
+
+function rbd_watch_end()
+{
+    local image=$1
+    local regexp=$2
+
+    # send 'enter' to watch to exit
+    echo > $(rbd_watch_fifo ${image})
+    # just in case it is not terminated
+    kill $(cat $(rbd_watch_pid_file ${image})) || :
+
+    # output rbd watch out file for easier troubleshooting
+    cat $(rbd_watch_out_file ${image})
+
+    # cleanup
+    rm -f $(rbd_watch_fifo ${image}) $(rbd_watch_pid_file ${image}) \
+       $(rbd_watch_out_file ${image}) $(rbd_watch_asok ${image})
+}
+
+pool="rbd"
+image=testimg$$
+ceph_admin="ceph --admin-daemon $(rbd_watch_asok ${image})"
+
+rbd create --size 128 ${pool}/${image}
+
+# check rbd cache commands are present in help output
+rbd_cache_flush="rbd cache flush ${pool}/${image}"
+rbd_cache_invalidate="rbd cache invalidate ${pool}/${image}"
+
+rbd_watch_start ${image}
+${ceph_admin} help | fgrep "${rbd_cache_flush}"
+${ceph_admin} help | fgrep "${rbd_cache_invalidate}"
+rbd_watch_end ${image}
+
+# test rbd cache commands with disabled and enabled cache
+for conf_rbd_cache in false true; do
+
+    rbd image-meta set ${image} conf_rbd_cache ${conf_rbd_cache}
+
+    rbd_watch_start ${image}
+
+    rbd_check_perfcounter ${image} flush 0
+    ${ceph_admin} ${rbd_cache_flush}
+    # 'flush' counter should increase regardless if cache is enabled
+    rbd_check_perfcounter ${image} flush 1
+
+    rbd_check_perfcounter ${image} invalidate_cache 0
+    ${ceph_admin} ${rbd_cache_invalidate}
+    # 'invalidate_cache' counter should increase regardless if cache is enabled
+    rbd_check_perfcounter ${image} invalidate_cache 1
+
+    rbd_watch_end ${image}
+done
+
+rbd rm ${image}
diff --git a/qa/workunits/rbd/test_librbd.sh b/qa/workunits/rbd/test_librbd.sh
new file mode 100755
index 000000000..447306bb4
--- /dev/null
+++ b/qa/workunits/rbd/test_librbd.sh
@@ -0,0 +1,9 @@
+#!/bin/sh -e
+
+if [ -n "${VALGRIND}" ]; then
+  valgrind ${VALGRIND} --suppressions=${TESTDIR}/valgrind.supp \
+    --error-exitcode=1 ceph_test_librbd
+else
+  ceph_test_librbd
+fi
+exit 0
diff --git a/qa/workunits/rbd/test_librbd_python.sh b/qa/workunits/rbd/test_librbd_python.sh
new file mode 100755
index 000000000..a33100829
--- /dev/null
+++ b/qa/workunits/rbd/test_librbd_python.sh
@@ -0,0 +1,12 @@
+#!/bin/sh -ex
+
+relpath=$(dirname $0)/../../../src/test/pybind
+
+if [ -n "${VALGRIND}" ]; then
+  valgrind ${VALGRIND} --suppressions=${TESTDIR}/valgrind.supp \
+    --errors-for-leak-kinds=definite --error-exitcode=1 \
+    python3 -m pytest -v $relpath/test_rbd.py "$@"
+else
+    python3 -m pytest -v $relpath/test_rbd.py "$@"
+fi
+exit 0
diff --git a/qa/workunits/rbd/test_lock_fence.sh b/qa/workunits/rbd/test_lock_fence.sh
new file mode 100755
index 000000000..7cf2d21c5
--- /dev/null
+++ b/qa/workunits/rbd/test_lock_fence.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+# can't use -e because of background process
+set -x
+
+IMAGE=rbdrw-image
+LOCKID=rbdrw
+RELPATH=$(dirname $0)/../../../src/test/librbd
+RBDRW=$RELPATH/rbdrw.py
+
+rbd create $IMAGE --size 10 --image-format 2 --image-shared || exit 1
+
+# rbdrw loops doing I/O to $IMAGE after locking with lockid $LOCKID
+python3 $RBDRW $IMAGE $LOCKID &
+iochild=$!
+
+# give client time to lock and start reading/writing
+LOCKS='[]'
+while [ "$LOCKS" == '[]' ]
+do
+    LOCKS=$(rbd lock list $IMAGE --format json)
+    sleep 1
+done
+
+clientaddr=$(rbd lock list $IMAGE | tail -1 | awk '{print $NF;}')
+clientid=$(rbd lock list $IMAGE | tail -1 | awk '{print $1;}')
+echo "clientaddr: $clientaddr"
+echo "clientid: $clientid"
+
+ceph osd blocklist add $clientaddr || exit 1
+
+wait $iochild
+rbdrw_exitcode=$?
+if [ $rbdrw_exitcode != 108 ]
+then
+	echo "wrong exitcode from rbdrw: $rbdrw_exitcode"
+	exit 1
+else
+	echo "rbdrw stopped with ESHUTDOWN"
+fi
+
+set -e
+ceph osd blocklist rm $clientaddr
+rbd lock remove $IMAGE $LOCKID "$clientid"
+# rbdrw will have exited with an existing watch, so, until #3527 is fixed,
+# hang out until the watch expires
+sleep 30
+rbd rm $IMAGE
+echo OK
diff --git a/qa/workunits/rbd/test_rbd_mirror.sh b/qa/workunits/rbd/test_rbd_mirror.sh
new file mode 100755
index 000000000..e139dd7e4
--- /dev/null
+++ b/qa/workunits/rbd/test_rbd_mirror.sh
@@ -0,0 +1,9 @@
+#!/bin/sh -e
+
+if [ -n "${VALGRIND}" ]; then
+  valgrind ${VALGRIND} --suppressions=${TESTDIR}/valgrind.supp \
+    --error-exitcode=1 ceph_test_rbd_mirror
+else
+  ceph_test_rbd_mirror
+fi
+exit 0
diff --git a/qa/workunits/rbd/test_rbd_tasks.sh b/qa/workunits/rbd/test_rbd_tasks.sh
new file mode 100755
index 000000000..b9663e601
--- /dev/null
+++ b/qa/workunits/rbd/test_rbd_tasks.sh
@@ -0,0 +1,276 @@
+#!/usr/bin/env bash
+set -ex
+
+POOL=rbd_tasks
+POOL_NS=ns1
+
+setup() {
+  trap 'cleanup' INT TERM EXIT
+
+  ceph osd pool create ${POOL} 128
+  rbd pool init ${POOL}
+  rbd namespace create ${POOL}/${POOL_NS}
+
+  TEMPDIR=`mktemp -d`
+}
+
+cleanup() {
+  ceph osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it
+
+  rm -rf ${TEMPDIR}
+}
+
+wait_for() {
+  local TEST_FN=$1
+  shift 1
+  local TEST_FN_ARGS=("$@")
+
+  for s in 1 2 4 8 8 8 8 8 8 8 8 16 16; do
+    sleep ${s}
+
+    ${TEST_FN} "${TEST_FN_ARGS[@]}" || continue
+    return 0
+  done
+  return 1
+}
+
+task_exists() {
+  local TASK_ID=$1
+  [[ -z "${TASK_ID}" ]] && exit 1
+
+  ceph rbd task list ${TASK_ID} || return 1
+  return 0
+}
+
+task_dne() {
+  local TASK_ID=$1
+  [[ -z "${TASK_ID}" ]] && exit 1
+
+  ceph rbd task list ${TASK_ID} || return 0
+  return 1
+}
+
+task_in_progress() {
+  local TASK_ID=$1
+  [[ -z "${TASK_ID}" ]] && exit 1
+
+  [[ $(ceph rbd task list ${TASK_ID} | jq '.in_progress') == 'true' ]]
+}
+
+test_remove() {
+  echo "test_remove"
+
+  local IMAGE=`uuidgen`
+  rbd create --size 1 --image-shared ${POOL}/${IMAGE}
+
+  # MGR might require some time to discover the OSD map w/ new pool
+  wait_for ceph rbd task add remove ${POOL}/${IMAGE}
+}
+
+test_flatten() {
+  echo "test_flatten"
+
+  local PARENT_IMAGE=`uuidgen`
+  local CHILD_IMAGE=`uuidgen`
+
+  rbd create --size 1 --image-shared ${POOL}/${PARENT_IMAGE}
+  rbd snap create ${POOL}/${PARENT_IMAGE}@snap
+  rbd clone ${POOL}/${PARENT_IMAGE}@snap ${POOL}/${POOL_NS}/${CHILD_IMAGE} --rbd-default-clone-format=2
+  [[ "$(rbd info --format json ${POOL}/${POOL_NS}/${CHILD_IMAGE} | jq 'has("parent")')" == "true" ]]
+
+  local TASK_ID=`ceph rbd task add flatten ${POOL}/${POOL_NS}/${CHILD_IMAGE} | jq --raw-output ".id"`
+  wait_for task_dne ${TASK_ID}
+
+  [[ "$(rbd info --format json ${POOL}/${POOL_NS}/${CHILD_IMAGE} | jq 'has("parent")')" == "false" ]]
+}
+
+test_trash_remove() {
+  echo "test_trash_remove"
+
+  local IMAGE=`uuidgen`
+  rbd create --size 1 --image-shared ${POOL}/${IMAGE}
+  local IMAGE_ID=`rbd info --format json ${POOL}/${IMAGE} | jq --raw-output ".id"`
+  rbd trash mv ${POOL}/${IMAGE}
+  [[ -n "$(rbd trash list ${POOL})" ]] || exit 1
+
+  local TASK_ID=`ceph rbd task add trash remove ${POOL}/${IMAGE_ID} | jq --raw-output ".id"`
+  wait_for task_dne ${TASK_ID}
+
+  [[ -z "$(rbd trash list ${POOL})" ]] || exit 1
+}
+
+test_migration_execute() {
+  echo "test_migration_execute"
+
+  local SOURCE_IMAGE=`uuidgen`
+  local TARGET_IMAGE=`uuidgen`
+  rbd create --size 1 --image-shared ${POOL}/${SOURCE_IMAGE}
+  rbd migration prepare ${POOL}/${SOURCE_IMAGE} ${POOL}/${TARGET_IMAGE}
+  [[ "$(rbd status --format json ${POOL}/${TARGET_IMAGE} | jq --raw-output '.migration.state')" == "prepared" ]]
+
+  local TASK_ID=`ceph rbd task add migration execute ${POOL}/${TARGET_IMAGE} | jq --raw-output ".id"`
+  wait_for task_dne ${TASK_ID}
+
+  [[ "$(rbd status --format json ${POOL}/${TARGET_IMAGE} | jq --raw-output '.migration.state')" == "executed" ]]
+}
+
+test_migration_commit() {
+  echo "test_migration_commit"
+
+  local SOURCE_IMAGE=`uuidgen`
+  local TARGET_IMAGE=`uuidgen`
+  rbd create --size 1 --image-shared ${POOL}/${SOURCE_IMAGE}
+  rbd migration prepare ${POOL}/${SOURCE_IMAGE} ${POOL}/${TARGET_IMAGE}
+  [[ "$(rbd status --format json ${POOL}/${TARGET_IMAGE} | jq --raw-output '.migration.state')" == "prepared" ]]
+
+  local TASK_ID=`ceph rbd task add migration execute ${POOL}/${TARGET_IMAGE} | jq --raw-output ".id"`
+  wait_for task_dne ${TASK_ID}
+
+  TASK_ID=`ceph rbd task add migration commit ${POOL}/${TARGET_IMAGE} | jq --raw-output ".id"`
+  wait_for task_dne ${TASK_ID}
+
+  [[ "$(rbd status --format json ${POOL}/${TARGET_IMAGE} | jq 'has("migration")')" == "false" ]]
+  (rbd info ${POOL}/${SOURCE_IMAGE} && return 1) || true
+  rbd info ${POOL}/${TARGET_IMAGE}
+}
+
+test_migration_abort() {
+  echo "test_migration_abort"
+
+  local SOURCE_IMAGE=`uuidgen`
+  local TARGET_IMAGE=`uuidgen`
+  rbd create --size 1 --image-shared ${POOL}/${SOURCE_IMAGE}
+  rbd migration prepare ${POOL}/${SOURCE_IMAGE} ${POOL}/${TARGET_IMAGE}
+  [[ "$(rbd status --format json ${POOL}/${TARGET_IMAGE} | jq --raw-output '.migration.state')" == "prepared" ]]
+
+  local TASK_ID=`ceph rbd task add migration execute ${POOL}/${TARGET_IMAGE} | jq --raw-output ".id"`
+  wait_for task_dne ${TASK_ID}
+
+  TASK_ID=`ceph rbd task add migration abort ${POOL}/${TARGET_IMAGE} | jq --raw-output ".id"`
+  wait_for task_dne ${TASK_ID}
+
+  [[ "$(rbd status --format json ${POOL}/${SOURCE_IMAGE} | jq 'has("migration")')" == "false" ]]
+  rbd info ${POOL}/${SOURCE_IMAGE}
+  (rbd info ${POOL}/${TARGET_IMAGE} && return 1) || true
+}
+
+test_list() {
+  echo "test_list"
+
+  local IMAGE_1=`uuidgen`
+  local IMAGE_2=`uuidgen`
+
+  rbd create --size 1T --image-shared ${POOL}/${IMAGE_1}
+  rbd create --size 1T --image-shared ${POOL}/${IMAGE_2}
+
+  local TASK_ID_1=`ceph rbd task add remove ${POOL}/${IMAGE_1} | jq --raw-output ".id"`
+  local TASK_ID_2=`ceph rbd task add remove ${POOL}/${IMAGE_2} | jq --raw-output ".id"`
+
+  local LIST_FILE="${TEMPDIR}/list_file"
+  ceph rbd task list > ${LIST_FILE}
+  cat ${LIST_FILE}
+
+  [[ $(jq "[.[] | .id] | contains([\"${TASK_ID_1}\", \"${TASK_ID_2}\"])" ${LIST_FILE}) == "true" ]]
+
+  ceph rbd task cancel ${TASK_ID_1}
+  ceph rbd task cancel ${TASK_ID_2}
+}
+
+test_cancel() {
+  echo "test_cancel"
+
+  local IMAGE=`uuidgen`
+  rbd create --size 1T --image-shared ${POOL}/${IMAGE}
+  local TASK_ID=`ceph rbd task add remove ${POOL}/${IMAGE} | jq --raw-output ".id"`
+
+  wait_for task_exists ${TASK_ID}
+
+  ceph rbd task cancel ${TASK_ID}
+  wait_for task_dne ${TASK_ID}
+}
+
+test_duplicate_task() {
+  echo "test_duplicate_task"
+
+  local IMAGE=`uuidgen`
+  rbd create --size 1T --image-shared ${POOL}/${IMAGE}
+  local IMAGE_ID=`rbd info --format json ${POOL}/${IMAGE} | jq --raw-output ".id"`
+  rbd trash mv ${POOL}/${IMAGE}
+
+  local TASK_ID_1=`ceph rbd task add trash remove ${POOL}/${IMAGE_ID} | jq --raw-output ".id"`
+  local TASK_ID_2=`ceph rbd task add trash remove ${POOL}/${IMAGE_ID} | jq --raw-output ".id"`
+
+  [[ "${TASK_ID_1}" == "${TASK_ID_2}" ]]
+
+  ceph rbd task cancel ${TASK_ID_1}
+}
+
+test_duplicate_name() {
+  echo "test_duplicate_name"
+
+  local IMAGE=`uuidgen`
+  rbd create --size 1G --image-shared ${POOL}/${IMAGE}
+  local TASK_ID_1=`ceph rbd task add remove ${POOL}/${IMAGE} | jq --raw-output ".id"`
+
+  wait_for task_dne ${TASK_ID_1}
+
+  rbd create --size 1G --image-shared ${POOL}/${IMAGE}
+  local TASK_ID_2=`ceph rbd task add remove ${POOL}/${IMAGE} | jq --raw-output ".id"`
+
+  [[ "${TASK_ID_1}" != "${TASK_ID_2}" ]]
+  wait_for task_dne ${TASK_ID_2}
+
+  local TASK_ID_3=`ceph rbd task add remove ${POOL}/${IMAGE} | jq --raw-output ".id"`
+
+  [[ "${TASK_ID_2}" == "${TASK_ID_3}" ]]
+}
+
+test_progress() {
+  echo "test_progress"
+
+  local IMAGE_1=`uuidgen`
+  local IMAGE_2=`uuidgen`
+
+  rbd create --size 1 --image-shared ${POOL}/${IMAGE_1}
+  local TASK_ID_1=`ceph rbd task add remove ${POOL}/${IMAGE_1} | jq --raw-output ".id"`
+
+  wait_for task_dne ${TASK_ID_1}
+
+  local PROGRESS_FILE="${TEMPDIR}/progress_file"
+  ceph progress json > ${PROGRESS_FILE}
+  cat ${PROGRESS_FILE}
+
+  [[ $(jq "[.completed | .[].id] | contains([\"${TASK_ID_1}\"])" ${PROGRESS_FILE}) == "true" ]]
+
+  rbd create --size 1T --image-shared ${POOL}/${IMAGE_2}
+  local TASK_ID_2=`ceph rbd task add remove ${POOL}/${IMAGE_2} | jq --raw-output ".id"`
+
+  wait_for task_in_progress ${TASK_ID_2}
+  ceph progress json > ${PROGRESS_FILE}
+  cat ${PROGRESS_FILE}
+
+  [[ $(jq "[.events | .[].id] | contains([\"${TASK_ID_2}\"])" ${PROGRESS_FILE}) == "true" ]]
+
+  ceph rbd task cancel ${TASK_ID_2}
+  wait_for task_dne ${TASK_ID_2}
+
+  ceph progress json > ${PROGRESS_FILE}
+  cat ${PROGRESS_FILE}
+
+  [[ $(jq "[.completed | map(select(.failed)) | .[].id] | contains([\"${TASK_ID_2}\"])" ${PROGRESS_FILE}) == "true" ]]
+}
+
+setup
+test_remove
+test_flatten
+test_trash_remove
+test_migration_execute
+test_migration_commit
+test_migration_abort
+test_list
+test_cancel
+test_duplicate_task
+test_duplicate_name
+test_progress
+
+echo OK
diff --git a/qa/workunits/rbd/test_rbdmap_RBDMAPFILE.sh b/qa/workunits/rbd/test_rbdmap_RBDMAPFILE.sh
new file mode 100755
index 000000000..501c69cd5
--- /dev/null
+++ b/qa/workunits/rbd/test_rbdmap_RBDMAPFILE.sh
@@ -0,0 +1,34 @@
+#!/bin/sh
+#
+# Regression test for http://tracker.ceph.com/issues/14984
+#
+# When the bug is present, starting the rbdmap service causes
+# a bogus log message to be emitted to the log because the RBDMAPFILE
+# environment variable is not set.
+#
+# When the bug is not present, starting the rbdmap service will emit
+# no log messages, because /etc/ceph/rbdmap does not contain any lines
+# that require processing.
+#
+set -ex
+
+echo "TEST: save timestamp for use later with journalctl --since"
+TIMESTAMP=$(date +%Y-%m-%d\ %H:%M:%S)
+
+echo "TEST: assert that rbdmap has not logged anything since boot"
+journalctl -b 0 -t rbdmap | grep 'rbdmap\[[[:digit:]]' && exit 1
+journalctl -b 0 -t init-rbdmap | grep 'rbdmap\[[[:digit:]]' && exit 1
+
+echo "TEST: restart the rbdmap.service"
+sudo systemctl restart rbdmap.service
+
+echo "TEST: ensure that /usr/bin/rbdmap runs to completion"
+until sudo systemctl status rbdmap.service | grep 'active (exited)' ; do
+    sleep 0.5
+done
+
+echo "TEST: assert that rbdmap has not logged anything since TIMESTAMP"
+journalctl --since "$TIMESTAMP" -t rbdmap  | grep 'rbdmap\[[[:digit:]]' && exit 1
+journalctl --since "$TIMESTAMP" -t init-rbdmap | grep 'rbdmap\[[[:digit:]]' && exit 1
+
+exit 0
diff --git a/qa/workunits/rbd/verify_pool.sh b/qa/workunits/rbd/verify_pool.sh
new file mode 100755
index 000000000..08bcca506
--- /dev/null
+++ b/qa/workunits/rbd/verify_pool.sh
@@ -0,0 +1,27 @@
+#!/bin/sh -ex
+
+POOL_NAME=rbd_test_validate_pool
+PG_NUM=32
+
+tear_down () {
+  ceph osd pool delete $POOL_NAME $POOL_NAME --yes-i-really-really-mean-it || true
+}
+
+set_up () {
+  tear_down
+  ceph osd pool create $POOL_NAME $PG_NUM
+  ceph osd pool mksnap $POOL_NAME snap
+  rbd pool init $POOL_NAME
+}
+
+trap tear_down EXIT HUP INT
+set_up
+
+# creating an image in a pool-managed snapshot pool should fail
+rbd create --pool $POOL_NAME --size 1 foo && exit 1 || true
+
+# should succeed if the pool already marked as validated
+printf "overwrite validated" | rados --pool $POOL_NAME put rbd_info -
+rbd create --pool $POOL_NAME --size 1 foo
+
+echo OK
diff --git a/qa/workunits/rename/all.sh b/qa/workunits/rename/all.sh
new file mode 100755
index 000000000..f97ff773f
--- /dev/null
+++ b/qa/workunits/rename/all.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+set -ex
+
+dir=`dirname $0`
+
+CEPH_TOOL='./ceph'
+$CEPH_TOOL || CEPH_TOOL='ceph'
+
+CEPH_ARGS=$CEPH_ARGS CEPH_TOOL=$CEPH_TOOL $dir/prepare.sh
+
+CEPH_ARGS=$CEPH_ARGS CEPH_TOOL=$CEPH_TOOL $dir/pri_nul.sh
+rm ./?/* || true
+
+CEPH_ARGS=$CEPH_ARGS CEPH_TOOL=$CEPH_TOOL $dir/rem_nul.sh
+rm ./?/* || true
+
+CEPH_ARGS=$CEPH_ARGS CEPH_TOOL=$CEPH_TOOL $dir/pri_pri.sh
+rm ./?/* || true
+
+CEPH_ARGS=$CEPH_ARGS CEPH_TOOL=$CEPH_TOOL $dir/rem_pri.sh
+rm ./?/* || true
+
+CEPH_ARGS=$CEPH_ARGS CEPH_TOOL=$CEPH_TOOL $dir/rem_rem.sh
+rm ./?/* || true
+
+CEPH_ARGS=$CEPH_ARGS CEPH_TOOL=$CEPH_TOOL $dir/pri_nul.sh
+rm -r ./?/* || true
+
+CEPH_ARGS=$CEPH_ARGS CEPH_TOOL=$CEPH_TOOL $dir/pri_pri.sh
+rm -r ./?/* || true
+
+CEPH_ARGS=$CEPH_ARGS CEPH_TOOL=$CEPH_TOOL $dir/dir_pri_pri.sh
+rm -r ./?/* || true
+
+CEPH_ARGS=$CEPH_ARGS CEPH_TOOL=$CEPH_TOOL $dir/dir_pri_nul.sh
+rm -r ./?/* || true
+
diff --git a/qa/workunits/rename/dir_pri_nul.sh b/qa/workunits/rename/dir_pri_nul.sh
new file mode 100755
index 000000000..dd8106b63
--- /dev/null
+++ b/qa/workunits/rename/dir_pri_nul.sh
@@ -0,0 +1,28 @@
+#!/bin/sh -ex
+
+# dir: srcdn=destdn
+mkdir ./a/dir1
+mv ./a/dir1 ./a/dir1.renamed
+
+# dir: diff
+mkdir ./a/dir2
+mv ./a/dir2 ./b/dir2
+
+# dir: diff, child subtree on target
+mkdir -p ./a/dir3/child/foo
+$CEPH_TOOL mds tell 0 export_dir /a/dir3/child 1
+sleep 5
+mv ./a/dir3 ./b/dir3
+
+# dir: diff, child subtree on other
+mkdir -p ./a/dir4/child/foo
+$CEPH_TOOL mds tell 0 export_dir /a/dir4/child 2
+sleep 5
+mv ./a/dir4 ./b/dir4
+
+# dir: witness subtree adjustment
+mkdir -p ./a/dir5/1/2/3/4
+$CEPH_TOOL mds tell 0 export_dir /a/dir5/1/2/3 2
+sleep 5
+mv ./a/dir5 ./b
+
diff --git a/qa/workunits/rename/dir_pri_pri.sh b/qa/workunits/rename/dir_pri_pri.sh
new file mode 100755
index 000000000..de235fcd3
--- /dev/null
+++ b/qa/workunits/rename/dir_pri_pri.sh
@@ -0,0 +1,11 @@
+#!/bin/sh -ex
+
+# dir, srcdn=destdn
+mkdir ./a/dir1
+mkdir ./a/dir2
+mv -T ./a/dir1 ./a/dir2
+
+# dir, different
+mkdir ./a/dir3
+mkdir ./b/dir4
+mv -T ./a/dir3 ./b/dir4
diff --git a/qa/workunits/rename/plan.txt b/qa/workunits/rename/plan.txt
new file mode 100644
index 000000000..b423b4140
--- /dev/null
+++ b/qa/workunits/rename/plan.txt
@@ -0,0 +1,111 @@
+#!/bin/sh
+
+# srcdn         destdn        targeti
+
+## pri auth      null auth     -
+## pri rep       null auth     -
+## rem auth      null auth     -
+## rem rep       null auth     -
+
+#/ pri auth      null rep      - dup of pr_na
+#/ pri rep       null rep      -
+#/ rem auth      null rep      - dup of rr_na
+#/ rem rep       null rep      -
+
+
+## pri auth      pri auth      -
+# pri rep       pri auth      -
+## rem auth      pri auth      -
+# rem rep       pri auth      -
+
+# pri auth      pri rep       -
+# pri rep       pri rep       -
+# rem auth      pri rep       -
+# rem rep       pri rep       -
+
+## pri auth      rem auth      auth
+# pri rep       rem auth      auth
+## rem auth      rem auth      auth
+# rem rep       rem auth      auth
+
+# pri auth      rem rep       auth
+# pri rep       rem rep       auth
+# rem auth      rem rep       auth
+# rem rep       rem rep       auth
+
+# pri auth      rem auth      rep
+# pri rep       rem auth      rep
+# rem auth      rem auth      rep
+# rem rep       rem auth      rep
+
+# pri auth      rem rep       rep
+# pri rep       rem rep       rep
+# rem auth      rem rep       rep
+# rem rep       rem rep       rep
+
+
+types of operations
+
+pri  nul
+  srcdn=destdn
+  diff
+
+rem  nul
+  srci=srcdn=destdn
+  srci=srcdn
+  srcdn=destdn
+  srci=destdn
+  all different
+
+pri  pri
+  srcdn=destdn
+  different
+
+rem  pri
+  srci=srcdn=destdn
+  srci=srcdn
+  srcdn=destdn
+  srci=destdn
+  all different
+
+pri  rem
+  srcdn=destdn=desti
+  srcdn=destdn
+  destdn=desti
+  srcdn=desti
+  all different
+
+rem  rem
+  srci=srcdn=destdn=desti
+  srci=srcdn=destdn
+  srci=srcdn=desti
+  srci=destdn=desti
+  srcdni=destdn=desti
+  srci=srcdn destdn=desti
+  srci=destdn srcdn=desti
+  srci=desti srcdn=destdn
+  srci=srcdn
+  srci=destdn
+  srci=desti
+  srcdn=destdn
+  srcdn=desti
+  destdn=desti
+  all different
+
+
+
+
+
+
+
+
+
+p   n   same
+r   n   same
+p   n   diff
+r   n   diff
+
+p   p   same
+r   p   same
+
+p   r   
diff --git a/qa/workunits/rename/prepare.sh b/qa/workunits/rename/prepare.sh
new file mode 100755
index 000000000..b5ba4ae58
--- /dev/null
+++ b/qa/workunits/rename/prepare.sh
@@ -0,0 +1,21 @@
+#!/bin/sh -ex
+
+$CEPH_TOOL mds tell 0 injectargs '--mds-bal-interval 0'
+$CEPH_TOOL mds tell 1 injectargs '--mds-bal-interval 0'
+$CEPH_TOOL mds tell 2 injectargs '--mds-bal-interval 0'
+$CEPH_TOOL mds tell 3 injectargs '--mds-bal-interval 0'
+#$CEPH_TOOL mds tell 4 injectargs '--mds-bal-interval 0'
+
+mkdir -p ./a/a
+mkdir -p ./b/b
+mkdir -p ./c/c
+mkdir -p ./d/d
+
+mount_dir=`df . | grep -o " /.*" | grep -o "/.*"`
+cur_dir=`pwd`
+ceph_dir=${cur_dir##$mount_dir}
+$CEPH_TOOL mds tell 0 export_dir $ceph_dir/b 1
+$CEPH_TOOL mds tell 0 export_dir $ceph_dir/c 2
+$CEPH_TOOL mds tell 0 export_dir $ceph_dir/d 3
+sleep 5
+
diff --git a/qa/workunits/rename/pri_nul.sh b/qa/workunits/rename/pri_nul.sh
new file mode 100755
index 000000000..c40ec1d25
--- /dev/null
+++ b/qa/workunits/rename/pri_nul.sh
@@ -0,0 +1,11 @@
+#!/bin/sh -ex
+
+# srcdn=destdn
+touch ./a/file1
+mv ./a/file1 ./a/file1.renamed
+
+# different
+touch ./a/file2
+mv ./a/file2 ./b
+
+
diff --git a/qa/workunits/rename/pri_pri.sh b/qa/workunits/rename/pri_pri.sh
new file mode 100755
index 000000000..b74985fe3
--- /dev/null
+++ b/qa/workunits/rename/pri_pri.sh
@@ -0,0 +1,12 @@
+#!/bin/sh -ex
+
+# srcdn=destdn
+touch ./a/file1
+touch ./a/file2
+mv ./a/file1 ./a/file2
+
+# different (srcdn != destdn)
+touch ./a/file3
+touch ./b/file4
+mv ./a/file3 ./b/file4
+
diff --git a/qa/workunits/rename/pri_rem.sh b/qa/workunits/rename/pri_rem.sh
new file mode 100755
index 000000000..a1cd03d10
--- /dev/null
+++ b/qa/workunits/rename/pri_rem.sh
@@ -0,0 +1,31 @@
+#!/bin/sh -ex
+
+dotest() {
+    src=$1
+    desti=$2
+    destdn=$3
+    n=$4
+
+    touch ./$src/src$n
+    touch ./$desti/desti$n
+    ln ./$desti/desti$n ./$destdn/destdn$n
+
+    mv ./$src/src$n ./$destdn/destdn$n
+}
+
+
+# srcdn=destdn=desti
+dotest 'a' 'a' 'a' 1
+
+# destdn=desti
+dotest 'b' 'a' 'a' 2
+
+# srcdn=destdn
+dotest 'a' 'b' 'a' 3
+
+# srcdn=desti
+dotest 'a' 'a' 'b' 4
+
+# all different
+dotest 'a' 'b' 'c' 5
+
diff --git a/qa/workunits/rename/rem_nul.sh b/qa/workunits/rename/rem_nul.sh
new file mode 100755
index 000000000..a71033108
--- /dev/null
+++ b/qa/workunits/rename/rem_nul.sh
@@ -0,0 +1,29 @@
+#!/bin/sh -ex
+
+dotest() {
+    srci=$1
+    srcdn=$2
+    dest=$3
+    n=$4
+
+    touch ./$srci/srci$n
+    ln ./$srci/srci$n ./$srcdn/srcdn$n
+
+    mv ./$srcdn/srcdn$n ./$dest/dest$n
+}
+
+# srci=srcdn=destdn
+dotest 'a' 'a' 'a' 1
+
+# srcdn=destdn
+dotest 'b' 'a' 'a' 2
+
+# srci=destdn
+dotest 'a' 'b' 'a' 3
+
+# srci=srcdn
+dotest 'a' 'a' 'b' 4
+
+# all different
+dotest 'a' 'b' 'c' 5
+
diff --git a/qa/workunits/rename/rem_pri.sh b/qa/workunits/rename/rem_pri.sh
new file mode 100755
index 000000000..501ac5e1a
--- /dev/null
+++ b/qa/workunits/rename/rem_pri.sh
@@ -0,0 +1,29 @@
+#!/bin/sh -ex
+
+dotest() {
+    srci=$1
+    srcdn=$2
+    dest=$3
+    n=$4
+
+    touch ./$srci/srci$n
+    ln ./$srci/srci$n ./$srcdn/srcdn$n
+    touch ./$dest/dest$n
+
+    mv ./$srcdn/srcdn$n ./$dest/dest$n
+}
+
+# srci=srcdn=destdn
+dotest 'a' 'a' 'a' 1
+
+# srcdn=destdn
+dotest 'b' 'a' 'a' 2
+
+# srci=destdn
+dotest 'a' 'b' 'a' 3
+
+# srci=srcdn
+dotest 'a' 'a' 'b' 4
+
+# all different
+dotest 'a' 'b' 'c' 5
diff --git a/qa/workunits/rename/rem_rem.sh b/qa/workunits/rename/rem_rem.sh
new file mode 100755
index 000000000..80028c517
--- /dev/null
+++ b/qa/workunits/rename/rem_rem.sh
@@ -0,0 +1,61 @@
+#!/bin/sh -ex
+
+dotest() {
+    srci=$1
+    srcdn=$2
+    desti=$3
+    destdn=$4
+    n=$5
+
+    touch ./$srci/srci$n
+    ln ./$srci/srci$n ./$srcdn/srcdn$n
+    touch ./$desti/desti$n
+    ln ./$desti/desti$n ./$destdn/destdn$n
+
+    mv ./$srcdn/srcdn$n ./$destdn/destdn$n
+}
+
+#  srci=srcdn=destdn=desti
+dotest 'a' 'a' 'a' 'a' 1
+
+#  srcdn=destdn=desti
+dotest 'b' 'a' 'a' 'a' 2
+
+#  srci=destdn=desti
+dotest 'a' 'b' 'a' 'a' 3
+
+#  srci=srcdn=destdn
+dotest 'a' 'a' 'b' 'a' 4
+
+#  srci=srcdn=desti
+dotest 'a' 'a' 'a' 'b' 5
+
+#  srci=srcdn destdn=desti
+dotest 'a' 'a' 'b' 'b' 6
+
+#  srci=destdn srcdn=desti
+dotest 'a' 'b' 'b' 'a' 7
+
+#  srci=desti srcdn=destdn
+dotest 'a' 'b' 'a' 'b' 8
+
+#  srci=srcdn
+dotest 'a' 'a' 'b' 'c' 9
+
+#  srci=desti
+dotest 'a' 'b' 'a' 'c' 10
+
+#  srci=destdn
+dotest 'a' 'b' 'c' 'a' 11
+
+#  srcdn=desti
+dotest 'a' 'b' 'b' 'c' 12
+
+#  srcdn=destdn
+dotest 'a' 'b' 'c' 'b' 13
+
+#  destdn=desti
+dotest 'a' 'b' 'c' 'c' 14
+
+#  all different
+dotest 'a' 'b' 'c' 'd' 15
diff --git a/qa/workunits/rest/test-restful.sh b/qa/workunits/rest/test-restful.sh
new file mode 100755
index 000000000..fde0d107a
--- /dev/null
+++ b/qa/workunits/rest/test-restful.sh
@@ -0,0 +1,10 @@
+#!/bin/sh -ex
+
+mydir=`dirname $0`
+
+secret=`ceph config-key get mgr/restful/keys/admin`
+url=$(ceph mgr dump|jq -r .services.restful|sed -e 's/\/$//')
+echo "url $url secret $secret"
+$mydir/test_mgr_rest_api.py $url $secret
+
+echo $0 OK
diff --git a/qa/workunits/rest/test_mgr_rest_api.py b/qa/workunits/rest/test_mgr_rest_api.py
new file mode 100755
index 000000000..74126ab78
--- /dev/null
+++ b/qa/workunits/rest/test_mgr_rest_api.py
@@ -0,0 +1,98 @@
+#! /usr/bin/env python3
+
+import requests
+import time
+import sys
+import json
+
+# Do not show the stupid message about verify=False.  ignore exceptions bc
+# this doesn't work on some distros.
+try:
+    from requests.packages.urllib3.exceptions import InsecureRequestWarning
+    requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
+except:
+    pass
+
+if len(sys.argv) < 3:
+    print("Usage: %s <url> <admin_key>" % sys.argv[0])
+    sys.exit(1)
+
+addr = sys.argv[1]
+auth = ('admin', sys.argv[2])
+headers = {'Content-type': 'application/json'}
+
+request = None
+
+# Create a pool and get its id
+request = requests.post(
+    addr + '/pool?wait=yes',
+    data=json.dumps({'name': 'supertestfriends', 'pg_num': 128}),
+    headers=headers,
+    verify=False,
+    auth=auth)
+print(request.text)
+request = requests.get(addr + '/pool', verify=False, auth=auth)
+assert(request.json()[-1]['pool_name'] == 'supertestfriends')
+pool_id = request.json()[-1]['pool']
+
+# get a mon name
+request = requests.get(addr + '/mon', verify=False, auth=auth)
+firstmon = request.json()[0]['name']
+print('first mon is %s' % firstmon)
+
+# get a server name
+request = requests.get(addr + '/osd', verify=False, auth=auth)
+aserver = request.json()[0]['server']
+print('a server is %s' % aserver)
+
+
+screenplay = [
+    ('get',    '/', {}),
+    ('get',    '/config/cluster', {}),
+    ('get',    '/crush/rule', {}),
+    ('get',    '/doc', {}),
+    ('get',    '/mon', {}),
+    ('get',    '/mon/' + firstmon, {}),
+    ('get',    '/osd', {}),
+    ('get',    '/osd/0', {}),
+    ('get',    '/osd/0/command', {}),
+    ('get',    '/pool/1', {}),
+    ('get',    '/server', {}),
+    ('get',    '/server/' + aserver, {}),
+    ('post',   '/osd/0/command', {'command': 'scrub'}),
+    ('post',   '/pool?wait=1', {'name': 'supertestfriends', 'pg_num': 128}),
+    ('patch',  '/osd/0', {'in': False}),
+    ('patch',  '/config/osd', {'pause': True}),
+    ('get',    '/config/osd', {}),
+    ('patch',  '/pool/' + str(pool_id), {'size': 2}),
+    ('patch',  '/config/osd', {'pause': False}),
+    ('patch',  '/osd/0', {'in': True}),
+    ('get',    '/pool', {}),
+    ('delete', '/pool/' + str(pool_id) + '?wait=1', {}),
+    ('get',    '/request?page=0', {}),
+    ('delete', '/request', {}),
+    ('get',    '/request', {}),
+    ('patch',  '/pool/1', {'pg_num': 128}),
+    ('patch',  '/pool/1', {'pgp_num': 128}),
+    ('get',    '/perf?daemon=.*', {}),
+]
+
+for method, endpoint, args in screenplay:
+    if method == 'sleep':
+        time.sleep(endpoint)
+        continue
+    url = addr + endpoint
+    print("URL = " + url)
+    request = getattr(requests, method)(
+        url,
+        data=json.dumps(args) if args else None,
+        headers=headers,
+        verify=False,
+        auth=auth)
+    assert request is not None
+    print(request.text)
+    if request.status_code != 200 or 'error' in request.json():
+        print('ERROR: %s request for URL "%s" failed' % (method, url))
+        sys.exit(1)
+
+print('OK')
diff --git a/qa/workunits/restart/test-backtraces.py b/qa/workunits/restart/test-backtraces.py
new file mode 100755
index 000000000..37ddef539
--- /dev/null
+++ b/qa/workunits/restart/test-backtraces.py
@@ -0,0 +1,250 @@
+#!/usr/bin/env python3
+
+from __future__ import print_function
+
+import subprocess
+import json
+import os
+import time
+import sys
+
+import rados as rados
+import cephfs as cephfs
+
+prefix='testbt'
+
+def get_name(b, i, j):
+    c = '{pre}.{pid}.{i}.{j}'.format(pre=prefix, pid=os.getpid(), i=i, j=j)
+    return c, b + '/' + c
+
+def mkdir(ceph, d):
+    print("mkdir {d}".format(d=d), file=sys.stderr)
+    ceph.mkdir(d, 0o755)
+    return ceph.stat(d)['st_ino']
+
+def create(ceph, f):
+    print("creating {f}".format(f=f), file=sys.stderr)
+    fd = ceph.open(f, os.O_CREAT | os.O_RDWR, 0o644)
+    ceph.close(fd)
+    return ceph.stat(f)['st_ino']
+
+def set_mds_config_param(ceph, param):
+    with open('/dev/null', 'rb') as devnull:
+        confarg = ''
+        if conf != '':
+            confarg = '-c {c}'.format(c=conf)
+        r = subprocess.call("ceph {ca} mds tell a injectargs '{p}'".format(ca=confarg, p=param), shell=True, stdout=devnull)
+        if r != 0:
+            raise Exception
+
+
+class _TrimIndentFile(object):
+    def __init__(self, fp):
+        self.fp = fp
+
+    def readline(self):
+        line = self.fp.readline()
+        return line.lstrip(' \t')
+
+def _optionxform(s):
+    s = s.replace('_', ' ')
+    s = '_'.join(s.split())
+    return s
+
+def conf_set_kill_mds(location, killnum):
+    print('setting mds kill config option for {l}.{k}'.format(l=location, k=killnum), file=sys.stderr)
+    print("restart mds a mds_kill_{l}_at {k}".format(l=location, k=killnum))
+    sys.stdout.flush()
+    for l in sys.stdin.readline():
+        if l == 'restarted':
+            break
+
+def flush(ceph, testnum):
+    print('flushing {t}'.format(t=testnum), file=sys.stderr)
+    set_mds_config_param(ceph, '--mds_log_max_segments 1')
+
+    for i in range(1, 500):
+        f = '{p}.{pid}.{t}.{i}'.format(p=prefix, pid=os.getpid(), t=testnum, i=i)
+        print('flushing with create {f}'.format(f=f), file=sys.stderr)
+        fd = ceph.open(f, os.O_CREAT | os.O_RDWR, 0o644)
+        ceph.close(fd)
+        ceph.unlink(f)
+
+    print('flush doing shutdown', file=sys.stderr)
+    ceph.shutdown()
+    print('flush reinitializing ceph', file=sys.stderr)
+    ceph = cephfs.LibCephFS(conffile=conf)
+    print('flush doing mount', file=sys.stderr)
+    ceph.mount()
+    return ceph
+
+def kill_mds(ceph, location, killnum):
+    print('killing mds: {l}.{k}'.format(l=location, k=killnum), file=sys.stderr)
+    set_mds_config_param(ceph, '--mds_kill_{l}_at {k}'.format(l=location, k=killnum))
+
+def wait_for_mds(ceph):
+    # wait for restart
+    while True:
+        confarg = ''
+        if conf != '':
+            confarg = '-c {c}'.format(c=conf)
+        r = subprocess.check_output("ceph {ca} mds stat".format(ca=confarg), shell=True).decode()
+        if r.find('a=up:active'):
+            break
+        time.sleep(1)
+
+def decode(value):
+
+    tmpfile = '/tmp/{p}.{pid}'.format(p=prefix, pid=os.getpid())
+    with open(tmpfile, 'w+') as f:
+      f.write(value)
+
+    p = subprocess.Popen(
+        [
+            'ceph-dencoder',
+            'import',
+            tmpfile,
+            'type',
+            'inode_backtrace_t',
+            'decode',
+            'dump_json',
+        ],
+        stdin=subprocess.PIPE,
+        stdout=subprocess.PIPE,
+      )
+    (stdout, _) = p.communicate(input=value)
+    p.stdin.close()
+    if p.returncode != 0:
+        raise Exception
+    os.remove(tmpfile)
+    return json.loads(stdout)
+
+class VerifyFailure(Exception):
+    pass
+
+def verify(rados_ioctx, ino, values, pool):
+    print('getting parent attr for ino: %lx.00000000' % ino, file=sys.stderr)
+    savede = None
+    for i in range(1, 20):
+        try:
+            savede = None
+            binbt = rados_ioctx.get_xattr('%lx.00000000' % ino, 'parent')
+        except rados.ObjectNotFound as e:
+            # wait for a bit to let segments get flushed out
+            savede = e
+            time.sleep(10)
+    if savede:
+        raise savede
+
+    bt = decode(binbt)
+
+    ind = 0
+    if bt['ino'] != ino:
+        raise VerifyFailure('inode mismatch: {bi} != {ino}\n\tbacktrace:\n\t\t{bt}\n\tfailed verify against:\n\t\t{i}, {v}'.format(
+                    bi=bt['ancestors'][ind]['dname'], ino=ino, bt=bt, i=ino, v=values))
+    for (n, i) in values:
+        if bt['ancestors'][ind]['dirino'] != i:
+            raise VerifyFailure('ancestor dirino mismatch: {b} != {ind}\n\tbacktrace:\n\t\t{bt}\n\tfailed verify against:\n\t\t{i}, {v}'.format(
+                    b=bt['ancestors'][ind]['dirino'], ind=i, bt=bt, i=ino, v=values))
+        if bt['ancestors'][ind]['dname'] != n:
+            raise VerifyFailure('ancestor dname mismatch: {b} != {n}\n\tbacktrace:\n\t\t{bt}\n\tfailed verify against:\n\t\t{i}, {v}'.format(
+                    b=bt['ancestors'][ind]['dname'], n=n, bt=bt, i=ino, v=values))
+        ind += 1
+
+    if bt['pool'] != pool:
+        raise VerifyFailure('pool mismatch: {btp} != {p}\n\tbacktrace:\n\t\t{bt}\n\tfailed verify against:\n\t\t{i}, {v}'.format(
+                    btp=bt['pool'], p=pool, bt=bt, i=ino, v=values))
+
+def make_abc(ceph, rooti, i):
+    expected_bt = []
+    c, d = get_name("/", i, 0)
+    expected_bt = [(c, rooti)] + expected_bt
+    di = mkdir(ceph, d)
+    c, d = get_name(d, i, 1)
+    expected_bt = [(c, di)] + expected_bt
+    di = mkdir(ceph, d)
+    c, f = get_name(d, i, 2)
+    fi = create(ceph, f)
+    expected_bt = [(c, di)] + expected_bt
+    return fi, expected_bt
+
+test = -1
+if len(sys.argv) > 1:
+    test = int(sys.argv[1])
+
+conf = ''
+if len(sys.argv) > 2:
+    conf = sys.argv[2]
+
+radosobj = rados.Rados(conffile=conf)
+radosobj.connect()
+ioctx = radosobj.open_ioctx('data')
+
+ceph = cephfs.LibCephFS(conffile=conf)
+ceph.mount()
+
+rooti = ceph.stat('/')['st_ino']
+
+test = -1
+if len(sys.argv) > 1:
+    test = int(sys.argv[1])
+
+conf = '/etc/ceph/ceph.conf'
+if len(sys.argv) > 2:
+    conf = sys.argv[2]
+
+# create /a/b/c
+# flush
+# verify
+
+i = 0
+if test < 0 or test == i:
+  print('Running test %d: basic verify' % i, file=sys.stderr)
+  ino, expected_bt = make_abc(ceph, rooti, i)
+  ceph = flush(ceph, i)
+  verify(ioctx, ino, expected_bt, 0)
+
+i += 1
+
+# kill-mds-at-openc-1
+# create /a/b/c
+# restart-mds
+# flush
+# verify
+
+if test < 0 or test == i:
+  print('Running test %d: kill openc' % i, file=sys.stderr)
+  print("restart mds a")
+  sys.stdout.flush()
+  kill_mds(ceph, 'openc', 1)
+  ino, expected_bt = make_abc(ceph, rooti, i)
+  ceph = flush(ceph, i)
+  verify(ioctx, ino, expected_bt, 0)
+
+i += 1
+
+# kill-mds-at-openc-1
+# create /a/b/c
+# restart-mds with kill-mds-at-replay-1
+# restart-mds
+# flush
+# verify
+if test < 0 or test == i:
+  print('Running test %d: kill openc/replay' % i, file=sys.stderr)
+  # these are reversed because we want to prepare the config
+  conf_set_kill_mds('journal_replay', 1)
+  kill_mds(ceph, 'openc', 1)
+  print("restart mds a")
+  sys.stdout.flush()
+  ino, expected_bt = make_abc(ceph, rooti, i)
+  ceph = flush(ceph, i)
+  verify(ioctx, ino, expected_bt, 0)
+
+i += 1
+
+ioctx.close()
+radosobj.shutdown()
+ceph.shutdown()
+
+print("done")
+sys.stdout.flush()
diff --git a/qa/workunits/rgw/common.py b/qa/workunits/rgw/common.py
new file mode 100755
index 000000000..2c9c5d035
--- /dev/null
+++ b/qa/workunits/rgw/common.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+
+import errno
+import subprocess
+import logging as log
+import boto3
+import botocore.exceptions
+import random
+import json
+from time import sleep
+
+log.basicConfig(format = '%(message)s', level=log.DEBUG)
+log.getLogger('botocore').setLevel(log.CRITICAL)
+log.getLogger('boto3').setLevel(log.CRITICAL)
+log.getLogger('urllib3').setLevel(log.CRITICAL)
+
+def exec_cmd(cmd, wait = True, **kwargs):
+    check_retcode = kwargs.pop('check_retcode', True)
+    kwargs['shell'] = True
+    kwargs['stdout'] = subprocess.PIPE
+    proc = subprocess.Popen(cmd, **kwargs)
+    log.info(proc.args)
+    if wait:
+        out, _ = proc.communicate()
+        if check_retcode:
+            assert(proc.returncode == 0)
+            return out
+        return (out, proc.returncode)
+    return ''
+    
+def create_user(uid, display_name, access_key, secret_key):
+    _, ret = exec_cmd(f'radosgw-admin user create --uid {uid} --display-name "{display_name}" --access-key {access_key} --secret {secret_key}', check_retcode=False)
+    assert(ret == 0 or errno.EEXIST)
+    
+def boto_connect(access_key, secret_key, config=None):
+    def try_connect(portnum, ssl, proto):
+        endpoint = proto + '://localhost:' + portnum
+        conn = boto3.resource('s3',
+                              aws_access_key_id=access_key,
+                              aws_secret_access_key=secret_key,
+                              use_ssl=ssl,
+                              endpoint_url=endpoint,
+                              verify=False,
+                              config=config,
+                              )
+        try:
+            list(conn.buckets.limit(1)) # just verify we can list buckets
+        except botocore.exceptions.ConnectionError as e:
+            print(e)
+            raise
+        print('connected to', endpoint)
+        return conn
+    try:
+        return try_connect('80', False, 'http')
+    except botocore.exceptions.ConnectionError:
+        try: # retry on non-privileged http port
+            return try_connect('8000', False, 'http')
+        except botocore.exceptions.ConnectionError:
+            # retry with ssl
+            return try_connect('443', True, 'https')
+
+def put_objects(bucket, key_list):
+    objs = []
+    for key in key_list:
+        o = bucket.put_object(Key=key, Body=b"some_data")
+        objs.append((o.key, o.version_id))
+    return objs
+
+def create_unlinked_objects(conn, bucket, key_list):
+    # creates an unlinked/unlistable object for each key in key_list
+    
+    object_versions = []
+    try:
+        exec_cmd('ceph config set client rgw_debug_inject_set_olh_err 2')
+        exec_cmd('ceph config set client rgw_debug_inject_olh_cancel_modification_err true')
+        sleep(1)
+        for key in key_list:
+            tag = str(random.randint(0, 1_000_000))
+            try:
+                bucket.put_object(Key=key, Body=b"some_data", Metadata = {
+                    'tag': tag,
+                })
+            except Exception as e:
+                log.debug(e)
+            out = exec_cmd(f'radosgw-admin bi list --bucket {bucket.name} --object {key}')
+            instance_entries = filter(
+                lambda x: x['type'] == 'instance',
+                json.loads(out.replace(b'\x80', b'0x80')))
+            found = False
+            for ie in instance_entries:
+                instance_id = ie['entry']['instance']
+                ov = conn.ObjectVersion(bucket.name, key, instance_id).head()
+                if ov['Metadata'] and ov['Metadata']['tag'] == tag:
+                    object_versions.append((key, instance_id))
+                    found = True
+                    break
+            if not found:
+                raise Exception(f'failed to create unlinked object for key={key}')
+    finally:
+        exec_cmd('ceph config rm client rgw_debug_inject_set_olh_err')
+        exec_cmd('ceph config rm client rgw_debug_inject_olh_cancel_modification_err')
+    return object_versions
+
diff --git a/qa/workunits/rgw/keystone-fake-server.py b/qa/workunits/rgw/keystone-fake-server.py
new file mode 100755
index 000000000..c05ad7bfd
--- /dev/null
+++ b/qa/workunits/rgw/keystone-fake-server.py
@@ -0,0 +1,208 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2022 Binero
+#
+# Author: Tobias Urdin <tobias.urdin@binero.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+
+from datetime import datetime, timedelta
+import logging
+import json
+from http.server import BaseHTTPRequestHandler, HTTPServer
+
+
+DEFAULT_DOMAIN = {
+    'id': 'default',
+    'name': 'Default',
+}
+
+
+PROJECTS = {
+    'admin': {
+        'domain': DEFAULT_DOMAIN,
+        'id': 'a6944d763bf64ee6a275f1263fae0352',
+        'name': 'admin',
+    },
+    'deadbeef': {
+        'domain': DEFAULT_DOMAIN,
+        'id': 'b4221c214dd64ee6a464g2153fae3813',
+        'name': 'deadbeef',
+    },
+}
+
+
+USERS = {
+    'admin': {
+        'domain': DEFAULT_DOMAIN,
+        'id': '51cc68287d524c759f47c811e6463340',
+        'name': 'admin',
+    },
+    'deadbeef': {
+        'domain': DEFAULT_DOMAIN,
+        'id': '99gg485738df758349jf8d848g774392',
+        'name': 'deadbeef',
+    },
+}
+
+
+USERROLES = {
+    'admin': [
+        {
+            'id': '51cc68287d524c759f47c811e6463340',
+            'name': 'admin',
+        }
+    ],
+    'deadbeef': [
+        {
+            'id': '98bd32184f854f393a72b932g5334124',
+            'name': 'Member',
+        }
+    ],
+}
+
+
+TOKENS = {
+    'admin-token-1': {
+        'username': 'admin',
+        'project': 'admin',
+        'expired': False,
+    },
+    'user-token-1': {
+        'username': 'deadbeef',
+        'project': 'deadbeef',
+        'expired': False,
+    },
+    'user-token-2': {
+        'username': 'deadbeef',
+        'project': 'deadbeef',
+        'expired': True,
+    },
+}
+
+
+def _generate_token_result(username, project, expired=False):
+    userdata = USERS[username]
+    projectdata = PROJECTS[project]
+    userroles = USERROLES[username]
+
+    if expired:
+        then = datetime.now() - timedelta(hours=2)
+        issued_at = then.strftime('%Y-%m-%dT%H:%M:%SZ')
+        expires_at = (then + timedelta(hours=1)).strftime('%Y-%m-%dT%H:%M:%SZ')
+    else:
+        now = datetime.now()
+        issued_at = now.strftime('%Y-%m-%dT%H:%M:%SZ')
+        expires_at = (now + timedelta(seconds=10)).strftime('%Y-%m-%dT%H:%M:%SZ')
+
+    result = {
+        'token': {
+            'audit_ids': ['3T2dc1CGQxyJsHdDu1xkcw'],
+            'catalog': [],
+            'expires_at': expires_at,
+            'is_domain': False,
+            'issued_at': issued_at,
+            'methods': ['password'],
+            'project': projectdata,
+            'roles': userroles,
+            'user': userdata,
+        }
+    }
+
+    return result
+
+
+COUNTERS = {
+    'get_total': 0,
+    'post_total': 0,
+}
+
+
+class HTTPRequestHandler(BaseHTTPRequestHandler):
+    def do_GET(self):
+        # This is not part of the Keystone API
+        if self.path == '/stats':
+            self._handle_stats()
+            return
+
+        if str(self.path).startswith('/v3/auth/tokens'):
+            self._handle_get_auth()
+        else:
+            self.send_response(403)
+            self.end_headers()
+
+    def do_POST(self):
+        if self.path == '/v3/auth/tokens':
+            self._handle_post_auth()
+        else:
+            self.send_response(400)
+            self.end_headers()
+
+    def _get_data(self):
+        length = int(self.headers.get('content-length'))
+        data = self.rfile.read(length).decode('utf8')
+        return json.loads(data)
+
+    def _set_data(self, data):
+        jdata = json.dumps(data)
+        self.wfile.write(jdata.encode('utf8'))
+
+    def _handle_stats(self):
+        self.send_response(200)
+        self.end_headers()
+        self._set_data(COUNTERS)
+
+    def _handle_get_auth(self):
+        logging.info('Increasing get_total counter from %d -> %d' % (COUNTERS['get_total'], COUNTERS['get_total']+1))
+        COUNTERS['get_total'] += 1
+        auth_token = self.headers.get('X-Subject-Token', None)
+        if auth_token and auth_token in TOKENS:
+            tokendata = TOKENS[auth_token]
+            if tokendata['expired'] and 'allow_expired=1' not in self.path:
+                self.send_response(404)
+                self.end_headers()
+            else:
+                self.send_response(200)
+                self.send_header('Content-Type', 'application/json')
+                self.end_headers()
+                result = _generate_token_result(tokendata['username'], tokendata['project'], tokendata['expired'])
+                self._set_data(result)
+        else:
+            self.send_response(404)
+            self.end_headers()
+
+    def _handle_post_auth(self):
+        logging.info('Increasing post_total counter from %d -> %d' % (COUNTERS['post_total'], COUNTERS['post_total']+1))
+        COUNTERS['post_total'] += 1
+        data = self._get_data()
+        user = data['auth']['identity']['password']['user']
+        if user['name'] == 'admin' and user['password'] == 'ADMIN':
+            self.send_response(201)
+            self.send_header('Content-Type', 'application/json')
+            self.send_header('X-Subject-Token', 'admin-token-1')
+            self.end_headers()
+            tokendata = TOKENS['admin-token-1']
+            result = _generate_token_result(tokendata['username'], tokendata['project'], tokendata['expired'])
+            self._set_data(result)
+        else:
+            self.send_response(401)
+            self.end_headers()
+
+
+def main():
+    logging.basicConfig(level=logging.DEBUG)
+    logging.info('Starting keystone-fake-server')
+    server = HTTPServer(('localhost', 5000), HTTPRequestHandler)
+    server.serve_forever()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/qa/workunits/rgw/keystone-service-token.sh b/qa/workunits/rgw/keystone-service-token.sh
new file mode 100755
index 000000000..fc39731ca
--- /dev/null
+++ b/qa/workunits/rgw/keystone-service-token.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2022 Binero
+#
+# Author: Tobias Urdin <tobias.urdin@binero.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+trap cleanup EXIT
+
+function cleanup() {
+  kill $KEYSTONE_FAKE_SERVER_PID
+  wait
+}
+
+function run() {
+  $CEPH_ROOT/qa/workunits/rgw//keystone-fake-server.py &
+  KEYSTONE_FAKE_SERVER_PID=$!
+  # Give fake Keystone server some seconds to startup
+  sleep 5
+  $CEPH_ROOT/qa/workunits/rgw/test-keystone-service-token.py
+}
+
+main keystone-service-token "$@"
diff --git a/qa/workunits/rgw/olh_noname_key b/qa/workunits/rgw/olh_noname_key
new file mode 100644
index 000000000..6138c57cd
--- /dev/null
+++ b/qa/workunits/rgw/olh_noname_key
@@ -0,0 +1 @@
+�1001_04/57/0457f727ec113e418d5b16d206b200ed068c0533554883ce811df7c932a3df68/2018_12_11/2889999/3386469/metadata.gz
+\ No newline at end of file
diff --git a/qa/workunits/rgw/olh_noname_val b/qa/workunits/rgw/olh_noname_val
new file mode 100644
index 000000000..ff442e137
--- /dev/null
+++ b/qa/workunits/rgw/olh_noname_val
diff --git a/qa/workunits/rgw/run-bucket-check.sh b/qa/workunits/rgw/run-bucket-check.sh
new file mode 100755
index 000000000..85e02db5e
--- /dev/null
+++ b/qa/workunits/rgw/run-bucket-check.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+set -ex
+
+# assume working ceph environment (radosgw-admin in path) and rgw on localhost:80
+# localhost::443 for ssl
+
+mydir=`dirname $0`
+
+python3 -m venv $mydir
+source $mydir/bin/activate
+pip install pip --upgrade
+pip install boto3
+
+## run test
+$mydir/bin/python3 $mydir/test_rgw_bucket_check.py
+
+deactivate
+echo OK.
+
diff --git a/qa/workunits/rgw/run-datacache.sh b/qa/workunits/rgw/run-datacache.sh
new file mode 100755
index 000000000..5c00da1da
--- /dev/null
+++ b/qa/workunits/rgw/run-datacache.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+set -ex
+
+#assume working ceph environment (radosgw-admin in path) and rgw on localhost:80
+# localhost::443 for ssl
+
+mydir=`dirname $0`
+
+python3 -m venv $mydir
+source $mydir/bin/activate
+pip install pip --upgrade
+pip install configobj
+
+## run test
+$mydir/bin/python3 $mydir/test_rgw_datacache.py
+
+deactivate
+echo OK.
+
diff --git a/qa/workunits/rgw/run-reshard.sh b/qa/workunits/rgw/run-reshard.sh
new file mode 100755
index 000000000..bdab0aabb
--- /dev/null
+++ b/qa/workunits/rgw/run-reshard.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+set -ex
+
+# this test uses fault injection to abort during 'radosgw-admin bucket reshard'
+# disable coredumps so teuthology won't mark a failure
+ulimit -c 0
+
+#assume working ceph environment (radosgw-admin in path) and rgw on localhost:80
+# localhost::443 for ssl
+
+mydir=`dirname $0`
+
+python3 -m venv $mydir
+source $mydir/bin/activate
+pip install pip --upgrade
+pip install boto3
+
+## run test
+$mydir/bin/python3 $mydir/test_rgw_reshard.py
+
+deactivate
+echo OK.
+
diff --git a/qa/workunits/rgw/run-s3tests.sh b/qa/workunits/rgw/run-s3tests.sh
new file mode 100755
index 000000000..727bef9eb
--- /dev/null
+++ b/qa/workunits/rgw/run-s3tests.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+set -ex
+
+# run s3-tests from current directory. assume working
+# ceph environment (radosgw-admin in path) and rgw on localhost:8000
+# (the vstart default).
+
+branch=$1
+[ -z "$1" ] && branch=master
+port=$2
+[ -z "$2" ] && port=8000   # this is vstart's default
+
+##
+
+[ -z "$BUILD_DIR" ] && BUILD_DIR=build
+
+if [ -e CMakeCache.txt ]; then
+    BIN_PATH=$PWD/bin
+elif [ -e $root_path/../${BUILD_DIR}/CMakeCache.txt ]; then
+    cd $root_path/../${BUILD_DIR}
+    BIN_PATH=$PWD/bin
+fi
+PATH=$PATH:$BIN_PATH
+
+dir=tmp.s3-tests.$$
+
+# clone and bootstrap
+mkdir $dir
+cd $dir
+git clone https://github.com/ceph/s3-tests
+cd s3-tests
+git checkout ceph-$branch
+S3TEST_CONF=s3tests.conf.SAMPLE tox -- -m "not fails_on_rgw and not sse_s3 and not lifecycle_expiration and not test_of_sts and not webidentity_test" -v
+
+cd ../..
+rm -rf $dir
+
+echo OK.
+
diff --git a/qa/workunits/rgw/run-versioning.sh b/qa/workunits/rgw/run-versioning.sh
new file mode 100755
index 000000000..df60b7b03
--- /dev/null
+++ b/qa/workunits/rgw/run-versioning.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+set -ex
+
+# assume working ceph environment (radosgw-admin in path) and rgw on localhost:80
+# localhost::443 for ssl
+
+mydir=`dirname $0`
+
+python3 -m venv $mydir
+source $mydir/bin/activate
+pip install pip --upgrade
+pip install boto3
+
+## run test
+$mydir/bin/python3 $mydir/test_rgw_versioning.py
+
+deactivate
+echo OK.
+
diff --git a/qa/workunits/rgw/s3_bucket_quota.pl b/qa/workunits/rgw/s3_bucket_quota.pl
new file mode 100755
index 000000000..7f5476ef6
--- /dev/null
+++ b/qa/workunits/rgw/s3_bucket_quota.pl
@@ -0,0 +1,393 @@
+#! /usr/bin/perl
+
+=head1 NAME
+
+s3_bucket_quota.pl - Script to test the rgw bucket quota functionality using s3 interface. 
+
+=head1 SYNOPSIS
+
+Use:
+        perl s3_bucket_quota.pl [--help]
+
+Examples:
+        perl s3_bucket_quota.pl 
+        or
+        perl s3_bucket_quota.pl  --help
+
+=head1 DESCRIPTION
+
+This script intends to test the rgw bucket quota funcionality using s3 interface 
+and reports the test results
+
+=head1 ARGUMENTS
+
+s3_bucket_quota.pl takes the following arguments:
+   --help
+   (optional) Displays the usage message.
+
+=cut
+
+use Amazon::S3;
+use Data::Dumper;
+#use strict;
+use IO::File;
+use Getopt::Long;
+use Digest::MD5;
+use Pod::Usage();
+use FindBin;
+use lib $FindBin::Bin;
+use s3_utilities;
+use Net::Domain qw(hostfqdn);
+
+my $help;
+
+Getopt::Long::GetOptions(
+    'help' => \$help
+);
+Pod::Usage::pod2usage(-verbose => 1) && exit if ($help);
+
+#== local variables ===
+our $mytestfilename;
+my $mytestfilename1;
+my $logmsg;
+my $kruft;
+my $s3;
+my $hostdom  = $ENV{RGW_FQDN}||hostfqdn();
+my $port     = $ENV{RGW_PORT}||80;
+our $hostname = "$hostdom:$port";
+our $testfileloc;
+my $rgw_user = "qa_user";
+
+# Function that deletes the user $rgw_user and write to logfile. 
+sub delete_user
+{
+    my $cmd = "$radosgw_admin user rm --uid=$rgw_user";
+    my $cmd_op = get_command_output($cmd);
+    if ($cmd_op !~ /aborting/){
+        print "user $rgw_user deleted\n";
+    } else {
+        print "user $rgw_user NOT deleted\n";
+        return 1;
+    }
+    return 0;
+}
+
+sub quota_set_max_size {
+    my $set_quota = `$radosgw_admin quota set --bucket=$bucketname --max-size=1048576000`; 
+    if ($set_quota !~ /./){
+      print "quota set for the bucket: $bucketname \n";
+    } else {
+      print "quota set failed for the bucket: $bucketname \n";
+      exit 1;
+    }
+    return 0;
+}
+
+sub quota_set_max_size_zero {
+    run_s3($rgw_user);
+    my $set_quota = `$radosgw_admin quota set --bucket=$bucketname --max-size=0`; 
+    if ($set_quota !~ /./){
+      pass ("quota set for the bucket: $bucketname with max size as zero\n");
+    } else {
+      fail ("quota set with max size 0 failed for the bucket: $bucketname \n");
+    }
+    delete_bucket();
+}
+
+sub quota_set_max_objs_zero {
+    run_s3($rgw_user);
+    my $set_quota = `$radosgw_admin quota set --bucket=$bucketname --max-objects=0`; 
+    if ($set_quota !~ /./){
+      pass ("quota set for the bucket: $bucketname with max objects as zero\n");
+    } else {
+      fail ("quota set with max objects 0 failed for the bucket: $bucketname \n");
+    }
+    delete_bucket();
+}
+
+sub quota_set_neg_size {
+    run_s3($rgw_user);
+    my $set_quota = `$radosgw_admin quota set --bucket=$bucketname --max-size=-1`; 
+    if ($set_quota !~ /./){
+      pass ("quota set for the bucket: $bucketname with max size -1\n");
+    } else {
+      fail ("quota set failed for the bucket: $bucketname with max size -1 \n");
+    }
+    delete_bucket();
+}
+
+sub quota_set_neg_objs {
+    run_s3($rgw_user);
+    my $set_quota = `$radosgw_admin quota set --bucket=$bucketname --max-objects=-1`; 
+    if ($set_quota !~ /./){
+      pass ("quota set for the bucket: $bucketname max objects -1 \n");
+    } else {
+      fail ("quota set failed for the bucket: $bucketname \n with max objects -1");
+    }
+    delete_bucket();
+}
+
+sub quota_set_user_objs {
+    my $set_quota = `$radosgw_admin quota set --uid=$rgw_user --quota-scope=bucket`; 
+    my $set_quota1 = `$radosgw_admin quota set --bucket=$bucketname --max-objects=1`; 
+    if ($set_quota1 !~ /./){
+      print "bucket quota max_objs set for the given user: $bucketname \n";
+    } else {
+      print "bucket quota max_objs set failed for the given user: $bucketname \n";
+      exit 1;
+    }
+    return 0;
+}
+
+sub quota_set_user_size {
+    my $set_quota = `$radosgw_admin quota set --uid=$rgw_user --quota-scope=bucket`; 
+    my $set_quota1 = `$radosgw_admin quota set --bucket=$bucketname --max-size=1048576000`; 
+    if ($set_quota1 !~ /./){
+      print "bucket quota max size set for the given user: $bucketname \n";
+    } else {
+      print "bucket quota max size set failed for the user: $bucketname \n";
+      exit 1;
+    }
+    return 0;
+}
+
+sub quota_set_max_obj {
+    # set max objects 
+    my $set_quota = `$radosgw_admin quota set --bucket=$bucketname --max-objects=1`; 
+    if ($set_quota !~ /./){ 
+      print "quota set for the bucket: $bucketname \n"; 
+    } else {
+      print "quota set failed for the bucket: $bucketname \n"; 
+     exit 1;
+    }
+    return 0;
+}
+
+sub quota_enable {
+    my $en_quota = `$radosgw_admin quota enable --bucket=$bucketname`; 
+    if ($en_quota !~ /./){ 
+      print "quota enabled for the bucket: $bucketname \n"; 
+    } else {
+      print "quota enable failed for the bucket: $bucketname \n"; 
+      exit 1;
+    }
+    return 0;
+}
+
+sub quota_disable {
+    my $dis_quota = `$radosgw_admin quota disable --bucket=$bucketname`; 
+    if ($dis_quota !~ /./){ 
+      print "quota disabled for the bucket: $bucketname \n"; 
+    } else {
+      print "quota disable failed for the bucket: $bucketname \n"; 
+      exit 1;
+    }
+    return 0;
+}
+
+# upload a file to the bucket
+sub upload_file {
+    print "adding file to bucket: $mytestfilename\n";
+    ($bucket->add_key_filename( $mytestfilename, $testfileloc,
+        { content_type => 'text/plain', },
+    ) and (print "upload file successful\n" ) and return 0 ) or (return 1);
+}
+
+# delete the bucket
+sub delete_bucket {
+   #($bucket->delete_key($mytestfilename1) and print "delete keys on bucket succeeded second time\n" ) or die $s3->err . "delete keys on bucket failed second time\n" . $s3->errstr;
+   ($bucket->delete_bucket) and (print "bucket delete succeeded \n") or die $s3->err . "delete bucket failed\n" . $s3->errstr;
+}
+
+# set bucket quota with max_objects and verify 
+sub test_max_objects {
+    my $size = '10Mb';
+    create_file($size);
+    run_s3($rgw_user);
+    quota_set_max_obj();
+    quota_enable();
+    my $ret_value = upload_file();
+    if ($ret_value == 0){
+        pass ( "Test max objects passed" );
+    } else {
+        fail ( "Test max objects failed" );
+    }
+    delete_user();
+    delete_keys($mytestfilename);
+    delete_bucket();
+}
+
+# Set bucket quota for specific user and ensure max objects set for the user is validated
+sub test_max_objects_per_user{
+    my $size = '10Mb';
+    create_file($size);
+    run_s3($rgw_user);
+    quota_set_user_objs();
+    quota_enable();
+    my $ret_value = upload_file();
+    if ($ret_value == 0){
+        pass ( "Test max objects for the given user passed" );
+    } else {
+        fail ( "Test max objects for the given user failed" );
+    }
+    delete_user();
+    delete_keys($mytestfilename);
+    delete_bucket();
+}
+
+# set bucket quota with max_objects and try to exceed the max_objects and verify 
+sub test_beyond_max_objs {
+    my $size = "10Mb";
+    create_file($size);
+    run_s3($rgw_user);
+    quota_set_max_obj();
+    quota_enable();
+    upload_file();
+    my $ret_value = readd_file();
+    if ($ret_value == 1){
+        pass ( "set max objects and test beyond max objects passed" );
+    } else {
+        fail ( "set max objects and test beyond max objects failed" );
+    }
+    delete_user();
+    delete_keys($mytestfilename);
+    delete_bucket();
+}
+
+# set bucket quota for a user with max_objects and try to exceed the max_objects and verify 
+sub test_beyond_max_objs_user {
+    my $size = "10Mb";
+    create_file($size);
+    run_s3($rgw_user);
+    quota_set_user_objs();
+    quota_enable();
+    upload_file();
+    my $ret_value = readd_file();
+    if ($ret_value == 1){
+        pass ( "set max objects for a given user and test beyond max objects passed" );
+    } else {
+        fail ( "set max objects for a given user and test beyond max objects failed" );
+    }
+    delete_user();
+    delete_keys($mytestfilename);
+    delete_bucket();
+}
+
+# set bucket quota for max size and ensure it is validated
+sub test_quota_size {
+    my $ret_value;
+    my $size = "2Gb";
+    create_file($size);
+    run_s3($rgw_user);
+    quota_set_max_size();    
+    quota_enable();
+    my $ret_value = upload_file();
+    if ($ret_value == 1) {
+        pass ( "set max size and ensure that objects upload beyond max size is not entertained" );
+        my $retdel = delete_keys($mytestfilename);
+        if ($retdel == 0) {
+            print "delete objects successful \n";
+            my $size1 = "1Gb";
+            create_file($size1);
+            my $ret_val1 = upload_file(); 
+            if ($ret_val1 == 0) {
+                pass ( "set max size and ensure that the max size is in effect" );
+            } else {
+                fail ( "set max size and ensure the max size takes effect" );
+            }
+        }
+    } else {
+        fail ( "set max size and ensure that objects beyond max size is not allowed" );
+    }
+    delete_user();
+    delete_keys($mytestfilename);
+    delete_bucket();
+}
+
+# set bucket quota for max size for a given user and ensure it is validated
+sub test_quota_size_user {
+    my $ret_value;
+    my $size = "2Gb";
+    create_file($size);
+    run_s3($rgw_user);
+    quota_set_user_size();
+    quota_enable();
+    my $ret_value = upload_file();
+    if ($ret_value == 1) {
+        pass ( "set max size for a given user and ensure that objects upload beyond max size is not entertained" );
+        my $retdel = delete_keys($mytestfilename);
+        if ($retdel == 0) {
+            print "delete objects successful \n";
+            my $size1 = "1Gb";
+            create_file($size1);
+            my $ret_val1 = upload_file();
+            if ($ret_val1 == 0) {
+                pass ( "set max size for a given user and ensure that the max size is in effect" );
+            } else {
+                fail ( "set max size for a given user and ensure the max size takes effect" );
+            }
+        }
+    } else {
+        fail ( "set max size for a given user and ensure that objects beyond max size is not allowed" );
+    }
+    delete_user();
+    delete_keys($mytestfilename);
+    delete_bucket();
+}
+
+# set bucket quota size but disable quota and verify
+sub test_quota_size_disabled {
+    my $ret_value;
+    my $size = "2Gb";
+    create_file($size);
+    run_s3($rgw_user);
+    quota_set_max_size();
+    quota_disable();
+    my $ret_value = upload_file();
+    if ($ret_value == 0) {
+        pass ( "bucket quota size doesnt take effect when quota is disabled" );
+    } else {
+        fail ( "bucket quota size doesnt take effect when quota is disabled" );
+    }
+    delete_user();
+    delete_keys($mytestfilename);
+    delete_bucket();
+}
+
+# set bucket quota size for a given user but disable quota and verify
+sub test_quota_size_disabled_user {
+    my $ret_value;
+    my $size = "2Gb";
+    create_file($size);
+    run_s3($rgw_user);
+    quota_set_user_size();
+    quota_disable();
+    my $ret_value = upload_file();
+    if ($ret_value == 0) {
+        pass ( "bucket quota size for a given user doesnt take effect when quota is disabled" );
+    } else {
+        fail ( "bucket quota size for a given user doesnt take effect when quota is disabled" );
+    }
+    delete_user();
+    delete_keys($mytestfilename);
+    delete_bucket();
+}
+
+# set bucket quota for specified user and verify
+
+#== Main starts here===
+ceph_os_info();
+test_max_objects();
+test_max_objects_per_user();
+test_beyond_max_objs();
+test_beyond_max_objs_user();
+quota_set_max_size_zero();
+quota_set_max_objs_zero();
+quota_set_neg_objs();
+quota_set_neg_size();
+test_quota_size(); 
+test_quota_size_user();
+test_quota_size_disabled();
+test_quota_size_disabled_user();
+
+print "OK";
diff --git a/qa/workunits/rgw/s3_multipart_upload.pl b/qa/workunits/rgw/s3_multipart_upload.pl
new file mode 100755
index 000000000..ab29e6b03
--- /dev/null
+++ b/qa/workunits/rgw/s3_multipart_upload.pl
@@ -0,0 +1,151 @@
+#! /usr/bin/perl
+
+=head1 NAME
+
+s3_multipart_upload.pl - Script to test rgw multipart upload using s3 interface.
+
+=head1 SYNOPSIS
+
+Use:
+        perl s3_multipart_upload.pl [--help]
+
+Examples:
+        perl s3_multipart_upload.pl
+        or
+        perl s3_multipart_upload.pl  --help
+
+=head1 DESCRIPTION
+
+This script intends to test the rgw multipart upload followed by a download
+and verify checksum using s3 interface and reports test results
+
+=head1 ARGUMENTS
+
+s3_multipart_upload.pl takes the following arguments:
+   --help
+   (optional) Displays the usage message.
+
+=cut
+
+use Amazon::S3;
+use Data::Dumper;
+use IO::File;
+use Getopt::Long;
+use Digest::MD5;
+use Pod::Usage();
+use FindBin;
+use lib $FindBin::Bin;
+use s3_utilities;
+use Net::Domain qw(hostfqdn);
+
+my $help;
+
+Getopt::Long::GetOptions(
+    'help' => \$help
+);
+Pod::Usage::pod2usage(-verbose => 1) && exit if ($help);
+
+#== local variables ===
+my $s3;
+my $hostdom  = $ENV{RGW_FQDN}||hostfqdn();
+my $port     = $ENV{RGW_PORT}||80;
+our $hostname = "$hostdom:$port";
+our $testfileloc;
+our $mytestfilename;
+
+# upload a file to the bucket
+sub upload_file {
+    my ($fsize, $i) = @_;
+    create_file($fsize, $i);
+    print "adding file to bucket $bucketname: $mytestfilename\n";
+    ($bucket->add_key_filename( $mytestfilename, $testfileloc,
+        { content_type => 'text/plain', },
+    ) and (print "upload file successful\n" ) and return 0 ) or (print "upload failed\n" and return 1);
+}
+
+# delete the bucket
+sub delete_bucket {
+   ($bucket->delete_bucket) and (print "bucket delete succeeded \n") or die $s3->err . "delete bucket failed\n" . $s3->errstr;
+}
+
+# Function to perform multipart upload of given file size to the user bucket via s3 interface
+sub multipart_upload
+{
+    my ($size, $parts) = @_;
+    # generate random user every time
+    my $user = rand();
+    # Divide the file size in to equal parts and upload to bucket in multiple parts
+    my $fsize = ($size/$parts);
+    my $fsize1;
+    run_s3($user);
+    if ($parts == 10){
+        $fsize1 = '100Mb';
+    } elsif ($parts == 100){
+        $fsize1 = '10Mb';
+    }
+    foreach my $i(1..$parts){
+       print "uploading file - part $i \n";
+       upload_file($fsize1, $i);
+    }
+    fetch_file_from_bucket($fsize1, $parts);
+    compare_cksum($fsize1, $parts);
+    purge_data($user);
+}
+
+# Function to download the files from bucket to verify there is no data corruption
+sub fetch_file_from_bucket
+{
+    # fetch file from the bucket
+    my ($fsize, $parts) = @_;
+    foreach my $i(1..$parts){
+    my $src_file = "$fsize.$i";
+    my $dest_file = "/tmp/downloadfile.$i";
+    print
+      "Downloading $src_file from bucket to $dest_file \n";
+    $response =
+      $bucket->get_key_filename( $src_file, GET,
+        $dest_file )
+      or die $s3->err . ": " . $s3->errstr;
+    }
+}
+
+# Compare the source file with destination file and verify checksum to ensure
+# the files are not corrupted
+sub compare_cksum
+{
+    my ($fsize, $parts)=@_;
+    my $md5    = Digest::MD5->new;
+    my $flag = 0;
+    foreach my $i (1..$parts){
+        my $src_file = "/tmp/"."$fsize".".$i";
+        my $dest_file = "/tmp/downloadfile".".$i";
+        open( FILE, $src_file )
+         or die "Error: Could not open $src_file for MD5 checksum...";
+        open( DLFILE, $dest_file )
+         or die "Error: Could not open $dest_file for MD5 checksum.";
+        binmode(FILE);
+        binmode(DLFILE);
+        my $md5sum   = $md5->addfile(*FILE)->hexdigest;
+        my $md5sumdl = $md5->addfile(*DLFILE)->hexdigest;
+        close FILE;
+        close DLFILE;
+        # compare the checksums
+        if ( $md5sum eq $md5sumdl ) {
+            $flag++;
+        }
+    }
+    if ($flag == $parts){
+       pass("checksum verification for multipart upload passed" );
+    }else{
+       fail("checksum verification for multipart upload failed" );
+    }
+}
+
+#== Main starts here===
+ceph_os_info();
+check();
+# The following test runs multi part upload of file size 1Gb in 10 parts
+multipart_upload('1048576000', 10);
+# The following test runs multipart upload of 1 Gb file in 100 parts
+multipart_upload('1048576000', 100);
+print "OK";
diff --git a/qa/workunits/rgw/s3_user_quota.pl b/qa/workunits/rgw/s3_user_quota.pl
new file mode 100755
index 000000000..6d5c02a9a
--- /dev/null
+++ b/qa/workunits/rgw/s3_user_quota.pl
@@ -0,0 +1,191 @@
+#! /usr/bin/perl
+
+=head1 NAME
+
+s3_user_quota.pl - Script to test the rgw user quota functionality using s3 interface.
+
+=head1 SYNOPSIS
+
+Use:
+        perl s3_user_quota.pl [--help]
+
+Examples:
+        perl s3_user_quota.pl
+        or
+        perl s3_user_quota.pl  --help
+
+=head1 DESCRIPTION
+
+This script intends to test the rgw user quota funcionality using s3 interface
+and reports the test results
+
+=head1 ARGUMENTS
+
+s3_user_quota.pl takes the following arguments:
+   --help
+   (optional) Displays the usage message.
+
+=cut
+
+use Amazon::S3;
+use Data::Dumper;
+use IO::File;
+use Getopt::Long;
+use Digest::MD5;
+use Pod::Usage();
+use FindBin;
+use lib $FindBin::Bin;
+use s3_utilities;
+use Net::Domain qw(hostfqdn);
+
+my $help;
+
+Getopt::Long::GetOptions(
+    'help' => \$help
+);
+Pod::Usage::pod2usage(-verbose => 1) && exit if ($help);
+
+#== local variables ===
+our $mytestfilename;
+my $mytestfilename1;
+my $logmsg;
+my $kruft;
+my $s3;
+my $hostdom  = $ENV{RGW_FQDN}||hostfqdn();
+my $port     = $ENV{RGW_PORT}||80;
+our $hostname = "$hostdom:$port";
+our $testfileloc;
+our $cnt;
+
+sub quota_set_max_size_per_user {
+    my ($maxsize, $size1,$rgw_user) = @_;
+    run_s3($rgw_user);
+    my $set_quota = `$radosgw_admin quota set --uid=$rgw_user --quota-scope=user --max-size=$maxsize`;
+    if (($set_quota !~ /./)&&($maxsize == 0)){
+      my $ret = test_max_objs($size1, $rgw_user);
+      if ($ret == 1){
+         pass("quota set for user: $rgw_user with max_size=$maxsize passed" );
+      }else {
+         fail("quota set for user: $rgw_user with max_size=$maxsize failed" );
+      }
+    } elsif (($set_quota !~ /./) && ($maxsize != 0)) {
+      my $ret = test_max_objs($size1, $rgw_user);
+      if ($ret == 0){
+         pass("quota set for user: $rgw_user with max_size=$maxsize passed" );
+      }else {
+         fail("quota set for user: $rgw_user with max_size=$maxsize failed" );
+      }
+    }
+    delete_keys($mytestfilename);
+    purge_data($rgw_user);
+    return 0;
+}
+
+sub max_size_per_user {
+    my ($maxsize, $size1,$rgw_user) = @_;
+    run_s3($rgw_user);
+    my $set_quota = `$radosgw_admin quota set --uid=$rgw_user --quota-scope=user --max-size=$maxsize`;
+    if (($set_quota !~ /./) && ($maxsize != 0)) {
+      my $ret = test_max_objs($size1, $rgw_user);
+      if ($ret == 0){
+         $cnt++;
+      }
+    }
+    return $cnt;
+}
+
+sub quota_set_max_obj_per_user {
+    # set max objects
+    my ($maxobjs, $size1, $rgw_user) = @_;
+    run_s3($rgw_user);
+    my $set_quota = `$radosgw_admin quota set --uid=$rgw_user --quota-scope=user --max-objects=$maxobjs`;
+    if (($set_quota !~ /./) && ($maxobjs == 0)){
+      my $ret = test_max_objs($size1, $rgw_user);
+      if ($ret == 1){
+         pass("quota set for user: $rgw_user with max_objects=$maxobjs passed" );
+      }else {
+         fail("quota set for user: $rgw_user with max_objects=$maxobjs failed" );
+      }
+    } elsif (($set_quota !~ /./) && ($maxobjs == 1)) {
+      my $ret = test_max_objs($size1, $rgw_user);
+      if ($ret == 0){
+         pass("quota set for user: $rgw_user with max_objects=$maxobjs passed" );
+      }else {
+         fail("quota set for user: $rgw_user with max_objects=$maxobjs failed" );
+      }
+    }
+    delete_keys($mytestfilename);
+    purge_data($rgw_user);
+}
+ 
+sub quota_enable_user {
+    my ($rgw_user) = @_;
+    my $en_quota = `$radosgw_admin quota enable --uid=$rgw_user --quota-scope=user`;
+    if ($en_quota !~ /./){
+      print "quota enabled for the user $rgw_user \n";
+    } else {
+      print "quota enable failed for the user $rgw_user \n";
+      exit 1;
+    }
+    return 0;
+}
+
+sub quota_disable_user {
+    my $dis_quota = `$radosgw_admin quota disable --uid=$rgw_user --quota-scope=user`;
+    if ($dis_quota !~ /./){
+      print "quota disabled for the user $rgw_user \n";
+    } else {
+      print "quota disable failed for the user $rgw_user \n";
+      exit 1;
+    }
+    return 0;
+}
+
+# upload a file to the bucket
+sub upload_file {
+    print "adding file to bucket $bucketname: $mytestfilename\n";
+    ($bucket->add_key_filename( $mytestfilename, $testfileloc,
+        { content_type => 'text/plain', },
+    ) and (print "upload file successful\n" ) and return 0 ) or (return 1);
+}
+
+# delete the bucket
+sub delete_bucket {
+   ($bucket->delete_bucket) and (print "bucket delete succeeded \n") or die $s3->err . "delete bucket failed\n" . $s3->errstr;
+}
+
+#Function to upload the given file size to bucket and verify
+sub test_max_objs {
+    my ($size, $rgw_user) = @_;
+    create_file($size);
+    quota_enable_user($rgw_user);
+    my $ret_value = upload_file();
+    return $ret_value;
+}
+
+# set user quota and ensure it is validated
+sub test_user_quota_max_size{
+    my ($max_buckets,$size, $fsize) = @_;
+    my $usr = rand();
+    foreach my $i (1..$max_buckets){
+       my $ret_value = max_size_per_user($size, $fsize, $usr );
+    }
+    if ($ret_value == $max_buckets){
+       fail( "user quota max size for $usr failed on $max_buckets buckets" );
+    } else {
+       pass( "user quota max size for $usr passed on $max_buckets buckets" );
+    }
+    delete_keys($mytestfilename);
+    purge_data($usr);
+}
+
+#== Main starts here===
+ceph_os_info();
+check();
+quota_set_max_obj_per_user('0', '10Mb', 'usr1');
+quota_set_max_obj_per_user('1', '10Mb', 'usr2');
+quota_set_max_size_per_user(0, '10Mb', 'usr1');
+quota_set_max_size_per_user(1048576000, '1Gb', 'usr2');
+test_user_quota_max_size(3,1048576000,'100Mb');
+test_user_quota_max_size(2,1048576000, '1Gb');
+print "OK";
diff --git a/qa/workunits/rgw/s3_utilities.pm b/qa/workunits/rgw/s3_utilities.pm
new file mode 100644
index 000000000..3c3fae900
--- /dev/null
+++ b/qa/workunits/rgw/s3_utilities.pm
@@ -0,0 +1,233 @@
+# Common subroutines shared by the s3 testing code
+my $sec;
+my $min;
+my $hour;
+my $mon;
+my $year;
+my $mday;
+my $wday;
+my $yday;
+my $isdst;
+my $PASS_CNT = 0;
+my $FAIL_CNT = 0;
+
+our $radosgw_admin = $ENV{RGW_ADMIN}||"sudo radosgw-admin";
+
+# function to get the current time stamp from the test set up
+sub get_timestamp {
+   ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
+   if ($mon < 10) { $mon = "0$mon"; }
+   if ($hour < 10) { $hour = "0$hour"; }
+   if ($min < 10) { $min = "0$min"; }
+   if ($sec < 10) { $sec = "0$sec"; }
+   $year=$year+1900;
+   return $year . '_' . $mon . '_' . $mday . '_' . $hour . '_' . $min . '_' . $sec;
+}
+
+# Function to check if radosgw is already running
+sub get_status {
+    my $service = "radosgw";
+    my $cmd = "pgrep $service";
+    my $status = get_cmd_op($cmd);
+    if ($status =~ /\d+/ ){
+        return 0;
+    }
+    return 1;
+}
+
+# function to execute the command and return output
+sub get_cmd_op
+{
+    my $cmd = shift;
+    my $excmd = `$cmd`;
+    return $excmd;
+}
+
+#Function that executes the CLI commands and returns the output of the command
+sub get_command_output {
+    my $cmd_output = shift;
+    open( FH, ">>$test_log" );
+    print FH "\"$cmd_output\"\n";
+    my $exec_cmd = `$cmd_output 2>&1`;
+    print FH "$exec_cmd\n";
+    close(FH);
+    return $exec_cmd;
+}
+
+# Function to get the hostname
+sub get_hostname
+{
+    my $cmd = "hostname";
+    my $get_host = get_command_output($cmd);
+    chomp($get_host);
+    return($get_host);
+}
+
+sub pass {
+    my ($comment) = @_;
+    print "Comment required." unless length $comment;
+    chomp $comment;
+    print_border2();
+    print "Test case: $TC_CNT PASSED - $comment \n";
+    print_border2();
+    $PASS_CNT++;
+}
+
+sub fail {
+    my ($comment) = @_;
+    print "Comment required." unless length $comment;
+    chomp $comment;
+    print_border2();
+    print "Test case: $TC_CNT FAILED - $comment \n";
+    print_border2();
+    $FAIL_CNT++;
+}
+
+sub print_border2 {
+    print "~" x 90 . "\n";
+}
+
+# Function to create the user "qa_user" and extract the user access_key and secret_key of the user
+sub get_user_info
+{
+    my ($rgw_user) = @_;
+    my $cmd = "$radosgw_admin user create --uid=$rgw_user --display-name=$rgw_user";
+    my $cmd_op = get_command_output($cmd);
+    if ($cmd_op !~ /keys/){
+        return (0,0);
+    }
+    my @get_user = (split/\n/,$cmd_op);
+    foreach (@get_user) {
+        if ($_ =~ /access_key/ ){
+            $get_acc_key = $_;
+        } elsif ($_ =~ /secret_key/ ){
+            $get_sec_key = $_;
+        }
+    }
+    my $access_key = $get_acc_key;
+    my $acc_key = (split /:/, $access_key)[1];
+    $acc_key =~ s/\\//g;
+    $acc_key =~ s/ //g;
+    $acc_key =~ s/"//g;
+    $acc_key =~ s/,//g;
+    my $secret_key = $get_sec_key;
+    my $sec_key = (split /:/, $secret_key)[1];
+    $sec_key =~ s/\\//g;
+    $sec_key =~ s/ //g;
+    $sec_key =~ s/"//g;
+    $sec_key =~ s/,//g;
+    return ($acc_key, $sec_key);
+}
+
+# Function that deletes the given user and all associated user data 
+sub purge_data
+{
+    my ($rgw_user) = @_;
+    my $cmd = "$radosgw_admin user rm --uid=$rgw_user --purge-data";
+    my $cmd_op = get_command_output($cmd);
+    if ($cmd_op !~ /./){
+        print "user $rgw_user deleted\n";
+    } else {
+        print "user $rgw_user NOT deleted\n";
+        return 1;
+    }
+    return 0;
+}
+
+# Read PRETTY_NAME from /etc/os-release
+sub os_pretty_name
+{
+    open(FH, '<', '/etc/os-release') or die $!;
+    while (my $line = <FH>) {
+        chomp $line;
+        if ($line =~ /^\s*PRETTY_NAME=\"?([^"]*)\"?/) {
+            return $1;
+        }
+    }
+    close(FH);
+}
+
+
+# Function to get the Ceph and distro info
+sub ceph_os_info
+{
+        my $ceph_v = get_command_output ( "ceph -v" );
+        my @ceph_arr = split(" ",$ceph_v);
+        $ceph_v = "Ceph Version:   $ceph_arr[2]";
+        my $os_distro = os_pretty_name();
+        $os_distro = "Linux Flavor:$os_distro";
+        return ($ceph_v, $os_distro);
+}
+
+# Execute the test case based on the input to the script
+sub create_file {
+    my ($file_size, $part) = @_;
+    my $cnt;
+    $mytestfilename = "$file_size.$part"; 
+    $testfileloc = "/tmp/".$mytestfilename;
+    if ($file_size == '10Mb'){
+        $cnt = 1;
+    } elsif ($file_size == '100Mb'){
+        $cnt = 10;
+    } elsif ($file_size == '500Mb'){
+        $cnt = 50;
+    } elsif ($file_size == '1Gb'){
+        $cnt = 100;
+    } elsif ($file_size == '2Gb'){
+        $cnt = 200;
+    } 
+    my $ret = system("dd if=/dev/zero of=$testfileloc bs=10485760 count=$cnt");
+    if ($ret) { exit 1 };
+    return 0;
+}
+
+sub run_s3
+{
+# Run tests for the S3 functionality
+    # Modify access key and secret key to suit the user account
+    my ($user) = @_;
+    our ( $access_key, $secret_key ) = get_user_info($user);
+    if ( ($access_key) && ($secret_key) ) {
+       $s3 = Amazon::S3->new(
+            {
+                aws_access_key_id     => $access_key,
+                aws_secret_access_key => $secret_key,
+                host                  => $hostname,
+                secure                => 0,
+                retry                 => 1,
+            }
+      );
+    }
+
+our $bucketname = 'buck_'.get_timestamp();
+# create a new bucket (the test bucket)
+our $bucket = $s3->add_bucket( { bucket => $bucketname } )
+      or die $s3->err. "bucket $bucketname create failed\n". $s3->errstr;
+    print "Bucket Created: $bucketname \n";
+    return 0;
+}
+
+# delete keys
+sub delete_keys {
+   (($bucket->delete_key($_[0])) and return 0) or return 1;
+}
+
+# Read the file back to bucket 
+sub readd_file {
+    system("dd if=/dev/zero of=/tmp/10MBfile1 bs=10485760 count=1");
+    $mytestfilename1 = '10MBfile1';
+    print "readding file to bucket: $mytestfilename1\n";
+    ((($bucket->add_key_filename( $mytestfilename1, $testfileloc,
+        { content_type => 'text/plain', },
+    )) and (print "readding file success\n") and return 0) or (return 1));
+}
+
+# check if rgw service is already running
+sub check
+{
+    my $state = get_status();
+    if ($state) {
+        exit 1;
+    }
+}
+1
diff --git a/qa/workunits/rgw/test-keystone-service-token.py b/qa/workunits/rgw/test-keystone-service-token.py
new file mode 100755
index 000000000..2c7f21e93
--- /dev/null
+++ b/qa/workunits/rgw/test-keystone-service-token.py
@@ -0,0 +1,189 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2022 Binero
+#
+# Author: Tobias Urdin <tobias.urdin@binero.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+
+import sys
+import requests
+import time
+
+
+# b4221c214dd64ee6a464g2153fae3813 is ID of deadbeef project
+SWIFT_URL = 'http://localhost:8000/swift/v1/AUTH_b4221c214dd64ee6a464g2153fae3813'
+KEYSTONE_URL = 'http://localhost:5000'
+
+
+def get_stats():
+    stats_url = '%s/stats' % KEYSTONE_URL
+    return requests.get(stats_url)
+
+
+def test_list_containers():
+    # Loop five list container requests with same token
+    for i in range(0, 5):
+        r = requests.get(SWIFT_URL, headers={'X-Auth-Token': 'user-token-1'})
+        if r.status_code != 204:
+            print('FAILED, status code is %d not 204' % r.status_code)
+            sys.exit(1)
+
+    # Get stats from fake Keystone server
+    r = get_stats()
+    if r.status_code != 200:
+        print('FAILED, status code is %d not 200' % r.status_code)
+        sys.exit(1)
+    stats = r.json()
+
+    # Verify admin token was cached
+    if stats['post_total'] != 1:
+        print('FAILED, post_total stat is %d not 1' % stats['post_total'])
+        sys.exit(1)
+
+    # Verify user token was cached
+    if stats['get_total'] != 1:
+        print('FAILED, get_total stat is %d not 1' % stats['get_total'])
+        sys.exit(1)
+
+    print('Wait for cache to be invalid')
+    time.sleep(11)
+
+    r = requests.get(SWIFT_URL, headers={'X-Auth-Token': 'user-token-1'})
+    if r.status_code != 204:
+        print('FAILED, status code is %d not 204' % r.status_code)
+        sys.exit(1)
+
+    # Get stats from fake Keystone server
+    r = get_stats()
+    if r.status_code != 200:
+        print('FAILED, status code is %d not 200' % r.status_code)
+        sys.exit(1)
+    stats = r.json()
+
+    if stats['post_total'] != 2:
+        print('FAILED, post_total stat is %d not 2' % stats['post_total'])
+        sys.exit(1)
+
+    if stats['get_total'] != 2:
+        print('FAILED, get_total stat is %d not 2' % stats['get_total'])
+        sys.exit(1)
+
+
+def test_expired_token():
+    # Try listing containers with an expired token
+    for i in range(0, 3):
+        r = requests.get(SWIFT_URL, headers={'X-Auth-Token': 'user-token-2'})
+        if r.status_code != 401:
+            print('FAILED, status code is %d not 401' % r.status_code)
+            sys.exit(1)
+
+    # Get stats from fake Keystone server
+    r = get_stats()
+    if r.status_code != 200:
+        print('FAILED, status code is %d not 200' % r.status_code)
+        sys.exit(1)
+    stats = r.json()
+
+    # Verify admin token was cached
+    if stats['post_total'] != 2:
+        print('FAILED, post_total stat is %d not 2' % stats['post_total'])
+        sys.exit(1)
+
+    # Verify we got to fake Keystone server since expired tokens is not cached
+    if stats['get_total'] != 5:
+        print('FAILED, get_total stat is %d not 5' % stats['get_total'])
+        sys.exit(1)
+
+
+def test_expired_token_with_service_token():
+    # Try listing containers with an expired token but with a service token
+    for i in range(0, 3):
+        r = requests.get(SWIFT_URL, headers={'X-Auth-Token': 'user-token-2', 'X-Service-Token': 'admin-token-1'})
+        if r.status_code != 204:
+            print('FAILED, status code is %d not 204' % r.status_code)
+            sys.exit(1)
+
+    # Get stats from fake Keystone server
+    r = get_stats()
+    if r.status_code != 200:
+        print('FAILED, status code is %d not 200' % r.status_code)
+        sys.exit(1)
+    stats = r.json()
+
+    # Verify admin token was cached
+    if stats['post_total'] != 2:
+        print('FAILED, post_total stat is %d not 2' % stats['post_total'])
+        sys.exit(1)
+
+    # Verify we got to fake Keystone server since expired tokens is not cached
+    if stats['get_total'] != 7:
+        print('FAILED, get_total stat is %d not 7' % stats['get_total'])
+        sys.exit(1)
+
+    print('Wait for cache to be invalid')
+    time.sleep(11)
+
+    r = requests.get(SWIFT_URL, headers={'X-Auth-Token': 'user-token-2', 'X-Service-Token': 'admin-token-1'})
+    if r.status_code != 204:
+        print('FAILED, status code is %d not 204' % r.status_code)
+        sys.exit(1)
+
+    # Get stats from fake Keystone server
+    r = get_stats()
+    if r.status_code != 200:
+        print('FAILED, status code is %d not 200' % r.status_code)
+        sys.exit(1)
+    stats = r.json()
+
+    if stats['post_total'] != 3:
+        print('FAILED, post_total stat is %d not 3' % stats['post_total'])
+        sys.exit(1)
+
+    if stats['get_total'] != 9:
+        print('FAILED, get_total stat is %d not 9' % stats['get_total'])
+        sys.exit(1)
+
+
+def test_expired_token_with_invalid_service_token():
+    print('Wait for cache to be invalid')
+    time.sleep(11)
+
+    # Test with a token that doesn't have allowed role as service token
+    for i in range(0, 3):
+        r = requests.get(SWIFT_URL, headers={'X-Auth-Token': 'user-token-2', 'X-Service-Token': 'user-token-1'})
+        if r.status_code != 401:
+            print('FAILED, status code is %d not 401' % r.status_code)
+            sys.exit(1)
+
+    # Make sure we get user-token-1 cached
+    r = requests.get(SWIFT_URL, headers={'X-Auth-Token': 'user-token-1'})
+    if r.status_code != 204:
+        print('FAILED, status code is %d not 204' % r.status_code)
+        sys.exit(1)
+
+    # Test that a cached token (that is invalid as service token) cannot be used as service token
+    for i in range(0, 3):
+        r = requests.get(SWIFT_URL, headers={'X-Auth-Token': 'user-token-2', 'X-Service-Token': 'user-token-1'})
+        if r.status_code != 401:
+            print('FAILED, status code is %d not 401' % r.status_code)
+            sys.exit(1)
+
+
+def main():
+    test_list_containers()
+    test_expired_token()
+    test_expired_token_with_service_token()
+    test_expired_token_with_invalid_service_token()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/qa/workunits/rgw/test_librgw_file.sh b/qa/workunits/rgw/test_librgw_file.sh
new file mode 100755
index 000000000..1371ff711
--- /dev/null
+++ b/qa/workunits/rgw/test_librgw_file.sh
@@ -0,0 +1,59 @@
+#!/bin/sh -e
+
+
+if [ -z ${AWS_ACCESS_KEY_ID} ]
+then
+    export AWS_ACCESS_KEY_ID=`openssl rand -base64 20`
+    export AWS_SECRET_ACCESS_KEY=`openssl rand -base64 40`
+
+    radosgw-admin user create --uid ceph-test-librgw-file \
+       --access-key $AWS_ACCESS_KEY_ID \
+       --secret $AWS_SECRET_ACCESS_KEY \
+       --display-name "librgw test user" \
+       --email librgw@example.com || echo "librgw user exists"
+
+    # keyring override for teuthology env
+    KEYRING="/etc/ceph/ceph.keyring"
+    K="-k ${KEYRING}"
+fi
+
+# nfsns is the main suite
+
+# create herarchy, and then list it
+echo "phase 1.1"
+ceph_test_librgw_file_nfsns ${K} --hier1 --dirs1 --create --rename --verbose
+
+# the older librgw_file can consume the namespace
+echo "phase 1.2"
+ceph_test_librgw_file_nfsns ${K} --getattr --verbose
+
+# and delete the hierarchy
+echo "phase 1.3"
+ceph_test_librgw_file_nfsns ${K} --hier1 --dirs1 --delete --verbose
+
+# bulk create/delete buckets
+echo "phase 2.1"
+ceph_test_librgw_file_cd ${K} --create --multi --verbose
+echo "phase 2.2"
+ceph_test_librgw_file_cd ${K} --delete --multi --verbose
+
+# write continuation test
+echo "phase 3.1"
+ceph_test_librgw_file_aw ${K} --create --large --verify
+echo "phase 3.2"
+ceph_test_librgw_file_aw ${K} --delete --large
+
+# continued readdir
+echo "phase 4.1"
+ceph_test_librgw_file_marker ${K} --create --marker1 --marker2 --nobjs=100 --verbose
+echo "phase 4.2"
+ceph_test_librgw_file_marker ${K} --delete --verbose
+
+# advanced i/o--but skip readv/writev for now--split delete from
+# create and stat ops to avoid fault in sysobject cache
+echo "phase 5.1"
+ceph_test_librgw_file_gp ${K} --get --stat --put --create
+echo "phase 5.2"
+ceph_test_librgw_file_gp ${K} --delete
+
+exit 0
diff --git a/qa/workunits/rgw/test_rgw_bucket_check.py b/qa/workunits/rgw/test_rgw_bucket_check.py
new file mode 100755
index 000000000..bfa6d65d6
--- /dev/null
+++ b/qa/workunits/rgw/test_rgw_bucket_check.py
@@ -0,0 +1,194 @@
+#!/usr/bin/env python3
+
+import logging as log
+import json
+import botocore
+from common import exec_cmd, create_user, boto_connect, put_objects, create_unlinked_objects
+from botocore.config import Config
+
+"""
+Tests behavior of radosgw-admin bucket check commands. 
+"""
+# The test cases in this file have been annotated for inventory.
+# To extract the inventory (in csv format) use the command:
+#
+#   grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //'
+#
+#
+
+""" Constants """
+USER = 'check-tester'
+DISPLAY_NAME = 'Check Testing'
+ACCESS_KEY = 'OJODXSLNX4LUNHQG99PA'
+SECRET_KEY = '3l6ffld34qaymfomuh832j94738aie2x4p2o8h6n'
+BUCKET_NAME = 'check-bucket'
+
+def main():
+    """
+    execute bucket check commands
+    """
+    create_user(USER, DISPLAY_NAME, ACCESS_KEY, SECRET_KEY)
+
+    connection = boto_connect(ACCESS_KEY, SECRET_KEY, Config(retries = {
+        'total_max_attempts': 1,
+    }))
+
+    # pre-test cleanup
+    try:
+        bucket = connection.Bucket(BUCKET_NAME)
+        bucket.objects.all().delete()
+        bucket.object_versions.all().delete()
+        bucket.delete()
+    except botocore.exceptions.ClientError as e:
+        if not e.response['Error']['Code'] == 'NoSuchBucket':
+            raise
+
+    bucket = connection.create_bucket(Bucket=BUCKET_NAME)
+
+    null_version_keys = ['a', 'z']
+    null_version_objs = put_objects(bucket, null_version_keys)
+
+    connection.BucketVersioning(BUCKET_NAME).enable()
+
+    ok_keys = ['a', 'b', 'c', 'd']
+    unlinked_keys = ['c', 'd', 'e', 'f']
+    ok_objs = put_objects(bucket, ok_keys)
+    
+    # TESTCASE 'recalculated bucket check stats are correct'
+    log.debug('TEST: recalculated bucket check stats are correct\n')
+    exec_cmd(f'radosgw-admin bucket check --fix --bucket {BUCKET_NAME}')
+    out = exec_cmd(f'radosgw-admin bucket stats --bucket {BUCKET_NAME}')
+    json_out = json.loads(out)
+    log.debug(json_out['usage'])
+    assert json_out['usage']['rgw.main']['num_objects'] == 6
+    
+    # TESTCASE 'bucket check unlinked does not report normal entries'
+    log.debug('TEST: bucket check unlinked does not report normal entries\n')
+    out = exec_cmd(f'radosgw-admin bucket check unlinked --bucket {BUCKET_NAME} --min-age-hours 0 --dump-keys')
+    json_out = json.loads(out)
+    assert len(json_out) == 0
+
+    unlinked_objs = create_unlinked_objects(connection, bucket, unlinked_keys)
+    
+    # TESTCASE 'bucket check unlinked finds unlistable entries'
+    log.debug('TEST: bucket check unlinked finds unlistable entries\n')
+    out = exec_cmd(f'radosgw-admin bucket check unlinked --bucket {BUCKET_NAME} --min-age-hours 0 --dump-keys')
+    json_out = json.loads(out)
+    assert len(json_out) == len(unlinked_keys)
+
+    # TESTCASE 'unlinked entries are not listable'
+    log.debug('TEST: unlinked entries are not listable\n')
+    for ov in bucket.object_versions.all():
+        assert (ov.key, ov.version_id) not in unlinked_objs, f'object "{ov.key}:{ov.version_id}" was found in bucket listing'
+
+    # TESTCASE 'GET returns 404 for unlinked entry keys that have no other versions'
+    log.debug('TEST: GET returns 404 for unlinked entry keys that have no other versions\n')
+    noent_keys = set(unlinked_keys) - set(ok_keys)
+    for key in noent_keys:
+        try:
+            bucket.Object(key).get()
+            assert False, 'GET did not return 404 for key={key} with no prior successful PUT'
+        except botocore.exceptions.ClientError as e:
+            assert e.response['ResponseMetadata']['HTTPStatusCode'] == 404
+            
+    # TESTCASE 'bucket check unlinked fixes unlistable entries'
+    log.debug('TEST: bucket check unlinked fixes unlistable entries\n')
+    out = exec_cmd(f'radosgw-admin bucket check unlinked --bucket {BUCKET_NAME} --fix --min-age-hours 0 --rgw-olh-pending-timeout-sec 0 --dump-keys')
+    json_out = json.loads(out)
+    assert len(json_out) == len(unlinked_keys)
+    for o in unlinked_objs:
+        try:
+            connection.ObjectVersion(bucket.name, o[0], o[1]).head()
+            assert False, f'head for unlistable object {o[0]}:{o[1]} succeeded after fix'
+        except botocore.exceptions.ClientError as e:
+            assert e.response['ResponseMetadata']['HTTPStatusCode'] == 404
+
+    # TESTCASE 'bucket check unlinked fix does not affect normal entries'
+    log.debug('TEST: bucket check unlinked does not affect normal entries\n')
+    all_listable = list(bucket.object_versions.all())
+    assert len(all_listable) == len(ok_keys) + len(null_version_keys), 'some normal objects were not accounted for in object listing after unlinked fix'
+    for o in ok_objs:
+        assert o in map(lambda x: (x.key, x.version_id), all_listable), "normal object not listable after fix"
+        connection.ObjectVersion(bucket.name, o[0], o[1]).head()
+
+    # TESTCASE 'bucket check unlinked does not find new unlistable entries after fix'
+    log.debug('TEST: bucket check unlinked does not find new unlistable entries after fix\n')
+    out = exec_cmd(f'radosgw-admin bucket check unlinked --bucket {BUCKET_NAME} --min-age-hours 0 --dump-keys')
+    json_out = json.loads(out)
+    assert len(json_out) == 0
+    
+    # for this set of keys we can produce leftover OLH object/entries by
+    # deleting the normal object instance since we should already have a leftover
+    # pending xattr on the OLH object due to the errors associated with the 
+    # prior unlinked entries that were created for the same keys 
+    leftover_pending_xattr_keys = set(ok_keys).intersection(unlinked_keys)
+    objs_to_delete = filter(lambda x: x[0] in leftover_pending_xattr_keys, ok_objs)
+        
+    for o in objs_to_delete:
+        connection.ObjectVersion(bucket.name, o[0], o[1]).delete()
+
+    for key in leftover_pending_xattr_keys:
+        out = exec_cmd(f'radosgw-admin bi list --bucket {BUCKET_NAME} --object {key}')
+        idx_entries = json.loads(out.replace(b'\x80', b'0x80'))
+        assert len(idx_entries) > 0, 'failed to create leftover OLH entries for key {key}'
+        
+    # TESTCASE 'bucket check olh finds leftover OLH entries'
+    log.debug('TEST: bucket check olh finds leftover OLH entries\n')
+    out = exec_cmd(f'radosgw-admin bucket check olh --bucket {BUCKET_NAME} --dump-keys')
+    json_out = json.loads(out)
+    assert len(json_out) == len(leftover_pending_xattr_keys)
+
+    # TESTCASE 'bucket check olh fixes leftover OLH entries'
+    log.debug('TEST: bucket check olh fixes leftover OLH entries\n')
+    out = exec_cmd(f'radosgw-admin bucket check olh --bucket {BUCKET_NAME} --fix --rgw-olh-pending-timeout-sec 0 --dump-keys')
+    json_out = json.loads(out)
+    assert len(json_out) == len(leftover_pending_xattr_keys)
+    
+    for key in leftover_pending_xattr_keys:
+        out = exec_cmd(f'radosgw-admin bi list --bucket {BUCKET_NAME} --object {key}')
+        idx_entries = json.loads(out.replace(b'\x80', b'0x80'))
+        assert len(idx_entries) == 0, 'index entries still exist for key={key} after olh fix'
+
+    # TESTCASE 'bucket check olh does not find new leftover OLH entries after fix'
+    log.debug('TEST: bucket check olh does not find new leftover OLH entries after fix\n')
+    out = exec_cmd(f'radosgw-admin bucket check olh --bucket {BUCKET_NAME} --dump-keys')
+    json_out = json.loads(out)
+    assert len(json_out) == 0
+
+    # TESTCASE 'bucket check fixes do not affect null version objects'
+    log.debug('TEST: verify that bucket check fixes do not affect null version objects\n')
+    for o in null_version_objs:
+        connection.ObjectVersion(bucket.name, o[0], 'null').head()
+        
+    all_versions = list(map(lambda x: (x.key, x.version_id), bucket.object_versions.all()))
+    for key in null_version_keys:
+        assert (key, 'null') in all_versions
+
+    # TESTCASE 'bucket check stats are correct in the presence of unlinked entries'
+    log.debug('TEST: bucket check stats are correct in the presence of unlinked entries\n')
+    bucket.object_versions.all().delete()
+    null_version_objs = put_objects(bucket, null_version_keys)
+    ok_objs = put_objects(bucket, ok_keys)
+    unlinked_objs = create_unlinked_objects(connection, bucket, unlinked_keys)
+    exec_cmd(f'radosgw-admin bucket check --fix --bucket {BUCKET_NAME}')
+    out = exec_cmd(f'radosgw-admin bucket check unlinked --bucket {BUCKET_NAME} --fix --min-age-hours 0 --rgw-olh-pending-timeout-sec 0 --dump-keys')
+    json_out = json.loads(out)
+    assert len(json_out) == len(unlinked_keys)
+    bucket.object_versions.all().delete()
+    out = exec_cmd(f'radosgw-admin bucket stats --bucket {BUCKET_NAME}')
+    json_out = json.loads(out)
+    log.debug(json_out['usage'])
+    assert json_out['usage']['rgw.main']['size'] == 0
+    assert json_out['usage']['rgw.main']['num_objects'] == 0
+    assert json_out['usage']['rgw.main']['size_actual'] == 0
+    assert json_out['usage']['rgw.main']['size_kb'] == 0
+    assert json_out['usage']['rgw.main']['size_kb_actual'] == 0
+    assert json_out['usage']['rgw.main']['size_kb_utilized'] == 0
+
+    # Clean up
+    log.debug("Deleting bucket {}".format(BUCKET_NAME))
+    bucket.object_versions.all().delete()
+    bucket.delete()
+
+main()
+log.info("Completed bucket check tests")
diff --git a/qa/workunits/rgw/test_rgw_datacache.py b/qa/workunits/rgw/test_rgw_datacache.py
new file mode 100755
index 000000000..f070ec0f1
--- /dev/null
+++ b/qa/workunits/rgw/test_rgw_datacache.py
@@ -0,0 +1,209 @@
+#!/usr/bin/python3
+
+import logging as log
+from configobj import ConfigObj
+import subprocess
+import json
+import os
+
+"""
+Runs a test against a rgw with the data cache enabled. A client must be
+set in the config for this task. This client must be the same client
+that is in the config for the `rgw` task.
+
+In the `overrides` section `datacache` and `datacache` must be configured for
+the `rgw` task and the ceph conf overrides must contain the below config
+variables in the client section.
+
+`s3cmd` must be added as an extra_package to the install task.
+
+In the `workunit` task, `- rgw/run-datacache.sh` must be set for the client that
+is in the config for the `rgw` task. The `RGW_DATACACHE_PATH` variable must be
+set in the workunit's `env` and it must match the `datacache_path` given to the
+`rgw` task in `overrides`.
+Ex:
+- install:
+    extra_packages:
+      deb: ['s3cmd']
+      rpm: ['s3cmd']
+- overrides:
+    rgw:
+      datacache: true
+      datacache_path: /tmp/rgw_datacache
+    install:
+      extra_packages:
+        deb: ['s3cmd']
+        rpm: ['s3cmd']
+    ceph:
+      conf:
+        client:
+          rgw d3n l1 datacache persistent path: /tmp/rgw_datacache/
+          rgw d3n l1 datacache size: 10737417240
+          rgw d3n l1 local datacache enabled: true
+          rgw enable ops log: true
+- rgw:
+    client.0:
+- workunit:
+    clients:
+      client.0:
+      - rgw/run-datacache.sh
+    env:
+      RGW_DATACACHE_PATH: /tmp/rgw_datacache
+    cleanup: true
+"""
+
+log.basicConfig(level=log.DEBUG)
+
+""" Constants """
+USER = 'rgw_datacache_user'
+DISPLAY_NAME = 'DatacacheUser'
+ACCESS_KEY = 'NX5QOQKC6BH2IDN8HC7A'
+SECRET_KEY = 'LnEsqNNqZIpkzauboDcLXLcYaWwLQ3Kop0zAnKIn'
+BUCKET_NAME = 'datacachebucket'
+FILE_NAME = '7M.dat'
+GET_FILE_NAME = '7M-get.dat'
+
+def exec_cmd(cmd):
+    log.debug("exec_cmd(%s)", cmd)
+    try:
+        proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
+        out, err = proc.communicate()
+        if proc.returncode == 0:
+            log.info('command succeeded')
+            if out is not None: log.info(out)
+            return out
+        else:
+            raise Exception("error: %s \nreturncode: %s" % (err, proc.returncode))
+    except Exception as e:
+        log.error('command failed')
+        log.error(e)
+        return False
+
+def get_radosgw_endpoint():
+    out = exec_cmd('sudo netstat -nltp | egrep "rados|valgr"')  # short for radosgw/valgrind
+    x = out.decode('utf8').split(" ")
+    port = [i for i in x if ':' in i][0].split(':')[1]
+    log.info('radosgw port: %s' % port)
+    proto = "http"
+    hostname = '127.0.0.1'
+
+    if port == '443':
+        proto = "https"
+
+    endpoint = hostname
+
+    log.info("radosgw endpoint is: %s", endpoint)
+    return endpoint, proto
+
+def create_s3cmd_config(path, proto):
+    """
+    Creates a minimal config file for s3cmd
+    """
+    log.info("Creating s3cmd config...")
+
+    use_https_config = "False"
+    log.info("proto for s3cmd config is %s", proto)
+    if proto == "https":
+        use_https_config = "True"
+
+    s3cmd_config = ConfigObj(
+        indent_type='',
+        infile={
+            'default':
+                {
+                'host_bucket': 'no.way.in.hell',
+                'use_https': use_https_config,
+                },
+            }
+    )
+
+    f = open(path, 'wb')
+    s3cmd_config.write(f)
+    f.close()
+    log.info("s3cmd config written")
+
+def get_cmd_output(cmd_out):
+    out = cmd_out.decode('utf8')
+    out = out.strip('\n')
+    return out
+
+def main():
+    """
+    execute the datacache test
+    """
+    # setup for test
+    cache_dir = os.environ['RGW_DATACACHE_PATH']
+    log.debug("datacache dir from config is: %s", cache_dir)
+
+    out = exec_cmd('pwd')
+    pwd = get_cmd_output(out)
+    log.debug("pwd is: %s", pwd)
+
+    endpoint, proto = get_radosgw_endpoint()
+
+    # create 7M file to put
+    outfile = pwd + '/' + FILE_NAME
+    exec_cmd('dd if=/dev/urandom of=%s bs=1M count=7' % (outfile))
+
+    # create user
+    exec_cmd('radosgw-admin user create --uid %s --display-name %s --access-key %s --secret %s'
+            % (USER, DISPLAY_NAME, ACCESS_KEY, SECRET_KEY))
+
+    # create s3cmd config
+    s3cmd_config_path = pwd + '/s3cfg'
+    create_s3cmd_config(s3cmd_config_path, proto)
+
+    # create a bucket
+    exec_cmd('s3cmd --access_key=%s --secret_key=%s --config=%s --no-check-hostname --host=%s mb s3://%s'
+            % (ACCESS_KEY, SECRET_KEY, s3cmd_config_path, endpoint, BUCKET_NAME))
+
+    # put an object in the bucket
+    exec_cmd('s3cmd --access_key=%s --secret_key=%s --config=%s --no-check-hostname --host=%s put %s s3://%s'
+            % (ACCESS_KEY, SECRET_KEY, s3cmd_config_path, endpoint, outfile, BUCKET_NAME))
+
+    # get object from bucket
+    get_file_path = pwd + '/' + GET_FILE_NAME
+    exec_cmd('s3cmd --access_key=%s --secret_key=%s --config=%s --no-check-hostname --host=%s get s3://%s/%s %s --force'
+            % (ACCESS_KEY, SECRET_KEY, s3cmd_config_path, endpoint, BUCKET_NAME, FILE_NAME, get_file_path))
+
+    # get info of object
+    out = exec_cmd('radosgw-admin object stat --bucket=%s --object=%s' % (BUCKET_NAME, FILE_NAME))
+
+    json_op = json.loads(out)
+    cached_object_name = json_op['manifest']['prefix']
+    log.debug("Cached object name is: %s", cached_object_name)
+
+    # check that the cache is enabled (does the cache directory empty)
+    out = exec_cmd('find %s -type f | wc -l' % (cache_dir))
+    chk_cache_dir = int(get_cmd_output(out))
+    log.debug("Check cache dir content: %s", chk_cache_dir)
+    if chk_cache_dir == 0:
+        log.info("NOTICE: datacache test object not found, inspect if datacache was bypassed or disabled during this check.")
+        return
+
+    # list the files in the cache dir for troubleshooting
+    out = exec_cmd('ls -l %s' % (cache_dir))
+    # get name of cached object and check if it exists in the cache
+    out = exec_cmd('find %s -name "*%s1"' % (cache_dir, cached_object_name))
+    cached_object_path = get_cmd_output(out)
+    log.debug("Path of file in datacache is: %s", cached_object_path)
+    out = exec_cmd('basename %s' % (cached_object_path))
+    basename_cmd_out = get_cmd_output(out)
+    log.debug("Name of file in datacache is: %s", basename_cmd_out)
+
+    # check to see if the cached object is in Ceph
+    out = exec_cmd('rados ls -p default.rgw.buckets.data')
+    rados_ls_out = get_cmd_output(out)
+    log.debug("rados ls output is: %s", rados_ls_out)
+
+    assert(basename_cmd_out in rados_ls_out)
+    log.debug("RGW Datacache test SUCCESS")
+
+    # remove datacache dir
+    #cmd = exec_cmd('rm -rf %s' % (cache_dir))
+    #log.debug("RGW Datacache dir deleted")
+    #^ commenting for future refrence - the work unit will continue running tests and if the cache_dir is removed
+    #  all the writes to cache will fail with errno 2 ENOENT No such file or directory.
+
+main()
+log.info("Completed Datacache tests")
diff --git a/qa/workunits/rgw/test_rgw_gc_log.sh b/qa/workunits/rgw/test_rgw_gc_log.sh
new file mode 100755
index 000000000..ab4015aef
--- /dev/null
+++ b/qa/workunits/rgw/test_rgw_gc_log.sh
@@ -0,0 +1,5 @@
+#!/bin/sh -e
+
+ceph_test_rgw_gc_log
+
+exit 0
diff --git a/qa/workunits/rgw/test_rgw_obj.sh b/qa/workunits/rgw/test_rgw_obj.sh
new file mode 100755
index 000000000..01dd2b5ee
--- /dev/null
+++ b/qa/workunits/rgw/test_rgw_obj.sh
@@ -0,0 +1,5 @@
+#!/bin/sh -e
+
+ceph_test_rgw_obj
+
+exit 0
diff --git a/qa/workunits/rgw/test_rgw_orphan_list.sh b/qa/workunits/rgw/test_rgw_orphan_list.sh
new file mode 100755
index 000000000..34d550cea
--- /dev/null
+++ b/qa/workunits/rgw/test_rgw_orphan_list.sh
@@ -0,0 +1,519 @@
+#!/usr/bin/env bash
+
+# set -x
+set -e
+
+# if defined, debug messages will be displayed and prepended with the string
+# debug="DEBUG"
+
+huge_size=5100 # in megabytes
+big_size=7 # in megabytes
+
+huge_obj=/tmp/huge_obj.temp.$$
+big_obj=/tmp/big_obj.temp.$$
+empty_obj=/tmp/empty_obj.temp.$$
+
+fifo=/tmp/orphan-fifo.$$
+awscli_dir=${HOME}/awscli_temp
+export PATH=${PATH}:${awscli_dir}
+
+rgw_host=$(hostname --fqdn)
+if echo "$rgw_host" | grep -q '\.' ; then
+    :
+else
+    host_domain=".front.sepia.ceph.com"
+    echo "WARNING: rgw hostname -- $rgw_host -- does not appear to be fully qualified; PUNTING and appending $host_domain"
+    rgw_host="${rgw_host}${host_domain}"
+fi
+rgw_port=80
+
+echo "Fully Qualified Domain Name: $rgw_host"
+
+success() {
+    echo OK.
+    exit 0
+}
+
+########################################################################
+# INSTALL AND CONFIGURE TOOLING
+
+install_awscli() {
+    # NB: this does verify authenticity and integrity of downloaded
+    # file; see
+    # https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2-linux.html
+    here="$(pwd)"
+    cd "$HOME"
+    curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
+    unzip awscliv2.zip
+    mkdir -p $awscli_dir
+    ./aws/install -i $awscli_dir
+    cd "$here"
+}
+
+uninstall_awscli() {
+    here="$(pwd)"
+    cd "$HOME"
+    rm -rf $awscli_dir ./aws awscliv2.zip
+    cd "$here"
+}
+
+sudo yum -y install s3cmd
+sudo yum -y install python3-setuptools
+sudo yum -y install python3-pip
+sudo pip3 install --upgrade setuptools
+sudo pip3 install python-swiftclient
+
+# get ready for transition from s3cmd to awscli
+if false ;then
+    install_awscli
+    aws --version
+    uninstall_awscli
+fi
+
+s3config=/tmp/s3config.$$
+
+# do not include the port when it is 80; the host base is used in the
+# v4 signature and it needs to follow this convention for signatures
+# to match
+if [ "$rgw_port" -ne 80 ] ;then
+    s3_host_base="${rgw_host}:${rgw_port}"
+else
+    s3_host_base="$rgw_host"
+fi
+
+cat >${s3config} <<EOF
+[default]
+host_base = $s3_host_base
+access_key = 0555b35654ad1656d804
+secret_key = h7GhxuBLTrlhVUyxSPUKUV8r/2EI4ngqJxD7iBdBYLhwluN30JaT3Q==
+bucket_location = us-east-1
+check_ssl_certificate = True
+check_ssl_hostname = True
+default_mime_type = binary/octet-stream
+delete_removed = False
+dry_run = False
+enable_multipart = True
+encoding = UTF-8
+encrypt = False
+follow_symlinks = False
+force = False
+guess_mime_type = True
+host_bucket = anything.with.three.dots
+multipart_chunk_size_mb = 15
+multipart_max_chunks = 10000
+recursive = False
+recv_chunk = 65536
+send_chunk = 65536
+signature_v2 = False
+socket_timeout = 300
+use_https = False
+use_mime_magic = True
+verbosity = WARNING
+EOF
+
+
+# set up swift authentication
+export ST_AUTH=http://${rgw_host}:${rgw_port}/auth/v1.0
+export ST_USER=test:tester
+export ST_KEY=testing
+
+create_users() {
+    # Create S3 user
+    local akey='0555b35654ad1656d804'
+    local skey='h7GhxuBLTrlhVUyxSPUKUV8r/2EI4ngqJxD7iBdBYLhwluN30JaT3Q=='
+    radosgw-admin user create --uid testid \
+		  --access-key $akey --secret $skey \
+		  --display-name 'M. Tester' --email tester@ceph.com
+
+    # Create Swift user
+    radosgw-admin user create --subuser=test:tester \
+		  --display-name=Tester-Subuser --key-type=swift \
+		  --secret=testing --access=full
+}
+
+myswift() {
+    if [ -n "$debug" ] ;then
+	echo "${debug}: swift --verbose --debug $@"
+    fi
+    swift --verbose --debug "$@"
+    local code=$?
+    if [ $code -ne 0 ] ;then
+	echo "ERROR: code = $code ; command = s3cmd --config=${s3config} --verbose --debug "$@""
+	exit $code
+    fi
+}
+
+mys3cmd() {
+    if [ -n "$debug" ] ;then
+	echo "${debug}: s3cmd --config=${s3config} --verbose --debug $@"
+    fi
+    s3cmd --config=${s3config} --verbose --debug "$@"
+    local code=$?
+    if [ $code -ne 0 ] ;then
+	echo "ERROR: code = $code ; command = s3cmd --config=${s3config} --verbose --debug "$@""
+	exit $code
+    fi
+}
+
+mys3uploadkill() {
+    if [ $# -ne 5 ] ;then
+	echo "$0: error expecting 5 arguments"
+	exit 1
+    fi
+
+    local_file="$1"
+    remote_bkt="$2"
+    remote_obj="$3"
+    fifo="$4"
+    stop_part="$5"
+    
+    mkfifo $fifo
+
+    s3cmd --config=${s3config} put $local_file \
+	  s3://${remote_bkt}/${remote_obj} \
+	  --progress \
+	  --multipart-chunk-size-mb=5 >$fifo &
+    set +e # don't allow errors to stop script
+    while read line ;do
+        echo "$line" | grep --quiet "part $stop_part "
+        if [ ${PIPESTATUS[1]} -eq 0 ] ;then
+	    kill -9 $(jobs -p)
+            break
+        fi
+    done <$fifo
+    set -e
+
+    rm -f $fifo
+}
+
+mys3upload() {
+    obj=$1
+    bucket=$2
+    dest_obj=$3
+
+    mys3cmd put -q $obj s3://${bucket}/$dest_obj
+}
+
+########################################################################
+# PREP
+
+create_users
+dd if=/dev/urandom of=$big_obj bs=1M count=${big_size}
+dd if=/dev/urandom of=$huge_obj bs=1M count=${huge_size}
+touch $empty_obj
+
+quick_tests() {
+    echo TRY A SWIFT COMMAND
+    myswift upload swift-plain-ctr $big_obj --object-name swift-obj-test
+    myswift list
+    myswift list swift-plain-ctr
+
+    echo TRY A RADOSGW-ADMIN COMMAND
+    radosgw-admin bucket list # make sure rgw is up and running
+}
+
+########################################################################
+# S3 TESTS
+
+####################################
+# regular multipart test
+
+mys3cmd mb s3://multipart-bkt
+mys3upload $huge_obj multipart-bkt multipart-obj
+mys3cmd ls
+mys3cmd ls s3://multipart-bkt
+
+####################################
+# multipart test with incomplete uploads
+
+bkt="incomplete-mp-bkt-1"
+
+mys3cmd mb s3://$bkt
+
+mys3uploadkill $huge_obj $bkt incomplete-mp-obj-c $fifo 20
+
+# generate an incomplete multipart with more than 1,000 parts
+mys3uploadkill $huge_obj $bkt incomplete-mp-obj-b $fifo 1005
+
+# generate more than 1000 incomplet multiparts
+for c in $(seq 1005) ;do
+    mys3uploadkill $huge_obj $bkt incomplete-mp-obj-c-$c $fifo 3
+done
+
+####################################
+# resharded bucket
+
+bkt=resharded-bkt-1
+
+mys3cmd mb s3://$bkt
+
+for f in $(seq 8) ; do
+    dest_obj="reshard-obj-${f}"
+    mys3cmd put -q $big_obj s3://${bkt}/$dest_obj
+done
+
+radosgw-admin bucket reshard --num-shards 3 --bucket=$bkt --yes-i-really-mean-it
+radosgw-admin bucket reshard --num-shards 5 --bucket=$bkt --yes-i-really-mean-it
+
+####################################
+# versioned bucket
+
+if true ;then
+    echo "WARNING: versioned bucket test currently turned off"
+else
+    bkt=versioned-bkt-1
+
+    mys3cmd mb s3://$bkt
+
+    # bucket-enable-versioning $bkt
+
+    for f in $(seq 3) ;do
+	for g in $(seq 10) ;do
+	    dest_obj="versioned-obj-${g}"
+	    mys3cmd put -q $big_obj s3://${bkt}/$dest_obj
+	done
+    done
+
+    for g in $(seq 1 2 10) ;do
+	dest_obj="versioned-obj-${g}"
+	mys3cmd rm s3://${bkt}/$dest_obj
+    done
+fi
+
+############################################################
+# copy small objects
+
+o_bkt="orig-bkt-1"
+d_bkt="copy-bkt-1"
+mys3cmd mb s3://$o_bkt
+
+for f in $(seq 4) ;do
+    dest_obj="orig-obj-$f"
+    mys3cmd put -q $big_obj s3://${o_bkt}/$dest_obj
+done
+
+mys3cmd mb s3://$d_bkt
+
+mys3cmd cp s3://${o_bkt}/orig-obj-1 s3://${d_bkt}/copied-obj-1
+mys3cmd cp s3://${o_bkt}/orig-obj-3 s3://${d_bkt}/copied-obj-3
+
+for f in $(seq 5 6) ;do
+    dest_obj="orig-obj-$f"
+    mys3cmd put -q $big_obj s3://${d_bkt}/$dest_obj
+done
+
+############################################################
+# copy small objects and delete original
+
+o_bkt="orig-bkt-2"
+d_bkt="copy-bkt-2"
+
+mys3cmd mb s3://$o_bkt
+
+for f in $(seq 4) ;do
+    dest_obj="orig-obj-$f"
+    mys3cmd put -q $big_obj s3://${o_bkt}/$dest_obj
+done
+
+mys3cmd mb s3://$d_bkt
+
+mys3cmd cp s3://${o_bkt}/orig-obj-1 s3://${d_bkt}/copied-obj-1
+mys3cmd cp s3://${o_bkt}/orig-obj-3 s3://${d_bkt}/copied-obj-3
+
+for f in $(seq 5 6) ;do
+    dest_obj="orig-obj-$f"
+    mys3cmd put -q $big_obj s3://${d_bkt}/$dest_obj
+done
+
+mys3cmd rb --recursive s3://${o_bkt}
+
+############################################################
+# copy multipart objects
+
+o_bkt="orig-mp-bkt-3"
+d_bkt="copy-mp-bkt-3"
+
+mys3cmd mb s3://$o_bkt
+
+for f in $(seq 2) ;do
+    dest_obj="orig-multipart-obj-$f"
+    mys3cmd put -q $huge_obj s3://${o_bkt}/$dest_obj
+done
+
+mys3cmd mb s3://$d_bkt
+
+mys3cmd cp s3://${o_bkt}/orig-multipart-obj-1 \
+	s3://${d_bkt}/copied-multipart-obj-1
+
+for f in $(seq 5 5) ;do
+    dest_obj="orig-multipart-obj-$f"
+    mys3cmd put -q $huge_obj s3://${d_bkt}/$dest_obj
+done
+
+
+############################################################
+# copy multipart objects and delete original
+
+o_bkt="orig-mp-bkt-4"
+d_bkt="copy-mp-bkt-4"
+
+mys3cmd mb s3://$o_bkt
+
+for f in $(seq 2) ;do
+    dest_obj="orig-multipart-obj-$f"
+    mys3cmd put -q $huge_obj s3://${o_bkt}/$dest_obj
+done
+
+mys3cmd mb s3://$d_bkt
+
+mys3cmd cp s3://${o_bkt}/orig-multipart-obj-1 \
+	s3://${d_bkt}/copied-multipart-obj-1
+
+for f in $(seq 5 5) ;do
+    dest_obj="orig-multipart-obj-$f"
+    mys3cmd put -q $huge_obj s3://${d_bkt}/$dest_obj
+done
+
+mys3cmd rb --recursive s3://$o_bkt
+
+########################################################################
+# SWIFT TESTS
+
+# 600MB
+segment_size=629145600
+
+############################################################
+# plain test
+
+for f in $(seq 4) ;do
+    myswift upload swift-plain-ctr $big_obj --object-name swift-obj-$f
+done
+
+############################################################
+# zero-len test
+
+myswift upload swift-zerolen-ctr $empty_obj --object-name subdir/
+myswift upload swift-zerolen-ctr $big_obj --object-name subdir/abc1
+myswift upload swift-zerolen-ctr $empty_obj --object-name subdir/empty1
+myswift upload swift-zerolen-ctr $big_obj --object-name subdir/xyz1
+
+############################################################
+# dlo test
+
+# upload in 300MB segments
+myswift upload swift-dlo-ctr $huge_obj --object-name dlo-obj-1 \
+      -S $segment_size
+
+############################################################
+# slo test
+
+# upload in 300MB segments
+myswift upload swift-slo-ctr $huge_obj --object-name slo-obj-1 \
+      -S $segment_size --use-slo
+
+############################################################
+# large object copy test
+
+# upload in 300MB segments
+o_ctr=swift-orig-ctr
+o_obj=slo-orig-obj-1
+d_ctr=swift-copy-ctr
+d_obj=slo-copy-obj-1
+myswift upload $o_ctr $big_obj --object-name $o_obj
+
+myswift copy --destination /${d_ctr}/${d_obj} \
+      $o_ctr $o_obj
+
+myswift delete $o_ctr $o_obj
+
+############################################################
+# huge dlo object copy test
+
+o_ctr=swift-orig-dlo-ctr-1
+o_obj=dlo-orig-dlo-obj-1
+d_ctr=swift-copy-dlo-ctr-1
+d_obj=dlo-copy-dlo-obj-1
+
+myswift upload $o_ctr $huge_obj --object-name $o_obj \
+      -S $segment_size
+
+myswift copy --destination /${d_ctr}/${d_obj} \
+      $o_ctr $o_obj
+
+############################################################
+# huge dlo object copy and orig delete
+
+o_ctr=swift-orig-dlo-ctr-2
+o_obj=dlo-orig-dlo-obj-2
+d_ctr=swift-copy-dlo-ctr-2
+d_obj=dlo-copy-dlo-obj-2
+
+myswift upload $o_ctr $huge_obj --object-name $o_obj \
+      -S $segment_size
+
+myswift copy --destination /${d_ctr}/${d_obj} \
+      $o_ctr $o_obj
+
+myswift delete $o_ctr $o_obj
+
+############################################################
+# huge slo object copy test
+
+o_ctr=swift-orig-slo-ctr-1
+o_obj=slo-orig-slo-obj-1
+d_ctr=swift-copy-slo-ctr-1
+d_obj=slo-copy-slo-obj-1
+myswift upload $o_ctr $huge_obj --object-name $o_obj \
+      -S $segment_size --use-slo
+
+myswift copy --destination /${d_ctr}/${d_obj} $o_ctr $o_obj
+
+############################################################
+# huge slo object copy test and orig delete
+
+o_ctr=swift-orig-slo-ctr-2
+o_obj=slo-orig-slo-obj-2
+d_ctr=swift-copy-slo-ctr-2
+d_obj=slo-copy-slo-obj-2
+myswift upload $o_ctr $huge_obj --object-name $o_obj \
+      -S $segment_size --use-slo
+
+myswift copy --destination /${d_ctr}/${d_obj} $o_ctr $o_obj
+
+myswift delete $o_ctr $o_obj
+
+########################################################################
+# FORCE GARBAGE COLLECTION
+
+sleep 6 # since for testing age at which gc can happen is 5 secs
+radosgw-admin gc process --include-all
+
+
+########################################
+# DO ORPHAN LIST
+
+pool="default.rgw.buckets.data"
+
+rgw-orphan-list $pool
+
+# we only expect there to be one output file, but loop just in case
+ol_error=""
+for f in orphan-list-*.out ; do
+    if [ -s "$f"  ] ;then # if file non-empty
+	ol_error="${ol_error}:$f"
+	echo "One ore more orphans found in $f:"
+	cat "$f"
+    fi
+done
+
+if [ -n "$ol_error" ] ;then
+    echo "ERROR: orphans found when none expected"
+    exit 1
+fi
+
+########################################################################
+# CLEAN UP
+
+rm -f $empty_obj $big_obj $huge_obj $s3config
+
+success
diff --git a/qa/workunits/rgw/test_rgw_reshard.py b/qa/workunits/rgw/test_rgw_reshard.py
new file mode 100755
index 000000000..6326e7b17
--- /dev/null
+++ b/qa/workunits/rgw/test_rgw_reshard.py
@@ -0,0 +1,311 @@
+#!/usr/bin/python3
+
+import errno
+import time
+import logging as log
+import json
+import os
+from common import exec_cmd, boto_connect, create_user, put_objects, create_unlinked_objects
+
+"""
+Rgw manual and dynamic resharding  testing against a running instance
+"""
+# The test cases in this file have been annotated for inventory.
+# To extract the inventory (in csv format) use the command:
+#
+#   grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //'
+#
+#
+
+""" Constants """
+USER = 'tester'
+DISPLAY_NAME = 'Testing'
+ACCESS_KEY = 'NX5QOQKC6BH2IDN8HC7A'
+SECRET_KEY = 'LnEsqNNqZIpkzauboDcLXLcYaWwLQ3Kop0zAnKIn'
+BUCKET_NAME = 'a-bucket'
+VER_BUCKET_NAME = 'myver'
+INDEX_POOL = 'default.rgw.buckets.index'
+
+class BucketStats:
+    def __init__(self, bucket_name, bucket_id, num_objs=0, size_kb=0, num_shards=0):
+        self.bucket_name = bucket_name
+        self.bucket_id = bucket_id
+        self.num_objs = num_objs
+        self.size_kb = size_kb
+        self.num_shards = num_shards if num_shards > 0 else 1
+
+    def get_num_shards(self):
+        self.num_shards = get_bucket_num_shards(self.bucket_name, self.bucket_id)
+
+
+def get_bucket_stats(bucket_name):
+    """
+    function to get bucket stats
+    """
+    cmd = exec_cmd("radosgw-admin bucket stats --bucket {}".format(bucket_name))
+    json_op = json.loads(cmd)
+    #print(json.dumps(json_op, indent = 4, sort_keys=True))
+    bucket_id = json_op['id']
+    num_shards = json_op['num_shards']
+    if len(json_op['usage']) > 0:
+        num_objects = json_op['usage']['rgw.main']['num_objects']
+        size_kb = json_op['usage']['rgw.main']['size_kb']
+    else:
+        num_objects = 0
+        size_kb = 0
+    log.debug(" \nBUCKET_STATS: \nbucket: {} id: {} num_objects: {} size_kb: {} num_shards: {}\n".format(bucket_name, bucket_id,
+              num_objects, size_kb, num_shards))
+    return BucketStats(bucket_name, bucket_id, num_objects, size_kb, num_shards)
+
+def get_bucket_layout(bucket_name):
+    res = exec_cmd("radosgw-admin bucket layout --bucket {}".format(bucket_name))
+    return json.loads(res)
+
+def get_bucket_shard0(bucket_name):
+    bucket_id = get_bucket_stats(bucket_name).bucket_id
+    index_gen = get_bucket_layout(bucket_name)['layout']['current_index']['gen']
+    return '.dir.%s.%d.0' % (bucket_id, index_gen)
+
+def get_bucket_num_shards(bucket_name, bucket_id):
+    """
+    function to get bucket num shards
+    """
+    metadata = 'bucket.instance:' + bucket_name + ':' + bucket_id
+    cmd = exec_cmd('radosgw-admin metadata get {}'.format(metadata))
+    json_op = json.loads(cmd)
+    num_shards = json_op['data']['bucket_info']['num_shards']
+    return num_shards
+
+def run_bucket_reshard_cmd(bucket_name, num_shards, **kwargs):
+    cmd = 'radosgw-admin bucket reshard --bucket {} --num-shards {}'.format(bucket_name, num_shards)
+    cmd += ' --rgw-reshard-bucket-lock-duration 30' # reduce to minimum
+    if 'error_at' in kwargs:
+        cmd += ' --inject-error-at {}'.format(kwargs.pop('error_at'))
+    elif 'abort_at' in kwargs:
+        cmd += ' --inject-abort-at {}'.format(kwargs.pop('abort_at'))
+    if 'error_code' in kwargs:
+        cmd += ' --inject-error-code {}'.format(kwargs.pop('error_code'))
+    return exec_cmd(cmd, **kwargs)
+
+def test_bucket_reshard(conn, name, **fault):
+    # create a bucket with non-default ACLs to verify that reshard preserves them
+    bucket = conn.create_bucket(Bucket=name, ACL='authenticated-read')
+    grants = bucket.Acl().grants
+
+    objs = []
+    try:
+        # create objs
+        for i in range(0, 20):
+            objs += [bucket.put_object(Key='key' + str(i), Body=b"some_data")]
+
+        old_shard_count = get_bucket_stats(name).num_shards
+        num_shards_expected = old_shard_count + 1
+
+        # try reshard with fault injection
+        _, ret = run_bucket_reshard_cmd(name, num_shards_expected, check_retcode=False, **fault)
+
+        if fault.get('error_code') == errno.ECANCELED:
+            assert(ret == 0) # expect ECANCELED to retry and succeed
+        else:
+            assert(ret != 0 and ret != errno.EBUSY)
+
+            # check shard count
+            cur_shard_count = get_bucket_stats(name).num_shards
+            assert(cur_shard_count == old_shard_count)
+
+            # verify that the bucket is writeable by deleting an object
+            objs.pop().delete()
+
+            assert grants == bucket.Acl().grants # recheck grants after cancel
+
+            # retry reshard without fault injection. if radosgw-admin aborted,
+            # we'll have to retry until the reshard lock expires
+            while True:
+                _, ret = run_bucket_reshard_cmd(name, num_shards_expected, check_retcode=False)
+                if ret == errno.EBUSY:
+                    log.info('waiting 30 seconds for reshard lock to expire...')
+                    time.sleep(30)
+                    continue
+                assert(ret == 0)
+                break
+
+        # recheck shard count
+        final_shard_count = get_bucket_stats(name).num_shards
+        assert(final_shard_count == num_shards_expected)
+
+        assert grants == bucket.Acl().grants # recheck grants after commit
+    finally:
+        # cleanup on resharded bucket must succeed
+        bucket.delete_objects(Delete={'Objects':[{'Key':o.key} for o in objs]})
+        bucket.delete()
+
+
+def main():
+    """
+    execute manual and dynamic resharding commands
+    """
+    create_user(USER, DISPLAY_NAME, ACCESS_KEY, SECRET_KEY)
+
+    connection = boto_connect(ACCESS_KEY, SECRET_KEY)
+
+    # create a bucket
+    bucket = connection.create_bucket(Bucket=BUCKET_NAME)
+    ver_bucket = connection.create_bucket(Bucket=VER_BUCKET_NAME)
+    connection.BucketVersioning(VER_BUCKET_NAME).enable()
+
+    bucket_acl = connection.BucketAcl(BUCKET_NAME).load()
+    ver_bucket_acl = connection.BucketAcl(VER_BUCKET_NAME).load()
+
+    # TESTCASE 'reshard-add','reshard','add','add bucket to resharding queue','succeeds'
+    log.debug('TEST: reshard add\n')
+
+    num_shards_expected = get_bucket_stats(BUCKET_NAME).num_shards + 1
+    cmd = exec_cmd('radosgw-admin reshard add --bucket {} --num-shards {}'.format(BUCKET_NAME, num_shards_expected))
+    cmd = exec_cmd('radosgw-admin reshard list')
+    json_op = json.loads(cmd)
+    log.debug('bucket name {}'.format(json_op[0]['bucket_name']))
+    assert json_op[0]['bucket_name'] == BUCKET_NAME
+    assert json_op[0]['tentative_new_num_shards'] == num_shards_expected
+
+    # TESTCASE 'reshard-process','reshard','','process bucket resharding','succeeds'
+    log.debug('TEST: reshard process\n')
+    cmd = exec_cmd('radosgw-admin reshard process')
+    time.sleep(5)
+    # check bucket shards num
+    bucket_stats1 = get_bucket_stats(BUCKET_NAME)
+    if bucket_stats1.num_shards != num_shards_expected:
+        log.error("Resharding failed on bucket {}. Expected number of shards are not created\n".format(BUCKET_NAME))
+
+    # TESTCASE 'reshard-add','reshard','add','add non empty bucket to resharding queue','succeeds'
+    log.debug('TEST: reshard add non empty bucket\n')
+    # create objs
+    num_objs = 8
+    for i in range(0, num_objs):
+        connection.Object(BUCKET_NAME, ('key'+str(i))).put(Body=b"some_data")
+
+    num_shards_expected = get_bucket_stats(BUCKET_NAME).num_shards + 1
+    cmd = exec_cmd('radosgw-admin reshard add --bucket {} --num-shards {}'.format(BUCKET_NAME, num_shards_expected))
+    cmd = exec_cmd('radosgw-admin reshard list')
+    json_op = json.loads(cmd)
+    assert json_op[0]['bucket_name'] == BUCKET_NAME
+    assert json_op[0]['tentative_new_num_shards'] == num_shards_expected
+
+    # TESTCASE 'reshard process ,'reshard','process','reshard non empty bucket','succeeds'
+    log.debug('TEST: reshard process non empty bucket\n')
+    cmd = exec_cmd('radosgw-admin reshard process')
+    # check bucket shards num
+    bucket_stats1 = get_bucket_stats(BUCKET_NAME)
+    if bucket_stats1.num_shards != num_shards_expected:
+        log.error("Resharding failed on bucket {}. Expected number of shards are not created\n".format(BUCKET_NAME))
+
+    # TESTCASE 'manual bucket resharding','inject error','fail','check bucket accessibility', 'retry reshard'
+    log.debug('TEST: reshard bucket with EIO injected at set_target_layout\n')
+    test_bucket_reshard(connection, 'error-at-set-target-layout', error_at='set_target_layout')
+    log.debug('TEST: reshard bucket with ECANCELED injected at set_target_layout\n')
+    test_bucket_reshard(connection, 'error-at-set-target-layout', error_at='set_target_layout', error_code=errno.ECANCELED)
+    log.debug('TEST: reshard bucket with abort at set_target_layout\n')
+    test_bucket_reshard(connection, 'abort-at-set-target-layout', abort_at='set_target_layout')
+
+    log.debug('TEST: reshard bucket with EIO injected at block_writes\n')
+    test_bucket_reshard(connection, 'error-at-block-writes', error_at='block_writes')
+    log.debug('TEST: reshard bucket with abort at block_writes\n')
+    test_bucket_reshard(connection, 'abort-at-block-writes', abort_at='block_writes')
+
+    log.debug('TEST: reshard bucket with EIO injected at commit_target_layout\n')
+    test_bucket_reshard(connection, 'error-at-commit-target-layout', error_at='commit_target_layout')
+    log.debug('TEST: reshard bucket with ECANCELED injected at commit_target_layout\n')
+    test_bucket_reshard(connection, 'error-at-commit-target-layout', error_at='commit_target_layout', error_code=errno.ECANCELED)
+    log.debug('TEST: reshard bucket with abort at commit_target_layout\n')
+    test_bucket_reshard(connection, 'abort-at-commit-target-layout', abort_at='commit_target_layout')
+
+    log.debug('TEST: reshard bucket with EIO injected at do_reshard\n')
+    test_bucket_reshard(connection, 'error-at-do-reshard', error_at='do_reshard')
+    log.debug('TEST: reshard bucket with abort at do_reshard\n')
+    test_bucket_reshard(connection, 'abort-at-do-reshard', abort_at='do_reshard')
+
+    # TESTCASE 'versioning reshard-','bucket', reshard','versioning reshard','succeeds'
+    log.debug(' test: reshard versioned bucket')
+    num_shards_expected = get_bucket_stats(VER_BUCKET_NAME).num_shards + 1
+    cmd = exec_cmd('radosgw-admin bucket reshard --bucket {} --num-shards {}'.format(VER_BUCKET_NAME,
+                                                                                 num_shards_expected))
+    # check bucket shards num
+    ver_bucket_stats = get_bucket_stats(VER_BUCKET_NAME)
+    assert ver_bucket_stats.num_shards == num_shards_expected
+
+    # TESTCASE 'check acl'
+    new_bucket_acl = connection.BucketAcl(BUCKET_NAME).load()
+    assert new_bucket_acl == bucket_acl
+    new_ver_bucket_acl = connection.BucketAcl(VER_BUCKET_NAME).load()
+    assert new_ver_bucket_acl == ver_bucket_acl
+
+    # TESTCASE 'check reshard removes olh entries with empty name'
+    log.debug(' test: reshard removes olh entries with empty name')
+    bucket.objects.all().delete()
+
+
+    # get name of shard 0 object, add a bogus olh entry with empty name
+    bucket_shard0 = get_bucket_shard0(BUCKET_NAME)
+    if 'CEPH_ROOT' in os.environ:
+      k = '%s/qa/workunits/rgw/olh_noname_key' % os.environ['CEPH_ROOT']
+      v = '%s/qa/workunits/rgw/olh_noname_val' % os.environ['CEPH_ROOT']
+    else:
+      k = 'olh_noname_key'
+      v = 'olh_noname_val'
+    exec_cmd('rados -p %s setomapval %s --omap-key-file %s < %s' % (INDEX_POOL, bucket_shard0, k, v))
+
+    # check that bi list has one entry with empty name
+    cmd = exec_cmd('radosgw-admin bi list --bucket %s' % BUCKET_NAME)
+    json_op = json.loads(cmd.decode('utf-8', 'ignore')) # ignore utf-8 can't decode 0x80
+    assert len(json_op) == 1
+    assert json_op[0]['entry']['key']['name'] == ''
+
+    # reshard to prune the bogus olh
+    cmd = exec_cmd('radosgw-admin bucket reshard --bucket %s --num-shards %s --yes-i-really-mean-it' % (BUCKET_NAME, 1))
+
+    # get that bi list has zero entries
+    cmd = exec_cmd('radosgw-admin bi list --bucket %s' % BUCKET_NAME)
+    json_op = json.loads(cmd.decode('utf-8', 'ignore')) # ignore utf-8 can't decode 0x80
+    assert len(json_op) == 0
+
+    # TESTCASE 'check that PUT succeeds during reshard'
+    log.debug(' test: PUT succeeds during reshard')
+    num_shards = get_bucket_stats(VER_BUCKET_NAME).num_shards
+    exec_cmd('''radosgw-admin --inject-delay-at=do_reshard --inject-delay-ms=5000 \
+                bucket reshard --bucket {} --num-shards {}'''
+                .format(VER_BUCKET_NAME, num_shards + 1), wait = False)
+    time.sleep(1)
+    ver_bucket.put_object(Key='put_during_reshard', Body=b"some_data")
+    log.debug('put object successful')
+
+    # TESTCASE 'check that bucket stats are correct after reshard with unlinked entries'
+    log.debug('TEST: check that bucket stats are correct after reshard with unlinked entries\n')
+    ver_bucket.object_versions.all().delete()
+    ok_keys = ['a', 'b', 'c']
+    unlinked_keys = ['x', 'y', 'z']
+    put_objects(ver_bucket, ok_keys)
+    create_unlinked_objects(connection, ver_bucket, unlinked_keys)
+    cmd = exec_cmd(f'radosgw-admin bucket reshard --bucket {VER_BUCKET_NAME} --num-shards 17 --yes-i-really-mean-it')
+    out = exec_cmd(f'radosgw-admin bucket check unlinked --bucket {VER_BUCKET_NAME} --fix --min-age-hours 0 --rgw-olh-pending-timeout-sec 0 --dump-keys')
+    json_out = json.loads(out)
+    assert len(json_out) == len(unlinked_keys)
+    ver_bucket.object_versions.all().delete()
+    out = exec_cmd(f'radosgw-admin bucket stats --bucket {VER_BUCKET_NAME}')
+    json_out = json.loads(out)
+    log.debug(json_out['usage'])
+    assert json_out['usage']['rgw.main']['size'] == 0
+    assert json_out['usage']['rgw.main']['num_objects'] == 0
+    assert json_out['usage']['rgw.main']['size_actual'] == 0
+    assert json_out['usage']['rgw.main']['size_kb'] == 0
+    assert json_out['usage']['rgw.main']['size_kb_actual'] == 0
+    assert json_out['usage']['rgw.main']['size_kb_utilized'] == 0
+
+    # Clean up
+    log.debug("Deleting bucket {}".format(BUCKET_NAME))
+    bucket.objects.all().delete()
+    bucket.delete()
+    log.debug("Deleting bucket {}".format(VER_BUCKET_NAME))
+    ver_bucket.object_versions.all().delete()
+    ver_bucket.delete()
+
+main()
+log.info("Completed resharding tests")
diff --git a/qa/workunits/rgw/test_rgw_s3_mp_reupload.py b/qa/workunits/rgw/test_rgw_s3_mp_reupload.py
new file mode 100755
index 000000000..b3cb2d5ab
--- /dev/null
+++ b/qa/workunits/rgw/test_rgw_s3_mp_reupload.py
@@ -0,0 +1,121 @@
+import boto3
+import botocore.exceptions
+import sys
+import os
+import subprocess
+
+#boto3.set_stream_logger(name='botocore')
+
+# handles two optional system arguments:
+#   <bucket-name> : default is "bkt134"
+#   <0 or 1>      : 0 -> upload aborted, 1 -> completed; default is completed
+
+if len(sys.argv) >= 2:
+    bucket_name = sys.argv[1]
+else:
+    bucket_name = "bkt314738362229"
+print("bucket nams is %s" % bucket_name)
+
+complete_mpu = True
+if len(sys.argv) >= 3:
+    complete_mpu = int(sys.argv[2]) > 0
+
+versioned_bucket = False
+if len(sys.argv) >= 4:
+    versioned_bucket = int(sys.argv[3]) > 0
+
+rgw_host = os.environ['RGW_HOST']
+access_key = os.environ['RGW_ACCESS_KEY']
+secret_key = os.environ['RGW_SECRET_KEY']
+
+try:
+    endpoint='http://%s:%d' % (rgw_host, 80)
+    client = boto3.client('s3',
+                          endpoint_url=endpoint,
+                          aws_access_key_id=access_key,
+                          aws_secret_access_key=secret_key)
+    res = client.create_bucket(Bucket=bucket_name)
+except botocore.exceptions.EndpointConnectionError:
+    try:
+        endpoint='https://%s:%d' % (rgw_host, 443)
+        client = boto3.client('s3',
+                              endpoint_url=endpoint,
+                              verify=False,
+                              aws_access_key_id=access_key,
+                              aws_secret_access_key=secret_key)
+        res = client.create_bucket(Bucket=bucket_name)
+    except botocore.exceptions.EndpointConnectionError:
+        endpoint='http://%s:%d' % (rgw_host, 8000)
+        client = boto3.client('s3',
+                              endpoint_url=endpoint,
+                              aws_access_key_id=access_key,
+                              aws_secret_access_key=secret_key)
+        res = client.create_bucket(Bucket=bucket_name)
+
+print("endpoint is %s" % endpoint)
+
+if versioned_bucket:
+    res = client.put_bucket_versioning(
+        Bucket=bucket_name,
+        VersioningConfiguration={
+        'MFADelete': 'Disabled',
+        'Status': 'Enabled'}
+        )
+
+key = "mpu_test4"
+nparts = 2
+ndups = 11
+do_reupload = True
+
+part_path = "/tmp/mp_part_5m"
+subprocess.run(["dd", "if=/dev/urandom", "of=" + part_path, "bs=1M", "count=5"], check=True)
+
+f = open(part_path, 'rb')
+
+res = client.create_multipart_upload(Bucket=bucket_name, Key=key)
+mpu_id = res["UploadId"]
+
+print("start UploadId=%s" % (mpu_id))
+
+parts = []
+parts2 = []
+
+for ix in range(0,nparts):
+    part_num = ix + 1
+    f.seek(0)
+    res = client.upload_part(Body=f, Bucket=bucket_name, Key=key,
+                             UploadId=mpu_id, PartNumber=part_num)
+    # save
+    etag = res['ETag']
+    part = {'ETag': etag, 'PartNumber': part_num}
+    print("phase 1 uploaded part %s" % part)
+    parts.append(part)
+
+if do_reupload:
+    # just re-upload part 1
+    part_num = 1
+    for ix in range(0,ndups):
+        f.seek(0)
+        res = client.upload_part(Body=f, Bucket=bucket_name, Key=key,
+                                UploadId=mpu_id, PartNumber=part_num)
+        etag = res['ETag']
+        part = {'ETag': etag, 'PartNumber': part_num}
+        print ("phase 2 uploaded part %s" % part)
+
+        # save
+        etag = res['ETag']
+        part = {'ETag': etag, 'PartNumber': part_num}
+        parts2.append(part)
+
+if complete_mpu:
+    print("completing multipart upload, parts=%s" % parts)
+    res = client.complete_multipart_upload(
+        Bucket=bucket_name, Key=key, UploadId=mpu_id,
+        MultipartUpload={'Parts': parts})
+else:
+    print("aborting multipart upload, parts=%s" % parts)
+    res = client.abort_multipart_upload(
+        Bucket=bucket_name, Key=key, UploadId=mpu_id)
+
+# clean up
+subprocess.run(["rm", "-f", part_path], check=True)
diff --git a/qa/workunits/rgw/test_rgw_s3_mp_reupload.sh b/qa/workunits/rgw/test_rgw_s3_mp_reupload.sh
new file mode 100755
index 000000000..5d73fd048
--- /dev/null
+++ b/qa/workunits/rgw/test_rgw_s3_mp_reupload.sh
@@ -0,0 +1,110 @@
+#!/usr/bin/env bash
+
+# INITIALIZATION
+
+mydir=$(dirname $0)
+data_pool=default.rgw.buckets.data
+orphan_list_out=/tmp/orphan_list.$$
+radoslist_out=/tmp/radoslist.$$
+rados_ls_out=/tmp/rados_ls.$$
+diff_out=/tmp/diff.$$
+
+rgw_host="$(hostname --fqdn)"
+echo "INFO: fully qualified domain name: $rgw_host"
+
+export RGW_ACCESS_KEY="0555b35654ad1656d804"
+export RGW_SECRET_KEY="h7GhxuBLTrlhVUyxSPUKUV8r/2EI4ngqJxD7iBdBYLhwluN30JaT3Q=="
+export RGW_HOST="${RGW_HOST:-$rgw_host}"
+
+# random argument determines if multipart is aborted or completed 50/50
+outcome=$((RANDOM % 2))
+if [ $outcome -eq 0 ] ;then
+    echo "== TESTING *ABORTING* MULTIPART UPLOAD WITH RE-UPLOADS =="
+else
+    echo "== TESTING *COMPLETING* MULTIPART UPLOAD WITH RE-UPLOADS =="
+fi
+
+# random argument determines if multipart is aborted or completed 50/50
+versioning=$((RANDOM % 2))
+if [ $versioning -eq 0 ] ;then
+    echo "== TESTING NON-VERSIONED BUCKET =="
+else
+    echo "== TESTING VERSIONED BUCKET =="
+fi
+
+# create a randomized bucket name
+bucket="reupload-bkt-$((RANDOM % 899999 + 100000))"
+
+
+# SET UP PYTHON VIRTUAL ENVIRONMENT
+
+# install boto3
+python3 -m venv $mydir
+source $mydir/bin/activate
+pip install pip --upgrade
+pip install boto3
+
+
+# CREATE RGW USER IF NECESSARY
+
+if radosgw-admin user info --access-key $RGW_ACCESS_KEY 2>/dev/null ;then
+    echo INFO: user already exists
+else
+    echo INFO: creating user
+    radosgw-admin user create --uid testid \
+		  --access-key $RGW_ACCESS_KEY \
+		  --secret $RGW_SECRET_KEY \
+		  --display-name 'M. Tester' \
+		  --email tester@ceph.com 2>/dev/null
+fi
+
+
+# RUN REUPLOAD TEST
+
+$mydir/bin/python3 ${mydir}/test_rgw_s3_mp_reupload.py $bucket $outcome $versioning
+
+
+# ANALYZE FOR ERRORS
+# (NOTE: for now we're choosing not to use the rgw-orphan-list tool)
+
+# force garbage collection to remove extra parts
+radosgw-admin gc process --include-all 2>/dev/null
+
+marker=$(radosgw-admin metadata get bucket:$bucket 2>/dev/null | grep bucket_id | sed 's/.*: "\(.*\)".*/\1/')
+
+# determine expected rados objects
+radosgw-admin bucket radoslist --bucket=$bucket 2>/dev/null | sort >$radoslist_out
+echo "radosgw-admin bucket radoslist:"
+cat $radoslist_out
+
+# determine found rados objects
+rados ls -p $data_pool 2>/dev/null | grep "^$marker" | sort >$rados_ls_out
+echo "rados ls:"
+cat $rados_ls_out
+
+# compare expected and found
+diff $radoslist_out $rados_ls_out >$diff_out
+if [ $(cat $diff_out | wc -l) -ne 0 ] ;then
+    error=1
+    echo "ERROR: Found differences between expected and actual rados objects for test bucket."
+    echo "    note: indicators: '>' found but not expected; '<' expected but not found."
+    cat $diff_out
+fi
+
+
+# CLEAN UP
+
+deactivate
+
+rm -f $orphan_list_out $radoslist_out $rados_ls_out $diff_out
+
+
+# PRODUCE FINAL RESULTS
+
+if [ -n "$error" ] ;then
+    echo "== FAILED =="
+    exit 1
+fi
+
+echo "== PASSED =="
+exit 0
diff --git a/qa/workunits/rgw/test_rgw_throttle.sh b/qa/workunits/rgw/test_rgw_throttle.sh
new file mode 100755
index 000000000..f637b8f08
--- /dev/null
+++ b/qa/workunits/rgw/test_rgw_throttle.sh
@@ -0,0 +1,5 @@
+#!/bin/sh -e
+
+ceph_test_rgw_throttle
+
+exit 0
diff --git a/qa/workunits/rgw/test_rgw_versioning.py b/qa/workunits/rgw/test_rgw_versioning.py
new file mode 100755
index 000000000..fc69e138d
--- /dev/null
+++ b/qa/workunits/rgw/test_rgw_versioning.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python3
+
+import logging as log
+import json
+import uuid
+import botocore
+import time
+from common import exec_cmd, create_user, boto_connect
+from botocore.config import Config
+
+"""
+Tests behavior of bucket versioning.
+"""
+# The test cases in this file have been annotated for inventory.
+# To extract the inventory (in csv format) use the command:
+#
+#   grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //'
+#
+#
+
+""" Constants """
+USER = 'versioning-tester'
+DISPLAY_NAME = 'Versioning Testing'
+ACCESS_KEY = 'LTA662PVVDTDWX6M2AB0'
+SECRET_KEY = 'pvtchqajgzqx5581t6qbddbkj0bgf3a69qdkjcea'
+BUCKET_NAME = 'versioning-bucket'
+DATA_POOL = 'default.rgw.buckets.data'
+
+def main():
+    """
+    execute versioning tests
+    """
+    create_user(USER, DISPLAY_NAME, ACCESS_KEY, SECRET_KEY)
+
+    connection = boto_connect(ACCESS_KEY, SECRET_KEY, Config(retries = {
+        'total_max_attempts': 1,
+    }))
+
+    # pre-test cleanup
+    try:
+        bucket = connection.Bucket(BUCKET_NAME)
+        bucket.objects.all().delete()
+        bucket.object_versions.all().delete()
+        bucket.delete()
+    except botocore.exceptions.ClientError as e:
+        if not e.response['Error']['Code'] == 'NoSuchBucket':
+            raise
+
+    bucket = connection.create_bucket(Bucket=BUCKET_NAME)
+    connection.BucketVersioning(BUCKET_NAME).enable()
+
+    # reproducer for bug from https://tracker.ceph.com/issues/59663
+    # TESTCASE 'verify that index entries and OLH objects are cleaned up after redundant deletes'
+    log.debug('TEST: verify that index entries and OLH objects are cleaned up after redundant deletes\n')
+    key = str(uuid.uuid4())
+    resp = bucket.Object(key).delete()
+    assert 'DeleteMarker' in resp, 'DeleteMarker key not present in response'
+    assert resp['DeleteMarker'], 'DeleteMarker value not True in response'
+    assert 'VersionId' in resp, 'VersionId key not present in response'
+    version_id = resp['VersionId']
+    bucket.Object(key).delete()
+    connection.ObjectVersion(bucket.name, key, version_id).delete()
+    # bucket index should now be empty
+    out = exec_cmd(f'radosgw-admin bi list --bucket {BUCKET_NAME}')
+    json_out = json.loads(out.replace(b'\x80', b'0x80'))
+    assert len(json_out) == 0, 'bucket index was not empty after all objects were deleted'
+
+    (_out, ret) = exec_cmd(f'rados -p {DATA_POOL} ls | grep {key}', check_retcode=False)
+    assert ret != 0, 'olh object was not cleaned up'
+
+    # TESTCASE 'verify that index entries and OLH objects are cleaned up after index linking error'
+    log.debug('TEST: verify that index entries and OLH objects are cleaned up after index linking error\n')
+    key = str(uuid.uuid4())
+    try:
+        exec_cmd('ceph config set client rgw_debug_inject_set_olh_err 2')
+        time.sleep(1)
+        bucket.Object(key).delete()
+    finally:
+        exec_cmd('ceph config rm client rgw_debug_inject_set_olh_err')
+    out = exec_cmd(f'radosgw-admin bi list --bucket {BUCKET_NAME}')
+    json_out = json.loads(out.replace(b'\x80', b'0x80'))
+    assert len(json_out) == 0, 'bucket index was not empty after op failed'
+    (_out, ret) = exec_cmd(f'rados -p {DATA_POOL} ls | grep {key}', check_retcode=False)
+    assert ret != 0, 'olh object was not cleaned up'
+
+    # TESTCASE 'verify that original null object version is intact after failed olh upgrade'
+    log.debug('TEST: verify that original null object version is intact after failed olh upgrade\n')
+    connection.BucketVersioning(BUCKET_NAME).suspend()
+    key = str(uuid.uuid4())
+    put_resp = bucket.put_object(Key=key, Body=b"data")
+    connection.BucketVersioning(BUCKET_NAME).enable()
+    try:
+        exec_cmd('ceph config set client rgw_debug_inject_set_olh_err 2')
+        time.sleep(1)
+        # expected to fail due to the above error injection
+        bucket.put_object(Key=key, Body=b"new data")
+    except Exception as e:
+        log.debug(e)
+    finally:
+        exec_cmd('ceph config rm client rgw_debug_inject_set_olh_err')
+    get_resp = bucket.Object(key).get()
+    assert put_resp.e_tag == get_resp['ETag'], 'get did not return null version with correct etag'
+        
+    # Clean up
+    log.debug("Deleting bucket {}".format(BUCKET_NAME))
+    bucket.object_versions.all().delete()
+    bucket.delete()
+
+main()
+log.info("Completed bucket versioning tests")
diff --git a/qa/workunits/suites/blogbench.sh b/qa/workunits/suites/blogbench.sh
new file mode 100755
index 000000000..a05d8d21c
--- /dev/null
+++ b/qa/workunits/suites/blogbench.sh
@@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+set -e
+
+echo "getting blogbench"
+wget http://download.ceph.com/qa/blogbench-1.0.tar.bz2
+#cp /home/gregf/src/blogbench-1.0.tar.bz2 .
+tar -xvf blogbench-1.0.tar.bz2
+cd blogbench-1.0/
+echo "making blogbench"
+./configure
+make
+cd src
+mkdir blogtest_in
+echo "running blogbench"
+./blogbench -d blogtest_in
diff --git a/qa/workunits/suites/bonnie.sh b/qa/workunits/suites/bonnie.sh
new file mode 100755
index 000000000..b60cc6a5e
--- /dev/null
+++ b/qa/workunits/suites/bonnie.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+set -e
+
+bonnie_bin=`which bonnie++`
+[ $? -eq 1 ] && bonnie_bin=/usr/sbin/bonnie++
+
+uid_flags=""
+[ "`id -u`" == "0" ] && uid_flags="-u root"
+
+$bonnie_bin $uid_flags -n 100
diff --git a/qa/workunits/suites/cephfs_journal_tool_smoke.sh b/qa/workunits/suites/cephfs_journal_tool_smoke.sh
new file mode 100755
index 000000000..3fe01ed63
--- /dev/null
+++ b/qa/workunits/suites/cephfs_journal_tool_smoke.sh
@@ -0,0 +1,92 @@
+#!/usr/bin/env bash
+
+set -e
+set -x
+
+export BIN="${BIN:-cephfs-journal-tool --rank=cephfs:0}"
+export JOURNAL_FILE=/tmp/journal.bin
+export JSON_OUTPUT=/tmp/json.tmp
+export BINARY_OUTPUT=/tmp/binary.tmp
+
+if [ -d $BINARY_OUTPUT ] ; then
+    rm -rf $BINARY_OUTPUT
+fi
+
+# Check that the import/export stuff really works as expected
+# first because it's used as the reset method between
+# following checks.
+echo "Testing that export/import cycle preserves state"
+HEADER_STATE=`$BIN header get`
+EVENT_LIST=`$BIN event get list`
+$BIN journal export $JOURNAL_FILE
+$BIN journal import $JOURNAL_FILE
+NEW_HEADER_STATE=`$BIN header get`
+NEW_EVENT_LIST=`$BIN event get list`
+
+if [ ! "$HEADER_STATE" = "$NEW_HEADER_STATE" ] ; then
+    echo "Import failed to preserve header state"
+    echo $HEADER_STATE
+    echo $NEW_HEADER_STATE
+    exit -1
+fi
+
+if [ ! "$EVENT_LIST" = "$NEW_EVENT_LIST" ] ; then
+    echo "Import failed to preserve event state"
+    echo $EVENT_LIST
+    echo $NEW_EVENT_LIST
+    exit -1
+fi
+
+echo "Testing 'journal' commands..."
+
+# Simplest thing: print the vital statistics of the journal
+$BIN journal inspect
+$BIN header get
+
+# Make a copy of the journal in its original state
+$BIN journal export $JOURNAL_FILE
+if [ ! -s $JOURNAL_FILE ] ; then
+    echo "Export to $JOURNAL_FILE failed"
+    exit -1
+fi
+
+# Can we execute a journal reset?
+$BIN journal reset
+$BIN journal inspect
+$BIN header get
+
+echo "Rolling back journal to original state..."
+$BIN journal import $JOURNAL_FILE
+
+echo "Testing 'header' commands..."
+$BIN header get
+$BIN header set write_pos 123
+$BIN header set expire_pos 123
+$BIN header set trimmed_pos 123
+
+echo "Rolling back journal to original state..."
+$BIN journal import $JOURNAL_FILE
+
+echo "Testing 'event' commands..."
+$BIN event get summary
+$BIN event get --type=UPDATE --path=/ --inode=0 --frag=0x100 summary
+$BIN event get json --path $JSON_OUTPUT
+if [ ! -s $JSON_OUTPUT ] ; then
+    echo "Export to $JSON_OUTPUT failed"
+    exit -1
+fi
+$BIN event get binary --path $BINARY_OUTPUT
+if [ ! -s $BINARY_OUTPUT ] ; then
+    echo "Export to $BINARY_OUTPUT failed"
+    exit -1
+fi
+$BIN event recover_dentries summary
+$BIN event splice summary
+
+# Tests finish.
+# Metadata objects have been modified by the 'event recover_dentries' command.
+# Journal is no long consistent with respect to metadata objects (especially inotable).
+# To ensure mds successfully replays its journal, we need to do journal reset.
+$BIN journal reset
+cephfs-table-tool all reset session
+
diff --git a/qa/workunits/suites/dbench-short.sh b/qa/workunits/suites/dbench-short.sh
new file mode 100755
index 000000000..b0da02275
--- /dev/null
+++ b/qa/workunits/suites/dbench-short.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+
+set -e
+
+dbench 1
diff --git a/qa/workunits/suites/dbench.sh b/qa/workunits/suites/dbench.sh
new file mode 100755
index 000000000..32c893b45
--- /dev/null
+++ b/qa/workunits/suites/dbench.sh
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+
+set -e
+
+dbench 1
+dbench 10
diff --git a/qa/workunits/suites/ffsb.sh b/qa/workunits/suites/ffsb.sh
new file mode 100755
index 000000000..bf95a05c4
--- /dev/null
+++ b/qa/workunits/suites/ffsb.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+
+set -e
+
+mydir=`dirname $0`
+
+# try it again if the clone is slow and the second time
+trap -- 'retry' EXIT
+retry() {
+    rm -rf ffsb
+    # double the timeout value
+    timeout 3600 git clone https://git.ceph.com/ffsb.git --depth 1
+}
+rm -rf ffsb
+timeout 1800 git clone https://git.ceph.com/ffsb.git --depth 1
+trap - EXIT
+
+cd ffsb
+./configure
+make
+cd ..
+mkdir tmp
+cd tmp
+
+for f in $mydir/*.ffsb
+do
+    ../ffsb/ffsb $f
+done
+cd ..
+rm -r tmp ffsb*
+
diff --git a/qa/workunits/suites/fio.sh b/qa/workunits/suites/fio.sh
new file mode 100755
index 000000000..ee69de81c
--- /dev/null
+++ b/qa/workunits/suites/fio.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+
+set -x
+
+gen_fio_file() {
+  iter=$1
+  f=$2
+  cat > randio-$$-${iter}.fio <<EOF
+[randio]
+blocksize_range=32m:128m
+blocksize_unaligned=1
+filesize=10G:20G
+readwrite=randrw
+runtime=300
+size=20G
+filename=${f}
+EOF
+}
+
+sudo apt-get -y install fio
+for i in $(seq 1 20); do
+  fcount=$(ls donetestfile* 2>/dev/null | wc -l)
+  donef="foo"
+  fiof="bar"
+  if test ${fcount} -gt 0; then
+     # choose random file
+     r=$[ ${RANDOM} % ${fcount} ]
+     testfiles=( $(ls donetestfile*) )
+     donef=${testfiles[${r}]}
+     fiof=$(echo ${donef} | sed -e "s|done|fio|")
+     gen_fio_file $i ${fiof}
+  else
+     fiof=fiotestfile.$$.$i
+     donef=donetestfile.$$.$i
+     gen_fio_file $i ${fiof}
+  fi
+
+  sudo rm -f ${donef}
+  sudo fio randio-$$-$i.fio
+  sudo ln ${fiof} ${donef}
+  ls -la
+done
diff --git a/qa/workunits/suites/fsstress.sh b/qa/workunits/suites/fsstress.sh
new file mode 100755
index 000000000..e5da5b439
--- /dev/null
+++ b/qa/workunits/suites/fsstress.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+set -ex
+
+mkdir -p fsstress
+pushd fsstress
+wget -q -O ltp-full.tgz http://download.ceph.com/qa/ltp-full-20091231.tgz
+tar xzf ltp-full.tgz
+pushd ltp-full-20091231/testcases/kernel/fs/fsstress
+make
+BIN=$(readlink -f fsstress)
+popd
+popd
+
+T=$(mktemp -d -p .)
+"$BIN" -d "$T" -l 1 -n 1000 -p 10 -v
+rm -rf -- "$T"
diff --git a/qa/workunits/suites/fsx.sh b/qa/workunits/suites/fsx.sh
new file mode 100755
index 000000000..0d5ba3a58
--- /dev/null
+++ b/qa/workunits/suites/fsx.sh
@@ -0,0 +1,16 @@
+#!/bin/sh -x
+
+set -e
+
+git clone https://git.ceph.com/xfstests-dev.git
+cd xfstests-dev
+git checkout 12973fc04fd10d4af086901e10ffa8e48866b735
+make -j4
+cd ..
+cp xfstests-dev/ltp/fsx .
+
+OPTIONS="-z"  # don't use zero range calls; not supported by cephfs
+
+./fsx $OPTIONS  1MB -N 50000 -p 10000 -l 1048576
+./fsx $OPTIONS  10MB -N 50000 -p 10000 -l 10485760
+./fsx $OPTIONS 100MB -N 50000 -p 10000 -l 104857600
diff --git a/qa/workunits/suites/fsync-tester.sh b/qa/workunits/suites/fsync-tester.sh
new file mode 100755
index 000000000..6e32786ea
--- /dev/null
+++ b/qa/workunits/suites/fsync-tester.sh
@@ -0,0 +1,13 @@
+#!/bin/sh -x
+
+set -e
+
+# To skirt around GPL compatibility issues:
+wget http://download.ceph.com/qa/fsync-tester.c
+gcc -D_GNU_SOURCE fsync-tester.c -o fsync-tester
+
+./fsync-tester
+
+echo $PATH
+whereis lsof
+lsof
diff --git a/qa/workunits/suites/iogen.sh b/qa/workunits/suites/iogen.sh
new file mode 100755
index 000000000..d92b87083
--- /dev/null
+++ b/qa/workunits/suites/iogen.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+set -e
+
+echo "getting iogen"
+wget http://download.ceph.com/qa/iogen_3.1p0.tar
+tar -xvzf iogen_3.1p0.tar
+cd iogen_3.1p0
+echo "making iogen"
+make
+echo "running iogen"
+./iogen -n 5 -s 2g
+echo "sleep for 10 min"
+sleep 600
+echo "stopping iogen"
+./iogen -k
+
+echo "OK"
diff --git a/qa/workunits/suites/iozone-sync.sh b/qa/workunits/suites/iozone-sync.sh
new file mode 100755
index 000000000..a37962d30
--- /dev/null
+++ b/qa/workunits/suites/iozone-sync.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+
+set -e
+
+# basic tests of O_SYNC, O_DSYNC, O_RSYNC
+# test O_SYNC
+iozone -c -e -s 512M -r 1M -t 1 -F osync1 -i 0 -i 1 -o
+# test O_DSYNC
+iozone -c -e -s 512M -r 1M -t 1 -F odsync1 -i 0 -i 1 -+D
+# test O_RSYNC
+iozone -c -e -s 512M -r 1M -t 1 -F orsync1 -i 0 -i 1 -+r
+
+# test same file with O_SYNC in one process, buffered in the other
+# the sync test starts first, so the buffered test should blow
+# past it and 
+iozone -c -e -s 512M -r 1M -t 1 -F osync2 -i 0 -i 1 -o &
+sleep 1
+iozone -c -e -s 512M -r 256K -t 1 -F osync2 -i 0
+wait $!
+
+# test same file with O_SYNC from different threads
+iozone -c -e -s 512M -r 1M -t 2 -F osync3 -i 2 -o
diff --git a/qa/workunits/suites/iozone.sh b/qa/workunits/suites/iozone.sh
new file mode 100755
index 000000000..7dc50cb0b
--- /dev/null
+++ b/qa/workunits/suites/iozone.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+set -ex
+
+iozone -c -e -s 1024M -r 16K -t 1 -F f1 -i 0 -i 1
+iozone -c -e -s 1024M -r 1M -t 1 -F f2 -i 0 -i 1
+iozone -c -e -s 10240M -r 1M -t 1 -F f3 -i 0 -i 1
diff --git a/qa/workunits/suites/pjd.sh b/qa/workunits/suites/pjd.sh
new file mode 100755
index 000000000..bd72f77f2
--- /dev/null
+++ b/qa/workunits/suites/pjd.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+
+set -e
+
+wget http://download.ceph.com/qa/pjd-fstest-20090130-RC-aclfixes.tgz
+tar zxvf pjd*.tgz
+cd pjd-fstest-20090130-RC
+make clean
+make
+cd ..
+mkdir tmp
+cd tmp
+# must be root!
+sudo prove -r -v --exec 'bash -x' ../pjd*/tests
+cd ..
+rm -rf tmp pjd*
+
diff --git a/qa/workunits/suites/random_write.32.ffsb b/qa/workunits/suites/random_write.32.ffsb
new file mode 100644
index 000000000..ba83e470f
--- /dev/null
+++ b/qa/workunits/suites/random_write.32.ffsb
@@ -0,0 +1,48 @@
+# Large file random writes.
+# 1024 files, 100MB per file.
+
+time=300  # 5 min
+alignio=1
+
+[filesystem0]
+        location=.
+        num_files=128
+        min_filesize=104857600  # 100 MB
+        max_filesize=104857600
+        reuse=1
+[end0]
+
+[threadgroup0]
+        num_threads=32
+
+        write_random=1
+        write_weight=1
+
+        write_size=5242880  # 5 MB
+        write_blocksize=4096
+
+        [stats]
+                enable_stats=1
+                enable_range=1
+
+                msec_range    0.00      0.01
+                msec_range    0.01      0.02
+                msec_range    0.02      0.05
+                msec_range    0.05      0.10
+                msec_range    0.10      0.20
+                msec_range    0.20      0.50
+                msec_range    0.50      1.00
+                msec_range    1.00      2.00
+                msec_range    2.00      5.00
+                msec_range    5.00     10.00
+                msec_range   10.00     20.00
+                msec_range   20.00     50.00
+                msec_range   50.00    100.00
+                msec_range  100.00    200.00
+                msec_range  200.00    500.00
+                msec_range  500.00   1000.00
+                msec_range 1000.00   2000.00
+                msec_range 2000.00   5000.00
+                msec_range 5000.00  10000.00
+        [end]
+[end0]
diff --git a/qa/workunits/test_telemetry_pacific.sh b/qa/workunits/test_telemetry_pacific.sh
new file mode 100755
index 000000000..a971f5883
--- /dev/null
+++ b/qa/workunits/test_telemetry_pacific.sh
@@ -0,0 +1,23 @@
+#!/bin/bash -ex
+
+# Set up ident details for cluster
+ceph config set mgr mgr/telemetry/channel_ident true
+ceph config set mgr mgr/telemetry/organization 'ceph-qa'
+ceph config set mgr mgr/telemetry/description 'upgrade test cluster'
+
+# Opt-in
+ceph telemetry on --license sharing-1-0
+
+# Check last_opt_revision
+LAST_OPT_REVISION=$(ceph config get mgr mgr/telemetry/last_opt_revision)
+if [ $LAST_OPT_REVISION -ne 3 ]; then
+    echo "last_opt_revision is incorrect."
+    exit 1
+fi
+
+# Check reports
+ceph telemetry show
+ceph telemetry show-device
+ceph telemetry show-all
+
+echo OK
diff --git a/qa/workunits/test_telemetry_pacific_x.sh b/qa/workunits/test_telemetry_pacific_x.sh
new file mode 100755
index 000000000..0e4a832db
--- /dev/null
+++ b/qa/workunits/test_telemetry_pacific_x.sh
@@ -0,0 +1,59 @@
+#!/bin/bash -ex
+
+# Assert that we're still opted in
+LAST_OPT_REVISION=$(ceph config get mgr mgr/telemetry/last_opt_revision)
+if [ $LAST_OPT_REVISION -ne 3 ]; then
+    echo "last_opt_revision is incorrect"
+    exit 1
+fi
+
+# Check the warning:
+STATUS=$(ceph -s)
+if ! [[ $STATUS == *"Telemetry requires re-opt-in"* ]]
+then
+    echo "STATUS does not contain re-opt-in warning"
+    exit 1
+fi
+
+# Check new collections
+COLLECTIONS=$(ceph telemetry collection ls)
+NEW_COLLECTIONS=("perf_perf" "basic_mds_metadata" "basic_pool_usage" "basic_rook_v01" "perf_memory_metrics")
+for col in ${NEW_COLLECTIONS[@]}; do
+    if ! [[ $COLLECTIONS == *$col* ]];
+    then
+        echo "COLLECTIONS does not contain" "'"$col"'."
+	exit 1
+    fi
+done
+
+# Run preview commands
+ceph telemetry preview
+ceph telemetry preview-device
+ceph telemetry preview-all
+
+# Opt in to new collections
+ceph telemetry on --license sharing-1-0
+ceph telemetry enable channel perf
+
+# Check the warning:
+timeout=60
+STATUS=$(ceph -s)
+until [[ $STATUS != *"Telemetry requires re-opt-in"* ]] || [ $timeout -le 0 ]; do
+    STATUS=$(ceph -s)
+    sleep 1
+    timeout=$(( timeout - 1 ))
+done
+if [ $timeout -le 0 ]; then
+    echo "STATUS should not contain re-opt-in warning at this point"
+    exit 1
+fi
+
+# Run show commands
+ceph telemetry show
+ceph telemetry show-device
+ceph telemetry show
+
+# Opt out
+ceph telemetry off
+
+echo OK
diff --git a/qa/workunits/test_telemetry_quincy.sh b/qa/workunits/test_telemetry_quincy.sh
new file mode 100755
index 000000000..e8b07ec13
--- /dev/null
+++ b/qa/workunits/test_telemetry_quincy.sh
@@ -0,0 +1,44 @@
+#!/bin/bash -ex
+
+# Set up ident details for cluster
+ceph config set mgr mgr/telemetry/channel_ident true
+ceph config set mgr mgr/telemetry/organization 'ceph-qa'
+ceph config set mgr mgr/telemetry/description 'upgrade test cluster'
+
+
+#Run preview commands
+ceph telemetry preview
+ceph telemetry preview-device
+ceph telemetry preview-all
+
+# Assert that new collections are available
+COLLECTIONS=$(ceph telemetry collection ls)
+NEW_COLLECTIONS=("perf_perf" "basic_mds_metadata" "basic_pool_usage" "basic_rook_v01" "perf_memory_metrics")
+for col in ${NEW_COLLECTIONS[@]}; do
+    if ! [[ $COLLECTIONS == *$col* ]];
+    then
+        echo "COLLECTIONS does not contain" "'"$col"'."
+	exit 1
+    fi
+done
+
+# Opt-in
+ceph telemetry on --license sharing-1-0
+
+# Enable perf channel
+ceph telemetry enable channel perf
+
+# For quincy, the last_opt_revision remains at 1 since last_opt_revision
+# was phased out for fresh installs of quincy.
+LAST_OPT_REVISION=$(ceph config get mgr mgr/telemetry/last_opt_revision)
+if [ $LAST_OPT_REVISION -ne 1 ]; then
+    echo "last_opt_revision is incorrect"
+    exit 1
+fi
+
+# Run show commands
+ceph telemetry show
+ceph telemetry show-device
+ceph telemetry show-all
+
+echo OK
diff --git a/qa/workunits/test_telemetry_quincy_x.sh b/qa/workunits/test_telemetry_quincy_x.sh
new file mode 100755
index 000000000..4734132d0
--- /dev/null
+++ b/qa/workunits/test_telemetry_quincy_x.sh
@@ -0,0 +1,40 @@
+#!/bin/bash -ex
+
+# For quincy, the last_opt_revision remains at 1 since last_opt_revision
+# was phased out for fresh installs of quincy.
+LAST_OPT_REVISION=$(ceph config get mgr mgr/telemetry/last_opt_revision)
+if [ $LAST_OPT_REVISION -ne 1 ]; then
+    echo "last_opt_revision is incorrect"
+    exit 1
+fi
+
+# Check the warning:
+ceph -s
+
+COLLECTIONS=$(ceph telemetry collection ls)
+NEW_COLLECTIONS=("perf_perf" "basic_mds_metadata" "basic_pool_usage" "basic_rook_v01" "perf_memory_metrics")
+for col in ${NEW_COLLECTIONS[@]}; do
+    if ! [[ $COLLECTIONS == *$col* ]];
+    then
+        echo "COLLECTIONS does not contain" "'"$col"'."
+	exit 1
+    fi
+done
+
+#Run preview commands
+ceph telemetry preview
+ceph telemetry preview-device
+ceph telemetry preview-all
+
+# Opt in to new collections
+# Currently, no new collections between latest quincy and reef (dev)
+
+# Run show commands
+ceph telemetry show
+ceph telemetry show-device
+ceph telemetry show
+
+# Opt out
+ceph telemetry off
+
+echo OK
diff --git a/qa/workunits/true.sh b/qa/workunits/true.sh
new file mode 100755
index 000000000..296ef781c
--- /dev/null
+++ b/qa/workunits/true.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+true
diff --git a/qa/workunits/windows/libvirt_vm/autounattend.xml b/qa/workunits/windows/libvirt_vm/autounattend.xml
new file mode 100644
index 000000000..c3cdf3171
--- /dev/null
+++ b/qa/workunits/windows/libvirt_vm/autounattend.xml
@@ -0,0 +1,157 @@
+<?xml version="1.0" encoding="utf-8"?>
+<unattend xmlns="urn:schemas-microsoft-com:unattend">
+
+  <settings pass="windowsPE">
+
+    <component name="Microsoft-Windows-International-Core-WinPE" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS" xmlns:wcm="http://schemas.microsoft.com/WMIConfig/2002/State" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+      <SetupUILanguage>
+        <UILanguage>en-US</UILanguage>
+      </SetupUILanguage>
+      <SystemLocale>en-US</SystemLocale>
+      <UILanguage>en-US</UILanguage>
+      <UserLocale>en-US</UserLocale>
+    </component>
+
+    <component name="Microsoft-Windows-Setup" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS" xmlns:wcm="http://schemas.microsoft.com/WMIConfig/2002/State" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+      <DiskConfiguration>
+        <WillShowUI>OnError</WillShowUI>
+        <Disk wcm:action="add">
+          <CreatePartitions>
+            <CreatePartition wcm:action="add">
+              <Order>1</Order>
+              <Size>100</Size>
+              <Type>Primary</Type>
+            </CreatePartition>
+            <CreatePartition wcm:action="add">
+              <Order>2</Order>
+              <Extend>true</Extend>
+              <Type>Primary</Type>
+            </CreatePartition>
+          </CreatePartitions>
+          <ModifyPartitions>
+            <ModifyPartition wcm:action="add">
+              <Active>true</Active>
+              <Label>Boot</Label>
+              <Format>NTFS</Format>
+              <Order>1</Order>
+              <PartitionID>1</PartitionID>
+            </ModifyPartition>
+            <ModifyPartition wcm:action="add">
+              <Format>NTFS</Format>
+              <Order>2</Order>
+              <PartitionID>2</PartitionID>
+              <Label>System</Label>
+            </ModifyPartition>
+          </ModifyPartitions>
+          <DiskID>0</DiskID>
+          <WillWipeDisk>true</WillWipeDisk>
+        </Disk>
+      </DiskConfiguration>
+
+      <ImageInstall>
+        <OSImage>
+          <InstallTo>
+            <PartitionID>2</PartitionID>
+            <DiskID>0</DiskID>
+          </InstallTo>
+          <InstallToAvailablePartition>false</InstallToAvailablePartition>
+          <WillShowUI>OnError</WillShowUI>
+          <InstallFrom>
+            <MetaData wcm:action="add">
+              <Key>/IMAGE/NAME</Key>
+              <Value>Windows Server 2019 SERVERSTANDARDCORE</Value>
+            </MetaData>
+          </InstallFrom>
+        </OSImage>
+      </ImageInstall>
+
+      <UserData>
+        <!-- Product Key from http://technet.microsoft.com/en-us/library/jj612867.aspx -->
+        <ProductKey>
+          <!-- Do not uncomment the Key element if you are using trial ISOs -->
+          <!-- You must uncomment the Key element (and optionally insert your own key) if you are using retail or volume license ISOs -->
+          <!-- <Key></Key> -->
+          <WillShowUI>OnError</WillShowUI>
+        </ProductKey>
+        <AcceptEula>true</AcceptEula>
+      </UserData>
+
+    </component>
+
+    <component name="Microsoft-Windows-PnpCustomizationsWinPE" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS" xmlns:wcm="http://schemas.microsoft.com/WMIConfig/2002/State" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+      <DriverPaths>
+        <PathAndCredentials wcm:action="add" wcm:keyValue="1">
+          <Path>E:\NetKVM\2k19\amd64\</Path>
+        </PathAndCredentials>
+        <PathAndCredentials wcm:action="add" wcm:keyValue="2">
+          <Path>E:\viostor\2k19\amd64\</Path>
+        </PathAndCredentials>
+        <PathAndCredentials wcm:action="add" wcm:keyValue="3">
+          <Path>E:\vioserial\2k19\amd64\</Path>
+        </PathAndCredentials>
+      </DriverPaths>
+    </component>
+
+  </settings>
+
+  <settings pass="oobeSystem">
+      <component name="Microsoft-Windows-Shell-Setup" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS" xmlns:wcm="http://schemas.microsoft.com/WMIConfig/2002/State" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+      <VisualEffects>
+        <FontSmoothing>ClearType</FontSmoothing>
+      </VisualEffects>
+
+      <UserAccounts>
+        <!--
+        Password to be used only during initial provisioning.
+        Must be reset with final Sysprep.
+        -->
+        <AdministratorPassword>
+            <Value>Passw0rd</Value>
+            <PlainText>true</PlainText>
+        </AdministratorPassword>
+      </UserAccounts>
+
+      <AutoLogon>
+          <Password>
+              <Value>Passw0rd</Value>
+              <PlainText>true</PlainText>
+          </Password>
+          <Enabled>true</Enabled>
+          <Username>Administrator</Username>
+      </AutoLogon>
+
+      <ComputerName>*</ComputerName>
+
+      <OOBE>
+        <NetworkLocation>Work</NetworkLocation>
+        <HideEULAPage>true</HideEULAPage>
+        <ProtectYourPC>3</ProtectYourPC>
+        <SkipMachineOOBE>true</SkipMachineOOBE>
+        <SkipUserOOBE>true</SkipUserOOBE>
+      </OOBE>
+
+      <FirstLogonCommands>
+
+        <SynchronousCommand wcm:action="add">
+          <CommandLine>%SystemRoot%\System32\WindowsPowerShell\v1.0\powershell -NoLogo -NonInteractive -ExecutionPolicy RemoteSigned -File A:\first-logon.ps1</CommandLine>
+          <Order>1</Order>
+        </SynchronousCommand>
+
+      </FirstLogonCommands>
+
+    </component>
+
+  </settings>
+
+  <settings pass="specialize">
+
+    <component name="Microsoft-Windows-Shell-Setup" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS" xmlns:wcm="http://schemas.microsoft.com/WMIConfig/2002/State" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+      <TimeZone>UTC</TimeZone>
+      <ComputerName>*</ComputerName>
+    </component>
+
+  </settings>
+
+</unattend>
diff --git a/qa/workunits/windows/libvirt_vm/first-logon.ps1 b/qa/workunits/windows/libvirt_vm/first-logon.ps1
new file mode 100644
index 000000000..654b836bb
--- /dev/null
+++ b/qa/workunits/windows/libvirt_vm/first-logon.ps1
@@ -0,0 +1,42 @@
+$ErrorActionPreference = "Stop"
+
+. "${PSScriptRoot}\utils.ps1"
+
+$VIRTIO_WIN_PATH = "E:\"
+
+# Install QEMU quest agent
+Write-Output "Installing QEMU guest agent"
+$p = Start-Process -FilePath "msiexec.exe" -ArgumentList @("/i", "${VIRTIO_WIN_PATH}\guest-agent\qemu-ga-x86_64.msi", "/qn") -NoNewWindow -PassThru -Wait
+if($p.ExitCode) {
+    Throw "The QEMU guest agent installation failed. Exit code: $($p.ExitCode)"
+}
+Write-Output "Successfully installed QEMU guest agent"
+
+# Install OpenSSH server
+Start-ExecuteWithRetry {
+    Get-WindowsCapability -Online -Name OpenSSH* | Add-WindowsCapability -Online
+}
+
+# Start OpenSSH server
+Set-Service -Name "sshd" -StartupType Automatic
+Start-Service -Name "sshd"
+
+# Set PowerShell as default SSH shell
+New-ItemProperty -PropertyType String -Force -Name DefaultShell -Path "HKLM:\SOFTWARE\OpenSSH" -Value (Get-Command powershell.exe).Source
+
+# Create SSH firewall rule
+New-NetFirewallRule -Name "sshd" -DisplayName 'OpenSSH Server (sshd)' -Enabled True -Direction Inbound -Protocol TCP -Action Allow -LocalPort 22
+
+# Authorize the SSH key
+$authorizedKeysFile = Join-Path $env:ProgramData "ssh\administrators_authorized_keys"
+Set-Content -Path $authorizedKeysFile -Value (Get-Content "${PSScriptRoot}\id_rsa.pub") -Encoding ascii
+$acl = Get-Acl $authorizedKeysFile
+$acl.SetAccessRuleProtection($true, $false)
+$administratorsRule = New-Object system.security.accesscontrol.filesystemaccessrule("Administrators", "FullControl", "Allow")
+$systemRule = New-Object system.security.accesscontrol.filesystemaccessrule("SYSTEM", "FullControl", "Allow")
+$acl.SetAccessRule($administratorsRule)
+$acl.SetAccessRule($systemRule)
+$acl | Set-Acl
+
+# Reboot the machine to complete first logon process
+Restart-Computer -Force -Confirm:$false
diff --git a/qa/workunits/windows/libvirt_vm/setup.ps1 b/qa/workunits/windows/libvirt_vm/setup.ps1
new file mode 100644
index 000000000..550fb274e
--- /dev/null
+++ b/qa/workunits/windows/libvirt_vm/setup.ps1
@@ -0,0 +1,43 @@
+$ErrorActionPreference = "Stop"
+$ProgressPreference = "SilentlyContinue"
+
+$PYTHON3_URL = "https://www.python.org/ftp/python/3.10.4/python-3.10.4-amd64.exe"
+$FIO_URL = "https://bsdio.com/fio/releases/fio-3.27-x64.msi"
+$VC_REDIST_URL = "https://aka.ms/vs/17/release/vc_redist.x64.exe"
+
+. "${PSScriptRoot}\utils.ps1"
+
+function Install-VCRedist {
+    Write-Output "Installing Visual Studio Redistributable x64"
+    Install-Tool -URL $VC_REDIST_URL -Params @("/quiet", "/norestart")
+    Write-Output "Successfully installed Visual Studio Redistributable x64"
+}
+
+function Install-Python3 {
+    Write-Output "Installing Python3"
+    Install-Tool -URL $PYTHON3_URL -Params @("/quiet", "InstallAllUsers=1", "PrependPath=1")
+    Add-ToPathEnvVar -Path @("${env:ProgramFiles}\Python310\", "${env:ProgramFiles}\Python310\Scripts\")
+    Write-Output "Installing pip dependencies"
+    Start-ExecuteWithRetry {
+        Invoke-CommandLine "pip3.exe" "install prettytable"
+    }
+    Write-Output "Successfully installed Python3"
+}
+
+function Install-FIO {
+    Write-Output "Installing FIO"
+    Install-Tool -URL $FIO_URL -Params @("/qn", "/l*v", "$env:TEMP\fio-install.log", "/norestart")
+    Write-Output "Successfully installed FIO"
+}
+
+Install-VCRedist
+Install-Python3
+Install-FIO
+
+# Pre-append WNBD and Ceph to PATH
+Add-ToPathEnvVar -Path @(
+    "${env:SystemDrive}\wnbd\binaries",
+    "${env:SystemDrive}\ceph")
+
+# This will refresh the PATH for new SSH sessions
+Restart-Service -Force -Name "sshd"
diff --git a/qa/workunits/windows/libvirt_vm/setup.sh b/qa/workunits/windows/libvirt_vm/setup.sh
new file mode 100755
index 000000000..51e91ec42
--- /dev/null
+++ b/qa/workunits/windows/libvirt_vm/setup.sh
@@ -0,0 +1,162 @@
+#!/usr/bin/env bash
+set -ex
+
+WINDOWS_SERVER_2019_ISO_URL=${WINDOWS_SERVER_2019_ISO_URL:-"https://software-download.microsoft.com/download/pr/17763.737.190906-2324.rs5_release_svc_refresh_SERVER_EVAL_x64FRE_en-us_1.iso"}
+VIRTIO_WIN_ISO_URL=${VIRTIO_WIN_ISO_URL:-"https://fedorapeople.org/groups/virt/virtio-win/direct-downloads/stable-virtio/virtio-win.iso"}
+
+DIR="$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)"
+
+# Use build_utils.sh from ceph-build
+curl --retry-max-time 30 --retry 10 -L -o ${DIR}/build_utils.sh https://raw.githubusercontent.com/ceph/ceph-build/main/scripts/build_utils.sh
+source ${DIR}/build_utils.sh
+
+# Helper function to restart the Windows VM
+function restart_windows_vm() {
+    echo "Restarting Windows VM"
+    ssh_exec "cmd.exe /c 'shutdown.exe /r /t 0 & sc.exe stop sshd'"
+    SECONDS=0
+    TIMEOUT=${1:-600}
+    while true; do
+        if [[ $SECONDS -gt $TIMEOUT ]]; then
+            echo "Timeout waiting for the VM to start"
+            exit 1
+        fi
+        ssh_exec hostname || {
+            echo "Cannot execute SSH commands yet"
+            sleep 10
+            continue
+        }
+        break
+    done
+    echo "Windows VM restarted"
+}
+
+# Install libvirt with KVM
+retrycmd_if_failure 5 0 5m sudo apt-get update
+retrycmd_if_failure 5 0 10m sudo apt-get install -y qemu-kvm libvirt-daemon-system libvirt-clients virtinst
+
+# Download ISO images
+echo "Downloading virtio-win ISO"
+retrycmd_if_failure 5 0 30m curl -C - -L $VIRTIO_WIN_ISO_URL -o ${DIR}/virtio-win.iso
+echo "Downloading Windows Server 2019 ISO"
+retrycmd_if_failure 5 0 60m curl -C - -L $WINDOWS_SERVER_2019_ISO_URL -o ${DIR}/windows-server-2019.iso
+
+# Create virtual floppy image with the unattended instructions to install Windows Server 2019
+echo "Creating floppy image"
+qemu-img create -f raw ${DIR}/floppy.img 1440k
+mkfs.msdos -s 1 ${DIR}/floppy.img
+mkdir ${DIR}/floppy
+sudo mount ${DIR}/floppy.img ${DIR}/floppy
+ssh-keygen -b 2048 -t rsa -f ${DIR}/id_rsa -q -N ""
+sudo cp \
+    ${DIR}/autounattend.xml \
+    ${DIR}/first-logon.ps1 \
+    ${DIR}/id_rsa.pub \
+    ${DIR}/utils.ps1 \
+    ${DIR}/setup.ps1 \
+    ${DIR}/floppy/
+sudo umount ${DIR}/floppy
+rmdir ${DIR}/floppy
+
+echo "Starting libvirt VM"
+qemu-img create -f qcow2 ${DIR}/ceph-win-ltsc2019.qcow2 50G
+VM_NAME="ceph-win-ltsc2019"
+sudo virt-install \
+    --name $VM_NAME \
+    --os-variant win2k19 \
+    --boot hd,cdrom \
+    --virt-type kvm \
+    --graphics spice \
+    --cpu host \
+    --vcpus 4 \
+    --memory 4096 \
+    --disk ${DIR}/floppy.img,device=floppy \
+    --disk ${DIR}/ceph-win-ltsc2019.qcow2,bus=virtio \
+    --disk ${DIR}/windows-server-2019.iso,device=cdrom \
+    --disk ${DIR}/virtio-win.iso,device=cdrom \
+    --network network=default,model=virtio \
+    --controller type=virtio-serial \
+    --channel unix,target_type=virtio,name=org.qemu.guest_agent.0 \
+    --noautoconsol
+
+export SSH_USER="administrator"
+export SSH_KNOWN_HOSTS_FILE="${DIR}/known_hosts"
+export SSH_KEY="${DIR}/id_rsa"
+
+SECONDS=0
+TIMEOUT=1800
+SLEEP_SECS=30
+while true; do
+    if [[ $SECONDS -gt $TIMEOUT ]]; then
+        echo "Timeout waiting for the VM to start"
+        exit 1
+    fi
+    VM_IP=$(sudo virsh domifaddr --source agent --interface Ethernet --full $VM_NAME | grep ipv4 | awk '{print $4}' | cut -d '/' -f1) || {
+        echo "Retrying in $SLEEP_SECS seconds"
+        sleep $SLEEP_SECS
+        continue
+    }
+    ssh-keyscan -H $VM_IP &> $SSH_KNOWN_HOSTS_FILE || {
+        echo "SSH is not reachable yet"
+        sleep $SLEEP_SECS
+        continue
+    }
+    SSH_ADDRESS=$VM_IP ssh_exec hostname || {
+        echo "Cannot execute SSH commands yet"
+        sleep $SLEEP_SECS
+        continue
+    }
+    break
+done
+export SSH_ADDRESS=$VM_IP
+
+scp_upload ${DIR}/utils.ps1 /utils.ps1
+scp_upload ${DIR}/setup.ps1 /setup.ps1
+SSH_TIMEOUT=1h ssh_exec /setup.ps1
+
+cd $DIR
+
+# Get the helper script to download Chacra builds
+retrycmd_if_failure 10 5 1m curl -L -o ./get-chacra-bin.py https://raw.githubusercontent.com/ceph/ceph-win32-tests/main/get-bin.py
+chmod +x ./get-chacra-bin.py
+
+# Download latest WNBD build from Chacra
+retrycmd_if_failure 10 0 10m ./get-chacra-bin.py --project wnbd --filename wnbd.zip
+scp_upload wnbd.zip /wnbd.zip
+ssh_exec tar.exe xzvf /wnbd.zip -C /
+
+# Install WNBD driver
+ssh_exec Import-Certificate -FilePath /wnbd/driver/wnbd.cer -Cert Cert:\\LocalMachine\\Root
+ssh_exec Import-Certificate -FilePath /wnbd/driver/wnbd.cer -Cert Cert:\\LocalMachine\\TrustedPublisher
+ssh_exec /wnbd/binaries/wnbd-client.exe install-driver /wnbd/driver/wnbd.inf
+restart_windows_vm
+ssh_exec wnbd-client.exe -v
+
+# Download Ceph Windows build from Chacra
+CEPH_REPO_FILE="/etc/apt/sources.list.d/ceph.list"
+PROJECT=$(cat $CEPH_REPO_FILE | cut -d ' ' -f3 | tr '\/', ' ' | awk '{print $4}')
+BRANCH=$(cat $CEPH_REPO_FILE | cut -d ' ' -f3 | tr '\/', ' ' | awk '{print $5}')
+SHA1=$(cat $CEPH_REPO_FILE | cut -d ' ' -f3 | tr '\/', ' ' | awk '{print $6}')
+retrycmd_if_failure 10 0 10m ./get-chacra-bin.py --project $PROJECT --branchname $BRANCH --sha1 $SHA1 --filename ceph.zip
+
+# Install Ceph on Windows
+SSH_TIMEOUT=5m scp_upload ./ceph.zip /ceph.zip
+SSH_TIMEOUT=10m ssh_exec tar.exe xzvf /ceph.zip -C /
+ssh_exec "New-Service -Name ceph-rbd -BinaryPathName 'c:\ceph\rbd-wnbd.exe service'"
+ssh_exec Start-Service -Name ceph-rbd
+ssh_exec rbd.exe -v
+
+# Setup Ceph configs and directories
+ssh_exec mkdir -force /etc/ceph, /var/run/ceph, /var/log/ceph
+for i in $(ls /etc/ceph); do
+    scp_upload /etc/ceph/$i /etc/ceph/$i
+done
+
+cat << EOF > ${DIR}/connection_info.sh
+export SSH_USER="${SSH_USER}"
+export SSH_KNOWN_HOSTS_FILE="${SSH_KNOWN_HOSTS_FILE}"
+export SSH_KEY="${SSH_KEY}"
+export SSH_ADDRESS="${SSH_ADDRESS}"
+EOF
+
+echo "Windows Server 2019 libvirt testing VM is ready"
diff --git a/qa/workunits/windows/libvirt_vm/utils.ps1 b/qa/workunits/windows/libvirt_vm/utils.ps1
new file mode 100644
index 000000000..f29ab79f5
--- /dev/null
+++ b/qa/workunits/windows/libvirt_vm/utils.ps1
@@ -0,0 +1,130 @@
+function Invoke-CommandLine {
+    Param(
+        [Parameter(Mandatory=$true)]
+        [String]$Command,
+        [String]$Arguments,
+        [Int[]]$AllowedExitCodes=@(0)
+    )
+    & $Command $Arguments.Split(" ")
+    if($LASTEXITCODE -notin $AllowedExitCodes) {
+        Throw "$Command $Arguments returned a non zero exit code ${LASTEXITCODE}."
+    }
+}
+
+function Start-ExecuteWithRetry {
+    Param(
+        [Parameter(Mandatory=$true)]
+        [ScriptBlock]$ScriptBlock,
+        [Int]$MaxRetryCount=10,
+        [Int]$RetryInterval=3,
+        [String]$RetryMessage,
+        [Array]$ArgumentList=@()
+    )
+    $currentErrorActionPreference = $ErrorActionPreference
+    $ErrorActionPreference = "Continue"
+    $retryCount = 0
+    while ($true) {
+        try {
+            $res = Invoke-Command -ScriptBlock $ScriptBlock -ArgumentList $ArgumentList
+            $ErrorActionPreference = $currentErrorActionPreference
+            return $res
+        } catch [System.Exception] {
+            $retryCount++
+            if ($retryCount -gt $MaxRetryCount) {
+                $ErrorActionPreference = $currentErrorActionPreference
+                Throw $_
+            } else {
+                $prefixMsg = "Retry(${retryCount}/${MaxRetryCount})"
+                if($RetryMessage) {
+                    Write-Host "${prefixMsg} - $RetryMessage"
+                } elseif($_) {
+                    Write-Host "${prefixMsg} - $($_.ToString())"
+                }
+                Start-Sleep $RetryInterval
+            }
+        }
+    }
+}
+
+function Start-FileDownload {
+    Param(
+        [Parameter(Mandatory=$true)]
+        [String]$URL,
+        [Parameter(Mandatory=$true)]
+        [String]$Destination,
+        [Int]$RetryCount=10
+    )
+    Write-Output "Downloading $URL to $Destination"
+    Start-ExecuteWithRetry `
+        -ScriptBlock { Invoke-CommandLine -Command "curl.exe" -Arguments "-L -s -o $Destination $URL" } `
+        -MaxRetryCount $RetryCount `
+        -RetryMessage "Failed to download '${URL}'. Retrying"
+    Write-Output "Successfully downloaded."
+}
+
+function Add-ToPathEnvVar {
+    Param(
+        [Parameter(Mandatory=$true)]
+        [String[]]$Path,
+        [Parameter(Mandatory=$false)]
+        [ValidateSet([System.EnvironmentVariableTarget]::User, [System.EnvironmentVariableTarget]::Machine)]
+        [System.EnvironmentVariableTarget]$Target=[System.EnvironmentVariableTarget]::Machine
+    )
+    $pathEnvVar = [Environment]::GetEnvironmentVariable("PATH", $Target).Split(';')
+    $currentSessionPath = $env:PATH.Split(';')
+    foreach($p in $Path) {
+        if($p -notin $pathEnvVar) {
+            $pathEnvVar += $p
+        }
+        if($p -notin $currentSessionPath) {
+            $currentSessionPath += $p
+        }
+    }
+    $env:PATH = $currentSessionPath -join ';'
+    $newPathEnvVar = $pathEnvVar -join ';'
+    [Environment]::SetEnvironmentVariable("PATH", $newPathEnvVar, $Target)
+}
+
+function Install-Tool {
+    [CmdletBinding(DefaultParameterSetName = "URL")]
+    Param(
+        [Parameter(Mandatory=$true, ParameterSetName = "URL")]
+        [String]$URL,
+        [Parameter(Mandatory=$true, ParameterSetName = "LocalPath")]
+        [String]$LocalPath,
+        [Parameter(ParameterSetName = "URL")]
+        [Parameter(ParameterSetName = "LocalPath")]
+        [String[]]$Params=@(),
+        [Parameter(ParameterSetName = "URL")]
+        [Parameter(ParameterSetName = "LocalPath")]
+        [Int[]]$AllowedExitCodes=@(0)
+    )
+    PROCESS {
+        $installerPath = $LocalPath
+        if($PSCmdlet.ParameterSetName -eq "URL") {
+            $installerPath = Join-Path $env:TEMP $URL.Split('/')[-1]
+            Start-FileDownload -URL $URL -Destination $installerPath
+        }
+        Write-Output "Installing ${installerPath}"
+        $kwargs = @{
+            "FilePath" = $installerPath
+            "ArgumentList" = $Params
+            "NoNewWindow" = $true
+            "PassThru" = $true
+            "Wait" = $true
+        }
+        if((Get-ChildItem $installerPath).Extension -eq '.msi') {
+            $kwargs["FilePath"] = "msiexec.exe"
+            $kwargs["ArgumentList"] = @("/i", $installerPath) + $Params
+        }
+        $p = Start-Process @kwargs
+        if($p.ExitCode -notin $AllowedExitCodes) {
+            Throw "Installation failed. Exit code: $($p.ExitCode)"
+        }
+        if($PSCmdlet.ParameterSetName -eq "URL") {
+            Start-ExecuteWithRetry `
+                -ScriptBlock { Remove-Item -Force -Path $installerPath -ErrorAction Stop } `
+                -RetryMessage "Failed to remove ${installerPath}. Retrying"
+        }
+    }
+}
diff --git a/qa/workunits/windows/run-tests.ps1 b/qa/workunits/windows/run-tests.ps1
new file mode 100644
index 000000000..6d818f426
--- /dev/null
+++ b/qa/workunits/windows/run-tests.ps1
@@ -0,0 +1,29 @@
+$ProgressPreference = "SilentlyContinue"
+$ErrorActionPreference = "Stop"
+
+$scriptLocation = [System.IO.Path]::GetDirectoryName(
+    $myInvocation.MyCommand.Definition)
+
+$testRbdWnbd = "$scriptLocation/test_rbd_wnbd.py"
+
+function safe_exec() {
+    # Powershell doesn't check the command exit code, we'll need to
+    # do it ourselves. Also, in case of native commands, it treats stderr
+    # output as an exception, which is why we'll have to capture it.
+    cmd /c "$args 2>&1"
+    if ($LASTEXITCODE) {
+        throw "Command failed: $args"
+    }
+}
+
+safe_exec python.exe $testRbdWnbd --test-name RbdTest --iterations 100
+safe_exec python.exe $testRbdWnbd --test-name RbdFioTest --iterations 100
+safe_exec python.exe $testRbdWnbd --test-name RbdStampTest --iterations 100
+
+# It can take a while to setup the partition (~10s), we'll use fewer iterations.
+safe_exec python.exe $testRbdWnbd --test-name RbdFsTest --iterations 4
+safe_exec python.exe $testRbdWnbd --test-name RbdFsFioTest --iterations 4
+safe_exec python.exe $testRbdWnbd --test-name RbdFsStampTest --iterations 4
+
+safe_exec python.exe $testRbdWnbd `
+    --test-name RbdResizeFioTest --image-size-mb 64
diff --git a/qa/workunits/windows/run-tests.sh b/qa/workunits/windows/run-tests.sh
new file mode 100644
index 000000000..b582491c5
--- /dev/null
+++ b/qa/workunits/windows/run-tests.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+set -ex
+
+DIR="$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)"
+
+source ${DIR}/libvirt_vm/build_utils.sh
+source ${DIR}/libvirt_vm/connection_info.sh
+
+# Run the Windows tests
+scp_upload ${DIR} /windows-workunits
+SSH_TIMEOUT=30m ssh_exec powershell.exe -File /windows-workunits/run-tests.ps1
diff --git a/qa/workunits/windows/test_rbd_wnbd.py b/qa/workunits/windows/test_rbd_wnbd.py
new file mode 100644
index 000000000..f22810e2e
--- /dev/null
+++ b/qa/workunits/windows/test_rbd_wnbd.py
@@ -0,0 +1,919 @@
+import argparse
+import collections
+import functools
+import json
+import logging
+import math
+import os
+import prettytable
+import random
+import subprocess
+import time
+import threading
+import typing
+import uuid
+from concurrent import futures
+
+LOG = logging.getLogger()
+
+parser = argparse.ArgumentParser(description='rbd-wnbd tests')
+parser.add_argument('--test-name',
+                    help='The test to be run.',
+                    default="RbdFioTest")
+parser.add_argument('--iterations',
+                    help='Total number of test iterations',
+                    default=1, type=int)
+parser.add_argument('--concurrency',
+                    help='The number of tests to run in parallel',
+                    default=4, type=int)
+parser.add_argument('--fio-iterations',
+                    help='Total number of benchmark iterations per disk.',
+                    default=1, type=int)
+parser.add_argument('--fio-workers',
+                    help='Total number of fio workers per disk.',
+                    default=1, type=int)
+parser.add_argument('--fio-depth',
+                    help='The number of concurrent asynchronous operations '
+                         'executed per disk',
+                    default=64, type=int)
+parser.add_argument('--fio-verify',
+                    help='The mechanism used to validate the written '
+                         'data. Examples: crc32c, md5, sha1, null, etc. '
+                         'If set to null, the written data will not be '
+                         'verified.',
+                    default='crc32c')
+parser.add_argument('--bs',
+                    help='Benchmark block size.',
+                    default="2M")
+parser.add_argument('--op',
+                    help='Benchmark operation. '
+                         'Examples: read, randwrite, rw, etc.',
+                    default="rw")
+parser.add_argument('--image-prefix',
+                    help='The image name prefix.',
+                    default="cephTest-")
+parser.add_argument('--image-size-mb',
+                    help='The image size in megabytes.',
+                    default=1024, type=int)
+parser.add_argument('--map-timeout',
+                    help='Image map timeout.',
+                    default=60, type=int)
+parser.add_argument('--skip-enabling-disk', action='store_true',
+                    help='If set, the disk will not be turned online and the '
+                         'read-only flag will not be removed. Useful when '
+                         'the SAN policy is set to "onlineAll".')
+parser.add_argument('--verbose', action='store_true',
+                    help='Print info messages.')
+parser.add_argument('--debug', action='store_true',
+                    help='Print debug messages.')
+parser.add_argument('--stop-on-error', action='store_true',
+                    help='Stop testing when hitting errors.')
+parser.add_argument('--skip-cleanup-on-error', action='store_true',
+                    help='Skip cleanup when hitting errors.')
+
+
+class CephTestException(Exception):
+    msg_fmt = "An exception has been encountered."
+
+    def __init__(self, message: str = None, **kwargs):
+        self.kwargs = kwargs
+        if not message:
+            message = self.msg_fmt % kwargs
+        self.message = message
+        super(CephTestException, self).__init__(message)
+
+
+class CommandFailed(CephTestException):
+    msg_fmt = (
+        "Command failed: %(command)s. "
+        "Return code: %(returncode)s. "
+        "Stdout: %(stdout)s. Stderr: %(stderr)s.")
+
+
+class CephTestTimeout(CephTestException):
+    msg_fmt = "Operation timeout."
+
+
+def setup_logging(log_level: int = logging.INFO):
+    handler = logging.StreamHandler()
+    handler.setLevel(log_level)
+
+    log_fmt = '[%(asctime)s] %(levelname)s - %(message)s'
+    formatter = logging.Formatter(log_fmt)
+    handler.setFormatter(formatter)
+
+    LOG.addHandler(handler)
+    LOG.setLevel(logging.DEBUG)
+
+
+def retry_decorator(timeout: int = 60,
+                    retry_interval: int = 2,
+                    silent_interval: int = 10,
+                    additional_details: str = "",
+                    retried_exceptions:
+                        typing.Union[
+                            typing.Type[Exception],
+                            collections.abc.Iterable[
+                                typing.Type[Exception]]] = Exception):
+    def wrapper(f: typing.Callable[..., typing.Any]):
+        @functools.wraps(f)
+        def inner(*args, **kwargs):
+            tstart: float = time.time()
+            elapsed: float = 0
+            exc = None
+            details = additional_details or "%s failed" % f.__qualname__
+
+            while elapsed < timeout or not timeout:
+                try:
+                    return f(*args, **kwargs)
+                except retried_exceptions as ex:
+                    exc = ex
+                    elapsed = time.time() - tstart
+                    if elapsed > silent_interval:
+                        level = logging.WARNING
+                    else:
+                        level = logging.DEBUG
+                    LOG.log(level,
+                            "Exception: %s. Additional details: %s. "
+                            "Time elapsed: %d. Timeout: %d",
+                            ex, details, elapsed, timeout)
+
+                    time.sleep(retry_interval)
+                    elapsed = time.time() - tstart
+
+            msg = (
+                "Operation timed out. Exception: %s. Additional details: %s. "
+                "Time elapsed: %d. Timeout: %d.")
+            raise CephTestTimeout(
+                msg % (exc, details, elapsed, timeout))
+        return inner
+    return wrapper
+
+
+def execute(*args, **kwargs):
+    LOG.debug("Executing: %s", args)
+    result = subprocess.run(
+        args,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        **kwargs)
+    LOG.debug("Command %s returned %d.", args, result.returncode)
+    if result.returncode:
+        exc = CommandFailed(
+            command=args, returncode=result.returncode,
+            stdout=result.stdout, stderr=result.stderr)
+        LOG.error(exc)
+        raise exc
+    return result
+
+
+def ps_execute(*args, **kwargs):
+    # Disable PS progress bar, causes issues when invoked remotely.
+    prefix = "$global:ProgressPreference = 'SilentlyContinue' ; "
+    return execute(
+        "powershell.exe", "-NonInteractive",
+        "-Command", prefix, *args, **kwargs)
+
+
+def array_stats(array: list):
+    mean = sum(array) / len(array) if len(array) else 0
+    variance = (sum((i - mean) ** 2 for i in array) / len(array)
+                if len(array) else 0)
+    std_dev = math.sqrt(variance)
+    sorted_array = sorted(array)
+
+    return {
+        'min': min(array) if len(array) else 0,
+        'max': max(array) if len(array) else 0,
+        'sum': sum(array) if len(array) else 0,
+        'mean': mean,
+        'median': sorted_array[len(array) // 2] if len(array) else 0,
+        'max_90': sorted_array[int(len(array) * 0.9)] if len(array) else 0,
+        'min_90': sorted_array[int(len(array) * 0.1)] if len(array) else 0,
+        'variance': variance,
+        'std_dev': std_dev,
+        'count': len(array)
+    }
+
+
+class Tracer:
+    data: collections.OrderedDict = collections.OrderedDict()
+    lock = threading.Lock()
+
+    @classmethod
+    def trace(cls, func):
+        def wrapper(*args, **kwargs):
+            tstart = time.time()
+            exc_str = None
+
+            # Preserve call order
+            with cls.lock:
+                if func.__qualname__ not in cls.data:
+                    cls.data[func.__qualname__] = list()
+
+            try:
+                return func(*args, **kwargs)
+            except Exception as exc:
+                exc_str = str(exc)
+                raise
+            finally:
+                tend = time.time()
+
+                with cls.lock:
+                    cls.data[func.__qualname__] += [{
+                        "duration": tend - tstart,
+                        "error": exc_str,
+                    }]
+
+        return wrapper
+
+    @classmethod
+    def get_results(cls):
+        stats = collections.OrderedDict()
+        for f in cls.data.keys():
+            stats[f] = array_stats([i['duration'] for i in cls.data[f]])
+            errors = []
+            for i in cls.data[f]:
+                if i['error']:
+                    errors.append(i['error'])
+
+            stats[f]['errors'] = errors
+        return stats
+
+    @classmethod
+    def print_results(cls):
+        r = cls.get_results()
+
+        table = prettytable.PrettyTable(title="Duration (s)")
+        table.field_names = [
+            "function", "min", "max", "total",
+            "mean", "median", "std_dev",
+            "max 90%", "min 90%", "count", "errors"]
+        table.float_format = ".4"
+        for f, s in r.items():
+            table.add_row([f, s['min'], s['max'], s['sum'],
+                           s['mean'], s['median'], s['std_dev'],
+                           s['max_90'], s['min_90'],
+                           s['count'], len(s['errors'])])
+        print(table)
+
+
+class RbdImage(object):
+    def __init__(self,
+                 name: str,
+                 size_mb: int,
+                 is_shared: bool = True,
+                 disk_number: int = -1,
+                 mapped: bool = False):
+        self.name = name
+        self.size_mb = size_mb
+        self.is_shared = is_shared
+        self.disk_number = disk_number
+        self.mapped = mapped
+        self.removed = False
+        self.drive_letter = ""
+
+    @classmethod
+    @Tracer.trace
+    def create(cls,
+               name: str,
+               size_mb: int = 1024,
+               is_shared: bool = True):
+        LOG.info("Creating image: %s. Size: %s.", name, "%sM" % size_mb)
+        cmd = ["rbd", "create", name, "--size", "%sM" % size_mb]
+        if is_shared:
+            cmd += ["--image-shared"]
+        execute(*cmd)
+
+        return RbdImage(name, size_mb, is_shared)
+
+    @Tracer.trace
+    def get_disk_number(self,
+                        timeout: int = 60,
+                        retry_interval: int = 2):
+        @retry_decorator(
+            retried_exceptions=CephTestException,
+            timeout=timeout,
+            retry_interval=retry_interval)
+        def _get_disk_number():
+            LOG.info("Retrieving disk number: %s", self.name)
+
+            result = execute("rbd-wnbd", "show", self.name, "--format=json")
+            disk_info = json.loads(result.stdout)
+            disk_number = disk_info["disk_number"]
+            if disk_number > 0:
+                LOG.debug("Image %s disk number: %d", self.name, disk_number)
+                return disk_number
+
+            raise CephTestException(
+                f"Could not get disk number: {self.name}.")
+
+        return _get_disk_number()
+
+    @Tracer.trace
+    def _wait_for_disk(self,
+                       timeout: int = 60,
+                       retry_interval: int = 2):
+        @retry_decorator(
+            retried_exceptions=(FileNotFoundError, OSError),
+            additional_details="the mapped disk isn't available yet",
+            timeout=timeout,
+            retry_interval=retry_interval)
+        def wait_for_disk():
+            LOG.debug("Waiting for disk to be accessible: %s %s",
+                      self.name, self.path)
+
+            with open(self.path, 'rb'):
+                pass
+
+        return wait_for_disk()
+
+    @property
+    def path(self):
+        return f"\\\\.\\PhysicalDrive{self.disk_number}"
+
+    @Tracer.trace
+    @retry_decorator(additional_details="couldn't clear disk read-only flag")
+    def set_writable(self):
+        ps_execute(
+            "Set-Disk", "-Number", str(self.disk_number),
+            "-IsReadOnly", "$false")
+
+    @Tracer.trace
+    @retry_decorator(additional_details="couldn't bring the disk online")
+    def set_online(self):
+        ps_execute(
+            "Set-Disk", "-Number", str(self.disk_number),
+            "-IsOffline", "$false")
+
+    @Tracer.trace
+    def map(self, timeout: int = 60):
+        LOG.info("Mapping image: %s", self.name)
+        tstart = time.time()
+
+        execute("rbd-wnbd", "map", self.name)
+        self.mapped = True
+
+        self.disk_number = self.get_disk_number(timeout=timeout)
+
+        elapsed = time.time() - tstart
+        self._wait_for_disk(timeout=timeout - elapsed)
+
+    @Tracer.trace
+    def unmap(self):
+        if self.mapped:
+            LOG.info("Unmapping image: %s", self.name)
+            execute("rbd-wnbd", "unmap", self.name)
+            self.mapped = False
+
+    @Tracer.trace
+    def remove(self):
+        if not self.removed:
+            LOG.info("Removing image: %s", self.name)
+            execute("rbd", "rm", self.name)
+            self.removed = True
+
+    def cleanup(self):
+        try:
+            self.unmap()
+        finally:
+            self.remove()
+
+    @Tracer.trace
+    @retry_decorator()
+    def _init_disk(self):
+        cmd = f"Get-Disk -Number {self.disk_number} | Initialize-Disk"
+        ps_execute(cmd)
+
+    @Tracer.trace
+    @retry_decorator()
+    def _create_partition(self):
+        cmd = (f"Get-Disk -Number {self.disk_number} | "
+               "New-Partition -AssignDriveLetter -UseMaximumSize")
+        ps_execute(cmd)
+
+    @Tracer.trace
+    @retry_decorator()
+    def _format_volume(self):
+        cmd = (
+            f"(Get-Partition -DiskNumber {self.disk_number}"
+            " | ? { $_.DriveLetter }) | Format-Volume -Force -Confirm:$false")
+        ps_execute(cmd)
+
+    @Tracer.trace
+    @retry_decorator()
+    def _get_drive_letter(self):
+        cmd = (f"(Get-Partition -DiskNumber {self.disk_number}"
+               " | ? { $_.DriveLetter }).DriveLetter")
+        result = ps_execute(cmd)
+
+        # The PowerShell command will place a null character if no drive letter
+        # is available. For example, we can receive "\x00\r\n".
+        self.drive_letter = result.stdout.decode().strip()
+        if not self.drive_letter.isalpha() or len(self.drive_letter) != 1:
+            raise CephTestException(
+                "Invalid drive letter received: %s" % self.drive_letter)
+
+    @Tracer.trace
+    def init_fs(self):
+        if not self.mapped:
+            raise CephTestException("Unable to create fs, image not mapped.")
+
+        LOG.info("Initializing fs, image: %s.", self.name)
+
+        self._init_disk()
+        self._create_partition()
+        self._format_volume()
+        self._get_drive_letter()
+
+    @Tracer.trace
+    def get_fs_capacity(self):
+        if not self.drive_letter:
+            raise CephTestException("No drive letter available")
+
+        cmd = f"(Get-Volume -DriveLetter {self.drive_letter}).Size"
+        result = ps_execute(cmd)
+
+        return int(result.stdout.decode().strip())
+
+    @Tracer.trace
+    def resize(self, new_size_mb, allow_shrink=False):
+        LOG.info(
+            "Resizing image: %s. New size: %s MB, old size: %s MB",
+            self.name, new_size_mb, self.size_mb)
+
+        cmd = ["rbd", "resize", self.name,
+               "--size", f"{new_size_mb}M", "--no-progress"]
+        if allow_shrink:
+            cmd.append("--allow-shrink")
+
+        execute(*cmd)
+
+        self.size_mb = new_size_mb
+
+    @Tracer.trace
+    def get_disk_size(self):
+        """Retrieve the virtual disk size (bytes) reported by Windows."""
+        cmd = f"(Get-Disk -Number {self.disk_number}).Size"
+        result = ps_execute(cmd)
+
+        disk_size = result.stdout.decode().strip()
+        if not disk_size.isdigit():
+            raise CephTestException(
+                "Invalid disk size received: %s" % disk_size)
+
+        return int(disk_size)
+
+    @Tracer.trace
+    @retry_decorator(timeout=30)
+    def wait_for_disk_resize(self):
+        # After resizing the rbd image, the daemon is expected to receive
+        # the notification, inform the WNBD driver and then trigger a disk
+        # rescan (IOCTL_DISK_UPDATE_PROPERTIES). This might take a few seconds,
+        # so we'll need to do some polling.
+        disk_size = self.get_disk_size()
+        disk_size_mb = disk_size // (1 << 20)
+
+        if disk_size_mb != self.size_mb:
+            raise CephTestException(
+                "The disk size hasn't been updated yet. Retrieved size: "
+                f"{disk_size_mb}MB. Expected size: {self.size_mb}MB.")
+
+
+class RbdTest(object):
+    image: RbdImage
+
+    requires_disk_online = False
+    requires_disk_write = False
+
+    def __init__(self,
+                 image_prefix: str = "cephTest-",
+                 image_size_mb: int = 1024,
+                 map_timeout: int = 60,
+                 **kwargs):
+        self.image_size_mb = image_size_mb
+        self.image_name = image_prefix + str(uuid.uuid4())
+        self.map_timeout = map_timeout
+        self.skip_enabling_disk = kwargs.get("skip_enabling_disk")
+
+    @Tracer.trace
+    def initialize(self):
+        self.image = RbdImage.create(
+            self.image_name,
+            self.image_size_mb)
+        self.image.map(timeout=self.map_timeout)
+
+        if not self.skip_enabling_disk:
+            if self.requires_disk_write:
+                self.image.set_writable()
+
+            if self.requires_disk_online:
+                self.image.set_online()
+
+    def run(self):
+        pass
+
+    def cleanup(self):
+        if self.image:
+            self.image.cleanup()
+
+    @classmethod
+    def print_results(cls,
+                      title: str = "Test results",
+                      description: str = None):
+        pass
+
+
+class RbdFsTestMixin(object):
+    # Windows disks must be turned online before accessing partitions.
+    requires_disk_online = True
+    requires_disk_write = True
+
+    @Tracer.trace
+    def initialize(self):
+        super(RbdFsTestMixin, self).initialize()
+
+        self.image.init_fs()
+
+    def get_subpath(self, *args):
+        drive_path = f"{self.image.drive_letter}:\\"
+        return os.path.join(drive_path, *args)
+
+
+class RbdFsTest(RbdFsTestMixin, RbdTest):
+    pass
+
+
+class RbdFioTest(RbdTest):
+    data: typing.DefaultDict[str, typing.List[typing.Dict[str, str]]] = (
+        collections.defaultdict(list))
+    lock = threading.Lock()
+
+    def __init__(self,
+                 *args,
+                 fio_size_mb: int = None,
+                 iterations: int = 1,
+                 workers: int = 1,
+                 bs: str = "2M",
+                 iodepth: int = 64,
+                 op: str = "rw",
+                 verify: str = "crc32c",
+                 **kwargs):
+
+        super(RbdFioTest, self).__init__(*args, **kwargs)
+
+        self.fio_size_mb = fio_size_mb or self.image_size_mb
+        self.iterations = iterations
+        self.workers = workers
+        self.bs = bs
+        self.iodepth = iodepth
+        self.op = op
+        if op not in ("read", "randread"):
+            self.requires_disk_write = True
+        self.verify = verify
+
+    def process_result(self, raw_fio_output: str):
+        result = json.loads(raw_fio_output)
+        with self.lock:
+            for job in result["jobs"]:
+                # Fio doesn't support trim on Windows
+                for op in ['read', 'write']:
+                    if op in job:
+                        self.data[op].append({
+                            'error': job['error'],
+                            'io_bytes': job[op]['io_bytes'],
+                            'bw_bytes': job[op]['bw_bytes'],
+                            'runtime': job[op]['runtime'] / 1000,  # seconds
+                            'total_ios': job[op]['short_ios'],
+                            'short_ios': job[op]['short_ios'],
+                            'dropped_ios': job[op]['short_ios'],
+                            'clat_ns_min': job[op]['clat_ns']['min'],
+                            'clat_ns_max': job[op]['clat_ns']['max'],
+                            'clat_ns_mean': job[op]['clat_ns']['mean'],
+                            'clat_ns_stddev': job[op]['clat_ns']['stddev'],
+                            'clat_ns_10': job[op].get('clat_ns', {})
+                                                 .get('percentile', {})
+                                                 .get('10.000000', 0),
+                            'clat_ns_90': job[op].get('clat_ns', {})
+                                                 .get('percentile', {})
+                                                 .get('90.000000', 0)
+                        })
+
+    def _get_fio_path(self):
+        return self.image.path
+
+    @Tracer.trace
+    def _run_fio(self, fio_size_mb=None):
+        LOG.info("Starting FIO test.")
+        cmd = [
+            "fio", "--thread", "--output-format=json",
+            "--randrepeat=%d" % self.iterations,
+            "--direct=1", "--name=test",
+            "--bs=%s" % self.bs, "--iodepth=%s" % self.iodepth,
+            "--size=%sM" % (fio_size_mb or self.fio_size_mb),
+            "--readwrite=%s" % self.op,
+            "--numjobs=%s" % self.workers,
+            "--filename=%s" % self._get_fio_path(),
+        ]
+        if self.verify:
+            cmd += ["--verify=%s" % self.verify]
+        result = execute(*cmd)
+        LOG.info("Completed FIO test.")
+        self.process_result(result.stdout)
+
+    @Tracer.trace
+    def run(self):
+        self._run_fio()
+
+    @classmethod
+    def print_results(cls,
+                      title: str = "Benchmark results",
+                      description: str = None):
+        if description:
+            title = "%s (%s)" % (title, description)
+
+        for op in cls.data.keys():
+            op_title = "%s op=%s" % (title, op)
+
+            table = prettytable.PrettyTable(title=op_title)
+            table.field_names = ["stat", "min", "max", "mean",
+                                 "median", "std_dev",
+                                 "max 90%", "min 90%", "total"]
+            table.float_format = ".4"
+
+            op_data = cls.data[op]
+
+            s = array_stats([float(i["bw_bytes"]) / 1000_000 for i in op_data])
+            table.add_row(["bandwidth (MB/s)",
+                           s['min'], s['max'], s['mean'],
+                           s['median'], s['std_dev'],
+                           s['max_90'], s['min_90'], 'N/A'])
+
+            s = array_stats([float(i["runtime"]) for i in op_data])
+            table.add_row(["duration (s)",
+                          s['min'], s['max'], s['mean'],
+                          s['median'], s['std_dev'],
+                          s['max_90'], s['min_90'], s['sum']])
+
+            s = array_stats([i["error"] for i in op_data])
+            table.add_row(["errors",
+                           s['min'], s['max'], s['mean'],
+                           s['median'], s['std_dev'],
+                           s['max_90'], s['min_90'], s['sum']])
+
+            s = array_stats([i["short_ios"] for i in op_data])
+            table.add_row(["incomplete IOs",
+                           s['min'], s['max'], s['mean'],
+                           s['median'], s['std_dev'],
+                           s['max_90'], s['min_90'], s['sum']])
+
+            s = array_stats([i["dropped_ios"] for i in op_data])
+            table.add_row(["dropped IOs",
+                           s['min'], s['max'], s['mean'],
+                           s['median'], s['std_dev'],
+                           s['max_90'], s['min_90'], s['sum']])
+
+            clat_min = array_stats([i["clat_ns_min"] for i in op_data])
+            clat_max = array_stats([i["clat_ns_max"] for i in op_data])
+            clat_mean = array_stats([i["clat_ns_mean"] for i in op_data])
+            clat_stddev = math.sqrt(
+                sum([float(i["clat_ns_stddev"]) ** 2 for i in op_data]) / len(op_data)
+                if len(op_data) else 0)
+            clat_10 = array_stats([i["clat_ns_10"] for i in op_data])
+            clat_90 = array_stats([i["clat_ns_90"] for i in op_data])
+            # For convenience, we'll convert it from ns to seconds.
+            table.add_row(["completion latency (s)",
+                           clat_min['min'] / 1e+9,
+                           clat_max['max'] / 1e+9,
+                           clat_mean['mean'] / 1e+9,
+                           clat_mean['median'] / 1e+9,
+                           clat_stddev / 1e+9,
+                           clat_10['mean'] / 1e+9,
+                           clat_90['mean'] / 1e+9,
+                           clat_mean['sum'] / 1e+9])
+            print(table)
+
+
+class RbdResizeFioTest(RbdFioTest):
+    """Image resize test.
+
+    This test extends and then shrinks the image, performing FIO tests to
+    validate the resized image.
+    """
+
+    @Tracer.trace
+    def run(self):
+        self.image.resize(self.image_size_mb * 2)
+        self.image.wait_for_disk_resize()
+
+        self._run_fio(fio_size_mb=self.image_size_mb * 2)
+
+        self.image.resize(self.image_size_mb // 2, allow_shrink=True)
+        self.image.wait_for_disk_resize()
+
+        self._run_fio(fio_size_mb=self.image_size_mb // 2)
+
+        # Just like rbd-nbd, rbd-wnbd is masking out-of-bounds errors.
+        # For this reason, we don't have a negative test that writes
+        # passed the disk boundary.
+
+
+class RbdFsFioTest(RbdFsTestMixin, RbdFioTest):
+    def initialize(self):
+        super(RbdFsFioTest, self).initialize()
+
+        if not self.fio_size_mb or self.fio_size_mb == self.image_size_mb:
+            # Out of caution, we'll use up to 80% of the FS by default
+            self.fio_size_mb = int(
+                self.image.get_fs_capacity() * 0.8 / (1024 * 1024))
+
+    @staticmethod
+    def _fio_escape_path(path):
+        # FIO allows specifying multiple files separated by colon.
+        # This means that ":" has to be escaped, so
+        # F:\filename becomes F\:\filename.
+        return path.replace(":", "\\:")
+
+    def _get_fio_path(self):
+        return self._fio_escape_path(self.get_subpath("test-fio"))
+
+
+class RbdStampTest(RbdTest):
+    requires_disk_write = True
+
+    _write_open_mode = "rb+"
+    _read_open_mode = "rb"
+    _expect_path_exists = True
+
+    @staticmethod
+    def _rand_float(min_val: float, max_val: float):
+        return min_val + (random.random() * max_val - min_val)
+
+    def _get_stamp(self):
+        buff = self.image_name.encode()
+        padding = 512 - len(buff)
+        buff += b'\0' * padding
+        return buff
+
+    def _get_stamp_path(self):
+        return self.image.path
+
+    @Tracer.trace
+    def _write_stamp(self):
+        with open(self._get_stamp_path(), self._write_open_mode) as disk:
+            stamp = self._get_stamp()
+            disk.write(stamp)
+
+    @Tracer.trace
+    def _read_stamp(self):
+        with open(self._get_stamp_path(), self._read_open_mode) as disk:
+            return disk.read(len(self._get_stamp()))
+
+    @Tracer.trace
+    def run(self):
+        if self._expect_path_exists:
+            # Wait up to 5 seconds and then check the disk, ensuring that
+            # nobody else wrote to it. This is particularly useful when
+            # running a high number of tests in parallel, ensuring that
+            # we aren't writing to the wrong disk.
+            time.sleep(self._rand_float(0, 5))
+
+            stamp = self._read_stamp()
+            assert stamp == b'\0' * len(self._get_stamp())
+
+        self._write_stamp()
+
+        stamp = self._read_stamp()
+        assert stamp == self._get_stamp()
+
+
+class RbdFsStampTest(RbdFsTestMixin, RbdStampTest):
+    _write_open_mode = "wb"
+    _expect_path_exists = False
+
+    def _get_stamp_path(self):
+        return self.get_subpath("test-stamp")
+
+
+class TestRunner(object):
+    def __init__(self,
+                 test_cls: typing.Type[RbdTest],
+                 test_params: dict = {},
+                 iterations: int = 1,
+                 workers: int = 1,
+                 stop_on_error: bool = False,
+                 cleanup_on_error: bool = True):
+        self.test_cls = test_cls
+        self.test_params = test_params
+        self.iterations = iterations
+        self.workers = workers
+        self.executor = futures.ThreadPoolExecutor(max_workers=workers)
+        self.lock = threading.Lock()
+        self.completed = 0
+        self.errors = 0
+        self.stopped = False
+        self.stop_on_error = stop_on_error
+        self.cleanup_on_error = cleanup_on_error
+
+    @Tracer.trace
+    def run(self):
+        tasks = []
+        for i in range(self.iterations):
+            task = self.executor.submit(self.run_single_test)
+            tasks.append(task)
+
+        LOG.info("Waiting for %d tests to complete.", self.iterations)
+        for task in tasks:
+            task.result()
+
+    def run_single_test(self):
+        failed = False
+        if self.stopped:
+            return
+
+        try:
+            test = self.test_cls(**self.test_params)
+            test.initialize()
+            test.run()
+        except KeyboardInterrupt:
+            LOG.warning("Received Ctrl-C.")
+            self.stopped = True
+        except Exception as ex:
+            failed = True
+            if self.stop_on_error:
+                self.stopped = True
+            with self.lock:
+                self.errors += 1
+                LOG.exception(
+                    "Test exception: %s. Total exceptions: %d",
+                    ex, self.errors)
+        finally:
+            if not failed or self.cleanup_on_error:
+                try:
+                    test.cleanup()
+                except KeyboardInterrupt:
+                    LOG.warning("Received Ctrl-C.")
+                    self.stopped = True
+                    # Retry the cleanup
+                    test.cleanup()
+                except Exception:
+                    LOG.exception("Test cleanup failed.")
+
+            with self.lock:
+                self.completed += 1
+                LOG.info("Completed tests: %d. Pending: %d",
+                         self.completed, self.iterations - self.completed)
+
+
+TESTS: typing.Dict[str, typing.Type[RbdTest]] = {
+    'RbdTest': RbdTest,
+    'RbdFioTest': RbdFioTest,
+    'RbdResizeFioTest': RbdResizeFioTest,
+    'RbdStampTest': RbdStampTest,
+    # FS tests
+    'RbdFsTest': RbdFsTest,
+    'RbdFsFioTest': RbdFsFioTest,
+    'RbdFsStampTest': RbdFsStampTest,
+}
+
+if __name__ == '__main__':
+    args = parser.parse_args()
+
+    log_level = logging.WARNING
+    if args.verbose:
+        log_level = logging.INFO
+    if args.debug:
+        log_level = logging.DEBUG
+    setup_logging(log_level)
+
+    test_params = dict(
+        image_size_mb=args.image_size_mb,
+        image_prefix=args.image_prefix,
+        bs=args.bs,
+        op=args.op,
+        verify=args.fio_verify,
+        iodepth=args.fio_depth,
+        map_timeout=args.map_timeout,
+        skip_enabling_disk=args.skip_enabling_disk,
+    )
+
+    try:
+        test_cls = TESTS[args.test_name]
+    except KeyError:
+        raise CephTestException("Unkown test: {}".format(args.test_name))
+
+    runner = TestRunner(
+        test_cls,
+        test_params=test_params,
+        iterations=args.iterations,
+        workers=args.concurrency,
+        stop_on_error=args.stop_on_error,
+        cleanup_on_error=not args.skip_cleanup_on_error)
+    runner.run()
+
+    Tracer.print_results()
+    test_cls.print_results(
+        description="count: %d, concurrency: %d" %
+        (args.iterations, args.concurrency))
+
+    assert runner.errors == 0, f"encountered {runner.errors} error(s)."